@camstack/addon-post-analysis 0.1.20 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/dist-4mTLJ7BJ.mjs +20750 -0
  2. package/dist/dist-CS2K80so.js +20933 -0
  3. package/dist/embedding-encoder/index.js +977 -902
  4. package/dist/embedding-encoder/index.mjs +967 -860
  5. package/dist/enrichment-engine/index.js +834 -833
  6. package/dist/enrichment-engine/index.mjs +828 -832
  7. package/dist/pipeline-analytics/_stub.js +1680 -1396
  8. package/dist/pipeline-analytics/_virtual_mf-localSharedImportMap___mfe_internal__addon_pipeline_analytics_widgets-DOSUJ-U0.mjs +156 -0
  9. package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_ui_mf_2_library__loadShare__.js-DJvmVCso.mjs +26 -0
  10. package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_tanstack_mf_1_react_mf_2_query__loadShare__.js-B3Wx5J80.mjs +26 -0
  11. package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare__react__loadShare__.js-C0AuF9av.mjs +26 -0
  12. package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_1_jsx_mf_2_runtime__loadShare__.js-Bm-iyjmq.mjs +26 -0
  13. package/dist/pipeline-analytics/dist-CYZr2fwk.mjs +2726 -0
  14. package/dist/pipeline-analytics/hostInit-BazRS2O7.mjs +129 -0
  15. package/dist/pipeline-analytics/index.js +7112 -3100
  16. package/dist/pipeline-analytics/index.mjs +7105 -3100
  17. package/dist/pipeline-analytics/remoteEntry.js +134 -2973
  18. package/dist/pipeline-analytics/remoteEntry.ssr.js +33 -0
  19. package/dist/pipeline-analytics/virtualExposes-BgYzpJZG.mjs +27 -0
  20. package/dist/pipeline-analytics/virtual_mf-exposes-ssr___mfe_internal__addon_pipeline_analytics_widgets__remoteEntry_js-D7qgWCKX.mjs +10 -0
  21. package/dist/resolve-frame-5lMxmeI1.js +57 -0
  22. package/dist/resolve-frame-CT1T1tWy.mjs +44 -0
  23. package/package.json +15 -6
  24. package/dist/embedding-encoder/index.js.map +0 -1
  25. package/dist/embedding-encoder/index.mjs.map +0 -1
  26. package/dist/enrichment-engine/index.js.map +0 -1
  27. package/dist/enrichment-engine/index.mjs.map +0 -1
  28. package/dist/index-B0RhVv1c.js +0 -17107
  29. package/dist/index-B0RhVv1c.js.map +0 -1
  30. package/dist/index-ot5PeFg_.mjs +0 -17108
  31. package/dist/index-ot5PeFg_.mjs.map +0 -1
  32. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/AudioHistoryChart.d.ts +0 -4
  33. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/AudioMetricsPanel.d.ts +0 -10
  34. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/DetectionHistoryChart.d.ts +0 -4
  35. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/LiveStatsTab.d.ts +0 -5
  36. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/MotionHistoryChart.d.ts +0 -4
  37. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/OccupancyHistoryChart.d.ts +0 -4
  38. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/OccupancyPanel.d.ts +0 -10
  39. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/chart-utils.d.ts +0 -97
  40. package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/index.d.ts +0 -29
  41. package/dist/pipeline-analytics/@mf-types/widgets.d.ts +0 -2
  42. package/dist/pipeline-analytics/@mf-types.d.ts +0 -3
  43. package/dist/pipeline-analytics/@mf-types.zip +0 -0
  44. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_sdk__loadShare__.mjs-lantnv8e.mjs +0 -12
  45. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_types__loadShare__.mjs-BD3oMNGB.mjs +0 -29
  46. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_ui_mf_2_library__loadShare__.mjs-BgOHCakr.mjs +0 -18
  47. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_tanstack_mf_1_react_mf_2_query__loadShare__.mjs-DoWbefqS.mjs +0 -104
  48. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_trpc_mf_1_client__loadShare__.mjs-52bfkwC8.mjs +0 -85
  49. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_trpc_mf_1_react_mf_2_query__loadShare__.mjs-CVrnrGED.mjs +0 -62
  50. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react__loadShare__.mjs-D1qPKjvR.mjs +0 -89
  51. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react__loadShare__.mjs_commonjs-proxy-B5X50Xa4.mjs +0 -29
  52. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_1_jsx_mf_2_runtime__loadShare__.mjs-BsyrX6NO.mjs +0 -36
  53. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_2_dom__loadShare__.mjs-Dp8hqYOB.mjs +0 -45
  54. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_2_dom__loadShare__.mjs_commonjs-proxy-B10b5k5J.mjs +0 -6
  55. package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_2_dom_mf_1_client__loadShare__.mjs-BZjEt71l.mjs +0 -34
  56. package/dist/pipeline-analytics/_virtual_mf-localSharedImportMap___mfe_internal__addon_pipeline_analytics_widgets-DWB3apaJ.mjs +0 -156
  57. package/dist/pipeline-analytics/client-C6xdgLZU.mjs +0 -9836
  58. package/dist/pipeline-analytics/getErrorShape-BPSzUA7W-TlK8ipWe.mjs +0 -211
  59. package/dist/pipeline-analytics/hostInit-3cyL9eyG.mjs +0 -168
  60. package/dist/pipeline-analytics/index-BCTHeI2m.mjs +0 -1641
  61. package/dist/pipeline-analytics/index-BuWLz0GG.mjs +0 -2603
  62. package/dist/pipeline-analytics/index-CIwq-tQL.mjs +0 -725
  63. package/dist/pipeline-analytics/index-CWBMDbou.mjs +0 -435
  64. package/dist/pipeline-analytics/index-CWkKuNLr.mjs +0 -232
  65. package/dist/pipeline-analytics/index-CZhagnlH.mjs +0 -67784
  66. package/dist/pipeline-analytics/index-D883Q5B8.mjs +0 -185
  67. package/dist/pipeline-analytics/index-DtOI1aTU.mjs +0 -18504
  68. package/dist/pipeline-analytics/index-xncRG7-x.mjs +0 -2713
  69. package/dist/pipeline-analytics/index.js.map +0 -1
  70. package/dist/pipeline-analytics/index.mjs.map +0 -1
  71. package/dist/pipeline-analytics/jsx-runtime-DdLhuHmJ.mjs +0 -55
  72. package/dist/pipeline-analytics/schemas-B7L0qZtq.mjs +0 -3599
  73. package/dist/pipeline-analytics/virtualExposes-8FzWTdq3.mjs +0 -42
@@ -1,934 +1,1009 @@
1
- "use strict";
2
- var __create = Object.create;
3
- var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __copyProps = (to, from, except, desc) => {
9
- if (from && typeof from === "object" || typeof from === "function") {
10
- for (let key of __getOwnPropNames(from))
11
- if (!__hasOwnProp.call(to, key) && key !== except)
12
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
13
- }
14
- return to;
15
- };
16
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
17
- // If the importer is in node compatibility mode or this is not an ESM
18
- // file that has been converted to a CommonJS file using a Babel-
19
- // compatible transform (i.e. "__esModule" has not been set), then set
20
- // "default" to the CommonJS "module.exports" for node compatibility.
21
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
22
- mod
23
- ));
24
- Object.defineProperties(exports, { __esModule: { value: true }, [Symbol.toStringTag]: { value: "Module" } });
25
- const index = require("../index-B0RhVv1c.js");
26
- const core = require("@camstack/core");
27
- const path = require("node:path");
28
- const fs = require("node:fs");
29
- const sharp = require("sharp");
30
- const node_child_process = require("node:child_process");
31
- function _interopNamespaceDefault(e) {
32
- const n = Object.create(null, { [Symbol.toStringTag]: { value: "Module" } });
33
- if (e) {
34
- for (const k in e) {
35
- if (k !== "default") {
36
- const d = Object.getOwnPropertyDescriptor(e, k);
37
- Object.defineProperty(n, k, d.get ? d : {
38
- enumerable: true,
39
- get: () => e[k]
40
- });
41
- }
42
- }
43
- }
44
- n.default = e;
45
- return Object.freeze(n);
46
- }
47
- const path__namespace = /* @__PURE__ */ _interopNamespaceDefault(path);
48
- const fs__namespace = /* @__PURE__ */ _interopNamespaceDefault(fs);
49
- const CLIP_IMAGE_MODELS = [
50
- {
51
- id: "clip-vit-b32",
52
- name: "CLIP ViT-B/32",
53
- description: "OpenAI CLIP ViT-B/32 — fast, 512-dim, int8 quantized (85 MB)",
54
- inputSize: { width: 224, height: 224 },
55
- labels: [],
56
- inputLayout: "nchw",
57
- inputNormalization: "none",
58
- formats: {
59
- onnx: {
60
- url: "https://huggingface.co/Xenova/clip-vit-base-patch32/resolve/main/onnx/vision_model_quantized.onnx",
61
- sizeMB: 85
62
- }
63
- }
64
- },
65
- {
66
- id: "clip-vit-b16",
67
- name: "CLIP ViT-B/16",
68
- description: "OpenAI CLIP ViT-B/16 — higher accuracy, 512-dim, int8 quantized (83 MB)",
69
- inputSize: { width: 224, height: 224 },
70
- labels: [],
71
- inputLayout: "nchw",
72
- inputNormalization: "none",
73
- formats: {
74
- onnx: {
75
- url: "https://huggingface.co/Xenova/clip-vit-base-patch16/resolve/main/onnx/vision_model_quantized.onnx",
76
- sizeMB: 83
77
- }
78
- }
79
- },
80
- {
81
- id: "siglip2-b16-256",
82
- name: "SigLIP2 Base/16 256",
83
- description: "Google SigLIP2 — superior scene understanding, 768-dim, int8 quantized (90 MB)",
84
- inputSize: { width: 256, height: 256 },
85
- labels: [],
86
- inputLayout: "nchw",
87
- inputNormalization: "none",
88
- formats: {
89
- onnx: {
90
- url: "https://huggingface.co/onnx-community/siglip2-base-patch16-256-ONNX/resolve/main/onnx/vision_model_quantized.onnx",
91
- sizeMB: 90
92
- }
93
- }
94
- }
1
+ Object.defineProperties(exports, {
2
+ __esModule: { value: true },
3
+ [Symbol.toStringTag]: { value: "Module" }
4
+ });
5
+ const require_dist = require("../dist-CS2K80so.js");
6
+ let sharp = require("sharp");
7
+ sharp = require_dist.__toESM(sharp);
8
+ let _camstack_core = require("@camstack/core");
9
+ let node_path = require("node:path");
10
+ node_path = require_dist.__toESM(node_path);
11
+ let node_fs = require("node:fs");
12
+ node_fs = require_dist.__toESM(node_fs);
13
+ let node_child_process = require("node:child_process");
14
+ //#region src/embedding-encoder/catalogs/embedding-models.ts
15
+ var CLIP_IMAGE_MODELS = [
16
+ {
17
+ id: "clip-vit-b32",
18
+ name: "CLIP ViT-B/32",
19
+ description: "OpenAI CLIP ViT-B/32 fast, 512-dim, int8 quantized (85 MB)",
20
+ inputSize: {
21
+ width: 224,
22
+ height: 224
23
+ },
24
+ labels: [],
25
+ inputLayout: "nchw",
26
+ inputNormalization: "none",
27
+ formats: { onnx: {
28
+ url: "https://huggingface.co/Xenova/clip-vit-base-patch32/resolve/main/onnx/vision_model_quantized.onnx",
29
+ sizeMB: 85
30
+ } }
31
+ },
32
+ {
33
+ id: "clip-vit-b16",
34
+ name: "CLIP ViT-B/16",
35
+ description: "OpenAI CLIP ViT-B/16 — higher accuracy, 512-dim, int8 quantized (83 MB)",
36
+ inputSize: {
37
+ width: 224,
38
+ height: 224
39
+ },
40
+ labels: [],
41
+ inputLayout: "nchw",
42
+ inputNormalization: "none",
43
+ formats: { onnx: {
44
+ url: "https://huggingface.co/Xenova/clip-vit-base-patch16/resolve/main/onnx/vision_model_quantized.onnx",
45
+ sizeMB: 83
46
+ } }
47
+ },
48
+ {
49
+ id: "siglip2-b16-256",
50
+ name: "SigLIP2 Base/16 256",
51
+ description: "Google SigLIP2 — superior scene understanding, 768-dim, int8 quantized (90 MB)",
52
+ inputSize: {
53
+ width: 256,
54
+ height: 256
55
+ },
56
+ labels: [],
57
+ inputLayout: "nchw",
58
+ inputNormalization: "none",
59
+ formats: { onnx: {
60
+ url: "https://huggingface.co/onnx-community/siglip2-base-patch16-256-ONNX/resolve/main/onnx/vision_model_quantized.onnx",
61
+ sizeMB: 90
62
+ } }
63
+ }
95
64
  ];
96
- const CLIP_TEXT_MODELS = [
97
- {
98
- id: "clip-vit-b32-text",
99
- name: "CLIP ViT-B/32 Text Encoder",
100
- description: "Text encoder for CLIP ViT-B/32, int8 quantized (62 MB)",
101
- inputSize: { width: 0, height: 0 },
102
- labels: [],
103
- formats: {
104
- onnx: {
105
- url: "https://huggingface.co/Xenova/clip-vit-base-patch32/resolve/main/onnx/text_model_quantized.onnx",
106
- sizeMB: 62
107
- }
108
- }
109
- },
110
- {
111
- id: "clip-vit-b16-text",
112
- name: "CLIP ViT-B/16 Text Encoder",
113
- description: "Text encoder for CLIP ViT-B/16, int8 quantized (62 MB)",
114
- inputSize: { width: 0, height: 0 },
115
- labels: [],
116
- formats: {
117
- onnx: {
118
- url: "https://huggingface.co/Xenova/clip-vit-base-patch16/resolve/main/onnx/text_model_quantized.onnx",
119
- sizeMB: 62
120
- }
121
- }
122
- },
123
- {
124
- id: "siglip2-b16-256-text",
125
- name: "SigLIP2 Base/16 256 Text Encoder",
126
- description: "Text encoder for SigLIP2, int8 quantized (270 MB)",
127
- inputSize: { width: 0, height: 0 },
128
- labels: [],
129
- formats: {
130
- onnx: {
131
- url: "https://huggingface.co/onnx-community/siglip2-base-patch16-256-ONNX/resolve/main/onnx/text_model_quantized.onnx",
132
- sizeMB: 270
133
- }
134
- }
135
- }
65
+ var CLIP_TEXT_MODELS = [
66
+ {
67
+ id: "clip-vit-b32-text",
68
+ name: "CLIP ViT-B/32 Text Encoder",
69
+ description: "Text encoder for CLIP ViT-B/32, int8 quantized (62 MB)",
70
+ inputSize: {
71
+ width: 0,
72
+ height: 0
73
+ },
74
+ labels: [],
75
+ formats: { onnx: {
76
+ url: "https://huggingface.co/Xenova/clip-vit-base-patch32/resolve/main/onnx/text_model_quantized.onnx",
77
+ sizeMB: 62
78
+ } }
79
+ },
80
+ {
81
+ id: "clip-vit-b16-text",
82
+ name: "CLIP ViT-B/16 Text Encoder",
83
+ description: "Text encoder for CLIP ViT-B/16, int8 quantized (62 MB)",
84
+ inputSize: {
85
+ width: 0,
86
+ height: 0
87
+ },
88
+ labels: [],
89
+ formats: { onnx: {
90
+ url: "https://huggingface.co/Xenova/clip-vit-base-patch16/resolve/main/onnx/text_model_quantized.onnx",
91
+ sizeMB: 62
92
+ } }
93
+ },
94
+ {
95
+ id: "siglip2-b16-256-text",
96
+ name: "SigLIP2 Base/16 256 Text Encoder",
97
+ description: "Text encoder for SigLIP2, int8 quantized (270 MB)",
98
+ inputSize: {
99
+ width: 0,
100
+ height: 0
101
+ },
102
+ labels: [],
103
+ formats: { onnx: {
104
+ url: "https://huggingface.co/onnx-community/siglip2-base-patch16-256-ONNX/resolve/main/onnx/text_model_quantized.onnx",
105
+ sizeMB: 270
106
+ } }
107
+ }
136
108
  ];
137
- const noop = () => {
138
- };
109
+ //#endregion
110
+ //#region src/embedding-encoder/shared/noop-logger.ts
111
+ var noop = () => {};
139
112
  function createNoopLogger() {
140
- const logger = {
141
- debug: noop,
142
- info: noop,
143
- warn: noop,
144
- error: noop,
145
- child: () => logger,
146
- withTags: (_tags) => logger
147
- };
148
- return logger;
113
+ const logger = {
114
+ debug: noop,
115
+ info: noop,
116
+ warn: noop,
117
+ error: noop,
118
+ child: () => logger,
119
+ withTags: (_tags) => logger
120
+ };
121
+ return logger;
149
122
  }
150
- const BACKEND_TO_DEVICE$1 = {
151
- cpu: "cpu",
152
- coreml: "gpu-mps",
153
- cuda: "gpu-cuda",
154
- tensorrt: "tensorrt"
123
+ //#endregion
124
+ //#region src/embedding-encoder/shared/node-raw-tensor-engine.ts
125
+ var BACKEND_TO_DEVICE$1 = {
126
+ cpu: "cpu",
127
+ coreml: "gpu-mps",
128
+ cuda: "gpu-cuda",
129
+ tensorrt: "tensorrt"
155
130
  };
156
- class NodeRawTensorEngine {
157
- constructor(modelPath, backend, logger) {
158
- this.modelPath = modelPath;
159
- this.backend = backend;
160
- this.device = BACKEND_TO_DEVICE$1[backend] ?? "cpu";
161
- this.log = logger ?? createNoopLogger();
162
- }
163
- runtime = "onnx";
164
- device;
165
- session = null;
166
- log;
167
- async initialize() {
168
- const ort = await import("onnxruntime-node");
169
- const provider = this.backend === "coreml" ? "coreml" : this.backend === "cuda" ? "cuda" : "cpu";
170
- const absModelPath = path__namespace.isAbsolute(this.modelPath) ? this.modelPath : path__namespace.resolve(process.cwd(), this.modelPath);
171
- this.session = await ort.InferenceSession.create(absModelPath, {
172
- executionProviders: [provider]
173
- });
174
- this.log.info("ONNX session loaded", { meta: { modelPath: absModelPath, backend: this.backend, provider } });
175
- }
176
- async run(input, inputShape) {
177
- if (!this.session) {
178
- throw new Error("NodeRawTensorEngine: not initialized call initialize() first");
179
- }
180
- const ort = await import("onnxruntime-node");
181
- const sess = this.session;
182
- const inputName = sess.inputNames[0];
183
- const tensor = new ort.Tensor("float32", input, [...inputShape]);
184
- const feeds = { [inputName]: tensor };
185
- const start = Date.now();
186
- let results;
187
- try {
188
- results = await sess.run(feeds);
189
- } catch (err) {
190
- const error = err instanceof Error ? err : new Error(String(err));
191
- this.log.error("Inference failed", { meta: { error: error.message } });
192
- throw error;
193
- }
194
- const outputName = sess.outputNames[0];
195
- this.log.debug("Inference complete", { meta: { durationMs: Date.now() - start, outputKeys: [outputName], preprocessMode: "raw-tensor" } });
196
- return results[outputName].data;
197
- }
198
- async dispose() {
199
- this.session = null;
200
- this.log.debug("Session disposed");
201
- }
202
- }
131
+ /**
132
+ * Raw tensor engine — runs ONNX inference on pre-processed Float32Array input.
133
+ * Used by addons that handle their own preprocessing (e.g. CLIP embedding encoder).
134
+ */
135
+ var NodeRawTensorEngine = class {
136
+ modelPath;
137
+ backend;
138
+ runtime = "onnx";
139
+ device;
140
+ session = null;
141
+ log;
142
+ constructor(modelPath, backend, logger) {
143
+ this.modelPath = modelPath;
144
+ this.backend = backend;
145
+ this.device = BACKEND_TO_DEVICE$1[backend] ?? "cpu";
146
+ this.log = logger ?? createNoopLogger();
147
+ }
148
+ async initialize() {
149
+ const ort = await import("onnxruntime-node");
150
+ const provider = this.backend === "coreml" ? "coreml" : this.backend === "cuda" ? "cuda" : "cpu";
151
+ const absModelPath = node_path.isAbsolute(this.modelPath) ? this.modelPath : node_path.resolve(process.cwd(), this.modelPath);
152
+ this.session = await ort.InferenceSession.create(absModelPath, { executionProviders: [provider] });
153
+ this.log.info("ONNX session loaded", { meta: {
154
+ modelPath: absModelPath,
155
+ backend: this.backend,
156
+ provider
157
+ } });
158
+ }
159
+ async run(input, inputShape) {
160
+ if (!this.session) throw new Error("NodeRawTensorEngine: not initialized — call initialize() first");
161
+ const ort = await import("onnxruntime-node");
162
+ const sess = this.session;
163
+ const inputName = sess.inputNames[0];
164
+ const tensor = new ort.Tensor("float32", input, [...inputShape]);
165
+ const feeds = { [inputName]: tensor };
166
+ const start = Date.now();
167
+ let results;
168
+ try {
169
+ results = await sess.run(feeds);
170
+ } catch (err) {
171
+ const error = err instanceof Error ? err : new Error(String(err));
172
+ this.log.error("Inference failed", { meta: { error: error.message } });
173
+ throw error;
174
+ }
175
+ const outputName = sess.outputNames[0];
176
+ this.log.debug("Inference complete", { meta: {
177
+ durationMs: Date.now() - start,
178
+ outputKeys: [outputName],
179
+ preprocessMode: "raw-tensor"
180
+ } });
181
+ return results[outputName].data;
182
+ }
183
+ async dispose() {
184
+ this.session = null;
185
+ this.log.debug("Session disposed");
186
+ }
187
+ };
188
+ //#endregion
189
+ //#region src/embedding-encoder/shared/image-utils.ts
190
+ /** Letterbox resize for YOLO: resize preserving aspect ratio, pad to square */
203
191
  async function letterbox(jpeg, targetSize) {
204
- const meta = await sharp(jpeg).metadata();
205
- const originalWidth = meta.width ?? 0;
206
- const originalHeight = meta.height ?? 0;
207
- const scale = Math.min(targetSize / originalWidth, targetSize / originalHeight);
208
- const scaledWidth = Math.round(originalWidth * scale);
209
- const scaledHeight = Math.round(originalHeight * scale);
210
- const padX = Math.floor((targetSize - scaledWidth) / 2);
211
- const padY = Math.floor((targetSize - scaledHeight) / 2);
212
- const { data } = await sharp(jpeg).resize(scaledWidth, scaledHeight).extend({
213
- top: padY,
214
- bottom: targetSize - scaledHeight - padY,
215
- left: padX,
216
- right: targetSize - scaledWidth - padX,
217
- background: { r: 114, g: 114, b: 114 }
218
- }).removeAlpha().raw().toBuffer({ resolveWithObject: true });
219
- const numPixels = targetSize * targetSize;
220
- const float32 = new Float32Array(3 * numPixels);
221
- for (let i = 0; i < numPixels; i++) {
222
- const srcBase = i * 3;
223
- float32[0 * numPixels + i] = data[srcBase] / 255;
224
- float32[1 * numPixels + i] = data[srcBase + 1] / 255;
225
- float32[2 * numPixels + i] = data[srcBase + 2] / 255;
226
- }
227
- return { data: float32, scale, padX, padY, originalWidth, originalHeight };
192
+ const meta = await (0, sharp.default)(jpeg).metadata();
193
+ const originalWidth = meta.width ?? 0;
194
+ const originalHeight = meta.height ?? 0;
195
+ const scale = Math.min(targetSize / originalWidth, targetSize / originalHeight);
196
+ const scaledWidth = Math.round(originalWidth * scale);
197
+ const scaledHeight = Math.round(originalHeight * scale);
198
+ const padX = Math.floor((targetSize - scaledWidth) / 2);
199
+ const padY = Math.floor((targetSize - scaledHeight) / 2);
200
+ const { data } = await (0, sharp.default)(jpeg).resize(scaledWidth, scaledHeight).extend({
201
+ top: padY,
202
+ bottom: targetSize - scaledHeight - padY,
203
+ left: padX,
204
+ right: targetSize - scaledWidth - padX,
205
+ background: {
206
+ r: 114,
207
+ g: 114,
208
+ b: 114
209
+ }
210
+ }).removeAlpha().raw().toBuffer({ resolveWithObject: true });
211
+ const numPixels = targetSize * targetSize;
212
+ const float32 = new Float32Array(3 * numPixels);
213
+ for (let i = 0; i < numPixels; i++) {
214
+ const srcBase = i * 3;
215
+ float32[0 * numPixels + i] = data[srcBase] / 255;
216
+ float32[1 * numPixels + i] = data[srcBase + 1] / 255;
217
+ float32[2 * numPixels + i] = data[srcBase + 2] / 255;
218
+ }
219
+ return {
220
+ data: float32,
221
+ scale,
222
+ padX,
223
+ padY,
224
+ originalWidth,
225
+ originalHeight
226
+ };
228
227
  }
228
+ /** Resize and normalize to Float32Array */
229
229
  async function resizeAndNormalize(jpeg, targetWidth, targetHeight, normalization, layout) {
230
- const { data } = await sharp(jpeg).resize(targetWidth, targetHeight, { fit: "fill" }).removeAlpha().raw().toBuffer({ resolveWithObject: true });
231
- const numPixels = targetWidth * targetHeight;
232
- const float32 = new Float32Array(3 * numPixels);
233
- const mean = [0.485, 0.456, 0.406];
234
- const std = [0.229, 0.224, 0.225];
235
- if (layout === "nchw") {
236
- for (let i = 0; i < numPixels; i++) {
237
- const srcBase = i * 3;
238
- for (let c = 0; c < 3; c++) {
239
- const raw = data[srcBase + c] / 255;
240
- let val;
241
- if (normalization === "zero-one") {
242
- val = raw;
243
- } else if (normalization === "imagenet") {
244
- val = (raw - mean[c]) / std[c];
245
- } else {
246
- val = data[srcBase + c];
247
- }
248
- float32[c * numPixels + i] = val;
249
- }
250
- }
251
- } else {
252
- for (let i = 0; i < numPixels; i++) {
253
- const srcBase = i * 3;
254
- for (let c = 0; c < 3; c++) {
255
- const raw = data[srcBase + c] / 255;
256
- let val;
257
- if (normalization === "zero-one") {
258
- val = raw;
259
- } else if (normalization === "imagenet") {
260
- val = (raw - mean[c]) / std[c];
261
- } else {
262
- val = data[srcBase + c];
263
- }
264
- float32[i * 3 + c] = val;
265
- }
266
- }
267
- }
268
- return float32;
230
+ const { data } = await (0, sharp.default)(jpeg).resize(targetWidth, targetHeight, { fit: "fill" }).removeAlpha().raw().toBuffer({ resolveWithObject: true });
231
+ const numPixels = targetWidth * targetHeight;
232
+ const float32 = new Float32Array(3 * numPixels);
233
+ const mean = [
234
+ .485,
235
+ .456,
236
+ .406
237
+ ];
238
+ const std = [
239
+ .229,
240
+ .224,
241
+ .225
242
+ ];
243
+ if (layout === "nchw") for (let i = 0; i < numPixels; i++) {
244
+ const srcBase = i * 3;
245
+ for (let c = 0; c < 3; c++) {
246
+ const raw = data[srcBase + c] / 255;
247
+ let val;
248
+ if (normalization === "zero-one") val = raw;
249
+ else if (normalization === "imagenet") val = (raw - mean[c]) / std[c];
250
+ else val = data[srcBase + c];
251
+ float32[c * numPixels + i] = val;
252
+ }
253
+ }
254
+ else for (let i = 0; i < numPixels; i++) {
255
+ const srcBase = i * 3;
256
+ for (let c = 0; c < 3; c++) {
257
+ const raw = data[srcBase + c] / 255;
258
+ let val;
259
+ if (normalization === "zero-one") val = raw;
260
+ else if (normalization === "imagenet") val = (raw - mean[c]) / std[c];
261
+ else val = data[srcBase + c];
262
+ float32[i * 3 + c] = val;
263
+ }
264
+ }
265
+ return float32;
269
266
  }
270
- const BACKEND_TO_PROVIDER = {
271
- cpu: "cpu",
272
- coreml: "coreml",
273
- cuda: "cuda",
274
- tensorrt: "tensorrt",
275
- dml: "dml"
267
+ //#endregion
268
+ //#region src/embedding-encoder/shared/node-engine.ts
269
+ var BACKEND_TO_PROVIDER = {
270
+ cpu: "cpu",
271
+ coreml: "coreml",
272
+ cuda: "cuda",
273
+ tensorrt: "tensorrt",
274
+ dml: "dml"
276
275
  };
277
- const BACKEND_TO_DEVICE = {
278
- cpu: "cpu",
279
- coreml: "gpu-mps",
280
- cuda: "gpu-cuda",
281
- tensorrt: "tensorrt"
276
+ var BACKEND_TO_DEVICE = {
277
+ cpu: "cpu",
278
+ coreml: "gpu-mps",
279
+ cuda: "gpu-cuda",
280
+ tensorrt: "tensorrt"
282
281
  };
283
- class NodeInferenceEngine {
284
- constructor(modelPath, backend, modelMeta, logger) {
285
- this.modelPath = modelPath;
286
- this.backend = backend;
287
- this.modelMeta = modelMeta;
288
- this.device = BACKEND_TO_DEVICE[backend] ?? "cpu";
289
- this.log = logger ?? createNoopLogger();
290
- }
291
- runtime = "onnx";
292
- device;
293
- session = null;
294
- log;
295
- async initialize() {
296
- const ort = await import("onnxruntime-node");
297
- const provider = BACKEND_TO_PROVIDER[this.backend] ?? "cpu";
298
- const absModelPath = path__namespace.isAbsolute(this.modelPath) ? this.modelPath : path__namespace.resolve(process.cwd(), this.modelPath);
299
- const sessionOptions = {
300
- executionProviders: [provider]
301
- };
302
- this.session = await ort.InferenceSession.create(absModelPath, sessionOptions);
303
- this.log.info("ONNX session loaded", { meta: { modelPath: absModelPath, backend: this.backend, provider } });
304
- }
305
- async infer(input) {
306
- const jpeg = input.kind === "jpeg" ? input.data : await this.encodeRawAsJpeg(input.data, input.width, input.height, input.format);
307
- const { data, letterboxMeta } = await this.preprocess(jpeg);
308
- const { inputSize } = this.modelMeta;
309
- const inputShape = this.modelMeta.preprocessMode === "letterbox" ? [1, 3, inputSize.height, inputSize.width] : [1, 3, inputSize.height, inputSize.width];
310
- const start = Date.now();
311
- let result;
312
- try {
313
- result = await this.runSession(data, inputShape);
314
- } catch (err) {
315
- const error = err instanceof Error ? err : new Error(String(err));
316
- this.log.error("Inference failed", { meta: { error: error.message } });
317
- throw error;
318
- }
319
- const durationMs = Date.now() - start;
320
- if ("tensor" in result) {
321
- this.log.debug("Inference complete", { meta: { durationMs, outputKeys: ["tensor"], preprocessMode: this.modelMeta.preprocessMode } });
322
- return { tensor: result.tensor, letterbox: letterboxMeta, inferenceMs: durationMs };
323
- }
324
- this.log.debug("Inference complete", { meta: { durationMs, outputKeys: Object.keys(result.tensors), preprocessMode: this.modelMeta.preprocessMode } });
325
- return { tensors: result.tensors, letterbox: letterboxMeta, inferenceMs: durationMs };
326
- }
327
- /** Preprocess JPEG to Float32Array using the configured mode */
328
- async preprocess(jpeg) {
329
- const { inputSize, inputNormalization, inputLayout, preprocessMode } = this.modelMeta;
330
- if (preprocessMode === "letterbox") {
331
- const targetSize = Math.max(inputSize.width, inputSize.height);
332
- const result = await letterbox(jpeg, targetSize);
333
- const letterboxMeta = {
334
- scale: result.scale,
335
- padX: result.padX,
336
- padY: result.padY,
337
- originalWidth: result.originalWidth,
338
- originalHeight: result.originalHeight
339
- };
340
- return { data: result.data, letterboxMeta };
341
- }
342
- const data = await resizeAndNormalize(
343
- jpeg,
344
- inputSize.width,
345
- inputSize.height,
346
- inputNormalization,
347
- inputLayout
348
- );
349
- return { data };
350
- }
351
- async encodeRawAsJpeg(raw, width, height, format) {
352
- const sharp2 = (await import("sharp")).default;
353
- const channels = format === "gray" ? 1 : 3;
354
- return sharp2(raw, { raw: { width, height, channels } }).jpeg({ quality: 80, mozjpeg: false }).toBuffer();
355
- }
356
- /** Run an ONNX session with a single input, handling both single and multi-output models */
357
- async runSession(input, inputShape) {
358
- if (!this.session) {
359
- throw new Error("NodeInferenceEngine: not initialized — call initialize() first");
360
- }
361
- const ort = await import("onnxruntime-node");
362
- const sess = this.session;
363
- const inputName = sess.inputNames[0];
364
- const tensor = new ort.Tensor("float32", input, [...inputShape]);
365
- const feeds = { [inputName]: tensor };
366
- const results = await sess.run(feeds);
367
- const outputNames = sess.outputNames;
368
- if (outputNames.length === 1) {
369
- const outputName = outputNames[0];
370
- return { tensor: results[outputName].data };
371
- }
372
- const tensors = {};
373
- for (const name of outputNames) {
374
- tensors[name] = results[name].data;
375
- }
376
- return { tensors };
377
- }
378
- async run(input, inputShape) {
379
- const result = await this.runSession(input, inputShape);
380
- if ("tensor" in result) return result.tensor;
381
- const firstKey = Object.keys(result.tensors)[0];
382
- return result.tensors[firstKey];
383
- }
384
- async dispose() {
385
- this.session = null;
386
- this.log.debug("Session disposed");
387
- }
388
- }
389
- class PythonInferenceEngine {
390
- constructor(pythonPath, scriptPath, runtime, modelPath, extraArgs = [], logger) {
391
- this.pythonPath = pythonPath;
392
- this.scriptPath = scriptPath;
393
- this.modelPath = modelPath;
394
- this.extraArgs = extraArgs;
395
- this.runtime = runtime;
396
- const runtimeDeviceMap = {
397
- onnx: "cpu",
398
- coreml: "gpu-mps",
399
- pytorch: "cpu",
400
- openvino: "cpu",
401
- tflite: "cpu"
402
- };
403
- this.device = runtimeDeviceMap[runtime];
404
- this.log = logger ?? createNoopLogger();
405
- }
406
- runtime;
407
- device;
408
- process = null;
409
- receiveBuffer = Buffer.alloc(0);
410
- pendingResolve = null;
411
- pendingReject = null;
412
- log;
413
- async initialize() {
414
- const args = [this.scriptPath, this.modelPath, ...this.extraArgs];
415
- this.process = node_child_process.spawn(this.pythonPath, args, {
416
- stdio: ["pipe", "pipe", "pipe"]
417
- });
418
- if (!this.process.stdout || !this.process.stdin) {
419
- throw new Error("PythonInferenceEngine: failed to create process pipes");
420
- }
421
- this.log.info("Python process started", { meta: { pythonPath: this.pythonPath, scriptPath: this.scriptPath, modelPath: this.modelPath } });
422
- this.process.stderr?.on("data", (chunk) => {
423
- const lines = chunk.toString().split("\n");
424
- for (const line of lines) {
425
- const trimmed = line.trim();
426
- if (trimmed) {
427
- this.log.warn(trimmed);
428
- }
429
- }
430
- });
431
- this.process.on("error", (err) => {
432
- this.log.error("Process error", { meta: { error: err.message } });
433
- this.pendingReject?.(err);
434
- this.pendingReject = null;
435
- this.pendingResolve = null;
436
- });
437
- this.process.on("exit", (code) => {
438
- if (code !== 0) {
439
- this.log.error("Process exited", { meta: { code } });
440
- const err = new Error(`PythonInferenceEngine: process exited with code ${code}`);
441
- this.pendingReject?.(err);
442
- this.pendingReject = null;
443
- this.pendingResolve = null;
444
- }
445
- });
446
- this.process.stdout.on("data", (chunk) => {
447
- this.receiveBuffer = Buffer.concat([this.receiveBuffer, chunk]);
448
- this._tryReceive();
449
- });
450
- await new Promise((resolve, reject) => {
451
- const timeout = setTimeout(() => resolve(), 2e3);
452
- this.process?.on("error", (err) => {
453
- clearTimeout(timeout);
454
- reject(err);
455
- });
456
- this.process?.on("exit", (code) => {
457
- clearTimeout(timeout);
458
- if (code !== 0) {
459
- reject(new Error(`PythonInferenceEngine: process exited early with code ${code}`));
460
- }
461
- });
462
- });
463
- }
464
- _tryReceive() {
465
- if (this.receiveBuffer.length < 4) return;
466
- const length = this.receiveBuffer.readUInt32LE(0);
467
- if (this.receiveBuffer.length < 4 + length) return;
468
- const jsonBytes = this.receiveBuffer.subarray(4, 4 + length);
469
- this.receiveBuffer = this.receiveBuffer.subarray(4 + length);
470
- const resolve = this.pendingResolve;
471
- const reject = this.pendingReject;
472
- this.pendingResolve = null;
473
- this.pendingReject = null;
474
- if (!resolve) return;
475
- try {
476
- const parsed = JSON.parse(jsonBytes.toString("utf8"));
477
- resolve(parsed);
478
- } catch (err) {
479
- reject?.(err instanceof Error ? err : new Error(String(err)));
480
- }
481
- }
482
- /** Run inference, returning structured detection results. Encodes raw input to JPEG when needed. */
483
- async infer(input) {
484
- const start = Date.now();
485
- const jpeg = input.kind === "jpeg" ? input.data : await this.encodeRawAsJpeg(input.data, input.width, input.height, input.format);
486
- const result = await this.sendJpeg(jpeg);
487
- const durationMs = Date.now() - start;
488
- this.log.debug("Inference complete", { meta: { durationMs } });
489
- return { structured: result, inferenceMs: durationMs };
490
- }
491
- async encodeRawAsJpeg(raw, width, height, format) {
492
- const sharp2 = (await import("sharp")).default;
493
- const channels = format === "gray" ? 1 : 3;
494
- return sharp2(raw, { raw: { width, height, channels } }).jpeg({ quality: 80, mozjpeg: false }).toBuffer();
495
- }
496
- /** Send JPEG buffer via binary IPC, receive JSON detection results */
497
- async sendJpeg(jpeg) {
498
- if (!this.process?.stdin) {
499
- throw new Error("PythonInferenceEngine: process not initialized");
500
- }
501
- return new Promise((resolve, reject) => {
502
- this.pendingResolve = resolve;
503
- this.pendingReject = reject;
504
- const lengthBuf = Buffer.allocUnsafe(4);
505
- lengthBuf.writeUInt32LE(jpeg.length, 0);
506
- this.process.stdin.write(Buffer.concat([lengthBuf, jpeg]));
507
- });
508
- }
509
- async dispose() {
510
- const proc = this.process;
511
- if (!proc) return;
512
- this.process = null;
513
- proc.stdin?.end();
514
- proc.kill("SIGTERM");
515
- const exited = await new Promise((resolve) => {
516
- const timer = setTimeout(() => {
517
- resolve(false);
518
- }, 5e3);
519
- proc.once("exit", () => {
520
- clearTimeout(timer);
521
- resolve(true);
522
- });
523
- });
524
- if (!exited) {
525
- try {
526
- proc.kill("SIGKILL");
527
- } catch {
528
- }
529
- this.log.warn("Python process did not exit gracefully — sent SIGKILL");
530
- } else {
531
- this.log.debug("Python process terminated");
532
- }
533
- }
534
- }
535
- const AUTO_BACKEND_PRIORITY = ["coreml", "cuda", "tensorrt", "cpu"];
536
- const BACKEND_TO_FORMAT = index.BACKEND_TO_FORMAT;
537
- const RUNTIME_TO_FORMAT = index.RUNTIME_TO_FORMAT;
282
+ var NodeInferenceEngine = class {
283
+ modelPath;
284
+ backend;
285
+ modelMeta;
286
+ runtime = "onnx";
287
+ device;
288
+ session = null;
289
+ log;
290
+ constructor(modelPath, backend, modelMeta, logger) {
291
+ this.modelPath = modelPath;
292
+ this.backend = backend;
293
+ this.modelMeta = modelMeta;
294
+ this.device = BACKEND_TO_DEVICE[backend] ?? "cpu";
295
+ this.log = logger ?? createNoopLogger();
296
+ }
297
+ async initialize() {
298
+ const ort = await import("onnxruntime-node");
299
+ const provider = BACKEND_TO_PROVIDER[this.backend] ?? "cpu";
300
+ const absModelPath = node_path.isAbsolute(this.modelPath) ? this.modelPath : node_path.resolve(process.cwd(), this.modelPath);
301
+ const sessionOptions = { executionProviders: [provider] };
302
+ this.session = await ort.InferenceSession.create(absModelPath, sessionOptions);
303
+ this.log.info("ONNX session loaded", { meta: {
304
+ modelPath: absModelPath,
305
+ backend: this.backend,
306
+ provider
307
+ } });
308
+ }
309
+ async infer(input) {
310
+ const jpeg = input.kind === "jpeg" ? input.data : await this.encodeRawAsJpeg(input.data, input.width, input.height, input.format);
311
+ const { data, letterboxMeta } = await this.preprocess(jpeg);
312
+ const { inputSize } = this.modelMeta;
313
+ const inputShape = this.modelMeta.preprocessMode === "letterbox" ? [
314
+ 1,
315
+ 3,
316
+ inputSize.height,
317
+ inputSize.width
318
+ ] : [
319
+ 1,
320
+ 3,
321
+ inputSize.height,
322
+ inputSize.width
323
+ ];
324
+ const start = Date.now();
325
+ let result;
326
+ try {
327
+ result = await this.runSession(data, inputShape);
328
+ } catch (err) {
329
+ const error = err instanceof Error ? err : new Error(String(err));
330
+ this.log.error("Inference failed", { meta: { error: error.message } });
331
+ throw error;
332
+ }
333
+ const durationMs = Date.now() - start;
334
+ if ("tensor" in result) {
335
+ this.log.debug("Inference complete", { meta: {
336
+ durationMs,
337
+ outputKeys: ["tensor"],
338
+ preprocessMode: this.modelMeta.preprocessMode
339
+ } });
340
+ return {
341
+ tensor: result.tensor,
342
+ letterbox: letterboxMeta,
343
+ inferenceMs: durationMs
344
+ };
345
+ }
346
+ this.log.debug("Inference complete", { meta: {
347
+ durationMs,
348
+ outputKeys: Object.keys(result.tensors),
349
+ preprocessMode: this.modelMeta.preprocessMode
350
+ } });
351
+ return {
352
+ tensors: result.tensors,
353
+ letterbox: letterboxMeta,
354
+ inferenceMs: durationMs
355
+ };
356
+ }
357
+ /** Preprocess JPEG to Float32Array using the configured mode */
358
+ async preprocess(jpeg) {
359
+ const { inputSize, inputNormalization, inputLayout, preprocessMode } = this.modelMeta;
360
+ if (preprocessMode === "letterbox") {
361
+ const result = await letterbox(jpeg, Math.max(inputSize.width, inputSize.height));
362
+ const letterboxMeta = {
363
+ scale: result.scale,
364
+ padX: result.padX,
365
+ padY: result.padY,
366
+ originalWidth: result.originalWidth,
367
+ originalHeight: result.originalHeight
368
+ };
369
+ return {
370
+ data: result.data,
371
+ letterboxMeta
372
+ };
373
+ }
374
+ return { data: await resizeAndNormalize(jpeg, inputSize.width, inputSize.height, inputNormalization, inputLayout) };
375
+ }
376
+ async encodeRawAsJpeg(raw, width, height, format) {
377
+ const sharp$3 = (await import("sharp")).default;
378
+ return sharp$3(raw, { raw: {
379
+ width,
380
+ height,
381
+ channels: format === "gray" ? 1 : 3
382
+ } }).jpeg({
383
+ quality: 80,
384
+ mozjpeg: false
385
+ }).toBuffer();
386
+ }
387
+ /** Run an ONNX session with a single input, handling both single and multi-output models */
388
+ async runSession(input, inputShape) {
389
+ if (!this.session) throw new Error("NodeInferenceEngine: not initialized call initialize() first");
390
+ const ort = await import("onnxruntime-node");
391
+ const sess = this.session;
392
+ const inputName = sess.inputNames[0];
393
+ const tensor = new ort.Tensor("float32", input, [...inputShape]);
394
+ const feeds = { [inputName]: tensor };
395
+ const results = await sess.run(feeds);
396
+ const outputNames = sess.outputNames;
397
+ if (outputNames.length === 1) return { tensor: results[outputNames[0]].data };
398
+ const tensors = {};
399
+ for (const name of outputNames) tensors[name] = results[name].data;
400
+ return { tensors };
401
+ }
402
+ async run(input, inputShape) {
403
+ const result = await this.runSession(input, inputShape);
404
+ if ("tensor" in result) return result.tensor;
405
+ const firstKey = Object.keys(result.tensors)[0];
406
+ return result.tensors[firstKey];
407
+ }
408
+ async dispose() {
409
+ this.session = null;
410
+ this.log.debug("Session disposed");
411
+ }
412
+ };
413
+ //#endregion
414
+ //#region src/embedding-encoder/shared/python-engine.ts
415
+ var PythonInferenceEngine = class {
416
+ pythonPath;
417
+ scriptPath;
418
+ modelPath;
419
+ extraArgs;
420
+ runtime;
421
+ device;
422
+ process = null;
423
+ receiveBuffer = Buffer.alloc(0);
424
+ pendingResolve = null;
425
+ pendingReject = null;
426
+ log;
427
+ constructor(pythonPath, scriptPath, runtime, modelPath, extraArgs = [], logger) {
428
+ this.pythonPath = pythonPath;
429
+ this.scriptPath = scriptPath;
430
+ this.modelPath = modelPath;
431
+ this.extraArgs = extraArgs;
432
+ this.runtime = runtime;
433
+ const runtimeDeviceMap = {
434
+ onnx: "cpu",
435
+ coreml: "gpu-mps",
436
+ pytorch: "cpu",
437
+ openvino: "cpu",
438
+ tflite: "cpu"
439
+ };
440
+ this.device = runtimeDeviceMap[runtime];
441
+ this.log = logger ?? createNoopLogger();
442
+ }
443
+ async initialize() {
444
+ const args = [
445
+ this.scriptPath,
446
+ this.modelPath,
447
+ ...this.extraArgs
448
+ ];
449
+ this.process = (0, node_child_process.spawn)(this.pythonPath, args, { stdio: [
450
+ "pipe",
451
+ "pipe",
452
+ "pipe"
453
+ ] });
454
+ if (!this.process.stdout || !this.process.stdin) throw new Error("PythonInferenceEngine: failed to create process pipes");
455
+ this.log.info("Python process started", { meta: {
456
+ pythonPath: this.pythonPath,
457
+ scriptPath: this.scriptPath,
458
+ modelPath: this.modelPath
459
+ } });
460
+ this.process.stderr?.on("data", (chunk) => {
461
+ const lines = chunk.toString().split("\n");
462
+ for (const line of lines) {
463
+ const trimmed = line.trim();
464
+ if (trimmed) this.log.warn(trimmed);
465
+ }
466
+ });
467
+ this.process.on("error", (err) => {
468
+ this.log.error("Process error", { meta: { error: err.message } });
469
+ this.pendingReject?.(err);
470
+ this.pendingReject = null;
471
+ this.pendingResolve = null;
472
+ });
473
+ this.process.on("exit", (code) => {
474
+ if (code !== 0) {
475
+ this.log.error("Process exited", { meta: { code } });
476
+ const err = /* @__PURE__ */ new Error(`PythonInferenceEngine: process exited with code ${code}`);
477
+ this.pendingReject?.(err);
478
+ this.pendingReject = null;
479
+ this.pendingResolve = null;
480
+ }
481
+ });
482
+ this.process.stdout.on("data", (chunk) => {
483
+ this.receiveBuffer = Buffer.concat([this.receiveBuffer, chunk]);
484
+ this._tryReceive();
485
+ });
486
+ await new Promise((resolve, reject) => {
487
+ const timeout = setTimeout(() => resolve(), 2e3);
488
+ this.process?.on("error", (err) => {
489
+ clearTimeout(timeout);
490
+ reject(err);
491
+ });
492
+ this.process?.on("exit", (code) => {
493
+ clearTimeout(timeout);
494
+ if (code !== 0) reject(/* @__PURE__ */ new Error(`PythonInferenceEngine: process exited early with code ${code}`));
495
+ });
496
+ });
497
+ }
498
+ _tryReceive() {
499
+ if (this.receiveBuffer.length < 4) return;
500
+ const length = this.receiveBuffer.readUInt32LE(0);
501
+ if (this.receiveBuffer.length < 4 + length) return;
502
+ const jsonBytes = this.receiveBuffer.subarray(4, 4 + length);
503
+ this.receiveBuffer = this.receiveBuffer.subarray(4 + length);
504
+ const resolve = this.pendingResolve;
505
+ const reject = this.pendingReject;
506
+ this.pendingResolve = null;
507
+ this.pendingReject = null;
508
+ if (!resolve) return;
509
+ try {
510
+ resolve(JSON.parse(jsonBytes.toString("utf8")));
511
+ } catch (err) {
512
+ reject?.(err instanceof Error ? err : new Error(String(err)));
513
+ }
514
+ }
515
+ /** Run inference, returning structured detection results. Encodes raw input to JPEG when needed. */
516
+ async infer(input) {
517
+ const start = Date.now();
518
+ const jpeg = input.kind === "jpeg" ? input.data : await this.encodeRawAsJpeg(input.data, input.width, input.height, input.format);
519
+ const result = await this.sendJpeg(jpeg);
520
+ const durationMs = Date.now() - start;
521
+ this.log.debug("Inference complete", { meta: { durationMs } });
522
+ return {
523
+ structured: result,
524
+ inferenceMs: durationMs
525
+ };
526
+ }
527
+ async encodeRawAsJpeg(raw, width, height, format) {
528
+ const sharp$2 = (await import("sharp")).default;
529
+ return sharp$2(raw, { raw: {
530
+ width,
531
+ height,
532
+ channels: format === "gray" ? 1 : 3
533
+ } }).jpeg({
534
+ quality: 80,
535
+ mozjpeg: false
536
+ }).toBuffer();
537
+ }
538
+ /** Send JPEG buffer via binary IPC, receive JSON detection results */
539
+ async sendJpeg(jpeg) {
540
+ if (!this.process?.stdin) throw new Error("PythonInferenceEngine: process not initialized");
541
+ return new Promise((resolve, reject) => {
542
+ this.pendingResolve = resolve;
543
+ this.pendingReject = reject;
544
+ const lengthBuf = Buffer.allocUnsafe(4);
545
+ lengthBuf.writeUInt32LE(jpeg.length, 0);
546
+ this.process.stdin.write(Buffer.concat([lengthBuf, jpeg]));
547
+ });
548
+ }
549
+ async dispose() {
550
+ const proc = this.process;
551
+ if (!proc) return;
552
+ this.process = null;
553
+ proc.stdin?.end();
554
+ proc.kill("SIGTERM");
555
+ if (!await new Promise((resolve) => {
556
+ const timer = setTimeout(() => {
557
+ resolve(false);
558
+ }, 5e3);
559
+ proc.once("exit", () => {
560
+ clearTimeout(timer);
561
+ resolve(true);
562
+ });
563
+ })) {
564
+ try {
565
+ proc.kill("SIGKILL");
566
+ } catch {}
567
+ this.log.warn("Python process did not exit gracefully — sent SIGKILL");
568
+ } else this.log.debug("Python process terminated");
569
+ }
570
+ };
571
+ //#endregion
572
+ //#region src/embedding-encoder/shared/engine-resolver.ts
573
+ /** Priority order for auto-selection of ONNX backends */
574
+ var AUTO_BACKEND_PRIORITY = [
575
+ "coreml",
576
+ "cuda",
577
+ "tensorrt",
578
+ "cpu"
579
+ ];
580
+ var BACKEND_TO_FORMAT = require_dist.BACKEND_TO_FORMAT;
581
+ var RUNTIME_TO_FORMAT = require_dist.RUNTIME_TO_FORMAT;
538
582
  function extractModelMeta(entry) {
539
- return {
540
- inputSize: entry.inputSize,
541
- inputNormalization: entry.inputNormalization ?? "zero-one",
542
- inputLayout: entry.inputLayout ?? "nchw",
543
- preprocessMode: entry.preprocessMode ?? "letterbox"
544
- };
583
+ return {
584
+ inputSize: entry.inputSize,
585
+ inputNormalization: entry.inputNormalization ?? "zero-one",
586
+ inputLayout: entry.inputLayout ?? "nchw",
587
+ preprocessMode: entry.preprocessMode ?? "letterbox"
588
+ };
545
589
  }
546
590
  function modelFilePath(modelsDir, modelEntry, format) {
547
- const formatEntry = modelEntry.formats[format];
548
- if (!formatEntry) {
549
- throw new Error(`Model ${modelEntry.id} has no ${format} format`);
550
- }
551
- const urlParts = formatEntry.url.split("/");
552
- const filename = urlParts[urlParts.length - 1] ?? `${modelEntry.id}.${format}`;
553
- return path__namespace.join(modelsDir, filename);
591
+ const formatEntry = modelEntry.formats[format];
592
+ if (!formatEntry) throw new Error(`Model ${modelEntry.id} has no ${format} format`);
593
+ const urlParts = formatEntry.url.split("/");
594
+ const filename = urlParts[urlParts.length - 1] ?? `${modelEntry.id}.${format}`;
595
+ return node_path.join(modelsDir, filename);
554
596
  }
555
597
  function modelExists(filePath) {
556
- try {
557
- return fs__namespace.existsSync(filePath);
558
- } catch {
559
- return false;
560
- }
598
+ try {
599
+ return node_fs.existsSync(filePath);
600
+ } catch {
601
+ return false;
602
+ }
561
603
  }
562
604
  async function resolveEngine(options) {
563
- const { runtime, backend, modelEntry, modelsDir, models } = options;
564
- const log = options.logger ?? createNoopLogger();
565
- let selectedFormat;
566
- let selectedBackend;
567
- if (runtime === "auto") {
568
- const available = await probeOnnxBackends();
569
- let chosen = null;
570
- for (const b of AUTO_BACKEND_PRIORITY) {
571
- if (!available.includes(b)) continue;
572
- const fmt = BACKEND_TO_FORMAT[b];
573
- if (!fmt) continue;
574
- if (!modelEntry.formats[fmt]) continue;
575
- chosen = { backend: b, format: fmt };
576
- break;
577
- }
578
- if (!chosen) {
579
- throw new Error(
580
- `resolveEngine: no compatible backend found for model ${modelEntry.id}. Available backends: ${available.join(", ")}`
581
- );
582
- }
583
- selectedFormat = chosen.format;
584
- selectedBackend = chosen.backend;
585
- } else {
586
- const fmt = RUNTIME_TO_FORMAT[runtime];
587
- if (!fmt) {
588
- throw new Error(`resolveEngine: unsupported runtime "${runtime}"`);
589
- }
590
- if (!modelEntry.formats[fmt]) {
591
- if (fmt !== "onnx" && modelEntry.formats["onnx"]) {
592
- selectedFormat = "onnx";
593
- selectedBackend = backend || "cpu";
594
- } else {
595
- throw new Error(
596
- `resolveEngine: model ${modelEntry.id} has no ${fmt} format for runtime ${runtime}`
597
- );
598
- }
599
- } else {
600
- selectedFormat = fmt;
601
- selectedBackend = runtime === "onnx" ? backend || "cpu" : runtime;
602
- }
603
- }
604
- let modelPath;
605
- if (models) {
606
- modelPath = await models.ensure(modelEntry.id, selectedFormat);
607
- } else {
608
- modelPath = modelFilePath(modelsDir, modelEntry, selectedFormat);
609
- if (!modelExists(modelPath)) {
610
- throw new Error(
611
- `resolveEngine: model file not found at ${modelPath} and no model service provided`
612
- );
613
- }
614
- }
615
- log.info("Engine resolved", { meta: { format: selectedFormat, backend: selectedBackend, modelId: modelEntry.id } });
616
- if (selectedFormat === "onnx") {
617
- const engine = new NodeInferenceEngine(modelPath, selectedBackend, extractModelMeta(modelEntry), options.logger);
618
- await engine.initialize();
619
- return { engine, format: selectedFormat, modelPath };
620
- }
621
- const effectiveRuntime = runtime === "auto" ? selectedBackend : runtime;
622
- let { pythonPath } = options;
623
- if (!pythonPath) {
624
- const { execFileSync: efs } = await import("node:child_process");
625
- for (const cmd of ["python3", "python"]) {
626
- try {
627
- efs(cmd, ["--version"], { timeout: 3e3, stdio: "ignore" });
628
- pythonPath = cmd;
629
- break;
630
- } catch {
631
- }
632
- }
633
- }
634
- const scriptName = index.PYTHON_SCRIPT[effectiveRuntime];
635
- if (scriptName && pythonPath) {
636
- const candidates = [
637
- path__namespace.join(__dirname, "../../python", scriptName),
638
- path__namespace.join(__dirname, "../python", scriptName),
639
- path__namespace.join(__dirname, "../../../python", scriptName)
640
- ];
641
- const scriptPath = candidates.find((p) => fs__namespace.existsSync(p));
642
- if (!scriptPath) {
643
- throw new Error(
644
- `resolveEngine: Python script "${scriptName}" not found. Searched:
645
- ${candidates.join("\n")}`
646
- );
647
- }
648
- const inputSize = Math.max(modelEntry.inputSize.width, modelEntry.inputSize.height);
649
- const engine = new PythonInferenceEngine(pythonPath, scriptPath, effectiveRuntime, modelPath, [
650
- `--input-size=${inputSize}`,
651
- `--confidence=0.25`
652
- ], options.logger);
653
- await engine.initialize();
654
- return { engine, format: selectedFormat, modelPath };
655
- }
656
- const fallbackPath = modelFilePath(modelsDir, modelEntry, "onnx");
657
- if (modelEntry.formats["onnx"] && modelExists(fallbackPath)) {
658
- const engine = new NodeInferenceEngine(fallbackPath, "cpu", extractModelMeta(modelEntry), options.logger);
659
- await engine.initialize();
660
- return { engine, format: "onnx", modelPath: fallbackPath };
661
- }
662
- throw new Error(
663
- `resolveEngine: format ${selectedFormat} is not yet supported by NodeInferenceEngine, no Python runtime is available, and no ONNX fallback exists`
664
- );
605
+ const { runtime, backend, modelEntry, modelsDir, models } = options;
606
+ const log = options.logger ?? createNoopLogger();
607
+ let selectedFormat;
608
+ let selectedBackend;
609
+ if (runtime === "auto") {
610
+ const available = await probeOnnxBackends();
611
+ let chosen = null;
612
+ for (const b of AUTO_BACKEND_PRIORITY) {
613
+ if (!available.includes(b)) continue;
614
+ const fmt = BACKEND_TO_FORMAT[b];
615
+ if (!fmt) continue;
616
+ if (!modelEntry.formats[fmt]) continue;
617
+ chosen = {
618
+ backend: b,
619
+ format: fmt
620
+ };
621
+ break;
622
+ }
623
+ if (!chosen) throw new Error(`resolveEngine: no compatible backend found for model ${modelEntry.id}. Available backends: ${available.join(", ")}`);
624
+ selectedFormat = chosen.format;
625
+ selectedBackend = chosen.backend;
626
+ } else {
627
+ const fmt = RUNTIME_TO_FORMAT[runtime];
628
+ if (!fmt) throw new Error(`resolveEngine: unsupported runtime "${runtime}"`);
629
+ if (!modelEntry.formats[fmt]) if (fmt !== "onnx" && modelEntry.formats["onnx"]) {
630
+ selectedFormat = "onnx";
631
+ selectedBackend = backend || "cpu";
632
+ } else throw new Error(`resolveEngine: model ${modelEntry.id} has no ${fmt} format for runtime ${runtime}`);
633
+ else {
634
+ selectedFormat = fmt;
635
+ selectedBackend = runtime === "onnx" ? backend || "cpu" : runtime;
636
+ }
637
+ }
638
+ let modelPath;
639
+ if (models) modelPath = await models.ensure(modelEntry.id, selectedFormat);
640
+ else {
641
+ modelPath = modelFilePath(modelsDir, modelEntry, selectedFormat);
642
+ if (!modelExists(modelPath)) throw new Error(`resolveEngine: model file not found at ${modelPath} and no model service provided`);
643
+ }
644
+ log.info("Engine resolved", { meta: {
645
+ format: selectedFormat,
646
+ backend: selectedBackend,
647
+ modelId: modelEntry.id
648
+ } });
649
+ if (selectedFormat === "onnx") {
650
+ const engine = new NodeInferenceEngine(modelPath, selectedBackend, extractModelMeta(modelEntry), options.logger);
651
+ await engine.initialize();
652
+ return {
653
+ engine,
654
+ format: selectedFormat,
655
+ modelPath
656
+ };
657
+ }
658
+ const effectiveRuntime = runtime === "auto" ? selectedBackend : runtime;
659
+ let { pythonPath } = options;
660
+ if (!pythonPath) {
661
+ const { execFileSync: efs } = await import("node:child_process");
662
+ for (const cmd of ["python3", "python"]) try {
663
+ efs(cmd, ["--version"], {
664
+ timeout: 3e3,
665
+ stdio: "ignore"
666
+ });
667
+ pythonPath = cmd;
668
+ break;
669
+ } catch {}
670
+ }
671
+ const scriptName = require_dist.PYTHON_SCRIPT[effectiveRuntime];
672
+ if (scriptName && pythonPath) {
673
+ const candidates = [
674
+ node_path.join(__dirname, "../../python", scriptName),
675
+ node_path.join(__dirname, "../python", scriptName),
676
+ node_path.join(__dirname, "../../../python", scriptName)
677
+ ];
678
+ const scriptPath = candidates.find((p) => node_fs.existsSync(p));
679
+ if (!scriptPath) throw new Error(`resolveEngine: Python script "${scriptName}" not found. Searched:\n${candidates.join("\n")}`);
680
+ const inputSize = Math.max(modelEntry.inputSize.width, modelEntry.inputSize.height);
681
+ const engine = new PythonInferenceEngine(pythonPath, scriptPath, effectiveRuntime, modelPath, [`--input-size=${inputSize}`, `--confidence=0.25`], options.logger);
682
+ await engine.initialize();
683
+ return {
684
+ engine,
685
+ format: selectedFormat,
686
+ modelPath
687
+ };
688
+ }
689
+ const fallbackPath = modelFilePath(modelsDir, modelEntry, "onnx");
690
+ if (modelEntry.formats["onnx"] && modelExists(fallbackPath)) {
691
+ const engine = new NodeInferenceEngine(fallbackPath, "cpu", extractModelMeta(modelEntry), options.logger);
692
+ await engine.initialize();
693
+ return {
694
+ engine,
695
+ format: "onnx",
696
+ modelPath: fallbackPath
697
+ };
698
+ }
699
+ throw new Error(`resolveEngine: format ${selectedFormat} is not yet supported by NodeInferenceEngine, no Python runtime is available, and no ONNX fallback exists`);
665
700
  }
701
+ /** Probe which ONNX execution providers are available on this system */
666
702
  async function probeOnnxBackends() {
667
- const available = ["cpu"];
668
- try {
669
- const ort = await import("onnxruntime-node");
670
- const providers = ort.env?.webgl?.disabled !== void 0 ? ort.InferenceSession.getAvailableProviders?.() ?? [] : [];
671
- for (const p of providers) {
672
- const normalized = p.toLowerCase().replace("executionprovider", "");
673
- if (normalized === "coreml") available.push("coreml");
674
- else if (normalized === "cuda") available.push("cuda");
675
- else if (normalized === "tensorrt") available.push("tensorrt");
676
- }
677
- } catch {
678
- }
679
- if (process.platform === "darwin" && !available.includes("coreml")) {
680
- available.push("coreml");
681
- }
682
- return [...new Set(available)];
703
+ const available = ["cpu"];
704
+ try {
705
+ const ort = await import("onnxruntime-node");
706
+ const providers = ort.env?.webgl?.disabled !== void 0 ? ort.InferenceSession.getAvailableProviders?.() ?? [] : [];
707
+ for (const p of providers) {
708
+ const normalized = p.toLowerCase().replace("executionprovider", "");
709
+ if (normalized === "coreml") available.push("coreml");
710
+ else if (normalized === "cuda") available.push("cuda");
711
+ else if (normalized === "tensorrt") available.push("tensorrt");
712
+ }
713
+ } catch {}
714
+ if (process.platform === "darwin" && !available.includes("coreml")) available.push("coreml");
715
+ return [...new Set(available)];
683
716
  }
684
- const CLIP_MODEL_META = {
685
- "clip-vit-b32": {
686
- imageModelId: "clip-vit-b32",
687
- textModelId: "clip-vit-b32-text",
688
- embeddingDim: 512,
689
- inputSize: 224,
690
- tokenizerType: "clip"
691
- },
692
- "clip-vit-b16": {
693
- imageModelId: "clip-vit-b16",
694
- textModelId: "clip-vit-b16-text",
695
- embeddingDim: 512,
696
- inputSize: 224,
697
- tokenizerType: "clip"
698
- },
699
- "siglip2-b16-256": {
700
- imageModelId: "siglip2-b16-256",
701
- textModelId: "siglip2-b16-256-text",
702
- embeddingDim: 768,
703
- inputSize: 256,
704
- tokenizerType: "siglip"
705
- }
717
+ //#endregion
718
+ //#region src/embedding-encoder/addon/clip-models.ts
719
+ var CLIP_MODEL_META = {
720
+ "clip-vit-b32": {
721
+ imageModelId: "clip-vit-b32",
722
+ textModelId: "clip-vit-b32-text",
723
+ embeddingDim: 512,
724
+ inputSize: 224,
725
+ tokenizerType: "clip"
726
+ },
727
+ "clip-vit-b16": {
728
+ imageModelId: "clip-vit-b16",
729
+ textModelId: "clip-vit-b16-text",
730
+ embeddingDim: 512,
731
+ inputSize: 224,
732
+ tokenizerType: "clip"
733
+ },
734
+ "siglip2-b16-256": {
735
+ imageModelId: "siglip2-b16-256",
736
+ textModelId: "siglip2-b16-256-text",
737
+ embeddingDim: 768,
738
+ inputSize: 256,
739
+ tokenizerType: "siglip"
740
+ }
706
741
  };
707
- const DEFAULT_CLIP_MODEL = "clip-vit-b32";
742
+ var DEFAULT_CLIP_MODEL = "clip-vit-b32";
708
743
  function getModelMeta(modelId) {
709
- return CLIP_MODEL_META[modelId] ?? CLIP_MODEL_META[DEFAULT_CLIP_MODEL];
744
+ return CLIP_MODEL_META[modelId] ?? CLIP_MODEL_META["clip-vit-b32"];
710
745
  }
711
- const CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073];
712
- const CLIP_STD = [0.26862954, 0.26130258, 0.27577711];
746
+ //#endregion
747
+ //#region src/embedding-encoder/addon/clip-preprocessing.ts
748
+ var CLIP_MEAN = [
749
+ .48145466,
750
+ .4578275,
751
+ .40821073
752
+ ];
753
+ var CLIP_STD = [
754
+ .26862954,
755
+ .26130258,
756
+ .27577711
757
+ ];
758
+ /**
759
+ * Preprocess raw RGB buffer for CLIP inference.
760
+ * Resizes (nearest-neighbor for speed), normalizes with CLIP mean/std, outputs NCHW Float32Array.
761
+ * For production use, the caller should use sharp to resize the JPEG to targetW×targetH
762
+ * before calling this with the raw RGB. This function handles normalization + layout.
763
+ */
713
764
  function preprocessForClip(rgb, srcWidth, srcHeight, targetWidth, targetHeight) {
714
- const pixels = targetWidth * targetHeight;
715
- const result = new Float32Array(3 * pixels);
716
- for (let y = 0; y < targetHeight; y++) {
717
- for (let x = 0; x < targetWidth; x++) {
718
- const srcX = Math.min(Math.floor(x / targetWidth * srcWidth), srcWidth - 1);
719
- const srcY = Math.min(Math.floor(y / targetHeight * srcHeight), srcHeight - 1);
720
- const srcIdx = (srcY * srcWidth + srcX) * 3;
721
- const dstIdx = y * targetWidth + x;
722
- for (let c = 0; c < 3; c++) {
723
- const val = (rgb[srcIdx + c] ?? 0) / 255;
724
- result[c * pixels + dstIdx] = (val - CLIP_MEAN[c]) / CLIP_STD[c];
725
- }
726
- }
727
- }
728
- return result;
765
+ const pixels = targetWidth * targetHeight;
766
+ const result = new Float32Array(3 * pixels);
767
+ for (let y = 0; y < targetHeight; y++) for (let x = 0; x < targetWidth; x++) {
768
+ const srcX = Math.min(Math.floor(x / targetWidth * srcWidth), srcWidth - 1);
769
+ const srcIdx = (Math.min(Math.floor(y / targetHeight * srcHeight), srcHeight - 1) * srcWidth + srcX) * 3;
770
+ const dstIdx = y * targetWidth + x;
771
+ for (let c = 0; c < 3; c++) {
772
+ const val = (rgb[srcIdx + c] ?? 0) / 255;
773
+ result[c * pixels + dstIdx] = (val - CLIP_MEAN[c]) / CLIP_STD[c];
774
+ }
775
+ }
776
+ return result;
729
777
  }
778
+ /**
779
+ * L2-normalize a vector in-place and return it.
780
+ */
730
781
  function l2Normalize(vec) {
731
- let norm = 0;
732
- for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i];
733
- norm = Math.sqrt(norm);
734
- if (norm > 0) {
735
- for (let i = 0; i < vec.length; i++) vec[i] /= norm;
736
- }
737
- return vec;
738
- }
739
- class EmbeddingEncoderAddon extends index.BaseAddon {
740
- imageRawEngine = null;
741
- textRawEngine = null;
742
- imagePythonEngine = null;
743
- textPythonEngine = null;
744
- models = null;
745
- isPython = false;
746
- constructor() {
747
- super({ modelId: DEFAULT_CLIP_MODEL, runtime: "auto", backend: "cpu" });
748
- }
749
- async onInitialize() {
750
- const modelsDir = await this.ctx.api.storage.resolve.query({ location: "models", relativePath: "" }).catch(() => "camstack-data/models");
751
- this.models = new core.ModelDownloadService(modelsDir, []);
752
- return [{ capability: index.embeddingEncoderCapability, provider: this }];
753
- }
754
- async encode(input) {
755
- const { crop, width, height } = input;
756
- await this.ensureImageEngine();
757
- const meta = getModelMeta(this.config.modelId);
758
- const start = Date.now();
759
- if (this.isPython && this.imagePythonEngine) {
760
- const jpegBuffer = Buffer.isBuffer(crop) ? crop : Buffer.from(crop);
761
- const result = await this.imagePythonEngine.infer({ kind: "jpeg", data: jpegBuffer });
762
- const rawEmbedding = result.structured?.["embedding"];
763
- const normalized2 = l2Normalize(new Float32Array(rawEmbedding));
764
- return {
765
- embedding: Array.from(normalized2),
766
- inferenceMs: result.inferenceMs ?? Date.now() - start
767
- };
768
- }
769
- const cropBuffer = Buffer.isBuffer(crop) ? crop : Buffer.from(crop);
770
- const preprocessed = preprocessForClip(cropBuffer, width, height, meta.inputSize, meta.inputSize);
771
- const output = await this.imageRawEngine.run(preprocessed, [1, 3, meta.inputSize, meta.inputSize]);
772
- const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
773
- const normalized = l2Normalize(new Float32Array(sliced));
774
- return {
775
- embedding: Array.from(normalized),
776
- inferenceMs: Date.now() - start
777
- };
778
- }
779
- async encodeText(input) {
780
- const { text } = input;
781
- await this.ensureTextEngine();
782
- const meta = getModelMeta(this.config.modelId);
783
- const start = Date.now();
784
- if (this.isPython && this.textPythonEngine) {
785
- const textBuffer = Buffer.from(JSON.stringify({ text }), "utf-8");
786
- const result = await this.textPythonEngine.infer({ kind: "jpeg", data: textBuffer });
787
- const rawEmbedding = result.structured?.["embedding"];
788
- const normalized2 = l2Normalize(new Float32Array(rawEmbedding));
789
- return {
790
- embedding: Array.from(normalized2),
791
- inferenceMs: result.inferenceMs ?? Date.now() - start
792
- };
793
- }
794
- const tokenIds = clipTokenize(text);
795
- const inputTensor = new Float32Array(tokenIds);
796
- const output = await this.textRawEngine.run(inputTensor, [1, tokenIds.length]);
797
- const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
798
- const normalized = l2Normalize(new Float32Array(sliced));
799
- return {
800
- embedding: Array.from(normalized),
801
- inferenceMs: Date.now() - start
802
- };
803
- }
804
- async getInfo() {
805
- const meta = getModelMeta(this.config.modelId);
806
- return {
807
- modelId: this.config.modelId,
808
- embeddingDim: meta.embeddingDim,
809
- ready: this.imageRawEngine !== null || this.imagePythonEngine !== null
810
- };
811
- }
812
- async ensureImageEngine() {
813
- if (this.imageRawEngine || this.imagePythonEngine) return;
814
- const meta = getModelMeta(this.config.modelId);
815
- const imageEntry = CLIP_IMAGE_MODELS.find((m) => m.id === meta.imageModelId);
816
- if (!imageEntry) {
817
- throw new Error(`EmbeddingEncoderAddon: unknown image model "${meta.imageModelId}"`);
818
- }
819
- await this.resolveForEntry(imageEntry, "image");
820
- }
821
- async ensureTextEngine() {
822
- if (this.textRawEngine || this.textPythonEngine) return;
823
- const meta = getModelMeta(this.config.modelId);
824
- const textEntry = CLIP_TEXT_MODELS.find((m) => m.id === meta.textModelId);
825
- if (!textEntry) {
826
- throw new Error(`EmbeddingEncoderAddon: unknown text model "${meta.textModelId}"`);
827
- }
828
- await this.resolveForEntry(textEntry, "text");
829
- }
830
- async resolveForEntry(entry, target) {
831
- const runtime = this.config.runtime === "auto" ? "auto" : this.config.runtime === "node" ? "onnx" : this.config.runtime;
832
- const modelsDir = this.models.getModelsDir();
833
- const engineLogger = this.ctx.logger.withTags({
834
- modelId: entry.id,
835
- runtime: this.config.runtime,
836
- backend: this.config.backend
837
- });
838
- await this.models.ensure(entry.id, "onnx");
839
- const resolved = await resolveEngine({
840
- runtime,
841
- backend: this.config.backend,
842
- modelEntry: entry,
843
- modelsDir,
844
- models: this.models ?? void 0,
845
- logger: engineLogger
846
- });
847
- if (resolved.format !== "onnx") {
848
- this.isPython = true;
849
- if (target === "image") {
850
- this.imagePythonEngine = resolved.engine;
851
- } else {
852
- this.textPythonEngine = resolved.engine;
853
- }
854
- } else {
855
- const rawEngine = new NodeRawTensorEngine(resolved.modelPath, this.config.backend, engineLogger);
856
- await rawEngine.initialize();
857
- await resolved.engine.dispose();
858
- if (target === "image") {
859
- this.imageRawEngine = rawEngine;
860
- } else {
861
- this.textRawEngine = rawEngine;
862
- }
863
- }
864
- }
865
- async onShutdown() {
866
- await this.imageRawEngine?.dispose();
867
- await this.textRawEngine?.dispose();
868
- await this.imagePythonEngine?.dispose();
869
- await this.textPythonEngine?.dispose();
870
- }
871
- // ── Three-level settings API (Phase 3) ──────────────────────────────
872
- globalSettingsSchema() {
873
- return this.schema({
874
- sections: [
875
- {
876
- id: "embedding-encoder-settings",
877
- title: "Embedding Encoder",
878
- columns: 2,
879
- fields: [
880
- {
881
- type: "text",
882
- key: "modelId",
883
- label: "Model ID",
884
- description: "CLIP model identifier to use for image/text embedding",
885
- default: DEFAULT_CLIP_MODEL
886
- },
887
- {
888
- type: "select",
889
- key: "runtime",
890
- label: "Runtime",
891
- description: "Inference runtime (auto selects the best available)",
892
- default: "auto",
893
- options: [
894
- { label: "Auto", value: "auto" },
895
- { label: "Node (ONNX)", value: "node" },
896
- { label: "Python", value: "python" }
897
- ]
898
- },
899
- {
900
- type: "select",
901
- key: "backend",
902
- label: "Backend",
903
- description: "Hardware backend for inference acceleration",
904
- default: "cpu",
905
- options: [
906
- { label: "CPU", value: "cpu" },
907
- { label: "CUDA", value: "cuda" },
908
- { label: "CoreML", value: "coreml" }
909
- ]
910
- }
911
- ]
912
- }
913
- ]
914
- });
915
- }
916
- async onConfigChanged() {
917
- }
782
+ let norm = 0;
783
+ for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i];
784
+ norm = Math.sqrt(norm);
785
+ if (norm > 0) for (let i = 0; i < vec.length; i++) vec[i] /= norm;
786
+ return vec;
918
787
  }
788
+ //#endregion
789
+ //#region src/embedding-encoder/addon/index.ts
790
+ var EmbeddingEncoderAddon = class extends require_dist.BaseAddon {
791
+ imageRawEngine = null;
792
+ textRawEngine = null;
793
+ imagePythonEngine = null;
794
+ textPythonEngine = null;
795
+ models = null;
796
+ isPython = false;
797
+ constructor() {
798
+ super({
799
+ modelId: DEFAULT_CLIP_MODEL,
800
+ runtime: "auto",
801
+ backend: "cpu"
802
+ });
803
+ }
804
+ async onInitialize() {
805
+ const modelsDir = await this.ctx.api.storage.resolve.query({
806
+ location: "models",
807
+ relativePath: ""
808
+ }).catch(() => "camstack-data/models");
809
+ this.models = new _camstack_core.ModelDownloadService(modelsDir, []);
810
+ return [{
811
+ capability: require_dist.embeddingEncoderCapability,
812
+ provider: this
813
+ }];
814
+ }
815
+ async encode(input) {
816
+ const { crop, width, height } = input;
817
+ await this.ensureImageEngine();
818
+ const meta = getModelMeta(this.config.modelId);
819
+ const start = Date.now();
820
+ if (this.isPython && this.imagePythonEngine) {
821
+ const jpegBuffer = Buffer.isBuffer(crop) ? crop : Buffer.from(crop);
822
+ const result = await this.imagePythonEngine.infer({
823
+ kind: "jpeg",
824
+ data: jpegBuffer
825
+ });
826
+ const rawEmbedding = result.structured?.["embedding"];
827
+ const normalized = l2Normalize(new Float32Array(rawEmbedding));
828
+ return {
829
+ embedding: Array.from(normalized),
830
+ inferenceMs: result.inferenceMs ?? Date.now() - start
831
+ };
832
+ }
833
+ const preprocessed = preprocessForClip(Buffer.isBuffer(crop) ? crop : Buffer.from(crop), width, height, meta.inputSize, meta.inputSize);
834
+ const output = await this.imageRawEngine.run(preprocessed, [
835
+ 1,
836
+ 3,
837
+ meta.inputSize,
838
+ meta.inputSize
839
+ ]);
840
+ const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
841
+ const normalized = l2Normalize(new Float32Array(sliced));
842
+ return {
843
+ embedding: Array.from(normalized),
844
+ inferenceMs: Date.now() - start
845
+ };
846
+ }
847
+ async encodeText(input) {
848
+ const { text } = input;
849
+ await this.ensureTextEngine();
850
+ const meta = getModelMeta(this.config.modelId);
851
+ const start = Date.now();
852
+ if (this.isPython && this.textPythonEngine) {
853
+ const textBuffer = Buffer.from(JSON.stringify({ text }), "utf-8");
854
+ const result = await this.textPythonEngine.infer({
855
+ kind: "jpeg",
856
+ data: textBuffer
857
+ });
858
+ const rawEmbedding = result.structured?.["embedding"];
859
+ const normalized = l2Normalize(new Float32Array(rawEmbedding));
860
+ return {
861
+ embedding: Array.from(normalized),
862
+ inferenceMs: result.inferenceMs ?? Date.now() - start
863
+ };
864
+ }
865
+ const tokenIds = clipTokenize(text);
866
+ const inputTensor = new Float32Array(tokenIds);
867
+ const output = await this.textRawEngine.run(inputTensor, [1, tokenIds.length]);
868
+ const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
869
+ const normalized = l2Normalize(new Float32Array(sliced));
870
+ return {
871
+ embedding: Array.from(normalized),
872
+ inferenceMs: Date.now() - start
873
+ };
874
+ }
875
+ async getInfo() {
876
+ const meta = getModelMeta(this.config.modelId);
877
+ return {
878
+ modelId: this.config.modelId,
879
+ embeddingDim: meta.embeddingDim,
880
+ ready: this.imageRawEngine !== null || this.imagePythonEngine !== null
881
+ };
882
+ }
883
+ async ensureImageEngine() {
884
+ if (this.imageRawEngine || this.imagePythonEngine) return;
885
+ const meta = getModelMeta(this.config.modelId);
886
+ const imageEntry = CLIP_IMAGE_MODELS.find((m) => m.id === meta.imageModelId);
887
+ if (!imageEntry) throw new Error(`EmbeddingEncoderAddon: unknown image model "${meta.imageModelId}"`);
888
+ await this.resolveForEntry(imageEntry, "image");
889
+ }
890
+ async ensureTextEngine() {
891
+ if (this.textRawEngine || this.textPythonEngine) return;
892
+ const meta = getModelMeta(this.config.modelId);
893
+ const textEntry = CLIP_TEXT_MODELS.find((m) => m.id === meta.textModelId);
894
+ if (!textEntry) throw new Error(`EmbeddingEncoderAddon: unknown text model "${meta.textModelId}"`);
895
+ await this.resolveForEntry(textEntry, "text");
896
+ }
897
+ async resolveForEntry(entry, target) {
898
+ const runtime = this.config.runtime === "auto" ? "auto" : this.config.runtime === "node" ? "onnx" : this.config.runtime;
899
+ const modelsDir = this.models.getModelsDir();
900
+ const engineLogger = this.ctx.logger.withTags({
901
+ modelId: entry.id,
902
+ runtime: this.config.runtime,
903
+ backend: this.config.backend
904
+ });
905
+ await this.models.ensure(entry.id, "onnx");
906
+ const resolved = await resolveEngine({
907
+ runtime,
908
+ backend: this.config.backend,
909
+ modelEntry: entry,
910
+ modelsDir,
911
+ models: this.models ?? void 0,
912
+ logger: engineLogger
913
+ });
914
+ if (resolved.format !== "onnx") {
915
+ this.isPython = true;
916
+ if (target === "image") this.imagePythonEngine = resolved.engine;
917
+ else this.textPythonEngine = resolved.engine;
918
+ } else {
919
+ const rawEngine = new NodeRawTensorEngine(resolved.modelPath, this.config.backend, engineLogger);
920
+ await rawEngine.initialize();
921
+ await resolved.engine.dispose();
922
+ if (target === "image") this.imageRawEngine = rawEngine;
923
+ else this.textRawEngine = rawEngine;
924
+ }
925
+ }
926
+ async onShutdown() {
927
+ await this.imageRawEngine?.dispose();
928
+ await this.textRawEngine?.dispose();
929
+ await this.imagePythonEngine?.dispose();
930
+ await this.textPythonEngine?.dispose();
931
+ }
932
+ globalSettingsSchema() {
933
+ return this.schema({ sections: [{
934
+ id: "embedding-encoder-settings",
935
+ title: "Embedding Encoder",
936
+ columns: 2,
937
+ fields: [
938
+ {
939
+ type: "text",
940
+ key: "modelId",
941
+ label: "Model ID",
942
+ description: "CLIP model identifier to use for image/text embedding",
943
+ default: DEFAULT_CLIP_MODEL
944
+ },
945
+ {
946
+ type: "select",
947
+ key: "runtime",
948
+ label: "Runtime",
949
+ description: "Inference runtime (auto selects the best available)",
950
+ default: "auto",
951
+ options: [
952
+ {
953
+ label: "Auto",
954
+ value: "auto"
955
+ },
956
+ {
957
+ label: "Node (ONNX)",
958
+ value: "node"
959
+ },
960
+ {
961
+ label: "Python",
962
+ value: "python"
963
+ }
964
+ ]
965
+ },
966
+ {
967
+ type: "select",
968
+ key: "backend",
969
+ label: "Backend",
970
+ description: "Hardware backend for inference acceleration",
971
+ default: "cpu",
972
+ options: [
973
+ {
974
+ label: "CPU",
975
+ value: "cpu"
976
+ },
977
+ {
978
+ label: "CUDA",
979
+ value: "cuda"
980
+ },
981
+ {
982
+ label: "CoreML",
983
+ value: "coreml"
984
+ }
985
+ ]
986
+ }
987
+ ]
988
+ }] });
989
+ }
990
+ async onConfigChanged() {}
991
+ };
992
+ /**
993
+ * Minimal CLIP tokenizer — encodes ASCII text to token IDs.
994
+ * Production implementations should use a proper BPE tokenizer;
995
+ * this is a simplified placeholder that maps characters to IDs
996
+ * with SOT/EOT tokens for basic functionality.
997
+ */
919
998
  function clipTokenize(text, maxLength = 77) {
920
- const SOT_TOKEN = 49406;
921
- const EOT_TOKEN = 49407;
922
- const tokens = [SOT_TOKEN];
923
- for (let i = 0; i < text.length && tokens.length < maxLength - 1; i++) {
924
- tokens.push(text.charCodeAt(i) + 256);
925
- }
926
- tokens.push(EOT_TOKEN);
927
- while (tokens.length < maxLength) {
928
- tokens.push(0);
929
- }
930
- return tokens;
999
+ const SOT_TOKEN = 49406;
1000
+ const EOT_TOKEN = 49407;
1001
+ const tokens = [SOT_TOKEN];
1002
+ for (let i = 0; i < text.length && tokens.length < maxLength - 1; i++) tokens.push(text.charCodeAt(i) + 256);
1003
+ tokens.push(EOT_TOKEN);
1004
+ while (tokens.length < maxLength) tokens.push(0);
1005
+ return tokens;
931
1006
  }
1007
+ //#endregion
932
1008
  exports.EmbeddingEncoderAddon = EmbeddingEncoderAddon;
933
1009
  exports.default = EmbeddingEncoderAddon;
934
- //# sourceMappingURL=index.js.map