@camstack/addon-post-analysis 0.1.20 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist-4mTLJ7BJ.mjs +20750 -0
- package/dist/dist-CS2K80so.js +20933 -0
- package/dist/embedding-encoder/index.js +977 -902
- package/dist/embedding-encoder/index.mjs +967 -860
- package/dist/enrichment-engine/index.js +834 -833
- package/dist/enrichment-engine/index.mjs +828 -832
- package/dist/pipeline-analytics/_stub.js +1680 -1396
- package/dist/pipeline-analytics/_virtual_mf-localSharedImportMap___mfe_internal__addon_pipeline_analytics_widgets-DOSUJ-U0.mjs +156 -0
- package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_ui_mf_2_library__loadShare__.js-DJvmVCso.mjs +26 -0
- package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_tanstack_mf_1_react_mf_2_query__loadShare__.js-B3Wx5J80.mjs +26 -0
- package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare__react__loadShare__.js-C0AuF9av.mjs +26 -0
- package/dist/pipeline-analytics/_virtual_mf___mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_1_jsx_mf_2_runtime__loadShare__.js-Bm-iyjmq.mjs +26 -0
- package/dist/pipeline-analytics/dist-CYZr2fwk.mjs +2726 -0
- package/dist/pipeline-analytics/hostInit-BazRS2O7.mjs +129 -0
- package/dist/pipeline-analytics/index.js +7112 -3100
- package/dist/pipeline-analytics/index.mjs +7105 -3100
- package/dist/pipeline-analytics/remoteEntry.js +134 -2973
- package/dist/pipeline-analytics/remoteEntry.ssr.js +33 -0
- package/dist/pipeline-analytics/virtualExposes-BgYzpJZG.mjs +27 -0
- package/dist/pipeline-analytics/virtual_mf-exposes-ssr___mfe_internal__addon_pipeline_analytics_widgets__remoteEntry_js-D7qgWCKX.mjs +10 -0
- package/dist/resolve-frame-5lMxmeI1.js +57 -0
- package/dist/resolve-frame-CT1T1tWy.mjs +44 -0
- package/package.json +15 -6
- package/dist/embedding-encoder/index.js.map +0 -1
- package/dist/embedding-encoder/index.mjs.map +0 -1
- package/dist/enrichment-engine/index.js.map +0 -1
- package/dist/enrichment-engine/index.mjs.map +0 -1
- package/dist/index-B0RhVv1c.js +0 -17107
- package/dist/index-B0RhVv1c.js.map +0 -1
- package/dist/index-ot5PeFg_.mjs +0 -17108
- package/dist/index-ot5PeFg_.mjs.map +0 -1
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/AudioHistoryChart.d.ts +0 -4
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/AudioMetricsPanel.d.ts +0 -10
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/DetectionHistoryChart.d.ts +0 -4
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/LiveStatsTab.d.ts +0 -5
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/MotionHistoryChart.d.ts +0 -4
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/OccupancyHistoryChart.d.ts +0 -4
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/OccupancyPanel.d.ts +0 -10
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/chart-utils.d.ts +0 -97
- package/dist/pipeline-analytics/@mf-types/compiled-types/pipeline-analytics/widgets/index.d.ts +0 -29
- package/dist/pipeline-analytics/@mf-types/widgets.d.ts +0 -2
- package/dist/pipeline-analytics/@mf-types.d.ts +0 -3
- package/dist/pipeline-analytics/@mf-types.zip +0 -0
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_sdk__loadShare__.mjs-lantnv8e.mjs +0 -12
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_types__loadShare__.mjs-BD3oMNGB.mjs +0 -29
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_camstack_mf_1_ui_mf_2_library__loadShare__.mjs-BgOHCakr.mjs +0 -18
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_tanstack_mf_1_react_mf_2_query__loadShare__.mjs-DoWbefqS.mjs +0 -104
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_trpc_mf_1_client__loadShare__.mjs-52bfkwC8.mjs +0 -85
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare___mf_0_trpc_mf_1_react_mf_2_query__loadShare__.mjs-CVrnrGED.mjs +0 -62
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react__loadShare__.mjs-D1qPKjvR.mjs +0 -89
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react__loadShare__.mjs_commonjs-proxy-B5X50Xa4.mjs +0 -29
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_1_jsx_mf_2_runtime__loadShare__.mjs-BsyrX6NO.mjs +0 -36
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_2_dom__loadShare__.mjs-Dp8hqYOB.mjs +0 -45
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_2_dom__loadShare__.mjs_commonjs-proxy-B10b5k5J.mjs +0 -6
- package/dist/pipeline-analytics/__mfe_internal__addon_pipeline_analytics_widgets__loadShare__react_mf_2_dom_mf_1_client__loadShare__.mjs-BZjEt71l.mjs +0 -34
- package/dist/pipeline-analytics/_virtual_mf-localSharedImportMap___mfe_internal__addon_pipeline_analytics_widgets-DWB3apaJ.mjs +0 -156
- package/dist/pipeline-analytics/client-C6xdgLZU.mjs +0 -9836
- package/dist/pipeline-analytics/getErrorShape-BPSzUA7W-TlK8ipWe.mjs +0 -211
- package/dist/pipeline-analytics/hostInit-3cyL9eyG.mjs +0 -168
- package/dist/pipeline-analytics/index-BCTHeI2m.mjs +0 -1641
- package/dist/pipeline-analytics/index-BuWLz0GG.mjs +0 -2603
- package/dist/pipeline-analytics/index-CIwq-tQL.mjs +0 -725
- package/dist/pipeline-analytics/index-CWBMDbou.mjs +0 -435
- package/dist/pipeline-analytics/index-CWkKuNLr.mjs +0 -232
- package/dist/pipeline-analytics/index-CZhagnlH.mjs +0 -67784
- package/dist/pipeline-analytics/index-D883Q5B8.mjs +0 -185
- package/dist/pipeline-analytics/index-DtOI1aTU.mjs +0 -18504
- package/dist/pipeline-analytics/index-xncRG7-x.mjs +0 -2713
- package/dist/pipeline-analytics/index.js.map +0 -1
- package/dist/pipeline-analytics/index.mjs.map +0 -1
- package/dist/pipeline-analytics/jsx-runtime-DdLhuHmJ.mjs +0 -55
- package/dist/pipeline-analytics/schemas-B7L0qZtq.mjs +0 -3599
- package/dist/pipeline-analytics/virtualExposes-8FzWTdq3.mjs +0 -42
|
@@ -1,894 +1,1001 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { a as PYTHON_SCRIPT, f as embeddingEncoderCapability, n as BaseAddon, o as RUNTIME_TO_FORMAT$1, t as BACKEND_TO_FORMAT$1 } from "../dist-4mTLJ7BJ.mjs";
|
|
2
|
+
import sharp from "sharp";
|
|
2
3
|
import { ModelDownloadService } from "@camstack/core";
|
|
3
4
|
import * as path from "node:path";
|
|
4
5
|
import * as fs from "node:fs";
|
|
5
|
-
import sharp from "sharp";
|
|
6
6
|
import { spawn } from "node:child_process";
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
7
|
+
//#region src/embedding-encoder/catalogs/embedding-models.ts
|
|
8
|
+
var CLIP_IMAGE_MODELS = [
|
|
9
|
+
{
|
|
10
|
+
id: "clip-vit-b32",
|
|
11
|
+
name: "CLIP ViT-B/32",
|
|
12
|
+
description: "OpenAI CLIP ViT-B/32 — fast, 512-dim, int8 quantized (85 MB)",
|
|
13
|
+
inputSize: {
|
|
14
|
+
width: 224,
|
|
15
|
+
height: 224
|
|
16
|
+
},
|
|
17
|
+
labels: [],
|
|
18
|
+
inputLayout: "nchw",
|
|
19
|
+
inputNormalization: "none",
|
|
20
|
+
formats: { onnx: {
|
|
21
|
+
url: "https://huggingface.co/Xenova/clip-vit-base-patch32/resolve/main/onnx/vision_model_quantized.onnx",
|
|
22
|
+
sizeMB: 85
|
|
23
|
+
} }
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
id: "clip-vit-b16",
|
|
27
|
+
name: "CLIP ViT-B/16",
|
|
28
|
+
description: "OpenAI CLIP ViT-B/16 — higher accuracy, 512-dim, int8 quantized (83 MB)",
|
|
29
|
+
inputSize: {
|
|
30
|
+
width: 224,
|
|
31
|
+
height: 224
|
|
32
|
+
},
|
|
33
|
+
labels: [],
|
|
34
|
+
inputLayout: "nchw",
|
|
35
|
+
inputNormalization: "none",
|
|
36
|
+
formats: { onnx: {
|
|
37
|
+
url: "https://huggingface.co/Xenova/clip-vit-base-patch16/resolve/main/onnx/vision_model_quantized.onnx",
|
|
38
|
+
sizeMB: 83
|
|
39
|
+
} }
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
id: "siglip2-b16-256",
|
|
43
|
+
name: "SigLIP2 Base/16 256",
|
|
44
|
+
description: "Google SigLIP2 — superior scene understanding, 768-dim, int8 quantized (90 MB)",
|
|
45
|
+
inputSize: {
|
|
46
|
+
width: 256,
|
|
47
|
+
height: 256
|
|
48
|
+
},
|
|
49
|
+
labels: [],
|
|
50
|
+
inputLayout: "nchw",
|
|
51
|
+
inputNormalization: "none",
|
|
52
|
+
formats: { onnx: {
|
|
53
|
+
url: "https://huggingface.co/onnx-community/siglip2-base-patch16-256-ONNX/resolve/main/onnx/vision_model_quantized.onnx",
|
|
54
|
+
sizeMB: 90
|
|
55
|
+
} }
|
|
56
|
+
}
|
|
53
57
|
];
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
58
|
+
var CLIP_TEXT_MODELS = [
|
|
59
|
+
{
|
|
60
|
+
id: "clip-vit-b32-text",
|
|
61
|
+
name: "CLIP ViT-B/32 Text Encoder",
|
|
62
|
+
description: "Text encoder for CLIP ViT-B/32, int8 quantized (62 MB)",
|
|
63
|
+
inputSize: {
|
|
64
|
+
width: 0,
|
|
65
|
+
height: 0
|
|
66
|
+
},
|
|
67
|
+
labels: [],
|
|
68
|
+
formats: { onnx: {
|
|
69
|
+
url: "https://huggingface.co/Xenova/clip-vit-base-patch32/resolve/main/onnx/text_model_quantized.onnx",
|
|
70
|
+
sizeMB: 62
|
|
71
|
+
} }
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
id: "clip-vit-b16-text",
|
|
75
|
+
name: "CLIP ViT-B/16 Text Encoder",
|
|
76
|
+
description: "Text encoder for CLIP ViT-B/16, int8 quantized (62 MB)",
|
|
77
|
+
inputSize: {
|
|
78
|
+
width: 0,
|
|
79
|
+
height: 0
|
|
80
|
+
},
|
|
81
|
+
labels: [],
|
|
82
|
+
formats: { onnx: {
|
|
83
|
+
url: "https://huggingface.co/Xenova/clip-vit-base-patch16/resolve/main/onnx/text_model_quantized.onnx",
|
|
84
|
+
sizeMB: 62
|
|
85
|
+
} }
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
id: "siglip2-b16-256-text",
|
|
89
|
+
name: "SigLIP2 Base/16 256 Text Encoder",
|
|
90
|
+
description: "Text encoder for SigLIP2, int8 quantized (270 MB)",
|
|
91
|
+
inputSize: {
|
|
92
|
+
width: 0,
|
|
93
|
+
height: 0
|
|
94
|
+
},
|
|
95
|
+
labels: [],
|
|
96
|
+
formats: { onnx: {
|
|
97
|
+
url: "https://huggingface.co/onnx-community/siglip2-base-patch16-256-ONNX/resolve/main/onnx/text_model_quantized.onnx",
|
|
98
|
+
sizeMB: 270
|
|
99
|
+
} }
|
|
100
|
+
}
|
|
94
101
|
];
|
|
95
|
-
|
|
96
|
-
|
|
102
|
+
//#endregion
|
|
103
|
+
//#region src/embedding-encoder/shared/noop-logger.ts
|
|
104
|
+
var noop = () => {};
|
|
97
105
|
function createNoopLogger() {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
106
|
+
const logger = {
|
|
107
|
+
debug: noop,
|
|
108
|
+
info: noop,
|
|
109
|
+
warn: noop,
|
|
110
|
+
error: noop,
|
|
111
|
+
child: () => logger,
|
|
112
|
+
withTags: (_tags) => logger
|
|
113
|
+
};
|
|
114
|
+
return logger;
|
|
107
115
|
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
116
|
+
//#endregion
|
|
117
|
+
//#region src/embedding-encoder/shared/node-raw-tensor-engine.ts
|
|
118
|
+
var BACKEND_TO_DEVICE$1 = {
|
|
119
|
+
cpu: "cpu",
|
|
120
|
+
coreml: "gpu-mps",
|
|
121
|
+
cuda: "gpu-cuda",
|
|
122
|
+
tensorrt: "tensorrt"
|
|
113
123
|
};
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
124
|
+
/**
|
|
125
|
+
* Raw tensor engine — runs ONNX inference on pre-processed Float32Array input.
|
|
126
|
+
* Used by addons that handle their own preprocessing (e.g. CLIP embedding encoder).
|
|
127
|
+
*/
|
|
128
|
+
var NodeRawTensorEngine = class {
|
|
129
|
+
modelPath;
|
|
130
|
+
backend;
|
|
131
|
+
runtime = "onnx";
|
|
132
|
+
device;
|
|
133
|
+
session = null;
|
|
134
|
+
log;
|
|
135
|
+
constructor(modelPath, backend, logger) {
|
|
136
|
+
this.modelPath = modelPath;
|
|
137
|
+
this.backend = backend;
|
|
138
|
+
this.device = BACKEND_TO_DEVICE$1[backend] ?? "cpu";
|
|
139
|
+
this.log = logger ?? createNoopLogger();
|
|
140
|
+
}
|
|
141
|
+
async initialize() {
|
|
142
|
+
const ort = await import("onnxruntime-node");
|
|
143
|
+
const provider = this.backend === "coreml" ? "coreml" : this.backend === "cuda" ? "cuda" : "cpu";
|
|
144
|
+
const absModelPath = path.isAbsolute(this.modelPath) ? this.modelPath : path.resolve(process.cwd(), this.modelPath);
|
|
145
|
+
this.session = await ort.InferenceSession.create(absModelPath, { executionProviders: [provider] });
|
|
146
|
+
this.log.info("ONNX session loaded", { meta: {
|
|
147
|
+
modelPath: absModelPath,
|
|
148
|
+
backend: this.backend,
|
|
149
|
+
provider
|
|
150
|
+
} });
|
|
151
|
+
}
|
|
152
|
+
async run(input, inputShape) {
|
|
153
|
+
if (!this.session) throw new Error("NodeRawTensorEngine: not initialized — call initialize() first");
|
|
154
|
+
const ort = await import("onnxruntime-node");
|
|
155
|
+
const sess = this.session;
|
|
156
|
+
const inputName = sess.inputNames[0];
|
|
157
|
+
const tensor = new ort.Tensor("float32", input, [...inputShape]);
|
|
158
|
+
const feeds = { [inputName]: tensor };
|
|
159
|
+
const start = Date.now();
|
|
160
|
+
let results;
|
|
161
|
+
try {
|
|
162
|
+
results = await sess.run(feeds);
|
|
163
|
+
} catch (err) {
|
|
164
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
165
|
+
this.log.error("Inference failed", { meta: { error: error.message } });
|
|
166
|
+
throw error;
|
|
167
|
+
}
|
|
168
|
+
const outputName = sess.outputNames[0];
|
|
169
|
+
this.log.debug("Inference complete", { meta: {
|
|
170
|
+
durationMs: Date.now() - start,
|
|
171
|
+
outputKeys: [outputName],
|
|
172
|
+
preprocessMode: "raw-tensor"
|
|
173
|
+
} });
|
|
174
|
+
return results[outputName].data;
|
|
175
|
+
}
|
|
176
|
+
async dispose() {
|
|
177
|
+
this.session = null;
|
|
178
|
+
this.log.debug("Session disposed");
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
//#endregion
|
|
182
|
+
//#region src/embedding-encoder/shared/image-utils.ts
|
|
183
|
+
/** Letterbox resize for YOLO: resize preserving aspect ratio, pad to square */
|
|
161
184
|
async function letterbox(jpeg, targetSize) {
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
185
|
+
const meta = await sharp(jpeg).metadata();
|
|
186
|
+
const originalWidth = meta.width ?? 0;
|
|
187
|
+
const originalHeight = meta.height ?? 0;
|
|
188
|
+
const scale = Math.min(targetSize / originalWidth, targetSize / originalHeight);
|
|
189
|
+
const scaledWidth = Math.round(originalWidth * scale);
|
|
190
|
+
const scaledHeight = Math.round(originalHeight * scale);
|
|
191
|
+
const padX = Math.floor((targetSize - scaledWidth) / 2);
|
|
192
|
+
const padY = Math.floor((targetSize - scaledHeight) / 2);
|
|
193
|
+
const { data } = await sharp(jpeg).resize(scaledWidth, scaledHeight).extend({
|
|
194
|
+
top: padY,
|
|
195
|
+
bottom: targetSize - scaledHeight - padY,
|
|
196
|
+
left: padX,
|
|
197
|
+
right: targetSize - scaledWidth - padX,
|
|
198
|
+
background: {
|
|
199
|
+
r: 114,
|
|
200
|
+
g: 114,
|
|
201
|
+
b: 114
|
|
202
|
+
}
|
|
203
|
+
}).removeAlpha().raw().toBuffer({ resolveWithObject: true });
|
|
204
|
+
const numPixels = targetSize * targetSize;
|
|
205
|
+
const float32 = new Float32Array(3 * numPixels);
|
|
206
|
+
for (let i = 0; i < numPixels; i++) {
|
|
207
|
+
const srcBase = i * 3;
|
|
208
|
+
float32[0 * numPixels + i] = data[srcBase] / 255;
|
|
209
|
+
float32[1 * numPixels + i] = data[srcBase + 1] / 255;
|
|
210
|
+
float32[2 * numPixels + i] = data[srcBase + 2] / 255;
|
|
211
|
+
}
|
|
212
|
+
return {
|
|
213
|
+
data: float32,
|
|
214
|
+
scale,
|
|
215
|
+
padX,
|
|
216
|
+
padY,
|
|
217
|
+
originalWidth,
|
|
218
|
+
originalHeight
|
|
219
|
+
};
|
|
186
220
|
}
|
|
221
|
+
/** Resize and normalize to Float32Array */
|
|
187
222
|
async function resizeAndNormalize(jpeg, targetWidth, targetHeight, normalization, layout) {
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
return float32;
|
|
223
|
+
const { data } = await sharp(jpeg).resize(targetWidth, targetHeight, { fit: "fill" }).removeAlpha().raw().toBuffer({ resolveWithObject: true });
|
|
224
|
+
const numPixels = targetWidth * targetHeight;
|
|
225
|
+
const float32 = new Float32Array(3 * numPixels);
|
|
226
|
+
const mean = [
|
|
227
|
+
.485,
|
|
228
|
+
.456,
|
|
229
|
+
.406
|
|
230
|
+
];
|
|
231
|
+
const std = [
|
|
232
|
+
.229,
|
|
233
|
+
.224,
|
|
234
|
+
.225
|
|
235
|
+
];
|
|
236
|
+
if (layout === "nchw") for (let i = 0; i < numPixels; i++) {
|
|
237
|
+
const srcBase = i * 3;
|
|
238
|
+
for (let c = 0; c < 3; c++) {
|
|
239
|
+
const raw = data[srcBase + c] / 255;
|
|
240
|
+
let val;
|
|
241
|
+
if (normalization === "zero-one") val = raw;
|
|
242
|
+
else if (normalization === "imagenet") val = (raw - mean[c]) / std[c];
|
|
243
|
+
else val = data[srcBase + c];
|
|
244
|
+
float32[c * numPixels + i] = val;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
else for (let i = 0; i < numPixels; i++) {
|
|
248
|
+
const srcBase = i * 3;
|
|
249
|
+
for (let c = 0; c < 3; c++) {
|
|
250
|
+
const raw = data[srcBase + c] / 255;
|
|
251
|
+
let val;
|
|
252
|
+
if (normalization === "zero-one") val = raw;
|
|
253
|
+
else if (normalization === "imagenet") val = (raw - mean[c]) / std[c];
|
|
254
|
+
else val = data[srcBase + c];
|
|
255
|
+
float32[i * 3 + c] = val;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return float32;
|
|
227
259
|
}
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
260
|
+
//#endregion
|
|
261
|
+
//#region src/embedding-encoder/shared/node-engine.ts
|
|
262
|
+
var BACKEND_TO_PROVIDER = {
|
|
263
|
+
cpu: "cpu",
|
|
264
|
+
coreml: "coreml",
|
|
265
|
+
cuda: "cuda",
|
|
266
|
+
tensorrt: "tensorrt",
|
|
267
|
+
dml: "dml"
|
|
234
268
|
};
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
269
|
+
var BACKEND_TO_DEVICE = {
|
|
270
|
+
cpu: "cpu",
|
|
271
|
+
coreml: "gpu-mps",
|
|
272
|
+
cuda: "gpu-cuda",
|
|
273
|
+
tensorrt: "tensorrt"
|
|
240
274
|
};
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
275
|
+
var NodeInferenceEngine = class {
|
|
276
|
+
modelPath;
|
|
277
|
+
backend;
|
|
278
|
+
modelMeta;
|
|
279
|
+
runtime = "onnx";
|
|
280
|
+
device;
|
|
281
|
+
session = null;
|
|
282
|
+
log;
|
|
283
|
+
constructor(modelPath, backend, modelMeta, logger) {
|
|
284
|
+
this.modelPath = modelPath;
|
|
285
|
+
this.backend = backend;
|
|
286
|
+
this.modelMeta = modelMeta;
|
|
287
|
+
this.device = BACKEND_TO_DEVICE[backend] ?? "cpu";
|
|
288
|
+
this.log = logger ?? createNoopLogger();
|
|
289
|
+
}
|
|
290
|
+
async initialize() {
|
|
291
|
+
const ort = await import("onnxruntime-node");
|
|
292
|
+
const provider = BACKEND_TO_PROVIDER[this.backend] ?? "cpu";
|
|
293
|
+
const absModelPath = path.isAbsolute(this.modelPath) ? this.modelPath : path.resolve(process.cwd(), this.modelPath);
|
|
294
|
+
const sessionOptions = { executionProviders: [provider] };
|
|
295
|
+
this.session = await ort.InferenceSession.create(absModelPath, sessionOptions);
|
|
296
|
+
this.log.info("ONNX session loaded", { meta: {
|
|
297
|
+
modelPath: absModelPath,
|
|
298
|
+
backend: this.backend,
|
|
299
|
+
provider
|
|
300
|
+
} });
|
|
301
|
+
}
|
|
302
|
+
async infer(input) {
|
|
303
|
+
const jpeg = input.kind === "jpeg" ? input.data : await this.encodeRawAsJpeg(input.data, input.width, input.height, input.format);
|
|
304
|
+
const { data, letterboxMeta } = await this.preprocess(jpeg);
|
|
305
|
+
const { inputSize } = this.modelMeta;
|
|
306
|
+
const inputShape = this.modelMeta.preprocessMode === "letterbox" ? [
|
|
307
|
+
1,
|
|
308
|
+
3,
|
|
309
|
+
inputSize.height,
|
|
310
|
+
inputSize.width
|
|
311
|
+
] : [
|
|
312
|
+
1,
|
|
313
|
+
3,
|
|
314
|
+
inputSize.height,
|
|
315
|
+
inputSize.width
|
|
316
|
+
];
|
|
317
|
+
const start = Date.now();
|
|
318
|
+
let result;
|
|
319
|
+
try {
|
|
320
|
+
result = await this.runSession(data, inputShape);
|
|
321
|
+
} catch (err) {
|
|
322
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
323
|
+
this.log.error("Inference failed", { meta: { error: error.message } });
|
|
324
|
+
throw error;
|
|
325
|
+
}
|
|
326
|
+
const durationMs = Date.now() - start;
|
|
327
|
+
if ("tensor" in result) {
|
|
328
|
+
this.log.debug("Inference complete", { meta: {
|
|
329
|
+
durationMs,
|
|
330
|
+
outputKeys: ["tensor"],
|
|
331
|
+
preprocessMode: this.modelMeta.preprocessMode
|
|
332
|
+
} });
|
|
333
|
+
return {
|
|
334
|
+
tensor: result.tensor,
|
|
335
|
+
letterbox: letterboxMeta,
|
|
336
|
+
inferenceMs: durationMs
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
this.log.debug("Inference complete", { meta: {
|
|
340
|
+
durationMs,
|
|
341
|
+
outputKeys: Object.keys(result.tensors),
|
|
342
|
+
preprocessMode: this.modelMeta.preprocessMode
|
|
343
|
+
} });
|
|
344
|
+
return {
|
|
345
|
+
tensors: result.tensors,
|
|
346
|
+
letterbox: letterboxMeta,
|
|
347
|
+
inferenceMs: durationMs
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
/** Preprocess JPEG to Float32Array using the configured mode */
|
|
351
|
+
async preprocess(jpeg) {
|
|
352
|
+
const { inputSize, inputNormalization, inputLayout, preprocessMode } = this.modelMeta;
|
|
353
|
+
if (preprocessMode === "letterbox") {
|
|
354
|
+
const result = await letterbox(jpeg, Math.max(inputSize.width, inputSize.height));
|
|
355
|
+
const letterboxMeta = {
|
|
356
|
+
scale: result.scale,
|
|
357
|
+
padX: result.padX,
|
|
358
|
+
padY: result.padY,
|
|
359
|
+
originalWidth: result.originalWidth,
|
|
360
|
+
originalHeight: result.originalHeight
|
|
361
|
+
};
|
|
362
|
+
return {
|
|
363
|
+
data: result.data,
|
|
364
|
+
letterboxMeta
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
return { data: await resizeAndNormalize(jpeg, inputSize.width, inputSize.height, inputNormalization, inputLayout) };
|
|
368
|
+
}
|
|
369
|
+
async encodeRawAsJpeg(raw, width, height, format) {
|
|
370
|
+
const sharp = (await import("sharp")).default;
|
|
371
|
+
return sharp(raw, { raw: {
|
|
372
|
+
width,
|
|
373
|
+
height,
|
|
374
|
+
channels: format === "gray" ? 1 : 3
|
|
375
|
+
} }).jpeg({
|
|
376
|
+
quality: 80,
|
|
377
|
+
mozjpeg: false
|
|
378
|
+
}).toBuffer();
|
|
379
|
+
}
|
|
380
|
+
/** Run an ONNX session with a single input, handling both single and multi-output models */
|
|
381
|
+
async runSession(input, inputShape) {
|
|
382
|
+
if (!this.session) throw new Error("NodeInferenceEngine: not initialized — call initialize() first");
|
|
383
|
+
const ort = await import("onnxruntime-node");
|
|
384
|
+
const sess = this.session;
|
|
385
|
+
const inputName = sess.inputNames[0];
|
|
386
|
+
const tensor = new ort.Tensor("float32", input, [...inputShape]);
|
|
387
|
+
const feeds = { [inputName]: tensor };
|
|
388
|
+
const results = await sess.run(feeds);
|
|
389
|
+
const outputNames = sess.outputNames;
|
|
390
|
+
if (outputNames.length === 1) return { tensor: results[outputNames[0]].data };
|
|
391
|
+
const tensors = {};
|
|
392
|
+
for (const name of outputNames) tensors[name] = results[name].data;
|
|
393
|
+
return { tensors };
|
|
394
|
+
}
|
|
395
|
+
async run(input, inputShape) {
|
|
396
|
+
const result = await this.runSession(input, inputShape);
|
|
397
|
+
if ("tensor" in result) return result.tensor;
|
|
398
|
+
const firstKey = Object.keys(result.tensors)[0];
|
|
399
|
+
return result.tensors[firstKey];
|
|
400
|
+
}
|
|
401
|
+
async dispose() {
|
|
402
|
+
this.session = null;
|
|
403
|
+
this.log.debug("Session disposed");
|
|
404
|
+
}
|
|
405
|
+
};
|
|
406
|
+
//#endregion
|
|
407
|
+
//#region src/embedding-encoder/shared/python-engine.ts
|
|
408
|
+
var PythonInferenceEngine = class {
|
|
409
|
+
pythonPath;
|
|
410
|
+
scriptPath;
|
|
411
|
+
modelPath;
|
|
412
|
+
extraArgs;
|
|
413
|
+
runtime;
|
|
414
|
+
device;
|
|
415
|
+
process = null;
|
|
416
|
+
receiveBuffer = Buffer.alloc(0);
|
|
417
|
+
pendingResolve = null;
|
|
418
|
+
pendingReject = null;
|
|
419
|
+
log;
|
|
420
|
+
constructor(pythonPath, scriptPath, runtime, modelPath, extraArgs = [], logger) {
|
|
421
|
+
this.pythonPath = pythonPath;
|
|
422
|
+
this.scriptPath = scriptPath;
|
|
423
|
+
this.modelPath = modelPath;
|
|
424
|
+
this.extraArgs = extraArgs;
|
|
425
|
+
this.runtime = runtime;
|
|
426
|
+
const runtimeDeviceMap = {
|
|
427
|
+
onnx: "cpu",
|
|
428
|
+
coreml: "gpu-mps",
|
|
429
|
+
pytorch: "cpu",
|
|
430
|
+
openvino: "cpu",
|
|
431
|
+
tflite: "cpu"
|
|
432
|
+
};
|
|
433
|
+
this.device = runtimeDeviceMap[runtime];
|
|
434
|
+
this.log = logger ?? createNoopLogger();
|
|
435
|
+
}
|
|
436
|
+
async initialize() {
|
|
437
|
+
const args = [
|
|
438
|
+
this.scriptPath,
|
|
439
|
+
this.modelPath,
|
|
440
|
+
...this.extraArgs
|
|
441
|
+
];
|
|
442
|
+
this.process = spawn(this.pythonPath, args, { stdio: [
|
|
443
|
+
"pipe",
|
|
444
|
+
"pipe",
|
|
445
|
+
"pipe"
|
|
446
|
+
] });
|
|
447
|
+
if (!this.process.stdout || !this.process.stdin) throw new Error("PythonInferenceEngine: failed to create process pipes");
|
|
448
|
+
this.log.info("Python process started", { meta: {
|
|
449
|
+
pythonPath: this.pythonPath,
|
|
450
|
+
scriptPath: this.scriptPath,
|
|
451
|
+
modelPath: this.modelPath
|
|
452
|
+
} });
|
|
453
|
+
this.process.stderr?.on("data", (chunk) => {
|
|
454
|
+
const lines = chunk.toString().split("\n");
|
|
455
|
+
for (const line of lines) {
|
|
456
|
+
const trimmed = line.trim();
|
|
457
|
+
if (trimmed) this.log.warn(trimmed);
|
|
458
|
+
}
|
|
459
|
+
});
|
|
460
|
+
this.process.on("error", (err) => {
|
|
461
|
+
this.log.error("Process error", { meta: { error: err.message } });
|
|
462
|
+
this.pendingReject?.(err);
|
|
463
|
+
this.pendingReject = null;
|
|
464
|
+
this.pendingResolve = null;
|
|
465
|
+
});
|
|
466
|
+
this.process.on("exit", (code) => {
|
|
467
|
+
if (code !== 0) {
|
|
468
|
+
this.log.error("Process exited", { meta: { code } });
|
|
469
|
+
const err = /* @__PURE__ */ new Error(`PythonInferenceEngine: process exited with code ${code}`);
|
|
470
|
+
this.pendingReject?.(err);
|
|
471
|
+
this.pendingReject = null;
|
|
472
|
+
this.pendingResolve = null;
|
|
473
|
+
}
|
|
474
|
+
});
|
|
475
|
+
this.process.stdout.on("data", (chunk) => {
|
|
476
|
+
this.receiveBuffer = Buffer.concat([this.receiveBuffer, chunk]);
|
|
477
|
+
this._tryReceive();
|
|
478
|
+
});
|
|
479
|
+
await new Promise((resolve, reject) => {
|
|
480
|
+
const timeout = setTimeout(() => resolve(), 2e3);
|
|
481
|
+
this.process?.on("error", (err) => {
|
|
482
|
+
clearTimeout(timeout);
|
|
483
|
+
reject(err);
|
|
484
|
+
});
|
|
485
|
+
this.process?.on("exit", (code) => {
|
|
486
|
+
clearTimeout(timeout);
|
|
487
|
+
if (code !== 0) reject(/* @__PURE__ */ new Error(`PythonInferenceEngine: process exited early with code ${code}`));
|
|
488
|
+
});
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
_tryReceive() {
|
|
492
|
+
if (this.receiveBuffer.length < 4) return;
|
|
493
|
+
const length = this.receiveBuffer.readUInt32LE(0);
|
|
494
|
+
if (this.receiveBuffer.length < 4 + length) return;
|
|
495
|
+
const jsonBytes = this.receiveBuffer.subarray(4, 4 + length);
|
|
496
|
+
this.receiveBuffer = this.receiveBuffer.subarray(4 + length);
|
|
497
|
+
const resolve = this.pendingResolve;
|
|
498
|
+
const reject = this.pendingReject;
|
|
499
|
+
this.pendingResolve = null;
|
|
500
|
+
this.pendingReject = null;
|
|
501
|
+
if (!resolve) return;
|
|
502
|
+
try {
|
|
503
|
+
resolve(JSON.parse(jsonBytes.toString("utf8")));
|
|
504
|
+
} catch (err) {
|
|
505
|
+
reject?.(err instanceof Error ? err : new Error(String(err)));
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
/** Run inference, returning structured detection results. Encodes raw input to JPEG when needed. */
|
|
509
|
+
async infer(input) {
|
|
510
|
+
const start = Date.now();
|
|
511
|
+
const jpeg = input.kind === "jpeg" ? input.data : await this.encodeRawAsJpeg(input.data, input.width, input.height, input.format);
|
|
512
|
+
const result = await this.sendJpeg(jpeg);
|
|
513
|
+
const durationMs = Date.now() - start;
|
|
514
|
+
this.log.debug("Inference complete", { meta: { durationMs } });
|
|
515
|
+
return {
|
|
516
|
+
structured: result,
|
|
517
|
+
inferenceMs: durationMs
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
async encodeRawAsJpeg(raw, width, height, format) {
|
|
521
|
+
const sharp = (await import("sharp")).default;
|
|
522
|
+
return sharp(raw, { raw: {
|
|
523
|
+
width,
|
|
524
|
+
height,
|
|
525
|
+
channels: format === "gray" ? 1 : 3
|
|
526
|
+
} }).jpeg({
|
|
527
|
+
quality: 80,
|
|
528
|
+
mozjpeg: false
|
|
529
|
+
}).toBuffer();
|
|
530
|
+
}
|
|
531
|
+
/** Send JPEG buffer via binary IPC, receive JSON detection results */
|
|
532
|
+
async sendJpeg(jpeg) {
|
|
533
|
+
if (!this.process?.stdin) throw new Error("PythonInferenceEngine: process not initialized");
|
|
534
|
+
return new Promise((resolve, reject) => {
|
|
535
|
+
this.pendingResolve = resolve;
|
|
536
|
+
this.pendingReject = reject;
|
|
537
|
+
const lengthBuf = Buffer.allocUnsafe(4);
|
|
538
|
+
lengthBuf.writeUInt32LE(jpeg.length, 0);
|
|
539
|
+
this.process.stdin.write(Buffer.concat([lengthBuf, jpeg]));
|
|
540
|
+
});
|
|
541
|
+
}
|
|
542
|
+
async dispose() {
|
|
543
|
+
const proc = this.process;
|
|
544
|
+
if (!proc) return;
|
|
545
|
+
this.process = null;
|
|
546
|
+
proc.stdin?.end();
|
|
547
|
+
proc.kill("SIGTERM");
|
|
548
|
+
if (!await new Promise((resolve) => {
|
|
549
|
+
const timer = setTimeout(() => {
|
|
550
|
+
resolve(false);
|
|
551
|
+
}, 5e3);
|
|
552
|
+
proc.once("exit", () => {
|
|
553
|
+
clearTimeout(timer);
|
|
554
|
+
resolve(true);
|
|
555
|
+
});
|
|
556
|
+
})) {
|
|
557
|
+
try {
|
|
558
|
+
proc.kill("SIGKILL");
|
|
559
|
+
} catch {}
|
|
560
|
+
this.log.warn("Python process did not exit gracefully — sent SIGKILL");
|
|
561
|
+
} else this.log.debug("Python process terminated");
|
|
562
|
+
}
|
|
563
|
+
};
|
|
564
|
+
//#endregion
|
|
565
|
+
//#region src/embedding-encoder/shared/engine-resolver.ts
|
|
566
|
+
/** Priority order for auto-selection of ONNX backends */
|
|
567
|
+
var AUTO_BACKEND_PRIORITY = [
|
|
568
|
+
"coreml",
|
|
569
|
+
"cuda",
|
|
570
|
+
"tensorrt",
|
|
571
|
+
"cpu"
|
|
572
|
+
];
|
|
573
|
+
var BACKEND_TO_FORMAT = BACKEND_TO_FORMAT$1;
|
|
574
|
+
var RUNTIME_TO_FORMAT = RUNTIME_TO_FORMAT$1;
|
|
496
575
|
function extractModelMeta(entry) {
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
576
|
+
return {
|
|
577
|
+
inputSize: entry.inputSize,
|
|
578
|
+
inputNormalization: entry.inputNormalization ?? "zero-one",
|
|
579
|
+
inputLayout: entry.inputLayout ?? "nchw",
|
|
580
|
+
preprocessMode: entry.preprocessMode ?? "letterbox"
|
|
581
|
+
};
|
|
503
582
|
}
|
|
504
583
|
function modelFilePath(modelsDir, modelEntry, format) {
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
const filename = urlParts[urlParts.length - 1] ?? `${modelEntry.id}.${format}`;
|
|
511
|
-
return path.join(modelsDir, filename);
|
|
584
|
+
const formatEntry = modelEntry.formats[format];
|
|
585
|
+
if (!formatEntry) throw new Error(`Model ${modelEntry.id} has no ${format} format`);
|
|
586
|
+
const urlParts = formatEntry.url.split("/");
|
|
587
|
+
const filename = urlParts[urlParts.length - 1] ?? `${modelEntry.id}.${format}`;
|
|
588
|
+
return path.join(modelsDir, filename);
|
|
512
589
|
}
|
|
513
590
|
function modelExists(filePath) {
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
591
|
+
try {
|
|
592
|
+
return fs.existsSync(filePath);
|
|
593
|
+
} catch {
|
|
594
|
+
return false;
|
|
595
|
+
}
|
|
519
596
|
}
|
|
520
597
|
async function resolveEngine(options) {
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
const engine = new NodeInferenceEngine(fallbackPath, "cpu", extractModelMeta(modelEntry), options.logger);
|
|
617
|
-
await engine.initialize();
|
|
618
|
-
return { engine, format: "onnx", modelPath: fallbackPath };
|
|
619
|
-
}
|
|
620
|
-
throw new Error(
|
|
621
|
-
`resolveEngine: format ${selectedFormat} is not yet supported by NodeInferenceEngine, no Python runtime is available, and no ONNX fallback exists`
|
|
622
|
-
);
|
|
598
|
+
const { runtime, backend, modelEntry, modelsDir, models } = options;
|
|
599
|
+
const log = options.logger ?? createNoopLogger();
|
|
600
|
+
let selectedFormat;
|
|
601
|
+
let selectedBackend;
|
|
602
|
+
if (runtime === "auto") {
|
|
603
|
+
const available = await probeOnnxBackends();
|
|
604
|
+
let chosen = null;
|
|
605
|
+
for (const b of AUTO_BACKEND_PRIORITY) {
|
|
606
|
+
if (!available.includes(b)) continue;
|
|
607
|
+
const fmt = BACKEND_TO_FORMAT[b];
|
|
608
|
+
if (!fmt) continue;
|
|
609
|
+
if (!modelEntry.formats[fmt]) continue;
|
|
610
|
+
chosen = {
|
|
611
|
+
backend: b,
|
|
612
|
+
format: fmt
|
|
613
|
+
};
|
|
614
|
+
break;
|
|
615
|
+
}
|
|
616
|
+
if (!chosen) throw new Error(`resolveEngine: no compatible backend found for model ${modelEntry.id}. Available backends: ${available.join(", ")}`);
|
|
617
|
+
selectedFormat = chosen.format;
|
|
618
|
+
selectedBackend = chosen.backend;
|
|
619
|
+
} else {
|
|
620
|
+
const fmt = RUNTIME_TO_FORMAT[runtime];
|
|
621
|
+
if (!fmt) throw new Error(`resolveEngine: unsupported runtime "${runtime}"`);
|
|
622
|
+
if (!modelEntry.formats[fmt]) if (fmt !== "onnx" && modelEntry.formats["onnx"]) {
|
|
623
|
+
selectedFormat = "onnx";
|
|
624
|
+
selectedBackend = backend || "cpu";
|
|
625
|
+
} else throw new Error(`resolveEngine: model ${modelEntry.id} has no ${fmt} format for runtime ${runtime}`);
|
|
626
|
+
else {
|
|
627
|
+
selectedFormat = fmt;
|
|
628
|
+
selectedBackend = runtime === "onnx" ? backend || "cpu" : runtime;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
let modelPath;
|
|
632
|
+
if (models) modelPath = await models.ensure(modelEntry.id, selectedFormat);
|
|
633
|
+
else {
|
|
634
|
+
modelPath = modelFilePath(modelsDir, modelEntry, selectedFormat);
|
|
635
|
+
if (!modelExists(modelPath)) throw new Error(`resolveEngine: model file not found at ${modelPath} and no model service provided`);
|
|
636
|
+
}
|
|
637
|
+
log.info("Engine resolved", { meta: {
|
|
638
|
+
format: selectedFormat,
|
|
639
|
+
backend: selectedBackend,
|
|
640
|
+
modelId: modelEntry.id
|
|
641
|
+
} });
|
|
642
|
+
if (selectedFormat === "onnx") {
|
|
643
|
+
const engine = new NodeInferenceEngine(modelPath, selectedBackend, extractModelMeta(modelEntry), options.logger);
|
|
644
|
+
await engine.initialize();
|
|
645
|
+
return {
|
|
646
|
+
engine,
|
|
647
|
+
format: selectedFormat,
|
|
648
|
+
modelPath
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
const effectiveRuntime = runtime === "auto" ? selectedBackend : runtime;
|
|
652
|
+
let { pythonPath } = options;
|
|
653
|
+
if (!pythonPath) {
|
|
654
|
+
const { execFileSync: efs } = await import("node:child_process");
|
|
655
|
+
for (const cmd of ["python3", "python"]) try {
|
|
656
|
+
efs(cmd, ["--version"], {
|
|
657
|
+
timeout: 3e3,
|
|
658
|
+
stdio: "ignore"
|
|
659
|
+
});
|
|
660
|
+
pythonPath = cmd;
|
|
661
|
+
break;
|
|
662
|
+
} catch {}
|
|
663
|
+
}
|
|
664
|
+
const scriptName = PYTHON_SCRIPT[effectiveRuntime];
|
|
665
|
+
if (scriptName && pythonPath) {
|
|
666
|
+
const candidates = [
|
|
667
|
+
path.join(__dirname, "../../python", scriptName),
|
|
668
|
+
path.join(__dirname, "../python", scriptName),
|
|
669
|
+
path.join(__dirname, "../../../python", scriptName)
|
|
670
|
+
];
|
|
671
|
+
const scriptPath = candidates.find((p) => fs.existsSync(p));
|
|
672
|
+
if (!scriptPath) throw new Error(`resolveEngine: Python script "${scriptName}" not found. Searched:\n${candidates.join("\n")}`);
|
|
673
|
+
const inputSize = Math.max(modelEntry.inputSize.width, modelEntry.inputSize.height);
|
|
674
|
+
const engine = new PythonInferenceEngine(pythonPath, scriptPath, effectiveRuntime, modelPath, [`--input-size=${inputSize}`, `--confidence=0.25`], options.logger);
|
|
675
|
+
await engine.initialize();
|
|
676
|
+
return {
|
|
677
|
+
engine,
|
|
678
|
+
format: selectedFormat,
|
|
679
|
+
modelPath
|
|
680
|
+
};
|
|
681
|
+
}
|
|
682
|
+
const fallbackPath = modelFilePath(modelsDir, modelEntry, "onnx");
|
|
683
|
+
if (modelEntry.formats["onnx"] && modelExists(fallbackPath)) {
|
|
684
|
+
const engine = new NodeInferenceEngine(fallbackPath, "cpu", extractModelMeta(modelEntry), options.logger);
|
|
685
|
+
await engine.initialize();
|
|
686
|
+
return {
|
|
687
|
+
engine,
|
|
688
|
+
format: "onnx",
|
|
689
|
+
modelPath: fallbackPath
|
|
690
|
+
};
|
|
691
|
+
}
|
|
692
|
+
throw new Error(`resolveEngine: format ${selectedFormat} is not yet supported by NodeInferenceEngine, no Python runtime is available, and no ONNX fallback exists`);
|
|
623
693
|
}
|
|
694
|
+
/** Probe which ONNX execution providers are available on this system */
|
|
624
695
|
async function probeOnnxBackends() {
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
available.push("coreml");
|
|
639
|
-
}
|
|
640
|
-
return [...new Set(available)];
|
|
696
|
+
const available = ["cpu"];
|
|
697
|
+
try {
|
|
698
|
+
const ort = await import("onnxruntime-node");
|
|
699
|
+
const providers = ort.env?.webgl?.disabled !== void 0 ? ort.InferenceSession.getAvailableProviders?.() ?? [] : [];
|
|
700
|
+
for (const p of providers) {
|
|
701
|
+
const normalized = p.toLowerCase().replace("executionprovider", "");
|
|
702
|
+
if (normalized === "coreml") available.push("coreml");
|
|
703
|
+
else if (normalized === "cuda") available.push("cuda");
|
|
704
|
+
else if (normalized === "tensorrt") available.push("tensorrt");
|
|
705
|
+
}
|
|
706
|
+
} catch {}
|
|
707
|
+
if (process.platform === "darwin" && !available.includes("coreml")) available.push("coreml");
|
|
708
|
+
return [...new Set(available)];
|
|
641
709
|
}
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
710
|
+
//#endregion
|
|
711
|
+
//#region src/embedding-encoder/addon/clip-models.ts
|
|
712
|
+
var CLIP_MODEL_META = {
|
|
713
|
+
"clip-vit-b32": {
|
|
714
|
+
imageModelId: "clip-vit-b32",
|
|
715
|
+
textModelId: "clip-vit-b32-text",
|
|
716
|
+
embeddingDim: 512,
|
|
717
|
+
inputSize: 224,
|
|
718
|
+
tokenizerType: "clip"
|
|
719
|
+
},
|
|
720
|
+
"clip-vit-b16": {
|
|
721
|
+
imageModelId: "clip-vit-b16",
|
|
722
|
+
textModelId: "clip-vit-b16-text",
|
|
723
|
+
embeddingDim: 512,
|
|
724
|
+
inputSize: 224,
|
|
725
|
+
tokenizerType: "clip"
|
|
726
|
+
},
|
|
727
|
+
"siglip2-b16-256": {
|
|
728
|
+
imageModelId: "siglip2-b16-256",
|
|
729
|
+
textModelId: "siglip2-b16-256-text",
|
|
730
|
+
embeddingDim: 768,
|
|
731
|
+
inputSize: 256,
|
|
732
|
+
tokenizerType: "siglip"
|
|
733
|
+
}
|
|
664
734
|
};
|
|
665
|
-
|
|
735
|
+
var DEFAULT_CLIP_MODEL = "clip-vit-b32";
|
|
666
736
|
function getModelMeta(modelId) {
|
|
667
|
-
|
|
737
|
+
return CLIP_MODEL_META[modelId] ?? CLIP_MODEL_META["clip-vit-b32"];
|
|
668
738
|
}
|
|
669
|
-
|
|
670
|
-
|
|
739
|
+
//#endregion
|
|
740
|
+
//#region src/embedding-encoder/addon/clip-preprocessing.ts
|
|
741
|
+
var CLIP_MEAN = [
|
|
742
|
+
.48145466,
|
|
743
|
+
.4578275,
|
|
744
|
+
.40821073
|
|
745
|
+
];
|
|
746
|
+
var CLIP_STD = [
|
|
747
|
+
.26862954,
|
|
748
|
+
.26130258,
|
|
749
|
+
.27577711
|
|
750
|
+
];
|
|
751
|
+
/**
|
|
752
|
+
* Preprocess raw RGB buffer for CLIP inference.
|
|
753
|
+
* Resizes (nearest-neighbor for speed), normalizes with CLIP mean/std, outputs NCHW Float32Array.
|
|
754
|
+
* For production use, the caller should use sharp to resize the JPEG to targetW×targetH
|
|
755
|
+
* before calling this with the raw RGB. This function handles normalization + layout.
|
|
756
|
+
*/
|
|
671
757
|
function preprocessForClip(rgb, srcWidth, srcHeight, targetWidth, targetHeight) {
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
}
|
|
685
|
-
}
|
|
686
|
-
return result;
|
|
758
|
+
const pixels = targetWidth * targetHeight;
|
|
759
|
+
const result = new Float32Array(3 * pixels);
|
|
760
|
+
for (let y = 0; y < targetHeight; y++) for (let x = 0; x < targetWidth; x++) {
|
|
761
|
+
const srcX = Math.min(Math.floor(x / targetWidth * srcWidth), srcWidth - 1);
|
|
762
|
+
const srcIdx = (Math.min(Math.floor(y / targetHeight * srcHeight), srcHeight - 1) * srcWidth + srcX) * 3;
|
|
763
|
+
const dstIdx = y * targetWidth + x;
|
|
764
|
+
for (let c = 0; c < 3; c++) {
|
|
765
|
+
const val = (rgb[srcIdx + c] ?? 0) / 255;
|
|
766
|
+
result[c * pixels + dstIdx] = (val - CLIP_MEAN[c]) / CLIP_STD[c];
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
return result;
|
|
687
770
|
}
|
|
771
|
+
/**
|
|
772
|
+
* L2-normalize a vector in-place and return it.
|
|
773
|
+
*/
|
|
688
774
|
function l2Normalize(vec) {
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
}
|
|
695
|
-
return vec;
|
|
696
|
-
}
|
|
697
|
-
class EmbeddingEncoderAddon extends BaseAddon {
|
|
698
|
-
imageRawEngine = null;
|
|
699
|
-
textRawEngine = null;
|
|
700
|
-
imagePythonEngine = null;
|
|
701
|
-
textPythonEngine = null;
|
|
702
|
-
models = null;
|
|
703
|
-
isPython = false;
|
|
704
|
-
constructor() {
|
|
705
|
-
super({ modelId: DEFAULT_CLIP_MODEL, runtime: "auto", backend: "cpu" });
|
|
706
|
-
}
|
|
707
|
-
async onInitialize() {
|
|
708
|
-
const modelsDir = await this.ctx.api.storage.resolve.query({ location: "models", relativePath: "" }).catch(() => "camstack-data/models");
|
|
709
|
-
this.models = new ModelDownloadService(modelsDir, []);
|
|
710
|
-
return [{ capability: embeddingEncoderCapability, provider: this }];
|
|
711
|
-
}
|
|
712
|
-
async encode(input) {
|
|
713
|
-
const { crop, width, height } = input;
|
|
714
|
-
await this.ensureImageEngine();
|
|
715
|
-
const meta = getModelMeta(this.config.modelId);
|
|
716
|
-
const start = Date.now();
|
|
717
|
-
if (this.isPython && this.imagePythonEngine) {
|
|
718
|
-
const jpegBuffer = Buffer.isBuffer(crop) ? crop : Buffer.from(crop);
|
|
719
|
-
const result = await this.imagePythonEngine.infer({ kind: "jpeg", data: jpegBuffer });
|
|
720
|
-
const rawEmbedding = result.structured?.["embedding"];
|
|
721
|
-
const normalized2 = l2Normalize(new Float32Array(rawEmbedding));
|
|
722
|
-
return {
|
|
723
|
-
embedding: Array.from(normalized2),
|
|
724
|
-
inferenceMs: result.inferenceMs ?? Date.now() - start
|
|
725
|
-
};
|
|
726
|
-
}
|
|
727
|
-
const cropBuffer = Buffer.isBuffer(crop) ? crop : Buffer.from(crop);
|
|
728
|
-
const preprocessed = preprocessForClip(cropBuffer, width, height, meta.inputSize, meta.inputSize);
|
|
729
|
-
const output = await this.imageRawEngine.run(preprocessed, [1, 3, meta.inputSize, meta.inputSize]);
|
|
730
|
-
const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
|
|
731
|
-
const normalized = l2Normalize(new Float32Array(sliced));
|
|
732
|
-
return {
|
|
733
|
-
embedding: Array.from(normalized),
|
|
734
|
-
inferenceMs: Date.now() - start
|
|
735
|
-
};
|
|
736
|
-
}
|
|
737
|
-
async encodeText(input) {
|
|
738
|
-
const { text } = input;
|
|
739
|
-
await this.ensureTextEngine();
|
|
740
|
-
const meta = getModelMeta(this.config.modelId);
|
|
741
|
-
const start = Date.now();
|
|
742
|
-
if (this.isPython && this.textPythonEngine) {
|
|
743
|
-
const textBuffer = Buffer.from(JSON.stringify({ text }), "utf-8");
|
|
744
|
-
const result = await this.textPythonEngine.infer({ kind: "jpeg", data: textBuffer });
|
|
745
|
-
const rawEmbedding = result.structured?.["embedding"];
|
|
746
|
-
const normalized2 = l2Normalize(new Float32Array(rawEmbedding));
|
|
747
|
-
return {
|
|
748
|
-
embedding: Array.from(normalized2),
|
|
749
|
-
inferenceMs: result.inferenceMs ?? Date.now() - start
|
|
750
|
-
};
|
|
751
|
-
}
|
|
752
|
-
const tokenIds = clipTokenize(text);
|
|
753
|
-
const inputTensor = new Float32Array(tokenIds);
|
|
754
|
-
const output = await this.textRawEngine.run(inputTensor, [1, tokenIds.length]);
|
|
755
|
-
const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
|
|
756
|
-
const normalized = l2Normalize(new Float32Array(sliced));
|
|
757
|
-
return {
|
|
758
|
-
embedding: Array.from(normalized),
|
|
759
|
-
inferenceMs: Date.now() - start
|
|
760
|
-
};
|
|
761
|
-
}
|
|
762
|
-
async getInfo() {
|
|
763
|
-
const meta = getModelMeta(this.config.modelId);
|
|
764
|
-
return {
|
|
765
|
-
modelId: this.config.modelId,
|
|
766
|
-
embeddingDim: meta.embeddingDim,
|
|
767
|
-
ready: this.imageRawEngine !== null || this.imagePythonEngine !== null
|
|
768
|
-
};
|
|
769
|
-
}
|
|
770
|
-
async ensureImageEngine() {
|
|
771
|
-
if (this.imageRawEngine || this.imagePythonEngine) return;
|
|
772
|
-
const meta = getModelMeta(this.config.modelId);
|
|
773
|
-
const imageEntry = CLIP_IMAGE_MODELS.find((m) => m.id === meta.imageModelId);
|
|
774
|
-
if (!imageEntry) {
|
|
775
|
-
throw new Error(`EmbeddingEncoderAddon: unknown image model "${meta.imageModelId}"`);
|
|
776
|
-
}
|
|
777
|
-
await this.resolveForEntry(imageEntry, "image");
|
|
778
|
-
}
|
|
779
|
-
async ensureTextEngine() {
|
|
780
|
-
if (this.textRawEngine || this.textPythonEngine) return;
|
|
781
|
-
const meta = getModelMeta(this.config.modelId);
|
|
782
|
-
const textEntry = CLIP_TEXT_MODELS.find((m) => m.id === meta.textModelId);
|
|
783
|
-
if (!textEntry) {
|
|
784
|
-
throw new Error(`EmbeddingEncoderAddon: unknown text model "${meta.textModelId}"`);
|
|
785
|
-
}
|
|
786
|
-
await this.resolveForEntry(textEntry, "text");
|
|
787
|
-
}
|
|
788
|
-
async resolveForEntry(entry, target) {
|
|
789
|
-
const runtime = this.config.runtime === "auto" ? "auto" : this.config.runtime === "node" ? "onnx" : this.config.runtime;
|
|
790
|
-
const modelsDir = this.models.getModelsDir();
|
|
791
|
-
const engineLogger = this.ctx.logger.withTags({
|
|
792
|
-
modelId: entry.id,
|
|
793
|
-
runtime: this.config.runtime,
|
|
794
|
-
backend: this.config.backend
|
|
795
|
-
});
|
|
796
|
-
await this.models.ensure(entry.id, "onnx");
|
|
797
|
-
const resolved = await resolveEngine({
|
|
798
|
-
runtime,
|
|
799
|
-
backend: this.config.backend,
|
|
800
|
-
modelEntry: entry,
|
|
801
|
-
modelsDir,
|
|
802
|
-
models: this.models ?? void 0,
|
|
803
|
-
logger: engineLogger
|
|
804
|
-
});
|
|
805
|
-
if (resolved.format !== "onnx") {
|
|
806
|
-
this.isPython = true;
|
|
807
|
-
if (target === "image") {
|
|
808
|
-
this.imagePythonEngine = resolved.engine;
|
|
809
|
-
} else {
|
|
810
|
-
this.textPythonEngine = resolved.engine;
|
|
811
|
-
}
|
|
812
|
-
} else {
|
|
813
|
-
const rawEngine = new NodeRawTensorEngine(resolved.modelPath, this.config.backend, engineLogger);
|
|
814
|
-
await rawEngine.initialize();
|
|
815
|
-
await resolved.engine.dispose();
|
|
816
|
-
if (target === "image") {
|
|
817
|
-
this.imageRawEngine = rawEngine;
|
|
818
|
-
} else {
|
|
819
|
-
this.textRawEngine = rawEngine;
|
|
820
|
-
}
|
|
821
|
-
}
|
|
822
|
-
}
|
|
823
|
-
async onShutdown() {
|
|
824
|
-
await this.imageRawEngine?.dispose();
|
|
825
|
-
await this.textRawEngine?.dispose();
|
|
826
|
-
await this.imagePythonEngine?.dispose();
|
|
827
|
-
await this.textPythonEngine?.dispose();
|
|
828
|
-
}
|
|
829
|
-
// ── Three-level settings API (Phase 3) ──────────────────────────────
|
|
830
|
-
globalSettingsSchema() {
|
|
831
|
-
return this.schema({
|
|
832
|
-
sections: [
|
|
833
|
-
{
|
|
834
|
-
id: "embedding-encoder-settings",
|
|
835
|
-
title: "Embedding Encoder",
|
|
836
|
-
columns: 2,
|
|
837
|
-
fields: [
|
|
838
|
-
{
|
|
839
|
-
type: "text",
|
|
840
|
-
key: "modelId",
|
|
841
|
-
label: "Model ID",
|
|
842
|
-
description: "CLIP model identifier to use for image/text embedding",
|
|
843
|
-
default: DEFAULT_CLIP_MODEL
|
|
844
|
-
},
|
|
845
|
-
{
|
|
846
|
-
type: "select",
|
|
847
|
-
key: "runtime",
|
|
848
|
-
label: "Runtime",
|
|
849
|
-
description: "Inference runtime (auto selects the best available)",
|
|
850
|
-
default: "auto",
|
|
851
|
-
options: [
|
|
852
|
-
{ label: "Auto", value: "auto" },
|
|
853
|
-
{ label: "Node (ONNX)", value: "node" },
|
|
854
|
-
{ label: "Python", value: "python" }
|
|
855
|
-
]
|
|
856
|
-
},
|
|
857
|
-
{
|
|
858
|
-
type: "select",
|
|
859
|
-
key: "backend",
|
|
860
|
-
label: "Backend",
|
|
861
|
-
description: "Hardware backend for inference acceleration",
|
|
862
|
-
default: "cpu",
|
|
863
|
-
options: [
|
|
864
|
-
{ label: "CPU", value: "cpu" },
|
|
865
|
-
{ label: "CUDA", value: "cuda" },
|
|
866
|
-
{ label: "CoreML", value: "coreml" }
|
|
867
|
-
]
|
|
868
|
-
}
|
|
869
|
-
]
|
|
870
|
-
}
|
|
871
|
-
]
|
|
872
|
-
});
|
|
873
|
-
}
|
|
874
|
-
async onConfigChanged() {
|
|
875
|
-
}
|
|
775
|
+
let norm = 0;
|
|
776
|
+
for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i];
|
|
777
|
+
norm = Math.sqrt(norm);
|
|
778
|
+
if (norm > 0) for (let i = 0; i < vec.length; i++) vec[i] /= norm;
|
|
779
|
+
return vec;
|
|
876
780
|
}
|
|
781
|
+
//#endregion
|
|
782
|
+
//#region src/embedding-encoder/addon/index.ts
|
|
783
|
+
var EmbeddingEncoderAddon = class extends BaseAddon {
|
|
784
|
+
imageRawEngine = null;
|
|
785
|
+
textRawEngine = null;
|
|
786
|
+
imagePythonEngine = null;
|
|
787
|
+
textPythonEngine = null;
|
|
788
|
+
models = null;
|
|
789
|
+
isPython = false;
|
|
790
|
+
constructor() {
|
|
791
|
+
super({
|
|
792
|
+
modelId: DEFAULT_CLIP_MODEL,
|
|
793
|
+
runtime: "auto",
|
|
794
|
+
backend: "cpu"
|
|
795
|
+
});
|
|
796
|
+
}
|
|
797
|
+
async onInitialize() {
|
|
798
|
+
const modelsDir = await this.ctx.api.storage.resolve.query({
|
|
799
|
+
location: "models",
|
|
800
|
+
relativePath: ""
|
|
801
|
+
}).catch(() => "camstack-data/models");
|
|
802
|
+
this.models = new ModelDownloadService(modelsDir, []);
|
|
803
|
+
return [{
|
|
804
|
+
capability: embeddingEncoderCapability,
|
|
805
|
+
provider: this
|
|
806
|
+
}];
|
|
807
|
+
}
|
|
808
|
+
async encode(input) {
|
|
809
|
+
const { crop, width, height } = input;
|
|
810
|
+
await this.ensureImageEngine();
|
|
811
|
+
const meta = getModelMeta(this.config.modelId);
|
|
812
|
+
const start = Date.now();
|
|
813
|
+
if (this.isPython && this.imagePythonEngine) {
|
|
814
|
+
const jpegBuffer = Buffer.isBuffer(crop) ? crop : Buffer.from(crop);
|
|
815
|
+
const result = await this.imagePythonEngine.infer({
|
|
816
|
+
kind: "jpeg",
|
|
817
|
+
data: jpegBuffer
|
|
818
|
+
});
|
|
819
|
+
const rawEmbedding = result.structured?.["embedding"];
|
|
820
|
+
const normalized = l2Normalize(new Float32Array(rawEmbedding));
|
|
821
|
+
return {
|
|
822
|
+
embedding: Array.from(normalized),
|
|
823
|
+
inferenceMs: result.inferenceMs ?? Date.now() - start
|
|
824
|
+
};
|
|
825
|
+
}
|
|
826
|
+
const preprocessed = preprocessForClip(Buffer.isBuffer(crop) ? crop : Buffer.from(crop), width, height, meta.inputSize, meta.inputSize);
|
|
827
|
+
const output = await this.imageRawEngine.run(preprocessed, [
|
|
828
|
+
1,
|
|
829
|
+
3,
|
|
830
|
+
meta.inputSize,
|
|
831
|
+
meta.inputSize
|
|
832
|
+
]);
|
|
833
|
+
const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
|
|
834
|
+
const normalized = l2Normalize(new Float32Array(sliced));
|
|
835
|
+
return {
|
|
836
|
+
embedding: Array.from(normalized),
|
|
837
|
+
inferenceMs: Date.now() - start
|
|
838
|
+
};
|
|
839
|
+
}
|
|
840
|
+
async encodeText(input) {
|
|
841
|
+
const { text } = input;
|
|
842
|
+
await this.ensureTextEngine();
|
|
843
|
+
const meta = getModelMeta(this.config.modelId);
|
|
844
|
+
const start = Date.now();
|
|
845
|
+
if (this.isPython && this.textPythonEngine) {
|
|
846
|
+
const textBuffer = Buffer.from(JSON.stringify({ text }), "utf-8");
|
|
847
|
+
const result = await this.textPythonEngine.infer({
|
|
848
|
+
kind: "jpeg",
|
|
849
|
+
data: textBuffer
|
|
850
|
+
});
|
|
851
|
+
const rawEmbedding = result.structured?.["embedding"];
|
|
852
|
+
const normalized = l2Normalize(new Float32Array(rawEmbedding));
|
|
853
|
+
return {
|
|
854
|
+
embedding: Array.from(normalized),
|
|
855
|
+
inferenceMs: result.inferenceMs ?? Date.now() - start
|
|
856
|
+
};
|
|
857
|
+
}
|
|
858
|
+
const tokenIds = clipTokenize(text);
|
|
859
|
+
const inputTensor = new Float32Array(tokenIds);
|
|
860
|
+
const output = await this.textRawEngine.run(inputTensor, [1, tokenIds.length]);
|
|
861
|
+
const sliced = output.length > meta.embeddingDim ? output.slice(0, meta.embeddingDim) : output;
|
|
862
|
+
const normalized = l2Normalize(new Float32Array(sliced));
|
|
863
|
+
return {
|
|
864
|
+
embedding: Array.from(normalized),
|
|
865
|
+
inferenceMs: Date.now() - start
|
|
866
|
+
};
|
|
867
|
+
}
|
|
868
|
+
async getInfo() {
|
|
869
|
+
const meta = getModelMeta(this.config.modelId);
|
|
870
|
+
return {
|
|
871
|
+
modelId: this.config.modelId,
|
|
872
|
+
embeddingDim: meta.embeddingDim,
|
|
873
|
+
ready: this.imageRawEngine !== null || this.imagePythonEngine !== null
|
|
874
|
+
};
|
|
875
|
+
}
|
|
876
|
+
async ensureImageEngine() {
|
|
877
|
+
if (this.imageRawEngine || this.imagePythonEngine) return;
|
|
878
|
+
const meta = getModelMeta(this.config.modelId);
|
|
879
|
+
const imageEntry = CLIP_IMAGE_MODELS.find((m) => m.id === meta.imageModelId);
|
|
880
|
+
if (!imageEntry) throw new Error(`EmbeddingEncoderAddon: unknown image model "${meta.imageModelId}"`);
|
|
881
|
+
await this.resolveForEntry(imageEntry, "image");
|
|
882
|
+
}
|
|
883
|
+
async ensureTextEngine() {
|
|
884
|
+
if (this.textRawEngine || this.textPythonEngine) return;
|
|
885
|
+
const meta = getModelMeta(this.config.modelId);
|
|
886
|
+
const textEntry = CLIP_TEXT_MODELS.find((m) => m.id === meta.textModelId);
|
|
887
|
+
if (!textEntry) throw new Error(`EmbeddingEncoderAddon: unknown text model "${meta.textModelId}"`);
|
|
888
|
+
await this.resolveForEntry(textEntry, "text");
|
|
889
|
+
}
|
|
890
|
+
async resolveForEntry(entry, target) {
|
|
891
|
+
const runtime = this.config.runtime === "auto" ? "auto" : this.config.runtime === "node" ? "onnx" : this.config.runtime;
|
|
892
|
+
const modelsDir = this.models.getModelsDir();
|
|
893
|
+
const engineLogger = this.ctx.logger.withTags({
|
|
894
|
+
modelId: entry.id,
|
|
895
|
+
runtime: this.config.runtime,
|
|
896
|
+
backend: this.config.backend
|
|
897
|
+
});
|
|
898
|
+
await this.models.ensure(entry.id, "onnx");
|
|
899
|
+
const resolved = await resolveEngine({
|
|
900
|
+
runtime,
|
|
901
|
+
backend: this.config.backend,
|
|
902
|
+
modelEntry: entry,
|
|
903
|
+
modelsDir,
|
|
904
|
+
models: this.models ?? void 0,
|
|
905
|
+
logger: engineLogger
|
|
906
|
+
});
|
|
907
|
+
if (resolved.format !== "onnx") {
|
|
908
|
+
this.isPython = true;
|
|
909
|
+
if (target === "image") this.imagePythonEngine = resolved.engine;
|
|
910
|
+
else this.textPythonEngine = resolved.engine;
|
|
911
|
+
} else {
|
|
912
|
+
const rawEngine = new NodeRawTensorEngine(resolved.modelPath, this.config.backend, engineLogger);
|
|
913
|
+
await rawEngine.initialize();
|
|
914
|
+
await resolved.engine.dispose();
|
|
915
|
+
if (target === "image") this.imageRawEngine = rawEngine;
|
|
916
|
+
else this.textRawEngine = rawEngine;
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
async onShutdown() {
|
|
920
|
+
await this.imageRawEngine?.dispose();
|
|
921
|
+
await this.textRawEngine?.dispose();
|
|
922
|
+
await this.imagePythonEngine?.dispose();
|
|
923
|
+
await this.textPythonEngine?.dispose();
|
|
924
|
+
}
|
|
925
|
+
globalSettingsSchema() {
|
|
926
|
+
return this.schema({ sections: [{
|
|
927
|
+
id: "embedding-encoder-settings",
|
|
928
|
+
title: "Embedding Encoder",
|
|
929
|
+
columns: 2,
|
|
930
|
+
fields: [
|
|
931
|
+
{
|
|
932
|
+
type: "text",
|
|
933
|
+
key: "modelId",
|
|
934
|
+
label: "Model ID",
|
|
935
|
+
description: "CLIP model identifier to use for image/text embedding",
|
|
936
|
+
default: DEFAULT_CLIP_MODEL
|
|
937
|
+
},
|
|
938
|
+
{
|
|
939
|
+
type: "select",
|
|
940
|
+
key: "runtime",
|
|
941
|
+
label: "Runtime",
|
|
942
|
+
description: "Inference runtime (auto selects the best available)",
|
|
943
|
+
default: "auto",
|
|
944
|
+
options: [
|
|
945
|
+
{
|
|
946
|
+
label: "Auto",
|
|
947
|
+
value: "auto"
|
|
948
|
+
},
|
|
949
|
+
{
|
|
950
|
+
label: "Node (ONNX)",
|
|
951
|
+
value: "node"
|
|
952
|
+
},
|
|
953
|
+
{
|
|
954
|
+
label: "Python",
|
|
955
|
+
value: "python"
|
|
956
|
+
}
|
|
957
|
+
]
|
|
958
|
+
},
|
|
959
|
+
{
|
|
960
|
+
type: "select",
|
|
961
|
+
key: "backend",
|
|
962
|
+
label: "Backend",
|
|
963
|
+
description: "Hardware backend for inference acceleration",
|
|
964
|
+
default: "cpu",
|
|
965
|
+
options: [
|
|
966
|
+
{
|
|
967
|
+
label: "CPU",
|
|
968
|
+
value: "cpu"
|
|
969
|
+
},
|
|
970
|
+
{
|
|
971
|
+
label: "CUDA",
|
|
972
|
+
value: "cuda"
|
|
973
|
+
},
|
|
974
|
+
{
|
|
975
|
+
label: "CoreML",
|
|
976
|
+
value: "coreml"
|
|
977
|
+
}
|
|
978
|
+
]
|
|
979
|
+
}
|
|
980
|
+
]
|
|
981
|
+
}] });
|
|
982
|
+
}
|
|
983
|
+
async onConfigChanged() {}
|
|
984
|
+
};
|
|
985
|
+
/**
|
|
986
|
+
* Minimal CLIP tokenizer — encodes ASCII text to token IDs.
|
|
987
|
+
* Production implementations should use a proper BPE tokenizer;
|
|
988
|
+
* this is a simplified placeholder that maps characters to IDs
|
|
989
|
+
* with SOT/EOT tokens for basic functionality.
|
|
990
|
+
*/
|
|
877
991
|
function clipTokenize(text, maxLength = 77) {
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
while (tokens.length < maxLength) {
|
|
886
|
-
tokens.push(0);
|
|
887
|
-
}
|
|
888
|
-
return tokens;
|
|
992
|
+
const SOT_TOKEN = 49406;
|
|
993
|
+
const EOT_TOKEN = 49407;
|
|
994
|
+
const tokens = [SOT_TOKEN];
|
|
995
|
+
for (let i = 0; i < text.length && tokens.length < maxLength - 1; i++) tokens.push(text.charCodeAt(i) + 256);
|
|
996
|
+
tokens.push(EOT_TOKEN);
|
|
997
|
+
while (tokens.length < maxLength) tokens.push(0);
|
|
998
|
+
return tokens;
|
|
889
999
|
}
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
EmbeddingEncoderAddon as default
|
|
893
|
-
};
|
|
894
|
-
//# sourceMappingURL=index.mjs.map
|
|
1000
|
+
//#endregion
|
|
1001
|
+
export { EmbeddingEncoderAddon, EmbeddingEncoderAddon as default };
|