rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Body solution - body pose estimation with 17 or 26 keypoints
3
+ */
4
+
5
+ import { YOLOX } from '../models/yolox';
6
+ import { RTMPose } from '../models/rtmpose';
7
+ import { BBox, ModeType, ModelConfig } from '../types/index';
8
+
9
+ export class Body {
10
+ private detModel: YOLOX;
11
+ private poseModel: RTMPose;
12
+
13
+ private static readonly MODE: Record<ModeType, ModelConfig> = {
14
+ performance: {
15
+ det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_l_8xb8-300e_humanart-ce1d7a62.zip',
16
+ detInputSize: [640, 640],
17
+ pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.zip',
18
+ poseInputSize: [288, 384],
19
+ },
20
+ lightweight: {
21
+ det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_nano_8xb8-300e_humanart-40f6f0d0.zip',
22
+ detInputSize: [416, 416],
23
+ pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmpose-t_simcc-body7_pt-body7_420e-256x192-026a1439_20230504.zip',
24
+ poseInputSize: [192, 256],
25
+ },
26
+ balanced: {
27
+ det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_m_8xb8-300e_humanart-c2c7a14a.zip',
28
+ detInputSize: [640, 640],
29
+ pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmpose-m_simcc-body7_pt-body7_420e-256x192-e48f03d0_20230504.zip',
30
+ poseInputSize: [192, 256],
31
+ },
32
+ };
33
+
34
+ constructor(
35
+ det: string | null = null,
36
+ detInputSize: [number, number] = [640, 640],
37
+ pose: string | null = null,
38
+ poseInputSize: [number, number] = [288, 384],
39
+ mode: ModeType = 'balanced',
40
+ toOpenpose: boolean = false,
41
+ backend: 'onnxruntime' = 'onnxruntime',
42
+ device: string = 'cpu'
43
+ ) {
44
+ let finalDet = det;
45
+ let finalDetInputSize = detInputSize;
46
+ let finalPose = pose;
47
+ let finalPoseInputSize = poseInputSize;
48
+
49
+ if (det === null) {
50
+ finalDet = Body.MODE[mode].det;
51
+ finalDetInputSize = Body.MODE[mode].detInputSize;
52
+ }
53
+
54
+ if (pose === null) {
55
+ finalPose = Body.MODE[mode].pose;
56
+ finalPoseInputSize = Body.MODE[mode].poseInputSize;
57
+ }
58
+
59
+ this.detModel = new YOLOX(
60
+ finalDet!,
61
+ finalDetInputSize,
62
+ 0.45,
63
+ 0.7,
64
+ backend
65
+ );
66
+
67
+ this.poseModel = new RTMPose(
68
+ finalPose!,
69
+ finalPoseInputSize,
70
+ toOpenpose,
71
+ backend
72
+ );
73
+ }
74
+
75
+ async init(): Promise<void> {
76
+ await this.detModel.init();
77
+ await this.poseModel.init();
78
+ }
79
+
80
+ async call(
81
+ image: Uint8Array,
82
+ imgWidth: number,
83
+ imgHeight: number
84
+ ): Promise<{ keypoints: number[][]; scores: number[] }> {
85
+ const bboxes = await this.detModel.call(image, imgWidth, imgHeight);
86
+ const result = await this.poseModel.call(image, imgWidth, imgHeight, bboxes);
87
+ return result;
88
+ }
89
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * BodyWithFeet solution - body pose estimation with 26 keypoints (including feet)
3
+ */
4
+
5
+ import { YOLOX } from '../models/yolox';
6
+ import { RTMPose } from '../models/rtmpose';
7
+ import { ModeType, ModelConfig } from '../types/index';
8
+
9
+ export class BodyWithFeet {
10
+ private detModel: YOLOX;
11
+ private poseModel: RTMPose;
12
+
13
+ private static readonly MODE: Record<ModeType, ModelConfig> = {
14
+ performance: {
15
+ det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_l_8xb8-300e_humanart-ce1d7a62.zip',
16
+ detInputSize: [640, 640],
17
+ pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmpose-l_simcc-body7_pt-body7-halpe26_700e-384x288-734182ce_20230605.zip',
18
+ poseInputSize: [288, 384],
19
+ },
20
+ lightweight: {
21
+ det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_nano_8xb8-300e_humanart-40f6f0d0.zip',
22
+ detInputSize: [416, 416],
23
+ pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmpose-t_simcc-body7_pt-body7-halpe26_700e-256x192-6020f8a6_20230605.zip',
24
+ poseInputSize: [192, 256],
25
+ },
26
+ balanced: {
27
+ det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_m_8xb8-300e_humanart-c2c7a14a.zip',
28
+ detInputSize: [640, 640],
29
+ pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmpose-m_simcc-body7_pt-body7-halpe26_700e-256x192-4d3e73dd_20230605.zip',
30
+ poseInputSize: [192, 256],
31
+ },
32
+ };
33
+
34
+ constructor(
35
+ det: string | null = null,
36
+ detInputSize: [number, number] = [640, 640],
37
+ pose: string | null = null,
38
+ poseInputSize: [number, number] = [288, 384],
39
+ mode: ModeType = 'balanced',
40
+ toOpenpose: boolean = false,
41
+ backend: 'onnxruntime' = 'onnxruntime',
42
+ device: string = 'cpu'
43
+ ) {
44
+ let finalDet = det;
45
+ let finalDetInputSize = detInputSize;
46
+ let finalPose = pose;
47
+ let finalPoseInputSize = poseInputSize;
48
+
49
+ if (det === null) {
50
+ finalDet = BodyWithFeet.MODE[mode].det;
51
+ finalDetInputSize = BodyWithFeet.MODE[mode].detInputSize;
52
+ }
53
+
54
+ if (pose === null) {
55
+ finalPose = BodyWithFeet.MODE[mode].pose;
56
+ finalPoseInputSize = BodyWithFeet.MODE[mode].poseInputSize;
57
+ }
58
+
59
+ this.detModel = new YOLOX(
60
+ finalDet!,
61
+ finalDetInputSize,
62
+ 0.45,
63
+ 0.7,
64
+ backend
65
+ );
66
+
67
+ this.poseModel = new RTMPose(
68
+ finalPose!,
69
+ finalPoseInputSize,
70
+ toOpenpose,
71
+ backend
72
+ );
73
+ }
74
+
75
+ async init(): Promise<void> {
76
+ await this.detModel.init();
77
+ await this.poseModel.init();
78
+ }
79
+
80
+ async call(
81
+ image: Uint8Array,
82
+ imgWidth: number,
83
+ imgHeight: number
84
+ ): Promise<{ keypoints: number[][]; scores: number[] }> {
85
+ const bboxes = await this.detModel.call(image, imgWidth, imgHeight);
86
+ const result = await this.poseModel.call(image, imgWidth, imgHeight, bboxes);
87
+ return result;
88
+ }
89
+ }
@@ -0,0 +1,474 @@
1
+ /**
2
+ * CustomDetector - Maximum flexibility detector for any ONNX model
3
+ * Provides low-level API for custom model inference
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * // Simple usage with auto-config
8
+ * const detector = new CustomDetector({
9
+ * model: 'path/to/model.onnx',
10
+ * });
11
+ * await detector.init();
12
+ * const results = await detector.run(imageData, width, height);
13
+ *
14
+ * // Advanced usage with custom preprocessing
15
+ * const detector = new CustomDetector({
16
+ * model: 'path/to/model.onnx',
17
+ * inputName: 'input',
18
+ * outputNames: ['output1', 'output2'],
19
+ * preprocessing: (data) => customPreprocess(data),
20
+ * postprocessing: (outputs) => customPostprocess(outputs),
21
+ * });
22
+ * ```
23
+ */
24
+
25
+ import * as ort from 'onnxruntime-web';
26
+ import { getCachedModel, isModelCached } from '../core/modelCache';
27
+
28
+ // Configure ONNX Runtime Web
29
+ ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
30
+ ort.env.wasm.simd = true;
31
+ ort.env.wasm.proxy = false;
32
+
33
+ /**
34
+ * Configuration options for CustomDetector
35
+ */
36
+ export interface CustomDetectorConfig {
37
+ /** Path to ONNX model (required) */
38
+ model: string;
39
+ /** Input tensor name (optional - auto-detected if not specified) */
40
+ inputName?: string;
41
+ /** Output tensor names (optional - auto-detected if not specified) */
42
+ outputNames?: string[];
43
+ /** Expected input shape [batch, channels, height, width] (optional) */
44
+ inputShape?: [number, number, number, number];
45
+ /** Custom preprocessing function */
46
+ preprocessing?: (data: ImageData, config: CustomDetectorConfig) => Float32Array | ort.Tensor;
47
+ /** Custom postprocessing function */
48
+ postprocessing?: (outputs: Record<string, ort.Tensor>, metadata: any) => any;
49
+ /** Execution backend (default: 'wasm') */
50
+ backend?: 'wasm' | 'webgpu';
51
+ /** Enable model caching (default: true) */
52
+ cache?: boolean;
53
+ /** Custom metadata for postprocessing */
54
+ metadata?: any;
55
+ /** Input normalization (default: { mean: [0, 0, 0], std: [1, 1, 1] }) */
56
+ normalization?: {
57
+ mean: number[];
58
+ std: number[];
59
+ };
60
+ /** Input size for automatic preprocessing (optional) */
61
+ inputSize?: [number, number];
62
+ /** Keep aspect ratio during preprocessing (default: true) */
63
+ keepAspectRatio?: boolean;
64
+ /** Background color for letterbox (default: black) */
65
+ backgroundColor?: string;
66
+ }
67
+
68
+ /**
69
+ * Detection result with metadata
70
+ */
71
+ export interface DetectionResult<T = any> {
72
+ /** Raw model outputs */
73
+ outputs: Record<string, ort.Tensor>;
74
+ /** Processed results */
75
+ data: T;
76
+ /** Inference time in ms */
77
+ inferenceTime: number;
78
+ /** Input shape used */
79
+ inputShape: number[];
80
+ }
81
+
82
+ /**
83
+ * Default configuration
84
+ */
85
+ const DEFAULT_CONFIG: Partial<CustomDetectorConfig> = {
86
+ backend: 'webgpu', // Default to WebGPU for better performance
87
+ cache: true,
88
+ keepAspectRatio: true,
89
+ backgroundColor: '#000000',
90
+ normalization: {
91
+ mean: [0, 0, 0],
92
+ std: [1, 1, 1],
93
+ },
94
+ };
95
+
96
+ export class CustomDetector {
97
+ private config: Required<CustomDetectorConfig>;
98
+ private session: ort.InferenceSession | null = null;
99
+ private initialized = false;
100
+ private canvas: HTMLCanvasElement | null = null;
101
+ private ctx: CanvasRenderingContext2D | null = null;
102
+
103
+ constructor(config: CustomDetectorConfig) {
104
+ this.config = {
105
+ ...DEFAULT_CONFIG,
106
+ ...config,
107
+ outputNames: config.outputNames || [],
108
+ inputShape: config.inputShape || [1, 3, 224, 224],
109
+ normalization: config.normalization || { mean: [0, 0, 0], std: [1, 1, 1] },
110
+ } as Required<CustomDetectorConfig>;
111
+ }
112
+
113
+ /**
114
+ * Initialize the model
115
+ */
116
+ async init(): Promise<void> {
117
+ if (this.initialized) return;
118
+
119
+ try {
120
+ console.log(`[CustomDetector] Loading model from: ${this.config.model}`);
121
+ let modelBuffer: ArrayBuffer;
122
+
123
+ if (this.config.cache) {
124
+ const cached = await isModelCached(this.config.model);
125
+ console.log(`[CustomDetector] Cache ${cached ? 'hit' : 'miss'}`);
126
+ modelBuffer = await getCachedModel(this.config.model);
127
+ } else {
128
+ const response = await fetch(this.config.model);
129
+ if (!response.ok) {
130
+ throw new Error(`Failed to fetch model: HTTP ${response.status}`);
131
+ }
132
+ modelBuffer = await response.arrayBuffer();
133
+ }
134
+
135
+ this.session = await ort.InferenceSession.create(modelBuffer, {
136
+ executionProviders: [this.config.backend],
137
+ graphOptimizationLevel: 'all',
138
+ });
139
+
140
+ // Auto-detect input/output names if not specified
141
+ if (!this.config.inputName && this.session.inputNames.length > 0) {
142
+ console.log(`[CustomDetector] Auto-detected input name: ${this.session.inputNames[0]}`);
143
+ }
144
+
145
+ if (this.config.outputNames.length === 0 && this.session.outputNames.length > 0) {
146
+ this.config.outputNames = [...this.session.outputNames];
147
+ console.log(`[CustomDetector] Auto-detected output names: ${this.config.outputNames}`);
148
+ }
149
+
150
+ console.log(`[CustomDetector] ✅ Initialized (${this.config.backend})`);
151
+ this.initialized = true;
152
+ } catch (error) {
153
+ console.error('[CustomDetector] ❌ Initialization failed:', error);
154
+ throw error;
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Run inference on canvas
160
+ */
161
+ async runFromCanvas<T = any>(canvas: HTMLCanvasElement): Promise<DetectionResult<T>> {
162
+ const ctx = canvas.getContext('2d');
163
+ if (!ctx) {
164
+ throw new Error('Could not get 2D context from canvas');
165
+ }
166
+
167
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
168
+ return this.run<T>(imageData, canvas.width, canvas.height);
169
+ }
170
+
171
+ /**
172
+ * Run inference on video
173
+ */
174
+ async runFromVideo<T = any>(
175
+ video: HTMLVideoElement,
176
+ targetCanvas?: HTMLCanvasElement
177
+ ): Promise<DetectionResult<T>> {
178
+ if (video.readyState < 2) {
179
+ throw new Error('Video not ready');
180
+ }
181
+
182
+ const canvas = targetCanvas || document.createElement('canvas');
183
+ canvas.width = video.videoWidth;
184
+ canvas.height = video.videoHeight;
185
+
186
+ const ctx = canvas.getContext('2d');
187
+ if (!ctx) {
188
+ throw new Error('Could not get 2D context from canvas');
189
+ }
190
+
191
+ ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
192
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
193
+
194
+ return this.run<T>(imageData, canvas.width, canvas.height);
195
+ }
196
+
197
+ /**
198
+ * Run inference on image
199
+ */
200
+ async runFromImage<T = any>(
201
+ image: HTMLImageElement,
202
+ targetCanvas?: HTMLCanvasElement
203
+ ): Promise<DetectionResult<T>> {
204
+ if (!image.complete || !image.naturalWidth) {
205
+ throw new Error('Image not loaded');
206
+ }
207
+
208
+ const canvas = targetCanvas || document.createElement('canvas');
209
+ canvas.width = image.naturalWidth;
210
+ canvas.height = image.naturalHeight;
211
+
212
+ const ctx = canvas.getContext('2d');
213
+ if (!ctx) {
214
+ throw new Error('Could not get 2D context from canvas');
215
+ }
216
+
217
+ ctx.drawImage(image, 0, 0);
218
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
219
+
220
+ return this.run<T>(imageData, canvas.width, canvas.height);
221
+ }
222
+
223
+ /**
224
+ * Run inference on bitmap
225
+ */
226
+ async runFromBitmap<T = any>(
227
+ bitmap: ImageBitmap,
228
+ targetCanvas?: HTMLCanvasElement
229
+ ): Promise<DetectionResult<T>> {
230
+ const canvas = targetCanvas || document.createElement('canvas');
231
+ canvas.width = bitmap.width;
232
+ canvas.height = bitmap.height;
233
+
234
+ const ctx = canvas.getContext('2d');
235
+ if (!ctx) {
236
+ throw new Error('Could not get 2D context from canvas');
237
+ }
238
+
239
+ ctx.drawImage(bitmap, 0, 0);
240
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
241
+
242
+ return this.run<T>(imageData, canvas.width, canvas.height);
243
+ }
244
+
245
+ /**
246
+ * Run inference on file
247
+ */
248
+ async runFromFile<T = any>(
249
+ file: File,
250
+ targetCanvas?: HTMLCanvasElement
251
+ ): Promise<DetectionResult<T>> {
252
+ return new Promise((resolve, reject) => {
253
+ const img = new Image();
254
+ img.onload = async () => {
255
+ try {
256
+ const result = await this.runFromImage<T>(img, targetCanvas);
257
+ resolve(result);
258
+ } catch (error) {
259
+ reject(error);
260
+ }
261
+ };
262
+ img.onerror = () => reject(new Error('Failed to load image'));
263
+ img.src = URL.createObjectURL(file);
264
+ });
265
+ }
266
+
267
+ /**
268
+ * Run inference on blob
269
+ */
270
+ async runFromBlob<T = any>(
271
+ blob: Blob,
272
+ targetCanvas?: HTMLCanvasElement
273
+ ): Promise<DetectionResult<T>> {
274
+ const bitmap = await createImageBitmap(blob);
275
+ const result = await this.runFromBitmap<T>(bitmap, targetCanvas);
276
+ bitmap.close();
277
+ return result;
278
+ }
279
+
280
+ /**
281
+ * Run inference with custom preprocessing
282
+ */
283
+ async run<T = any>(
284
+ imageData: ImageData,
285
+ width: number,
286
+ height: number,
287
+ metadata?: any
288
+ ): Promise<DetectionResult<T>> {
289
+ if (!this.initialized) {
290
+ await this.init();
291
+ }
292
+
293
+ const startTime = performance.now();
294
+
295
+ // Preprocess
296
+ let inputTensor: ort.Tensor;
297
+
298
+ if (this.config.preprocessing) {
299
+ // Custom preprocessing
300
+ const result = this.config.preprocessing(imageData, this.config);
301
+ if (result instanceof Float32Array) {
302
+ const [h, w] = this.config.inputSize || [height, width];
303
+ inputTensor = new ort.Tensor('float32', result, [1, 3, h, w]);
304
+ } else {
305
+ inputTensor = result;
306
+ }
307
+ } else if (this.config.inputSize) {
308
+ // Automatic preprocessing with letterbox
309
+ inputTensor = this.preprocess(imageData, width, height, this.config.inputSize);
310
+ } else {
311
+ // Simple preprocessing - just normalize
312
+ inputTensor = this.simplePreprocess(imageData);
313
+ }
314
+
315
+ // Get input name
316
+ const inputName = this.config.inputName || this.session!.inputNames[0];
317
+
318
+ // Run inference
319
+ const feeds: Record<string, ort.Tensor> = {};
320
+ feeds[inputName] = inputTensor;
321
+
322
+ const results = await this.session!.run(feeds);
323
+
324
+ // Postprocess
325
+ let data: T;
326
+ if (this.config.postprocessing) {
327
+ data = this.config.postprocessing(results, metadata || this.config.metadata);
328
+ } else {
329
+ // Return raw outputs
330
+ data = results as any;
331
+ }
332
+
333
+ const inferenceTime = performance.now() - startTime;
334
+
335
+ return {
336
+ outputs: results,
337
+ data,
338
+ inferenceTime,
339
+ inputShape: [...inputTensor.dims],
340
+ };
341
+ }
342
+
343
+ /**
344
+ * Get model info
345
+ */
346
+ getModelInfo(): {
347
+ inputNames: string[];
348
+ outputNames: string[];
349
+ inputCount: number;
350
+ outputCount: number;
351
+ } {
352
+ if (!this.session) {
353
+ throw new Error('Model not initialized. Call init() first.');
354
+ }
355
+
356
+ return {
357
+ inputNames: [...this.session.inputNames],
358
+ outputNames: [...this.session.outputNames],
359
+ inputCount: this.session.inputNames.length,
360
+ outputCount: this.session.outputNames.length,
361
+ };
362
+ }
363
+
364
+ /**
365
+ * Get tensor by name from outputs
366
+ */
367
+ getOutputTensor<T extends ort.Tensor = ort.Tensor>(
368
+ outputs: Record<string, ort.Tensor>,
369
+ name?: string
370
+ ): T {
371
+ const tensorName = name || this.config.outputNames[0] || this.session!.outputNames[0];
372
+ return outputs[tensorName] as T;
373
+ }
374
+
375
+ /**
376
+ * Simple preprocessing - just normalize to [0, 1] and convert to CHW
377
+ */
378
+ private simplePreprocess(imageData: ImageData): ort.Tensor {
379
+ const { width, height, data } = imageData;
380
+ const tensor = new Float32Array(3 * width * height);
381
+
382
+ for (let i = 0; i < data.length; i += 4) {
383
+ const pixelIdx = i / 4;
384
+ tensor[pixelIdx] = data[i] / 255;
385
+ tensor[pixelIdx + width * height] = data[i + 1] / 255;
386
+ tensor[pixelIdx + 2 * width * height] = data[i + 2] / 255;
387
+ }
388
+
389
+ return new ort.Tensor('float32', tensor, [1, 3, height, width]);
390
+ }
391
+
392
+ /**
393
+ * Preprocess with letterbox and normalization
394
+ */
395
+ private preprocess(
396
+ imageData: ImageData,
397
+ imgWidth: number,
398
+ imgHeight: number,
399
+ inputSize: [number, number]
400
+ ): ort.Tensor {
401
+ const [inputW, inputH] = inputSize;
402
+
403
+ if (!this.canvas || !this.ctx) {
404
+ this.canvas = document.createElement('canvas');
405
+ this.canvas.width = inputW;
406
+ this.canvas.height = inputH;
407
+ this.ctx = this.canvas.getContext('2d', { willReadFrequently: true, alpha: false })!;
408
+ }
409
+
410
+ const ctx = this.ctx;
411
+ ctx.fillStyle = this.config.backgroundColor;
412
+ ctx.fillRect(0, 0, inputW, inputH);
413
+
414
+ // Calculate letterbox
415
+ const aspectRatio = imgWidth / imgHeight;
416
+ const targetAspectRatio = inputW / inputH;
417
+
418
+ let drawWidth: number, drawHeight: number, offsetX: number, offsetY: number;
419
+
420
+ if (this.config.keepAspectRatio) {
421
+ if (aspectRatio > targetAspectRatio) {
422
+ drawWidth = inputW;
423
+ drawHeight = Math.floor(inputW / aspectRatio);
424
+ offsetX = 0;
425
+ offsetY = Math.floor((inputH - drawHeight) / 2);
426
+ } else {
427
+ drawHeight = inputH;
428
+ drawWidth = Math.floor(inputH * aspectRatio);
429
+ offsetX = Math.floor((inputW - drawWidth) / 2);
430
+ offsetY = 0;
431
+ }
432
+ } else {
433
+ drawWidth = inputW;
434
+ drawHeight = inputH;
435
+ offsetX = 0;
436
+ offsetY = 0;
437
+ }
438
+
439
+ // Create source canvas
440
+ const srcCanvas = document.createElement('canvas');
441
+ const srcCtx = srcCanvas.getContext('2d')!;
442
+ srcCanvas.width = imgWidth;
443
+ srcCanvas.height = imgHeight;
444
+
445
+ srcCtx.putImageData(imageData, 0, 0);
446
+
447
+ // Draw with letterbox
448
+ ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
449
+
450
+ const paddedData = ctx.getImageData(0, 0, inputW, inputH);
451
+ const tensor = new Float32Array(inputW * inputH * 3);
452
+ const { mean, std } = this.config.normalization;
453
+
454
+ for (let i = 0; i < paddedData.data.length; i += 4) {
455
+ const pixelIdx = i / 4;
456
+ tensor[pixelIdx] = (paddedData.data[i] - mean[0]) / std[0];
457
+ tensor[pixelIdx + inputW * inputH] = (paddedData.data[i + 1] - mean[1]) / std[1];
458
+ tensor[pixelIdx + 2 * inputW * inputH] = (paddedData.data[i + 2] - mean[2]) / std[2];
459
+ }
460
+
461
+ return new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
462
+ }
463
+
464
+ /**
465
+ * Dispose resources
466
+ */
467
+ dispose(): void {
468
+ if (this.session) {
469
+ this.session.release();
470
+ this.session = null;
471
+ }
472
+ this.initialized = false;
473
+ }
474
+ }
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Hand solution - hand pose estimation with 21 keypoints
3
+ */
4
+
5
+ import { YOLOX } from '../models/yolox';
6
+ import { RTMPose } from '../models/rtmpose';
7
+ import { BBox } from '../types/index';
8
+
9
+ export class Hand {
10
+ private detModel: YOLOX;
11
+ private poseModel: RTMPose;
12
+
13
+ constructor(
14
+ det: string = 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmdet_nano_8xb32-300e_hand-267f9c8f.zip',
15
+ detInputSize: [number, number] = [320, 320],
16
+ pose: string = 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.zip',
17
+ poseInputSize: [number, number] = [256, 256],
18
+ toOpenpose: boolean = false,
19
+ backend: 'onnxruntime' = 'onnxruntime',
20
+ device: string = 'cpu'
21
+ ) {
22
+ this.detModel = new YOLOX(
23
+ det,
24
+ detInputSize,
25
+ 0.45,
26
+ 0.5,
27
+ backend
28
+ );
29
+
30
+ this.poseModel = new RTMPose(
31
+ pose,
32
+ poseInputSize,
33
+ toOpenpose,
34
+ backend
35
+ );
36
+ }
37
+
38
+ async init(): Promise<void> {
39
+ await this.detModel.init();
40
+ await this.poseModel.init();
41
+ }
42
+
43
+ async call(
44
+ image: Uint8Array,
45
+ imgWidth: number,
46
+ imgHeight: number
47
+ ): Promise<{ keypoints: number[][]; scores: number[] }> {
48
+ const bboxes = await this.detModel.call(image, imgWidth, imgHeight);
49
+ const result = await this.poseModel.call(image, imgWidth, imgHeight, bboxes);
50
+ return result;
51
+ }
52
+ }