ai-cli 0.0.12 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/mp4.ts ADDED
@@ -0,0 +1,560 @@
1
+ export interface KeyframeData {
2
+ sps: Uint8Array;
3
+ pps: Uint8Array;
4
+ sliceData: Uint8Array;
5
+ width: number;
6
+ height: number;
7
+ }
8
+
9
+ interface Box {
10
+ type: string;
11
+ offset: number;
12
+ size: number;
13
+ headerSize: number;
14
+ }
15
+
16
+ function readBoxHeader(view: DataView, offset: number): Box | null {
17
+ if (offset + 8 > view.byteLength) return null;
18
+ let size = view.getUint32(offset);
19
+ const type = String.fromCharCode(
20
+ view.getUint8(offset + 4),
21
+ view.getUint8(offset + 5),
22
+ view.getUint8(offset + 6),
23
+ view.getUint8(offset + 7)
24
+ );
25
+ let headerSize = 8;
26
+ if (size === 1) {
27
+ if (offset + 16 > view.byteLength) return null;
28
+ size = Number(view.getBigUint64(offset + 8));
29
+ headerSize = 16;
30
+ } else if (size === 0) {
31
+ size = view.byteLength - offset;
32
+ }
33
+ return { type, offset, size, headerSize };
34
+ }
35
+
36
+ function findBox(
37
+ view: DataView,
38
+ start: number,
39
+ end: number,
40
+ type: string
41
+ ): Box | null {
42
+ let offset = start;
43
+ while (offset < end) {
44
+ const box = readBoxHeader(view, offset);
45
+ if (!box || box.size < 8) return null;
46
+ if (box.type === type) return box;
47
+ offset += box.size;
48
+ }
49
+ return null;
50
+ }
51
+
52
+ function findBoxPath(
53
+ view: DataView,
54
+ start: number,
55
+ end: number,
56
+ path: string[]
57
+ ): Box | null {
58
+ let box: Box | null = null;
59
+ let s = start;
60
+ let e = end;
61
+ for (const type of path) {
62
+ box = findBox(view, s, e, type);
63
+ if (!box) return null;
64
+ s = box.offset + box.headerSize;
65
+ e = box.offset + box.size;
66
+ if (
67
+ type === "stsd" ||
68
+ type === "stbl" ||
69
+ type === "minf" ||
70
+ type === "mdia" ||
71
+ type === "trak" ||
72
+ type === "moov"
73
+ ) {
74
+ // full boxes with version/flags
75
+ }
76
+ }
77
+ return box;
78
+ }
79
+
80
+ function isVideoTrack(
81
+ view: DataView,
82
+ trakStart: number,
83
+ trakEnd: number
84
+ ): boolean {
85
+ const mdia = findBox(view, trakStart, trakEnd, "mdia");
86
+ if (!mdia) return false;
87
+ const hdlr = findBox(
88
+ view,
89
+ mdia.offset + mdia.headerSize,
90
+ mdia.offset + mdia.size,
91
+ "hdlr"
92
+ );
93
+ if (!hdlr) return false;
94
+ const hdlrData = hdlr.offset + hdlr.headerSize;
95
+ if (hdlrData + 12 > view.byteLength) return false;
96
+ // version(1) + flags(3) + pre_defined(4) + handler_type(4)
97
+ const handlerType = String.fromCharCode(
98
+ view.getUint8(hdlrData + 8),
99
+ view.getUint8(hdlrData + 9),
100
+ view.getUint8(hdlrData + 10),
101
+ view.getUint8(hdlrData + 11)
102
+ );
103
+ return handlerType === "vide";
104
+ }
105
+
106
+ function findVideoTrack(
107
+ view: DataView,
108
+ moovStart: number,
109
+ moovEnd: number
110
+ ): Box | null {
111
+ let offset = moovStart;
112
+ while (offset < moovEnd) {
113
+ const box = readBoxHeader(view, offset);
114
+ if (!box || box.size < 8) break;
115
+ if (box.type === "trak") {
116
+ if (
117
+ isVideoTrack(view, box.offset + box.headerSize, box.offset + box.size)
118
+ ) {
119
+ return box;
120
+ }
121
+ }
122
+ offset += box.size;
123
+ }
124
+ return null;
125
+ }
126
+
127
+ interface AvcCData {
128
+ sps: Uint8Array;
129
+ pps: Uint8Array;
130
+ nalLengthSize: number;
131
+ }
132
+
133
+ function parseAvcC(
134
+ buf: Uint8Array,
135
+ offset: number,
136
+ size: number
137
+ ): AvcCData | null {
138
+ if (size < 8) return null;
139
+ const view = new DataView(buf.buffer, buf.byteOffset + offset, size);
140
+ const version = view.getUint8(0);
141
+ if (version !== 1) return null;
142
+ const nalLengthSize = (view.getUint8(4) & 0x03) + 1;
143
+ const numSPS = view.getUint8(5) & 0x1f;
144
+ if (numSPS < 1) return null;
145
+
146
+ let pos = 6;
147
+ const spsLen = view.getUint16(pos);
148
+ pos += 2;
149
+ if (pos + spsLen > size) return null;
150
+ const sps = buf.slice(offset + pos, offset + pos + spsLen);
151
+ pos += spsLen;
152
+
153
+ // skip remaining SPS entries
154
+ for (let i = 1; i < numSPS; i++) {
155
+ const len = view.getUint16(pos);
156
+ pos += 2 + len;
157
+ }
158
+
159
+ const numPPS = view.getUint8(pos);
160
+ pos += 1;
161
+ if (numPPS < 1) return null;
162
+ const ppsLen = view.getUint16(pos);
163
+ pos += 2;
164
+ if (pos + ppsLen > size) return null;
165
+ const pps = buf.slice(offset + pos, offset + pos + ppsLen);
166
+
167
+ return { sps, pps, nalLengthSize };
168
+ }
169
+
170
+ function readStsdAvcC(
171
+ view: DataView,
172
+ buf: Uint8Array,
173
+ stsdOffset: number,
174
+ stsdSize: number
175
+ ): AvcCData | null {
176
+ // stsd full box payload: version(1)+flags(3) + entry_count(4) + entries
177
+ if (stsdSize < 8) return null;
178
+ const entryCount = view.getUint32(stsdOffset + 4);
179
+ if (entryCount < 1) return null;
180
+
181
+ let entryOffset = stsdOffset + 8;
182
+ const entrySize = view.getUint32(entryOffset);
183
+ const entryType = String.fromCharCode(
184
+ view.getUint8(entryOffset + 4),
185
+ view.getUint8(entryOffset + 5),
186
+ view.getUint8(entryOffset + 6),
187
+ view.getUint8(entryOffset + 7)
188
+ );
189
+
190
+ if (entryType !== "avc1" && entryType !== "avc3") return null;
191
+
192
+ // avc1 box: SampleEntry(8) + VisualSampleEntry(70) = 78 bytes fixed after box header
193
+ const childrenStart = entryOffset + 8 + 78;
194
+ const childrenEnd = entryOffset + entrySize;
195
+
196
+ let pos = childrenStart;
197
+ while (pos + 8 <= childrenEnd) {
198
+ const childBox = readBoxHeader(view, pos);
199
+ if (!childBox || childBox.size < 8) break;
200
+ if (childBox.type === "avcC") {
201
+ return parseAvcC(
202
+ buf,
203
+ childBox.offset + childBox.headerSize,
204
+ childBox.size - childBox.headerSize
205
+ );
206
+ }
207
+ pos += childBox.size;
208
+ }
209
+ return null;
210
+ }
211
+
212
+ function readUint32Array(
213
+ view: DataView,
214
+ offset: number,
215
+ count: number
216
+ ): number[] {
217
+ const arr: number[] = [];
218
+ for (let i = 0; i < count; i++) {
219
+ arr.push(view.getUint32(offset + i * 4));
220
+ }
221
+ return arr;
222
+ }
223
+
224
+ interface SampleTableInfo {
225
+ syncSamples: number[] | null;
226
+ sampleSizes: number[];
227
+ sampleToChunk: Array<{
228
+ firstChunk: number;
229
+ samplesPerChunk: number;
230
+ sdi: number;
231
+ }>;
232
+ chunkOffsets: number[];
233
+ totalSamples: number;
234
+ }
235
+
236
+ function readSampleTable(
237
+ view: DataView,
238
+ stblStart: number,
239
+ stblEnd: number
240
+ ): SampleTableInfo | null {
241
+ // stss (sync sample)
242
+ let syncSamples: number[] | null = null;
243
+ const stss = findBox(view, stblStart, stblEnd, "stss");
244
+ if (stss) {
245
+ const d = stss.offset + stss.headerSize;
246
+ const count = view.getUint32(d + 4);
247
+ syncSamples = readUint32Array(view, d + 8, count);
248
+ }
249
+
250
+ // stsz (sample size)
251
+ const stsz = findBox(view, stblStart, stblEnd, "stsz");
252
+ if (!stsz) return null;
253
+ const stszData = stsz.offset + stsz.headerSize;
254
+ const uniformSize = view.getUint32(stszData + 4);
255
+ const sampleCount = view.getUint32(stszData + 8);
256
+ let sampleSizes: number[];
257
+ if (uniformSize !== 0) {
258
+ sampleSizes = Array.from({ length: sampleCount }, () => uniformSize);
259
+ } else {
260
+ sampleSizes = readUint32Array(view, stszData + 12, sampleCount);
261
+ }
262
+
263
+ // stsc (sample-to-chunk)
264
+ const stsc = findBox(view, stblStart, stblEnd, "stsc");
265
+ if (!stsc) return null;
266
+ const stscData = stsc.offset + stsc.headerSize;
267
+ const stscCount = view.getUint32(stscData + 4);
268
+ const sampleToChunk: SampleTableInfo["sampleToChunk"] = [];
269
+ for (let i = 0; i < stscCount; i++) {
270
+ const off = stscData + 8 + i * 12;
271
+ sampleToChunk.push({
272
+ firstChunk: view.getUint32(off),
273
+ samplesPerChunk: view.getUint32(off + 4),
274
+ sdi: view.getUint32(off + 8),
275
+ });
276
+ }
277
+
278
+ // stco or co64 (chunk offsets)
279
+ let chunkOffsets: number[];
280
+ const stco = findBox(view, stblStart, stblEnd, "stco");
281
+ if (stco) {
282
+ const stcoData = stco.offset + stco.headerSize;
283
+ const chunkCount = view.getUint32(stcoData + 4);
284
+ chunkOffsets = readUint32Array(view, stcoData + 8, chunkCount);
285
+ } else {
286
+ const co64 = findBox(view, stblStart, stblEnd, "co64");
287
+ if (!co64) return null;
288
+ const co64Data = co64.offset + co64.headerSize;
289
+ const chunkCount = view.getUint32(co64Data + 4);
290
+ chunkOffsets = [];
291
+ for (let i = 0; i < chunkCount; i++) {
292
+ chunkOffsets.push(Number(view.getBigUint64(co64Data + 8 + i * 8)));
293
+ }
294
+ }
295
+
296
+ return {
297
+ syncSamples,
298
+ sampleSizes,
299
+ sampleToChunk,
300
+ chunkOffsets,
301
+ totalSamples: sampleCount,
302
+ };
303
+ }
304
+
305
+ function computeSampleOffset(
306
+ info: SampleTableInfo,
307
+ sampleIndex: number
308
+ ): number {
309
+ // sampleIndex is 0-based
310
+ let chunkIndex = 0;
311
+ let sampleInChunk = 0;
312
+ let currentSample = 0;
313
+
314
+ for (let i = 0; i < info.sampleToChunk.length; i++) {
315
+ const entry = info.sampleToChunk[i];
316
+ const nextFirstChunk =
317
+ i + 1 < info.sampleToChunk.length
318
+ ? info.sampleToChunk[i + 1].firstChunk
319
+ : info.chunkOffsets.length + 1;
320
+ const chunksInRun = nextFirstChunk - entry.firstChunk;
321
+
322
+ for (let c = 0; c < chunksInRun; c++) {
323
+ const chunk = entry.firstChunk - 1 + c;
324
+ if (currentSample + entry.samplesPerChunk > sampleIndex) {
325
+ chunkIndex = chunk;
326
+ sampleInChunk = sampleIndex - currentSample;
327
+
328
+ let offset = info.chunkOffsets[chunkIndex];
329
+ for (let s = 0; s < sampleInChunk; s++) {
330
+ offset += info.sampleSizes[currentSample + s];
331
+ }
332
+ return offset;
333
+ }
334
+ currentSample += entry.samplesPerChunk;
335
+ }
336
+ }
337
+
338
+ return info.chunkOffsets[chunkIndex] ?? 0;
339
+ }
340
+
341
+ export function extractKeyframe(buf: Uint8Array): KeyframeData | null {
342
+ if (buf.length < 8) return null;
343
+ const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
344
+
345
+ const moov = findBox(view, 0, buf.length, "moov");
346
+ if (!moov) return null;
347
+
348
+ const moovStart = moov.offset + moov.headerSize;
349
+ const moovEnd = moov.offset + moov.size;
350
+
351
+ const trak = findVideoTrack(view, moovStart, moovEnd);
352
+ if (!trak) return null;
353
+ const trakStart = trak.offset + trak.headerSize;
354
+ const trakEnd = trak.offset + trak.size;
355
+
356
+ // Navigate to stbl
357
+ const mdia = findBox(view, trakStart, trakEnd, "mdia");
358
+ if (!mdia) return null;
359
+ const minf = findBox(
360
+ view,
361
+ mdia.offset + mdia.headerSize,
362
+ mdia.offset + mdia.size,
363
+ "minf"
364
+ );
365
+ if (!minf) return null;
366
+ const stbl = findBox(
367
+ view,
368
+ minf.offset + minf.headerSize,
369
+ minf.offset + minf.size,
370
+ "stbl"
371
+ );
372
+ if (!stbl) return null;
373
+ const stblStart = stbl.offset + stbl.headerSize;
374
+ const stblEnd = stbl.offset + stbl.size;
375
+
376
+ // Get stsd for avcC
377
+ const stsd = findBox(view, stblStart, stblEnd, "stsd");
378
+ if (!stsd) return null;
379
+ const avcC = readStsdAvcC(
380
+ view,
381
+ buf,
382
+ stsd.offset + stsd.headerSize,
383
+ stsd.size - stsd.headerSize
384
+ );
385
+ if (!avcC) return null;
386
+
387
+ const tableInfo = readSampleTable(view, stblStart, stblEnd);
388
+ if (!tableInfo || tableInfo.totalSamples === 0) return null;
389
+
390
+ // Pick keyframe closest to midpoint
391
+ let targetSample: number;
392
+ const midSample = Math.floor(tableInfo.totalSamples / 2);
393
+ if (tableInfo.syncSamples && tableInfo.syncSamples.length > 0) {
394
+ let bestIdx = 0;
395
+ let bestDist = Infinity;
396
+ for (let i = 0; i < tableInfo.syncSamples.length; i++) {
397
+ const dist = Math.abs(tableInfo.syncSamples[i] - 1 - midSample);
398
+ if (dist < bestDist) {
399
+ bestDist = dist;
400
+ bestIdx = i;
401
+ }
402
+ }
403
+ targetSample = tableInfo.syncSamples[bestIdx] - 1; // convert to 0-based
404
+ } else {
405
+ // No stss means every sample is a sync sample
406
+ targetSample = midSample;
407
+ }
408
+
409
+ if (targetSample < 0 || targetSample >= tableInfo.totalSamples) return null;
410
+
411
+ const sampleOffset = computeSampleOffset(tableInfo, targetSample);
412
+ const sampleSize = tableInfo.sampleSizes[targetSample];
413
+ if (sampleOffset + sampleSize > buf.length) return null;
414
+
415
+ // Find the IDR slice NAL (type 5) among the length-prefixed NALUs
416
+ const idrNal = findIDRNal(buf, sampleOffset, sampleSize, avcC.nalLengthSize);
417
+ if (!idrNal) return null;
418
+
419
+ const dims = parseSPSDimensions(avcC.sps);
420
+
421
+ return {
422
+ sps: avcC.sps,
423
+ pps: avcC.pps,
424
+ sliceData: idrNal,
425
+ width: dims?.width ?? 0,
426
+ height: dims?.height ?? 0,
427
+ };
428
+ }
429
+
430
+ function findIDRNal(
431
+ buf: Uint8Array,
432
+ offset: number,
433
+ size: number,
434
+ nalLengthSize: number
435
+ ): Uint8Array | null {
436
+ let pos = offset;
437
+ const end = offset + size;
438
+
439
+ while (pos + nalLengthSize <= end) {
440
+ let nalLen = 0;
441
+ for (let i = 0; i < nalLengthSize; i++) {
442
+ nalLen = (nalLen << 8) | buf[pos + i];
443
+ }
444
+ pos += nalLengthSize;
445
+ if (pos + nalLen > end) break;
446
+
447
+ const nalType = buf[pos] & 0x1f;
448
+ if (nalType === 5) {
449
+ return buf.slice(pos, pos + nalLen);
450
+ }
451
+ pos += nalLen;
452
+ }
453
+ return null;
454
+ }
455
+
456
+ function parseSPSDimensions(
457
+ sps: Uint8Array
458
+ ): { width: number; height: number } | null {
459
+ if (sps.length < 4) return null;
460
+ // Simple Exp-Golomb reader
461
+ let bitPos = 0;
462
+ const totalBits = sps.length * 8;
463
+
464
+ function readBit(): number {
465
+ if (bitPos >= totalBits) return 0;
466
+ const byte = sps[bitPos >> 3];
467
+ const bit = (byte >> (7 - (bitPos & 7))) & 1;
468
+ bitPos++;
469
+ return bit;
470
+ }
471
+
472
+ function readBits(n: number): number {
473
+ let val = 0;
474
+ for (let i = 0; i < n; i++) val = (val << 1) | readBit();
475
+ return val;
476
+ }
477
+
478
+ function readUE(): number {
479
+ let zeros = 0;
480
+ while (readBit() === 0 && zeros < 32) zeros++;
481
+ if (zeros === 0) return 0;
482
+ return (1 << zeros) - 1 + readBits(zeros);
483
+ }
484
+
485
+ // NAL header: forbidden_zero_bit(1) + nal_ref_idc(2) + nal_unit_type(5)
486
+ readBits(8);
487
+ const profileIdc = readBits(8);
488
+ readBits(8); // constraint flags
489
+ readBits(8); // level_idc
490
+ readUE(); // seq_parameter_set_id
491
+
492
+ if (
493
+ [100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134].includes(
494
+ profileIdc
495
+ )
496
+ ) {
497
+ const chromaFormat = readUE();
498
+ if (chromaFormat === 3) readBit(); // separate_colour_plane_flag
499
+ readUE(); // bit_depth_luma_minus8
500
+ readUE(); // bit_depth_chroma_minus8
501
+ readBit(); // qpprime_y_zero_transform_bypass_flag
502
+ const scalingMatrixPresent = readBit();
503
+ if (scalingMatrixPresent) {
504
+ const count = chromaFormat !== 3 ? 8 : 12;
505
+ for (let i = 0; i < count; i++) {
506
+ if (readBit()) {
507
+ const size = i < 6 ? 16 : 64;
508
+ let lastScale = 8;
509
+ let nextScale = 8;
510
+ for (let j = 0; j < size; j++) {
511
+ if (nextScale !== 0) {
512
+ const delta = readUE(); // actually se(v) but we just need to skip
513
+ nextScale = (lastScale + delta + 256) % 256;
514
+ }
515
+ lastScale = nextScale === 0 ? lastScale : nextScale;
516
+ }
517
+ }
518
+ }
519
+ }
520
+ }
521
+
522
+ readUE(); // log2_max_frame_num_minus4
523
+ const picOrderCntType = readUE();
524
+ if (picOrderCntType === 0) {
525
+ readUE(); // log2_max_pic_order_cnt_lsb_minus4
526
+ } else if (picOrderCntType === 1) {
527
+ readBit(); // delta_pic_order_always_zero_flag
528
+ readUE(); // offset_for_non_ref_pic (se, but skip)
529
+ readUE(); // offset_for_top_to_bottom_field (se, but skip)
530
+ const numRefFrames = readUE();
531
+ for (let i = 0; i < numRefFrames; i++) readUE(); // offset_for_ref_frame
532
+ }
533
+ readUE(); // max_num_ref_frames
534
+ readBit(); // gaps_in_frame_num_value_allowed_flag
535
+
536
+ const picWidthInMbs = readUE() + 1;
537
+ const picHeightInMapUnits = readUE() + 1;
538
+ const frameMbsOnly = readBit();
539
+ if (!frameMbsOnly) readBit(); // mb_adaptive_frame_field_flag
540
+
541
+ readBit(); // direct_8x8_inference_flag
542
+
543
+ const frameCropping = readBit();
544
+ let cropLeft = 0,
545
+ cropRight = 0,
546
+ cropTop = 0,
547
+ cropBottom = 0;
548
+ if (frameCropping) {
549
+ cropLeft = readUE();
550
+ cropRight = readUE();
551
+ cropTop = readUE();
552
+ cropBottom = readUE();
553
+ }
554
+
555
+ const width = picWidthInMbs * 16 - (cropLeft + cropRight) * 2;
556
+ const height =
557
+ (2 - frameMbsOnly) * picHeightInMapUnits * 16 - (cropTop + cropBottom) * 2;
558
+
559
+ return { width, height };
560
+ }
@@ -0,0 +1,28 @@
1
+ interface OpenH264ModuleInstance {
2
+ _decoder_init(): number;
3
+ _decoder_feed(ptr: number, len: number): number;
4
+ _decoder_flush(): number;
5
+ _decoder_destroy(): void;
6
+ _get_has_frame(): number;
7
+ _get_width(): number;
8
+ _get_height(): number;
9
+ _get_y_stride(): number;
10
+ _get_uv_stride(): number;
11
+ _get_y_ptr(): number;
12
+ _get_u_ptr(): number;
13
+ _get_v_ptr(): number;
14
+ _malloc(size: number): number;
15
+ _free(ptr: number): void;
16
+ HEAPU8: Uint8Array;
17
+ }
18
+
19
+ interface ModuleOptions {
20
+ print?: (...args: unknown[]) => void;
21
+ printErr?: (...args: unknown[]) => void;
22
+ wasmBinary?: ArrayBufferLike | Uint8Array;
23
+ }
24
+
25
+ declare function Module(
26
+ options?: ModuleOptions
27
+ ): Promise<OpenH264ModuleInstance>;
28
+ export default Module;