@aim-packages/subtitle 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1615 -1615
- package/dist/index.cjs.js +54 -54
- package/dist/index.d.ts +84 -10
- package/dist/index.es.js +1170 -1150
- package/package.json +33 -33
package/dist/index.d.ts
CHANGED
|
@@ -222,15 +222,41 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
222
222
|
*
|
|
223
223
|
* @returns A new Parser parser, with `parse`, `end` and `reset` methods.
|
|
224
224
|
* @public
|
|
225
|
+
*
|
|
226
|
+
* Whisper字幕解析器
|
|
227
|
+
*
|
|
228
|
+
* 用于解析Whisper模型输出的字幕格式,支持VAD(Voice Activity Detection)时间调整
|
|
229
|
+
* 输入格式示例:
|
|
230
|
+
* [00:00:00.000 --> 00:00:03.000] 这是第一句话
|
|
231
|
+
* [00:00:03.000 --> 00:00:06.000] 这是第二句话
|
|
232
|
+
* [--Single--] // 单个文件结束标记
|
|
233
|
+
* [--Full--] // 所有文件结束标记
|
|
234
|
+
* [--end--] // 解析结束标记
|
|
235
|
+
*
|
|
236
|
+
* @param options - 解析器配置选项
|
|
237
|
+
* @param options.vad - VAD时间片段数组,用于调整字幕时间戳
|
|
238
|
+
* @param options.vadPadding - VAD时间调整的填充值 [前填充, 后填充],默认 [0.4, 0]
|
|
239
|
+
* @returns 返回一个Parser实例,包含feed、reset、end方法
|
|
225
240
|
*/
|
|
226
|
-
declare function create(options?: ParserOptions<AimSegments[], AimSegments[], string> & {
|
|
241
|
+
declare function create(options?: ParserOptions<AimSegments[], AimSegments[], string, AimSegments[]> & {
|
|
227
242
|
vad?: Segment[];
|
|
228
243
|
vadPadding?: [number, number];
|
|
229
244
|
}): Parser<string>;
|
|
230
245
|
|
|
231
|
-
declare function create_2(options?: ParserOptions<RequiredByKey<Partial<AimSegments>, "index">[], Partial<AimSegments>[], string>): Parser<string>;
|
|
246
|
+
declare function create_2(options?: ParserOptions<RequiredByKey<Partial<AimSegments>, "index">[], Partial<AimSegments>[], string, RequiredByKey<Partial<AimSegments>, "index">[]>): Parser<string>;
|
|
232
247
|
|
|
233
|
-
|
|
248
|
+
/**
|
|
249
|
+
* 创建字幕分段解析器
|
|
250
|
+
* 主要功能:
|
|
251
|
+
* 1. 将输入的字幕片段按句子长度进行分组
|
|
252
|
+
* 2. 检测和处理重复内容
|
|
253
|
+
* 3. 支持多语言分句
|
|
254
|
+
* 4. 流式处理字幕数据
|
|
255
|
+
*
|
|
256
|
+
* @param options 解析器配置选项
|
|
257
|
+
* @returns 解析器实例
|
|
258
|
+
*/
|
|
259
|
+
declare function create_3(options?: ParserOptions<AimSegments[], AimSegments[], AimSegments[], AimSegments[]> & {
|
|
234
260
|
sentenceLength?: number;
|
|
235
261
|
repeatString?: string[];
|
|
236
262
|
}): Parser<AimSegments[]>;
|
|
@@ -274,6 +300,33 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
274
300
|
}
|
|
275
301
|
}
|
|
276
302
|
|
|
303
|
+
/**
|
|
304
|
+
* 查找文本中重复子串的位置
|
|
305
|
+
* @param text 要搜索的文本
|
|
306
|
+
* @param substring 要查找的重复子串
|
|
307
|
+
* @returns 重复子串在文本中的位置数组
|
|
308
|
+
*/
|
|
309
|
+
declare function findRepeatedSubstringPositions(text: string, substring: string): {
|
|
310
|
+
start: number;
|
|
311
|
+
end: number;
|
|
312
|
+
match: string;
|
|
313
|
+
}[];
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* 修复whisper json的解码问题
|
|
317
|
+
*
|
|
318
|
+
* @export
|
|
319
|
+
* @param {Buffer} inputBuffer 读取的json文件
|
|
320
|
+
* @return {*}
|
|
321
|
+
* @example
|
|
322
|
+
*
|
|
323
|
+
* const { readFile, writeFile } = require("fs/promises");
|
|
324
|
+
* const inputJsonBuffer = await readFile(inputPath)
|
|
325
|
+
* fixWhisperJsonDecode(inputJsonBuffer)
|
|
326
|
+
*
|
|
327
|
+
*/
|
|
328
|
+
declare function fixWhisperJsonDecode(inputBuffer: Buffer): Promise<any>;
|
|
329
|
+
|
|
277
330
|
/**
|
|
278
331
|
* 将秒数转换为 xx:xx:xx.xxx 格式
|
|
279
332
|
*
|
|
@@ -285,6 +338,13 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
285
338
|
|
|
286
339
|
export declare type ISegment = [string, string, string, string | undefined];
|
|
287
340
|
|
|
341
|
+
/**
|
|
342
|
+
* 将多个子片段合并成一个片段
|
|
343
|
+
* @param s 要合并的子片段数组
|
|
344
|
+
* @returns 合并后的片段
|
|
345
|
+
*/
|
|
346
|
+
declare function joinAimSegmentItems(s: AimSegments[]): AimSegments | undefined;
|
|
347
|
+
|
|
288
348
|
export declare type LanguageCode = "auto" | "none" | "zh" | "zh_cn" | "zh_tw" | "yue" | "en" | "ja" | "ko" | "fr" | "es" | "ru" | "de" | "it" | "tr" | "pt" | "vi" | "id" | "th" | "ms" | "ar" | "hi" | "ro" | "ug" | "uz" | "kk" | "az" | "ky" | "fa" | "tg";
|
|
289
349
|
|
|
290
350
|
/**
|
|
@@ -375,12 +435,13 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
375
435
|
*
|
|
376
436
|
* @public
|
|
377
437
|
*/
|
|
378
|
-
declare type ParseCallback<T> = (event: ParsedEvent<T>) => void;
|
|
438
|
+
declare type ParseCallback<T, R = never> = (event: ParsedEvent<T, R>) => void;
|
|
379
439
|
|
|
380
|
-
declare interface ParsedEvent<T> {
|
|
440
|
+
declare interface ParsedEvent<T, R = never> {
|
|
381
441
|
type: "event";
|
|
382
442
|
event: "start" | "message" | "end";
|
|
383
443
|
data?: T;
|
|
444
|
+
result?: R;
|
|
384
445
|
}
|
|
385
446
|
|
|
386
447
|
/**
|
|
@@ -429,17 +490,18 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
429
490
|
* @template S - 解析开始时的状态类型
|
|
430
491
|
* @template P - 解析过程中使用的状态类型
|
|
431
492
|
* @template E - 解析结束时的状态类型
|
|
493
|
+
* @template R - 解析结果类型(用于 onEnd 回调)
|
|
432
494
|
*
|
|
433
495
|
* @property {ParseCallback<S>} [onStart] - 解析开始时的回调函数
|
|
434
496
|
* @property {ParseCallback<P>} [onParse] - 解析过程中调用的回调函数
|
|
435
497
|
* @property {ParseCallback<P>} [onProgress] - 解析进度更新时的回调函数
|
|
436
|
-
* @property {ParseCallback<E>} [onEnd] - 解析结束时的回调函数
|
|
498
|
+
* @property {ParseCallback<E, R>} [onEnd] - 解析结束时的回调函数
|
|
437
499
|
*/
|
|
438
|
-
declare type ParserOptions<S, P, E> = {
|
|
500
|
+
declare type ParserOptions<S, P, E, R = never> = {
|
|
439
501
|
onStart?: ParseCallback<S>;
|
|
440
502
|
onParse?: ParseCallback<P>;
|
|
441
503
|
onProgress?: ParseCallback<P>;
|
|
442
|
-
onEnd?: ParseCallback<E>;
|
|
504
|
+
onEnd?: ParseCallback<E, R>;
|
|
443
505
|
};
|
|
444
506
|
|
|
445
507
|
export declare type PartialByKey<T, K extends keyof T> = Omit<T, K> & Partial<Pick<T, K>>;
|
|
@@ -497,6 +559,14 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
497
559
|
end: number;
|
|
498
560
|
};
|
|
499
561
|
|
|
562
|
+
/**
|
|
563
|
+
* 从片段中移除指定数量的子项
|
|
564
|
+
* @param segment 父片段
|
|
565
|
+
* @param count 要移除的子项数量
|
|
566
|
+
* @returns 被移除的子项合并后的新片段
|
|
567
|
+
*/
|
|
568
|
+
declare function shiftAimSegmentItems(segment: AimSegments, count?: number): AimSegments | undefined;
|
|
569
|
+
|
|
500
570
|
export declare interface SpeakerData {
|
|
501
571
|
settings: Record<string, {
|
|
502
572
|
spk: string;
|
|
@@ -651,7 +721,8 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
651
721
|
outputLrc,
|
|
652
722
|
outputTxt,
|
|
653
723
|
outputMarkdown,
|
|
654
|
-
outputAss
|
|
724
|
+
outputAss,
|
|
725
|
+
fixWhisperJsonDecode
|
|
655
726
|
}
|
|
656
727
|
}
|
|
657
728
|
|
|
@@ -661,6 +732,7 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
661
732
|
formatTime,
|
|
662
733
|
convertToSeconds,
|
|
663
734
|
cleanTimeDisplay,
|
|
735
|
+
findRepeatedSubstringPositions,
|
|
664
736
|
containsCJKCharacters,
|
|
665
737
|
languageCodeToName,
|
|
666
738
|
convertHexColorToAssFormat,
|
|
@@ -668,7 +740,9 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
668
740
|
convertHexColorToFFmpegFormat,
|
|
669
741
|
chunkArrayStrings,
|
|
670
742
|
chunkSegmentStringsWithIndex,
|
|
671
|
-
consolidateSegments
|
|
743
|
+
consolidateSegments,
|
|
744
|
+
joinAimSegmentItems,
|
|
745
|
+
shiftAimSegmentItems
|
|
672
746
|
}
|
|
673
747
|
}
|
|
674
748
|
|