@aim-packages/subtitle 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1615 -1509
- package/dist/index.cjs.js +59 -51
- package/dist/index.d.ts +91 -9
- package/dist/index.es.js +1299 -1268
- package/package.json +33 -33
package/dist/index.d.ts
CHANGED
|
@@ -107,6 +107,26 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
107
107
|
indexResult: number[];
|
|
108
108
|
};
|
|
109
109
|
|
|
110
|
+
/**
|
|
111
|
+
* 清理时间显示格式,移除毫秒部分和多余的小时前缀
|
|
112
|
+
*
|
|
113
|
+
* 作用:
|
|
114
|
+
* 1. 移除时间字符串末尾的毫秒部分(支持 .xxx、,xxx 等分隔符)
|
|
115
|
+
* 2. 移除时间字符串开头多余的 "00:" 小时前缀
|
|
116
|
+
* 3. 保持时间格式的简洁性和可读性
|
|
117
|
+
*
|
|
118
|
+
* 示例:
|
|
119
|
+
* 输入:"00:01:30.500" -> 输出:"01:30"
|
|
120
|
+
* 输入:"01:45:20,123" -> 输出:"01:45:20"
|
|
121
|
+
* 输入:"00:00:05.00" -> 输出:"00:05"
|
|
122
|
+
* 输入:"01:30:45,50" -> 输出:"01:30:45"
|
|
123
|
+
*
|
|
124
|
+
* @export
|
|
125
|
+
* @param {string} timeString - 输入的时间字符串
|
|
126
|
+
* @return {string} 清理后的时间字符串
|
|
127
|
+
*/
|
|
128
|
+
declare function cleanTimeDisplay(timeString: string): string;
|
|
129
|
+
|
|
110
130
|
/**
|
|
111
131
|
* 合并字幕片段,优化字幕的时间轴
|
|
112
132
|
*
|
|
@@ -202,15 +222,41 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
202
222
|
*
|
|
203
223
|
* @returns A new Parser parser, with `parse`, `end` and `reset` methods.
|
|
204
224
|
* @public
|
|
225
|
+
*
|
|
226
|
+
* Whisper字幕解析器
|
|
227
|
+
*
|
|
228
|
+
* 用于解析Whisper模型输出的字幕格式,支持VAD(Voice Activity Detection)时间调整
|
|
229
|
+
* 输入格式示例:
|
|
230
|
+
* [00:00:00.000 --> 00:00:03.000] 这是第一句话
|
|
231
|
+
* [00:00:03.000 --> 00:00:06.000] 这是第二句话
|
|
232
|
+
* [--Single--] // 单个文件结束标记
|
|
233
|
+
* [--Full--] // 所有文件结束标记
|
|
234
|
+
* [--end--] // 解析结束标记
|
|
235
|
+
*
|
|
236
|
+
* @param options - 解析器配置选项
|
|
237
|
+
* @param options.vad - VAD时间片段数组,用于调整字幕时间戳
|
|
238
|
+
* @param options.vadPadding - VAD时间调整的填充值 [前填充, 后填充],默认 [0.4, 0]
|
|
239
|
+
* @returns 返回一个Parser实例,包含feed、reset、end方法
|
|
205
240
|
*/
|
|
206
|
-
declare function create(options?: ParserOptions<AimSegments[], AimSegments[], string> & {
|
|
241
|
+
declare function create(options?: ParserOptions<AimSegments[], AimSegments[], string, AimSegments[]> & {
|
|
207
242
|
vad?: Segment[];
|
|
208
243
|
vadPadding?: [number, number];
|
|
209
244
|
}): Parser<string>;
|
|
210
245
|
|
|
211
|
-
declare function create_2(options?: ParserOptions<RequiredByKey<Partial<AimSegments>, "index">[], Partial<AimSegments>[], string>): Parser<string>;
|
|
246
|
+
declare function create_2(options?: ParserOptions<RequiredByKey<Partial<AimSegments>, "index">[], Partial<AimSegments>[], string, RequiredByKey<Partial<AimSegments>, "index">[]>): Parser<string>;
|
|
212
247
|
|
|
213
|
-
|
|
248
|
+
/**
|
|
249
|
+
* 创建字幕分段解析器
|
|
250
|
+
* 主要功能:
|
|
251
|
+
* 1. 将输入的字幕片段按句子长度进行分组
|
|
252
|
+
* 2. 检测和处理重复内容
|
|
253
|
+
* 3. 支持多语言分句
|
|
254
|
+
* 4. 流式处理字幕数据
|
|
255
|
+
*
|
|
256
|
+
* @param options 解析器配置选项
|
|
257
|
+
* @returns 解析器实例
|
|
258
|
+
*/
|
|
259
|
+
declare function create_3(options?: ParserOptions<AimSegments[], AimSegments[], AimSegments[], AimSegments[]> & {
|
|
214
260
|
sentenceLength?: number;
|
|
215
261
|
repeatString?: string[];
|
|
216
262
|
}): Parser<AimSegments[]>;
|
|
@@ -254,6 +300,18 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
254
300
|
}
|
|
255
301
|
}
|
|
256
302
|
|
|
303
|
+
/**
|
|
304
|
+
* 查找文本中重复子串的位置
|
|
305
|
+
* @param text 要搜索的文本
|
|
306
|
+
* @param substring 要查找的重复子串
|
|
307
|
+
* @returns 重复子串在文本中的位置数组
|
|
308
|
+
*/
|
|
309
|
+
declare function findRepeatedSubstringPositions(text: string, substring: string): {
|
|
310
|
+
start: number;
|
|
311
|
+
end: number;
|
|
312
|
+
match: string;
|
|
313
|
+
}[];
|
|
314
|
+
|
|
257
315
|
/**
|
|
258
316
|
* 将秒数转换为 xx:xx:xx.xxx 格式
|
|
259
317
|
*
|
|
@@ -265,6 +323,13 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
265
323
|
|
|
266
324
|
export declare type ISegment = [string, string, string, string | undefined];
|
|
267
325
|
|
|
326
|
+
/**
|
|
327
|
+
* 将多个子片段合并成一个片段
|
|
328
|
+
* @param s 要合并的子片段数组
|
|
329
|
+
* @returns 合并后的片段
|
|
330
|
+
*/
|
|
331
|
+
declare function joinAimSegmentItems(s: AimSegments[]): AimSegments | undefined;
|
|
332
|
+
|
|
268
333
|
export declare type LanguageCode = "auto" | "none" | "zh" | "zh_cn" | "zh_tw" | "yue" | "en" | "ja" | "ko" | "fr" | "es" | "ru" | "de" | "it" | "tr" | "pt" | "vi" | "id" | "th" | "ms" | "ar" | "hi" | "ro" | "ug" | "uz" | "kk" | "az" | "ky" | "fa" | "tg";
|
|
269
334
|
|
|
270
335
|
/**
|
|
@@ -321,6 +386,8 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
321
386
|
|
|
322
387
|
declare function outputLrc({ segments1, segments2, speakerData }: OutputTextParams): string;
|
|
323
388
|
|
|
389
|
+
declare function outputMarkdown({ segments1, segments2, header, isMd, chunkSize, speakerData, locale }: OutputTextParams): string;
|
|
390
|
+
|
|
324
391
|
declare function outputSrt({ segments1, segments2, speakerData }: OutputTextParams): string;
|
|
325
392
|
|
|
326
393
|
export declare interface OutputTextParams {
|
|
@@ -353,12 +420,13 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
353
420
|
*
|
|
354
421
|
* @public
|
|
355
422
|
*/
|
|
356
|
-
declare type ParseCallback<T> = (event: ParsedEvent<T>) => void;
|
|
423
|
+
declare type ParseCallback<T, R = never> = (event: ParsedEvent<T, R>) => void;
|
|
357
424
|
|
|
358
|
-
declare interface ParsedEvent<T> {
|
|
425
|
+
declare interface ParsedEvent<T, R = never> {
|
|
359
426
|
type: "event";
|
|
360
427
|
event: "start" | "message" | "end";
|
|
361
428
|
data?: T;
|
|
429
|
+
result?: R;
|
|
362
430
|
}
|
|
363
431
|
|
|
364
432
|
/**
|
|
@@ -407,17 +475,18 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
407
475
|
* @template S - 解析开始时的状态类型
|
|
408
476
|
* @template P - 解析过程中使用的状态类型
|
|
409
477
|
* @template E - 解析结束时的状态类型
|
|
478
|
+
* @template R - 解析结果类型(用于 onEnd 回调)
|
|
410
479
|
*
|
|
411
480
|
* @property {ParseCallback<S>} [onStart] - 解析开始时的回调函数
|
|
412
481
|
* @property {ParseCallback<P>} [onParse] - 解析过程中调用的回调函数
|
|
413
482
|
* @property {ParseCallback<P>} [onProgress] - 解析进度更新时的回调函数
|
|
414
|
-
* @property {ParseCallback<E>} [onEnd] - 解析结束时的回调函数
|
|
483
|
+
* @property {ParseCallback<E, R>} [onEnd] - 解析结束时的回调函数
|
|
415
484
|
*/
|
|
416
|
-
declare type ParserOptions<S, P, E> = {
|
|
485
|
+
declare type ParserOptions<S, P, E, R = never> = {
|
|
417
486
|
onStart?: ParseCallback<S>;
|
|
418
487
|
onParse?: ParseCallback<P>;
|
|
419
488
|
onProgress?: ParseCallback<P>;
|
|
420
|
-
onEnd?: ParseCallback<E>;
|
|
489
|
+
onEnd?: ParseCallback<E, R>;
|
|
421
490
|
};
|
|
422
491
|
|
|
423
492
|
export declare type PartialByKey<T, K extends keyof T> = Omit<T, K> & Partial<Pick<T, K>>;
|
|
@@ -475,6 +544,14 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
475
544
|
end: number;
|
|
476
545
|
};
|
|
477
546
|
|
|
547
|
+
/**
|
|
548
|
+
* 从片段中移除指定数量的子项
|
|
549
|
+
* @param segment 父片段
|
|
550
|
+
* @param count 要移除的子项数量
|
|
551
|
+
* @returns 被移除的子项合并后的新片段
|
|
552
|
+
*/
|
|
553
|
+
declare function shiftAimSegmentItems(segment: AimSegments, count?: number): AimSegments | undefined;
|
|
554
|
+
|
|
478
555
|
export declare interface SpeakerData {
|
|
479
556
|
settings: Record<string, {
|
|
480
557
|
spk: string;
|
|
@@ -628,6 +705,7 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
628
705
|
outputVtt,
|
|
629
706
|
outputLrc,
|
|
630
707
|
outputTxt,
|
|
708
|
+
outputMarkdown,
|
|
631
709
|
outputAss
|
|
632
710
|
}
|
|
633
711
|
}
|
|
@@ -637,6 +715,8 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
637
715
|
padNumber,
|
|
638
716
|
formatTime,
|
|
639
717
|
convertToSeconds,
|
|
718
|
+
cleanTimeDisplay,
|
|
719
|
+
findRepeatedSubstringPositions,
|
|
640
720
|
containsCJKCharacters,
|
|
641
721
|
languageCodeToName,
|
|
642
722
|
convertHexColorToAssFormat,
|
|
@@ -644,7 +724,9 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
644
724
|
convertHexColorToFFmpegFormat,
|
|
645
725
|
chunkArrayStrings,
|
|
646
726
|
chunkSegmentStringsWithIndex,
|
|
647
|
-
consolidateSegments
|
|
727
|
+
consolidateSegments,
|
|
728
|
+
joinAimSegmentItems,
|
|
729
|
+
shiftAimSegmentItems
|
|
648
730
|
}
|
|
649
731
|
}
|
|
650
732
|
|