@aim-packages/subtitle 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1615 -1615
- package/dist/index.cjs.js +57 -56
- package/dist/index.d.ts +47 -0
- package/dist/index.es.js +1023 -912
- package/package.json +33 -33
package/dist/index.d.ts
CHANGED
|
@@ -67,6 +67,12 @@ export declare interface AimSegments {
|
|
|
67
67
|
|
|
68
68
|
declare function assToAimSegments(text: string): Promise<AimSegments[]>;
|
|
69
69
|
|
|
70
|
+
export declare type Chunk = {
|
|
71
|
+
content: string;
|
|
72
|
+
index: number;
|
|
73
|
+
count: number;
|
|
74
|
+
};
|
|
75
|
+
|
|
70
76
|
/**
|
|
71
77
|
* 将字符串数组按字符限制分块
|
|
72
78
|
*
|
|
@@ -107,6 +113,13 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
107
113
|
indexResult: number[];
|
|
108
114
|
};
|
|
109
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Simple character-based chunker with overlap (mirrors main/embedding/chunker)
|
|
118
|
+
* - maxChars: max characters per chunk
|
|
119
|
+
* - overlap: overlapping characters between adjacent chunks to keep context
|
|
120
|
+
*/
|
|
121
|
+
declare function chunkText(text: string, maxChars?: number, overlap?: number): Chunk[];
|
|
122
|
+
|
|
110
123
|
/**
|
|
111
124
|
* 清理时间显示格式,移除毫秒部分和多余的小时前缀
|
|
112
125
|
*
|
|
@@ -497,6 +510,32 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
497
510
|
*/
|
|
498
511
|
declare function padNumber(num: number, length?: number): string;
|
|
499
512
|
|
|
513
|
+
/**
|
|
514
|
+
* Parakeet 转写结果的类型定义
|
|
515
|
+
*
|
|
516
|
+
* Parakeet 是 Apple 提供的离线语音转写模型,输出的 JSON 包含:
|
|
517
|
+
* - 全文 text
|
|
518
|
+
* - 每个 sub-word token 的时间戳和置信度
|
|
519
|
+
*/
|
|
520
|
+
declare interface ParakeetResult {
|
|
521
|
+
confidence: number;
|
|
522
|
+
duration: number;
|
|
523
|
+
processingTime: number;
|
|
524
|
+
rtfx: number;
|
|
525
|
+
text: string;
|
|
526
|
+
tokenTimings: Array<ParakeetToken>;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
declare function parakeetToAimSegments(json: ParakeetResult): Promise<AimSegments[]>;
|
|
530
|
+
|
|
531
|
+
declare interface ParakeetToken {
|
|
532
|
+
confidence: number;
|
|
533
|
+
endTime: number;
|
|
534
|
+
startTime: number;
|
|
535
|
+
token: string;
|
|
536
|
+
tokenId: number;
|
|
537
|
+
}
|
|
538
|
+
|
|
500
539
|
/**
|
|
501
540
|
* Callback passed as the `onParse` callback to a parser
|
|
502
541
|
*
|
|
@@ -556,6 +595,7 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
556
595
|
tingwuToAimSegments,
|
|
557
596
|
openaiToAimSegments,
|
|
558
597
|
whisperJsonToAimSegments,
|
|
598
|
+
parakeetToAimSegments,
|
|
559
599
|
detectSubtitleType,
|
|
560
600
|
parseSubtitle,
|
|
561
601
|
create as createWhisperStreamParser,
|
|
@@ -662,6 +702,11 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
662
702
|
*/
|
|
663
703
|
declare function shiftAimSegmentItems(segment: AimSegments, count?: number): AimSegments | undefined;
|
|
664
704
|
|
|
705
|
+
/**
|
|
706
|
+
* Optional paragraph-aware splitter: split by paragraph/sentence first, then pack.
|
|
707
|
+
*/
|
|
708
|
+
declare function smartChunks(text: string, maxChars?: number, overlap?: number): Chunk[];
|
|
709
|
+
|
|
665
710
|
export declare interface SpeakerData {
|
|
666
711
|
settings: Record<string, {
|
|
667
712
|
spk: string;
|
|
@@ -834,6 +879,8 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
|
|
|
834
879
|
convertTimeToAssFormat,
|
|
835
880
|
convertHexColorToFFmpegFormat,
|
|
836
881
|
chunkArrayStrings,
|
|
882
|
+
chunkText,
|
|
883
|
+
smartChunks,
|
|
837
884
|
chunkSegmentStringsWithIndex,
|
|
838
885
|
consolidateSegments,
|
|
839
886
|
joinAimSegmentItems,
|