@aim-packages/subtitle 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -67,6 +67,12 @@ export declare interface AimSegments {
67
67
 
68
68
  declare function assToAimSegments(text: string): Promise<AimSegments[]>;
69
69
 
70
+ export declare type Chunk = {
71
+ content: string;
72
+ index: number;
73
+ count: number;
74
+ };
75
+
70
76
  /**
71
77
  * 将字符串数组按字符限制分块
72
78
  *
@@ -107,6 +113,13 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
107
113
  indexResult: number[];
108
114
  };
109
115
 
116
+ /**
117
+ * Simple character-based chunker with overlap (mirrors main/embedding/chunker)
118
+ * - maxChars: max characters per chunk
119
+ * - overlap: overlapping characters between adjacent chunks to keep context
120
+ */
121
+ declare function chunkText(text: string, maxChars?: number, overlap?: number): Chunk[];
122
+
110
123
  /**
111
124
  * 清理时间显示格式,移除毫秒部分和多余的小时前缀
112
125
  *
@@ -497,6 +510,32 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
497
510
  */
498
511
  declare function padNumber(num: number, length?: number): string;
499
512
 
513
+ /**
514
+ * Parakeet 转写结果的类型定义
515
+ *
516
+ * Parakeet 是 Apple 提供的离线语音转写模型,输出的 JSON 包含:
517
+ * - 全文 text
518
+ * - 每个 sub-word token 的时间戳和置信度
519
+ */
520
+ declare interface ParakeetResult {
521
+ confidence: number;
522
+ duration: number;
523
+ processingTime: number;
524
+ rtfx: number;
525
+ text: string;
526
+ tokenTimings: Array<ParakeetToken>;
527
+ }
528
+
529
+ declare function parakeetToAimSegments(json: ParakeetResult): Promise<AimSegments[]>;
530
+
531
+ declare interface ParakeetToken {
532
+ confidence: number;
533
+ endTime: number;
534
+ startTime: number;
535
+ token: string;
536
+ tokenId: number;
537
+ }
538
+
500
539
  /**
501
540
  * Callback passed as the `onParse` callback to a parser
502
541
  *
@@ -556,6 +595,7 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
556
595
  tingwuToAimSegments,
557
596
  openaiToAimSegments,
558
597
  whisperJsonToAimSegments,
598
+ parakeetToAimSegments,
559
599
  detectSubtitleType,
560
600
  parseSubtitle,
561
601
  create as createWhisperStreamParser,
@@ -662,6 +702,11 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
662
702
  */
663
703
  declare function shiftAimSegmentItems(segment: AimSegments, count?: number): AimSegments | undefined;
664
704
 
705
+ /**
706
+ * Optional paragraph-aware splitter: split by paragraph/sentence first, then pack.
707
+ */
708
+ declare function smartChunks(text: string, maxChars?: number, overlap?: number): Chunk[];
709
+
665
710
  export declare interface SpeakerData {
666
711
  settings: Record<string, {
667
712
  spk: string;
@@ -834,6 +879,8 @@ declare function chunkArrayStrings(strings: string[], characterLimit: number): s
834
879
  convertTimeToAssFormat,
835
880
  convertHexColorToFFmpegFormat,
836
881
  chunkArrayStrings,
882
+ chunkText,
883
+ smartChunks,
837
884
  chunkSegmentStringsWithIndex,
838
885
  consolidateSegments,
839
886
  joinAimSegmentItems,