@deepcitation/deepcitation-js 1.1.49 → 1.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +73 -157
  2. package/lib/chunk-2HINOG74.js +3 -0
  3. package/lib/chunk-2HINOG74.js.map +1 -0
  4. package/lib/chunk-4UWAUWYL.cjs +3 -0
  5. package/lib/chunk-4UWAUWYL.cjs.map +1 -0
  6. package/lib/chunk-5XGN7UAV.js +2 -0
  7. package/lib/chunk-5XGN7UAV.js.map +1 -0
  8. package/lib/chunk-7TORYXU4.cjs +3 -0
  9. package/lib/chunk-7TORYXU4.cjs.map +1 -0
  10. package/lib/chunk-BDSA6VGC.js +125 -0
  11. package/lib/chunk-BDSA6VGC.js.map +1 -0
  12. package/lib/chunk-BEYJEW3Y.js +2 -0
  13. package/lib/chunk-BEYJEW3Y.js.map +1 -0
  14. package/lib/chunk-BWQLFMTV.js +2 -0
  15. package/lib/chunk-BWQLFMTV.js.map +1 -0
  16. package/lib/{chunk-F2MMVEVC.cjs → chunk-BYLIBOAU.cjs} +2 -1
  17. package/lib/chunk-BYLIBOAU.cjs.map +1 -0
  18. package/lib/chunk-DS6SOU4L.cjs +2 -0
  19. package/lib/chunk-DS6SOU4L.cjs.map +1 -0
  20. package/lib/{chunk-UUR2SQKU.cjs → chunk-HL3AXCDL.cjs} +2 -1
  21. package/lib/chunk-HL3AXCDL.cjs.map +1 -0
  22. package/lib/chunk-N7FTXSGM.js +3 -0
  23. package/lib/chunk-N7FTXSGM.js.map +1 -0
  24. package/lib/chunk-WS4CQVDI.cjs +125 -0
  25. package/lib/chunk-WS4CQVDI.cjs.map +1 -0
  26. package/lib/client/index.cjs +2 -1
  27. package/lib/client/index.cjs.map +1 -0
  28. package/lib/client/index.d.cts +58 -13
  29. package/lib/client/index.d.ts +58 -13
  30. package/lib/client/index.js +2 -1
  31. package/lib/client/index.js.map +1 -0
  32. package/lib/{index-fvVBZYVK.d.ts → index-BHjI8Bh1.d.cts} +61 -22
  33. package/lib/{index-fvVBZYVK.d.cts → index-BHjI8Bh1.d.ts} +61 -22
  34. package/lib/index.cjs +2 -1
  35. package/lib/index.cjs.map +1 -0
  36. package/lib/index.d.cts +137 -16
  37. package/lib/index.d.ts +137 -16
  38. package/lib/index.js +2 -1
  39. package/lib/index.js.map +1 -0
  40. package/lib/prompts/index.cjs +2 -1
  41. package/lib/prompts/index.cjs.map +1 -0
  42. package/lib/prompts/index.d.cts +177 -55
  43. package/lib/prompts/index.d.ts +177 -55
  44. package/lib/prompts/index.js +2 -1
  45. package/lib/prompts/index.js.map +1 -0
  46. package/lib/react/index.cjs +8 -5
  47. package/lib/react/index.cjs.map +1 -0
  48. package/lib/react/index.d.cts +348 -21
  49. package/lib/react/index.d.ts +348 -21
  50. package/lib/react/index.js +8 -5
  51. package/lib/react/index.js.map +1 -0
  52. package/lib/styles.css +1 -1
  53. package/lib/types/index.cjs +2 -1
  54. package/lib/types/index.cjs.map +1 -0
  55. package/lib/types/index.d.cts +1 -1
  56. package/lib/types/index.d.ts +1 -1
  57. package/lib/types/index.js +2 -1
  58. package/lib/types/index.js.map +1 -0
  59. package/lib/{utils-q6anRKO_.d.cts → utils-CCi9_JTv.d.cts} +5 -5
  60. package/lib/{utils-DuacFTtu.d.ts → utils-CoSP-i76.d.ts} +5 -5
  61. package/package.json +165 -152
  62. package/src/tailwind.css +5 -5
  63. package/lib/chunk-2PRW5PVT.cjs +0 -2
  64. package/lib/chunk-3XSZLKJW.js +0 -2
  65. package/lib/chunk-D2TKEF6D.cjs +0 -2
  66. package/lib/chunk-DHVODVIA.cjs +0 -71
  67. package/lib/chunk-HRCAI3NV.js +0 -1
  68. package/lib/chunk-ND6LFDGK.js +0 -71
  69. package/lib/chunk-O2XFH626.js +0 -1
  70. package/lib/chunk-PKXMJNRX.js +0 -2
package/lib/index.d.ts CHANGED
@@ -1,8 +1,9 @@
1
- export { CitationInput, DeepCitation, DeepCitationConfig, FileDataPart, FileInput, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsOptions, VerifyCitationsResponse, verifyAll } from './client/index.js';
2
- import { C as Citation, V as Verification, a as CitationStatus } from './index-fvVBZYVK.js';
3
- export { B as BLANK_VERIFICATION, b as CitationType, f as ContentMatchStatus, D as DEFAULT_OUTPUT_IMAGE_FORMAT, k as DeepTextItem, I as IVertex, N as NOT_FOUND_VERIFICATION_INDEX, O as OutputImageFormat, P as PENDING_VERIFICATION_INDEX, j as ScreenBox, i as SearchAttempt, h as SearchMethod, g as SearchStatus, e as SourceMeta, S as SourceType, U as UrlAccessStatus, c as VerifyCitationRequest, d as VerifyCitationResponse } from './index-fvVBZYVK.js';
4
- export { C as CITATION_X_PADDING, c as CITATION_Y_PADDING, b as generateCitationInstanceId, g as generateCitationKey, a as generateVerificationKey } from './utils-DuacFTtu.js';
5
- export { AV_CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_AV_BASED_JSON_OUTPUT_FORMAT, CITATION_AV_REMINDER, CITATION_JSON_OUTPUT_FORMAT, CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_REMINDER, CompressedResult, WrapCitationPromptOptions, WrapCitationPromptResult, WrapSystemPromptOptions, compressPromptIds, decompressPromptIds, wrapCitationPrompt, wrapSystemCitationPrompt } from './prompts/index.js';
1
+ export { CitationInput, DeepCitation, DeepCitationConfig, FileDataPart, FileInput, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsOptions, VerifyCitationsResponse } from './client/index.js';
2
+ import { C as Citation, V as Verification, a as CitationStatus } from './index-BHjI8Bh1.js';
3
+ export { B as BLANK_VERIFICATION, b as CitationType, c as ContentMatchStatus, D as DEFAULT_OUTPUT_IMAGE_FORMAT, d as DeepTextItem, I as IVertex, N as NOT_FOUND_VERIFICATION_INDEX, O as OutputImageFormat, P as PENDING_VERIFICATION_INDEX, S as ScreenBox, e as SearchAttempt, f as SearchMethod, g as SearchStatus, h as SourceMeta, i as SourceType, U as UrlAccessStatus, j as VerifyCitationRequest, k as VerifyCitationResponse } from './index-BHjI8Bh1.js';
4
+ import { CitationData, ParsedCitationResponse } from './prompts/index.js';
5
+ export { AV_CITATION_PROMPT, CITATION_AV_JSON_OUTPUT_FORMAT, CITATION_AV_REMINDER, CITATION_DATA_END_DELIMITER, CITATION_DATA_START_DELIMITER, CITATION_JSON_OUTPUT_FORMAT, CITATION_PROMPT, CITATION_REMINDER, CompressedResult, WrapCitationPromptOptions, WrapCitationPromptResult, WrapSystemPromptOptions, compressPromptIds, decompressPromptIds, wrapCitationPrompt, wrapSystemCitationPrompt } from './prompts/index.js';
6
+ export { C as CITATION_X_PADDING, a as CITATION_Y_PADDING, g as generateCitationInstanceId, b as generateCitationKey, c as generateVerificationKey } from './utils-CoSP-i76.js';
6
7
 
7
8
  /**
8
9
  * Calculates the verification status of a citation based on the found highlight and search state.
@@ -82,12 +83,136 @@ declare function groupCitationsByAttachmentIdObject(citations: Citation[] | {
82
83
  };
83
84
  };
84
85
 
86
+ /**
87
+ * Citation Parser
88
+ *
89
+ * Implements the "Split & Parse" strategy for the deferred JSON citation pattern.
90
+ * This parser extracts citations from LLM responses that use [N] markers in text
91
+ * and include a JSON data block at the end.
92
+ *
93
+ * Algorithm:
94
+ * 1. Detection: Look for the start delimiter <<<CITATION_DATA>>>
95
+ * 2. Splitting: Separate visible content from the citation data block
96
+ * 3. Data Extraction: Extract the JSON string between delimiters
97
+ * 4. Sanitization: Parse with JSON.parse, with fallback repair for common issues
98
+ * 5. Hydration: Map the JSON objects to a usable format
99
+ */
100
+
101
+ /**
102
+ * Parses a citation response from an LLM.
103
+ *
104
+ * This function:
105
+ * 1. Finds the <<<CITATION_DATA>>> delimiter in the response
106
+ * 2. Splits the response into visible text and citation data
107
+ * 3. Parses the JSON citation data
108
+ * 4. Returns a structured result with both
109
+ *
110
+ * @param llmResponse - The full LLM response text
111
+ * @returns ParsedCitationResponse with visible text and parsed citations
112
+ *
113
+ * @example
114
+ * ```typescript
115
+ * const response = `
116
+ * The company grew 45% [1].
117
+ *
118
+ * <<<CITATION_DATA>>>
119
+ * [{"id": 1, "attachment_id": "abc", "full_phrase": "grew 45%", "anchor_text": "45%"}]
120
+ * <<<END_CITATION_DATA>>>
121
+ * `;
122
+ *
123
+ * const parsed = parseDeferredCitationResponse(response);
124
+ * console.log(parsed.visibleText); // "The company grew 45% [1]."
125
+ * console.log(parsed.citations); // [{id: 1, attachment_id: "abc", ...}]
126
+ * ```
127
+ */
128
+ declare function parseDeferredCitationResponse(llmResponse: string): ParsedCitationResponse;
129
+ /**
130
+ * Converts a CitationData object to the standard Citation format.
131
+ *
132
+ * @param data - The citation data
133
+ * @param citationNumber - Optional override for citation number (defaults to data.id)
134
+ * @returns Standard Citation object
135
+ */
136
+ declare function deferredCitationToCitation(data: CitationData, citationNumber?: number): Citation;
137
+ /**
138
+ * Extracts all citations from a citation response and returns them
139
+ * in the standard dictionary format used by the verification API.
140
+ *
141
+ * This function parses the response, converts each citation to the standard
142
+ * Citation format, and generates deterministic keys for each.
143
+ *
144
+ * @param llmResponse - The full LLM response with citation block
145
+ * @returns Dictionary of parsed Citation objects keyed by citation key
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * const citations = getAllCitationsFromDeferredResponse(llmOutput);
150
+ * // Returns: { "abc123...": { attachmentId: "...", fullPhrase: "...", ... }, ... }
151
+ * ```
152
+ */
153
+ declare function getAllCitationsFromDeferredResponse(llmResponse: string): {
154
+ [key: string]: Citation;
155
+ };
156
+ /**
157
+ * Checks if a response contains citation markers.
158
+ *
159
+ * @param response - The LLM response to check
160
+ * @returns True if the response contains the citation data delimiter
161
+ */
162
+ declare function hasDeferredCitations(response: string): boolean;
163
+ /**
164
+ * Extracts just the visible text from a response,
165
+ * removing the citation data block.
166
+ *
167
+ * @param llmResponse - The full LLM response
168
+ * @returns The visible text portion only
169
+ */
170
+ declare function extractVisibleText(llmResponse: string): string;
171
+ /**
172
+ * Replaces [N] citation markers in text with optional content.
173
+ *
174
+ * @param text - The text containing [N] markers
175
+ * @param options - Configuration for replacement
176
+ * @returns The text with markers replaced
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * const text = "Revenue grew 45% [1] in Q4 [2].";
181
+ *
182
+ * // Remove markers entirely
183
+ * replaceDeferredMarkers(text);
184
+ * // Returns: "Revenue grew 45% in Q4."
185
+ *
186
+ * // Replace with anchor texts
187
+ * replaceDeferredMarkers(text, {
188
+ * citationMap: new Map([[1, { anchor_text: "45%" }], [2, { anchor_text: "Q4" }]]),
189
+ * showAnchorText: true,
190
+ * });
191
+ * // Returns: "Revenue grew 45% 45% in Q4 Q4."
192
+ * ```
193
+ */
194
+ declare function replaceDeferredMarkers(text: string, options?: {
195
+ /** Map of citation IDs to their data */
196
+ citationMap?: Map<number, CitationData>;
197
+ /** Whether to show the anchor text after the marker */
198
+ showAnchorText?: boolean;
199
+ /** Custom replacement function */
200
+ replacer?: (id: number, data?: CitationData) => string;
201
+ }): string;
202
+ /**
203
+ * Gets all citation marker IDs found in a text.
204
+ *
205
+ * @param text - The text to scan for [N] markers
206
+ * @returns Array of citation IDs in order of appearance
207
+ */
208
+ declare function getCitationMarkerIds(text: string): number[];
209
+
85
210
  interface ReplaceCitationsOptions {
86
211
  /**
87
- * If true, leaves the key_span text behind when removing citations.
212
+ * If true, leaves the anchor_text text behind when removing citations.
88
213
  * @default false
89
214
  */
90
- leaveKeySpanBehind?: boolean;
215
+ leaveAnchorTextBehind?: boolean;
91
216
  /**
92
217
  * Map of citation keys to verification results.
93
218
  * Used to determine verification status for each citation.
@@ -120,12 +245,12 @@ declare const getVerificationTextIndicator: (verification: Verification | null |
120
245
  * // Remove all citations
121
246
  * const clean = replaceCitations(llmOutput);
122
247
  *
123
- * // Leave key_span text behind
124
- * const withKeySpans = replaceCitations(llmOutput, { leaveKeySpanBehind: true });
248
+ * // Leave anchor_text text behind
249
+ * const withAnchorTexts = replaceCitations(llmOutput, { leaveAnchorTextBehind: true });
125
250
  *
126
251
  * // Show verification status indicators
127
252
  * const withStatus = replaceCitations(llmOutput, {
128
- * leaveKeySpanBehind: true,
253
+ * leaveAnchorTextBehind: true,
129
254
  * verifications: verificationMap,
130
255
  * showVerificationStatus: true,
131
256
  * });
@@ -133,13 +258,9 @@ declare const getVerificationTextIndicator: (verification: Verification | null |
133
258
  * ```
134
259
  */
135
260
  declare const replaceCitations: (markdownWithCitations: string, options?: ReplaceCitationsOptions) => string;
136
- /**
137
- * @deprecated Use `replaceCitations` instead. This function is kept for backward compatibility.
138
- */
139
- declare const removeCitations: (markdownWithCitations: string, leaveKeySpanBehind?: boolean) => string;
140
261
  declare const removePageNumberMetadata: (pageText: string) => string;
141
262
  declare const removeLineIdMetadata: (pageText: string) => string;
142
- declare const getCitationPageNumber: (startPageKey?: string | null) => number | null;
263
+ declare const getCitationPageNumber: (startPageId?: string | null) => number | null;
143
264
  declare const normalizeCitations: (response: string) => string;
144
265
 
145
266
  declare const isGeminiGarbage: (content: string) => boolean;
@@ -156,4 +277,4 @@ declare function cleanRepeatingLastSentence(text: string): string;
156
277
  */
157
278
  declare function sha1Hash(data: string | any): string;
158
279
 
159
- export { Citation, CitationStatus, type ReplaceCitationsOptions, Verification, cleanRepeatingLastSentence, getAllCitationsFromLlmOutput, getCitationPageNumber, getCitationStatus, getVerificationTextIndicator, groupCitationsByAttachmentId, groupCitationsByAttachmentIdObject, isGeminiGarbage, normalizeCitations, parseCitation, removeCitations, removeLineIdMetadata, removePageNumberMetadata, replaceCitations, sha1Hash };
280
+ export { Citation, CitationData, CitationStatus, ParsedCitationResponse, type ReplaceCitationsOptions, Verification, cleanRepeatingLastSentence, deferredCitationToCitation, extractVisibleText, getAllCitationsFromDeferredResponse, getAllCitationsFromLlmOutput, getCitationMarkerIds, getCitationPageNumber, getCitationStatus, getVerificationTextIndicator, groupCitationsByAttachmentId, groupCitationsByAttachmentIdObject, hasDeferredCitations, isGeminiGarbage, normalizeCitations, parseCitation, parseDeferredCitationResponse, removeLineIdMetadata, removePageNumberMetadata, replaceCitations, replaceDeferredMarkers, sha1Hash };
package/lib/index.js CHANGED
@@ -1 +1,2 @@
1
- export{a as DeepCitation}from'./chunk-3XSZLKJW.js';export{b as AV_CITATION_MARKDOWN_SYNTAX_PROMPT,h as CITATION_AV_BASED_JSON_OUTPUT_FORMAT,d as CITATION_AV_REMINDER,g as CITATION_JSON_OUTPUT_FORMAT,a as CITATION_MARKDOWN_SYNTAX_PROMPT,c as CITATION_REMINDER,i as compressPromptIds,j as decompressPromptIds,f as wrapCitationPrompt,e as wrapSystemCitationPrompt}from'./chunk-ND6LFDGK.js';export{d as BLANK_VERIFICATION,a as DEFAULT_OUTPUT_IMAGE_FORMAT,b as NOT_FOUND_VERIFICATION_INDEX,c as PENDING_VERIFICATION_INDEX}from'./chunk-HRCAI3NV.js';export{k as CITATION_X_PADDING,l as CITATION_Y_PADDING,f as generateCitationInstanceId,d as generateCitationKey,e as generateVerificationKey,v as getAllCitationsFromLlmOutput,r as getCitationPageNumber,t as getCitationStatus,m as getVerificationTextIndicator,w as groupCitationsByAttachmentId,x as groupCitationsByAttachmentIdObject,s as normalizeCitations,u as parseCitation,o as removeCitations,q as removeLineIdMetadata,p as removePageNumberMetadata,n as replaceCitations,a as sha1Hash}from'./chunk-PKXMJNRX.js';import'./chunk-O2XFH626.js';var z=t=>{if(!t)return false;let e=t.trim();if(e.length<64)return false;let a=e?.[0];for(let r=1;r<e.length;r++)if(e[r]!==a)return false;return true};function J(t){t=t.trim();let e=2,a=10,r=/[.?!](?=\s+|$)/g,n=[],c;for(;(c=r.exec(t))!==null;)n.push(c.index);if(n.length<2)return t;let I=n[n.length-1],f=n[n.length-2],o=t.substring(f+1,I+1),p=o.length;if(o.trim().slice(0,-1).length<a||p<=0||t.length<p*e)return t;let m=0,s=I+1;t.endsWith(o)&&(s=t.length);let T=-1;for(;;){let i=s-p;if(i<0)break;if(t.substring(i,s)===o)m++,T=i,s=i;else break}return m>=e?t.substring(0,T)+o:t}export{J as cleanRepeatingLastSentence,z as isGeminiGarbage};
1
+ export{a as DeepCitation}from'./chunk-2HINOG74.js';export{a as compressPromptIds,b as decompressPromptIds}from'./chunk-BWQLFMTV.js';export{d as BLANK_VERIFICATION,a as DEFAULT_OUTPUT_IMAGE_FORMAT,b as NOT_FOUND_VERIFICATION_INDEX,c as PENDING_VERIFICATION_INDEX}from'./chunk-BEYJEW3Y.js';export{k as CITATION_X_PADDING,l as CITATION_Y_PADDING,t as deferredCitationToCitation,w as extractVisibleText,f as generateCitationInstanceId,d as generateCitationKey,e as generateVerificationKey,u as getAllCitationsFromDeferredResponse,B as getAllCitationsFromLlmOutput,y as getCitationMarkerIds,q as getCitationPageNumber,z as getCitationStatus,m as getVerificationTextIndicator,C as groupCitationsByAttachmentId,D as groupCitationsByAttachmentIdObject,v as hasDeferredCitations,r as normalizeCitations,A as parseCitation,s as parseDeferredCitationResponse,p as removeLineIdMetadata,o as removePageNumberMetadata,n as replaceCitations,x as replaceDeferredMarkers,a as sha1Hash}from'./chunk-N7FTXSGM.js';export{d as AV_CITATION_PROMPT,j as CITATION_AV_JSON_OUTPUT_FORMAT,f as CITATION_AV_REMINDER,b as CITATION_DATA_END_DELIMITER,a as CITATION_DATA_START_DELIMITER,i as CITATION_JSON_OUTPUT_FORMAT,c as CITATION_PROMPT,e as CITATION_REMINDER,h as wrapCitationPrompt,g as wrapSystemCitationPrompt}from'./chunk-BDSA6VGC.js';import'./chunk-5XGN7UAV.js';var rt=t=>{if(!t)return false;let e=t.trim();if(e.length<64)return false;let a=e?.[0];for(let n=1;n<e.length;n++)if(e[n]!==a)return false;return true};function nt(t){t=t.trim();let e=2,a=10,n=/[.?!](?=\s+|$)/g,r=[],I;for(;(I=n.exec(t))!==null;)r.push(I.index);if(r.length<2)return t;let c=r[r.length-1],C=r[r.length-2],i=t.substring(C+1,c+1),p=i.length;if(i.trim().slice(0,-1).length<a||p<=0||t.length<p*e)return t;let m=0,s=c+1;t.endsWith(i)&&(s=t.length);let T=-1;for(;;){let o=s-p;if(o<0)break;if(t.substring(o,s)===i)m++,T=o,s=o;else break}return m>=e?t.substring(0,T)+i:t}export{nt as cleanRepeatingLastSentence,rt as isGeminiGarbage};//# sourceMappingURL=index.js.map
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/parsing/parseWorkAround.ts"],"names":["isGeminiGarbage","content","trimmedContent","firstCharacter","i","cleanRepeatingLastSentence","text","MIN_REPETITIONS","MIN_SENTENCE_CONTENT_LENGTH","sentenceEndRegex","sentenceEndIndices","match","lastTerminatorIndex","secondLastTerminatorIndex","repeatingUnit","unitLength","repetitionsFound","currentCheckEndIndex","firstRepetitionStartIndex","checkStartIndex"],"mappings":"4zCAEO,IAAMA,GAAmBC,CAAAA,EAAoB,CAClD,GAAI,CAACA,EAAS,OAAO,MAAA,CACrB,IAAMC,CAAAA,CAAiBD,EAAQ,IAAA,EAAK,CACpC,GAAIC,CAAAA,CAAe,MAAA,CAAS,GAAuC,OAAO,MAAA,CAE1E,IAAMC,CAAAA,CAAiBD,IAAiB,CAAC,CAAA,CAEzC,QAASE,CAAAA,CAAI,CAAA,CAAGA,EAAIF,CAAAA,CAAe,MAAA,CAAQE,CAAAA,EAAAA,CACzC,GAAIF,EAAeE,CAAC,CAAA,GAAMD,EAAgB,OAAO,MAAA,CAEnD,OAAO,KACT,EAGO,SAASE,EAAAA,CAA2BC,EAAsB,CAC/DA,CAAAA,CAAOA,CAAAA,CAAK,IAAA,GACZ,IAAMC,CAAAA,CAAkB,CAAA,CAClBC,CAAAA,CAA8B,GAE9BC,CAAAA,CAAmB,iBAAA,CACnBC,EAA+B,EAAC,CAClCC,EACJ,KAAA,CAAQA,CAAAA,CAAQF,CAAAA,CAAiB,IAAA,CAAKH,CAAI,CAAA,IAAO,IAAA,EAC/CI,EAAmB,IAAA,CAAKC,CAAAA,CAAM,KAAK,CAAA,CAGrC,GAAID,CAAAA,CAAmB,MAAA,CAAS,EAC9B,OAAOJ,CAAAA,CAGT,IAAMM,CAAAA,CAAsBF,CAAAA,CAAmBA,EAAmB,MAAA,CAAS,CAAC,CAAA,CACtEG,CAAAA,CAA4BH,EAAmBA,CAAAA,CAAmB,MAAA,CAAS,CAAC,CAAA,CAE5EI,EAAgBR,CAAAA,CAAK,SAAA,CAAUO,CAAAA,CAA4B,CAAA,CAAGD,EAAsB,CAAC,CAAA,CACrFG,EAAaD,CAAAA,CAAc,MAAA,CAUjC,GARwBA,CAAAA,CAAc,IAAA,EAAK,CAAE,KAAA,CAAM,EAAG,EAAE,CAAA,CACpC,OAASN,CAAAA,EAGzBO,CAAAA,EAAc,GAIdT,CAAAA,CAAK,MAAA,CAASS,CAAAA,CAAaR,CAAAA,CAC7B,OAAOD,CAAAA,CAGT,IAAIU,EAAmB,CAAA,CACnBC,CAAAA,CAAuBL,EAAsB,CAAA,CAC7CN,CAAAA,CAAK,QAAA,CAASQ,CAAa,IAC7BG,CAAAA,CAAuBX,CAAAA,CAAK,QAG9B,IAAIY,CAAAA,CAA4B,GAEhC,OAAa,CACX,IAAMC,CAAAA,CAAkBF,EAAuBF,CAAAA,CAE/C,GAAII,EAAkB,CAAA,CACpB,MAKF,GAFcb,CAAAA,CAAK,SAAA,CAAUa,CAAAA,CAAiBF,CAAoB,IAEpDH,CAAAA,CACZE,CAAAA,EAAAA,CACAE,EAA4BC,CAAAA,CAC5BF,CAAAA,CAAuBE,OAEvB,KAEJ,CAEA,OAAIH,CAAAA,EAAoBT,EACQD,CAAAA,CAAK,SAAA,CAAU,EAAGY,CAAyB,CAAA,CAClCJ,EAGhCR,CAEX","file":"index.js","sourcesContent":["//flash and flash lite get super confused if we ask for a MD table and infinite loop\nconst MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE = 64;\nexport const isGeminiGarbage = (content: string) => {\n if (!content) return false;\n const trimmedContent = content.trim();\n if (trimmedContent.length < MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE) return false;\n\n const firstCharacter = trimmedContent?.[0];\n\n for (let i = 1; i < trimmedContent.length; i++) {\n if (trimmedContent[i] !== firstCharacter) return false;\n }\n return true;\n};\n\n// helps clean up infinite rambling bug output from gemini\nexport function cleanRepeatingLastSentence(text: string): string {\n text = text.trim();\n const MIN_REPETITIONS = 2;\n const MIN_SENTENCE_CONTENT_LENGTH = 10;\n\n const sentenceEndRegex = /[.?!](?=\\s+|$)/g;\n const sentenceEndIndices: number[] = [];\n let match;\n while ((match = sentenceEndRegex.exec(text)) !== null) {\n sentenceEndIndices.push(match.index);\n }\n\n if (sentenceEndIndices.length < 2) {\n return text;\n }\n\n const lastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 1];\n const secondLastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 2];\n\n const repeatingUnit = text.substring(secondLastTerminatorIndex + 1, lastTerminatorIndex + 1);\n const unitLength = repeatingUnit.length;\n\n const sentenceContent = repeatingUnit.trim().slice(0, -1);\n if (sentenceContent.length < MIN_SENTENCE_CONTENT_LENGTH) {\n return text;\n }\n if (unitLength <= 0) {\n return text;\n }\n\n if (text.length < unitLength * MIN_REPETITIONS) {\n return text;\n }\n\n let repetitionsFound = 0;\n let currentCheckEndIndex = lastTerminatorIndex + 1;\n if (text.endsWith(repeatingUnit)) {\n currentCheckEndIndex = text.length;\n }\n\n let firstRepetitionStartIndex = -1;\n\n while (true) {\n const checkStartIndex = currentCheckEndIndex - unitLength;\n\n if (checkStartIndex < 0) {\n break;\n }\n\n const chunk = text.substring(checkStartIndex, currentCheckEndIndex);\n\n if (chunk === repeatingUnit) {\n repetitionsFound++;\n firstRepetitionStartIndex = checkStartIndex;\n currentCheckEndIndex = checkStartIndex;\n } else {\n break;\n }\n }\n\n if (repetitionsFound >= MIN_REPETITIONS) {\n const textBeforeRepetitions = text.substring(0, firstRepetitionStartIndex);\n const result = textBeforeRepetitions + repeatingUnit;\n return result;\n } else {\n return text;\n }\n}\n"]}
@@ -1 +1,2 @@
1
- 'use strict';var chunkDHVODVIA_cjs=require('../chunk-DHVODVIA.cjs');require('../chunk-F2MMVEVC.cjs');Object.defineProperty(exports,"AV_CITATION_MARKDOWN_SYNTAX_PROMPT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.b}});Object.defineProperty(exports,"CITATION_AV_BASED_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.h}});Object.defineProperty(exports,"CITATION_AV_REMINDER",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.d}});Object.defineProperty(exports,"CITATION_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.g}});Object.defineProperty(exports,"CITATION_MARKDOWN_SYNTAX_PROMPT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.a}});Object.defineProperty(exports,"CITATION_REMINDER",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.c}});Object.defineProperty(exports,"compressPromptIds",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.i}});Object.defineProperty(exports,"decompressPromptIds",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.j}});Object.defineProperty(exports,"wrapCitationPrompt",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.f}});Object.defineProperty(exports,"wrapSystemCitationPrompt",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.e}});
1
+ 'use strict';var chunkDS6SOU4L_cjs=require('../chunk-DS6SOU4L.cjs'),chunkWS4CQVDI_cjs=require('../chunk-WS4CQVDI.cjs');require('../chunk-BYLIBOAU.cjs');Object.defineProperty(exports,"compressPromptIds",{enumerable:true,get:function(){return chunkDS6SOU4L_cjs.a}});Object.defineProperty(exports,"decompressPromptIds",{enumerable:true,get:function(){return chunkDS6SOU4L_cjs.b}});Object.defineProperty(exports,"AV_CITATION_PROMPT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.d}});Object.defineProperty(exports,"CITATION_AV_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.j}});Object.defineProperty(exports,"CITATION_AV_REMINDER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.f}});Object.defineProperty(exports,"CITATION_DATA_END_DELIMITER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.b}});Object.defineProperty(exports,"CITATION_DATA_START_DELIMITER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.a}});Object.defineProperty(exports,"CITATION_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.i}});Object.defineProperty(exports,"CITATION_PROMPT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.c}});Object.defineProperty(exports,"CITATION_REMINDER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.e}});Object.defineProperty(exports,"wrapCitationPrompt",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.h}});Object.defineProperty(exports,"wrapSystemCitationPrompt",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.g}});//# sourceMappingURL=index.cjs.map
2
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"index.cjs"}
@@ -1,14 +1,48 @@
1
- declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\n<citation-instructions priority=\"critical\">\n## REQUIRED: Citation Format\n\n### Scope (Where to use)\nFor every claim, value, or fact in your chat response; you MUST cite the attachment using this exact syntax:\n\n<cite attachment_id='ID' reasoning='why this supports the claim' full_phrase='verbatim quote' key_span='1-3 key words' start_page_key='page_number_N_index_I' line_ids='X-Y' />\n\n### Syntax Rules (MUST follow)\n\n1. **attachment_id**: Use the exact ID from the source document\n2. **reasoning**: Brief explanation of why this citation supports your claim (think first!)\n3. **full_phrase**: Copy text VERBATIM from source. Escape quotes (\\') and newlines (\\n).\n4. **key_span**: The 1-3 most important words or value from full_phrase\n5. **start_page_key**: ONLY use format `page_number_N_index_I` from page tags (e.g., `<page_number_1_index_0>`). Never extract page numbers from document content.\n6. **line_ids**: Inclusive range (e.g., '2-6' or '4'). Infer intermediate lines since only every 5th line is shown.\n\n### Placement Rules\n\n- Place <cite /> inline, typically at the end of a claim\n- One citation per distinct idea, concept, or value (a sentence citing 3 different values needs 3 citations)\n- Do NOT group citations at the end of the document\n- The <cite /> tag is self-closing - never use <cite>...</cite>\n\n### Example Citation 1\n\nThe company reported strong growth<cite attachment_id='abc123' reasoning='directly states revenue growth percentage' full_phrase='Revenue increased 45% year-over-year to $2.3 billion' key_span='increased 45%' start_page_key='page_number_2_index_1' line_ids='12-14' />\n\n### Example Citation 2\n\nThe total amount is $500 USD <cite attachment_id='abc123' reasoning='directly states the total amount' full_phrase='The total amount is $500 USD' key_span='$500 USD' start_page_key='page_number_2_index_1' line_ids='12-14' />\n</citation-instructions>\n\n";
2
- declare const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = "\n<citation-instructions priority=\"critical\">\n## REQUIRED: Audio/Video Citation Format\n\nFor every claim, value, or fact; you MUST cite the attachment using this exact syntax:\n\n<cite attachment_id='ID' reasoning='why this supports the claim' full_phrase='verbatim transcript quote' key_span='1-3 key words' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' />\n\n### Syntax Rules (MUST follow)\n\n1. **attachment_id**: Use the exact ID from the source\n2. **reasoning**: Brief explanation of why this citation supports your claim (think first!)\n3. **full_phrase**: Copy transcript text VERBATIM. Escape quotes (\\') and newlines (\\n).\n4. **key_span**: The 1-3 most important words or value from full_phrase\n5. **timestamps**: Start and end time with milliseconds (e.g., '00:01:23.456-00:01:45.789')\n\n### Placement Rules\n\n- Place <cite /> inline, typically at the end of a claim\n- One citation per distinct idea, concept, or value (a sentence citing 3 different values needs 3 citations)\n- Do NOT group citations at the end of the document\n- The <cite /> tag is self-closing - never use <cite>...</cite>\n\n</citation-instructions>\n";
1
+ /**
2
+ * Citation Prompts
3
+ *
4
+ * This module provides the "Deferred JSON Pattern" for citation output.
5
+ * The LLM uses lightweight markers (e.g., [1], [2]) in the text and outputs
6
+ * a structured JSON block at the end of the response.
7
+ *
8
+ * Benefits:
9
+ * - **Robustness**: JSON.parse handles escaping naturally, avoiding quote-escaping issues
10
+ * - **Streaming Latency**: No mid-sentence pausing for hidden metadata generation
11
+ * - **Token Efficiency**: ~40% reduction in tokens per citation
12
+ */
13
+ /** Start delimiter for the citation data block */
14
+ declare const CITATION_DATA_START_DELIMITER = "<<<CITATION_DATA>>>";
15
+ /** End delimiter for the citation data block */
16
+ declare const CITATION_DATA_END_DELIMITER = "<<<END_CITATION_DATA>>>";
17
+ /**
18
+ * Citation prompt for document-based citations.
19
+ * Uses [N] markers in text with JSON metadata at the end.
20
+ * Citations are grouped by attachment_id to avoid repetition.
21
+ *
22
+ * Shorthand key mapping (optional):
23
+ * - n: id, r: reasoning, f: full_phrase
24
+ * - k: anchor_text, p: page_id, l: line_ids
25
+ */
26
+ declare const CITATION_PROMPT = "\n<citation-instructions priority=\"critical\">\n## REQUIRED: Citation Format\n\n### In-Text Markers\nFor every claim, value, or fact from attachments, place a sequential integer marker like [1], [2], [3] at the end of the claim.\n\n### Citation Data Block\nAt the END of your response, append a citation block. Group citations by attachment_id to avoid repetition.\n\n### Format\n```\n<<<CITATION_DATA>>>\n{\n \"attachment_id_here\": [\n {\"id\": 1, \"reasoning\": \"why\", \"full_phrase\": \"quote\", \"anchor_text\": \"key\", \"page_id\": \"2_1\", \"line_ids\": [12]}\n ]\n}\n<<<END_CITATION_DATA>>>\n```\n\n### Shorthand (Optional)\nTo save tokens: n=id, r=reasoning, f=full_phrase, k=anchor_text, p=page_id, l=line_ids\n\n### JSON Field Rules\n\n1. **Group key**: The attachment_id (exact ID from source document)\n2. **id** (or n): Must match the [N] marker in your text (integer)\n3. **reasoning** (or r): Brief explanation connecting the citation to your claim (think first!)\n4. **full_phrase** (or f): Copy text VERBATIM from source. Use proper JSON escaping for quotes.\n5. **anchor_text** (or k): The 1-3 most important words from full_phrase\n6. **page_id** (or p): Format \"N_I\" where N=page number, I=index (from `<page_number_N_index_I>` tags)\n7. **line_ids** (or l): Array of line numbers. Infer intermediate lines since only every 5th is shown.\n\n### Placement Rules\n\n- Place [N] markers inline, typically at the end of a claim\n- One marker per distinct idea, concept, or value\n- Use sequential numbering starting from [1]\n- The JSON block MUST appear at the very end of your response\n\n### Example Response\n\nThe company reported strong growth [1]. Revenue increased significantly in Q4 [2]. The competitor also grew [3].\n\n<<<CITATION_DATA>>>\n{\n \"abc123\": [\n {\"id\": 1, \"reasoning\": \"directly states growth metrics\", \"full_phrase\": \"The company achieved 45% year-over-year growth\", \"anchor_text\": \"45% year-over-year growth\", \"page_id\": \"2_1\", \"line_ids\": [12, 13]},\n {\"id\": 2, \"reasoning\": \"states Q4 revenue figure\", \"full_phrase\": \"Q4 revenue reached $2.3 billion, up from $1.8 billion\", \"anchor_text\": \"$2.3 billion\", \"page_id\": \"3_2\", \"line_ids\": [5, 6, 7]}\n ],\n \"def456\": [\n {\"id\": 3, \"reasoning\": \"competitor data\", \"full_phrase\": \"Competitor X reported 20% growth\", \"anchor_text\": \"20% growth\", \"page_id\": \"1_0\", \"line_ids\": [8]}\n ]\n}\n<<<END_CITATION_DATA>>>\n</citation-instructions>\n\n";
27
+ /**
28
+ * Citation prompt for audio/video content.
29
+ * Uses timestamps instead of page/line references.
30
+ * Citations are grouped by attachment_id to avoid repetition.
31
+ *
32
+ * Shorthand key mapping (optional):
33
+ * - n: id, r: reasoning, f: full_phrase
34
+ * - k: anchor_text, t: timestamps (with s: start_time, e: end_time)
35
+ */
36
+ declare const AV_CITATION_PROMPT = "\n<citation-instructions priority=\"critical\">\n## REQUIRED: Audio/Video Citation Format\n\n### In-Text Markers\nFor every claim, value, or fact from media content, place a sequential integer marker like [1], [2], [3] at the end of the claim.\n\n### Citation Data Block\nAt the END of your response, append a citation block. Group citations by attachment_id to avoid repetition.\n\n### Format\n```\n<<<CITATION_DATA>>>\n{\n \"attachment_id_here\": [\n {\"id\": 1, \"reasoning\": \"why\", \"full_phrase\": \"quote\", \"anchor_text\": \"key\", \"timestamps\": {\"start_time\": \"HH:MM:SS.SSS\", \"end_time\": \"HH:MM:SS.SSS\"}}\n ]\n}\n<<<END_CITATION_DATA>>>\n```\n\n### Shorthand (Optional)\nTo save tokens: n=id, r=reasoning, f=full_phrase, k=anchor_text, t=timestamps (with s=start_time, e=end_time)\n\n### JSON Field Rules\n\n1. **Group key**: The attachment_id (exact ID from source media)\n2. **id** (or n): Must match the [N] marker in your text (integer)\n3. **reasoning** (or r): Brief explanation connecting the citation to your claim (think first!)\n4. **full_phrase** (or f): Copy transcript text VERBATIM. Use proper JSON escaping.\n5. **anchor_text** (or k): The 1-3 most important words from full_phrase\n6. **timestamps** (or t): Object with start_time/s and end_time/e in HH:MM:SS.SSS format\n\n### Placement Rules\n\n- Place [N] markers inline, typically at the end of a claim\n- One marker per distinct idea, concept, or value\n- Use sequential numbering starting from [1]\n- The JSON block MUST appear at the very end of your response\n\n### Example Response\n\nThe speaker discussed exercise benefits [1]. They recommended specific techniques [2].\n\n<<<CITATION_DATA>>>\n{\n \"video123\": [\n {\"id\": 1, \"reasoning\": \"speaker directly states health benefits\", \"full_phrase\": \"Regular exercise improves cardiovascular health by 30%\", \"anchor_text\": \"cardiovascular health\", \"timestamps\": {\"start_time\": \"00:05:23.000\", \"end_time\": \"00:05:45.500\"}},\n {\"id\": 2, \"reasoning\": \"demonstrates proper form\", \"full_phrase\": \"Keep your back straight and engage your core\", \"anchor_text\": \"engage your core\", \"timestamps\": {\"start_time\": \"00:12:10.200\", \"end_time\": \"00:12:25.800\"}}\n ]\n}\n<<<END_CITATION_DATA>>>\n</citation-instructions>\n\n";
3
37
  /**
4
38
  * A brief reminder to reinforce citation requirements in user messages.
5
39
  * Use this when you want to add emphasis without repeating full instructions.
6
40
  */
7
- declare const CITATION_REMINDER = "<citation-reminder>STOP and CHECK: Did you use <cite /> tags with all required attributes for every claim, value, or fact from attachments?</citation-reminder>";
41
+ declare const CITATION_REMINDER = "<citation-reminder>STOP and CHECK: Did you use [N] markers for every claim and include the <<<CITATION_DATA>>> JSON block at the end?</citation-reminder>";
8
42
  /**
9
43
  * Audio/video version of the citation reminder.
10
44
  */
11
- declare const CITATION_AV_REMINDER = "<citation-reminder>STOP and CHECK: Did you use <cite /> tags with timestamps for every claim, value, or fact from source media?</citation-reminder>";
45
+ declare const CITATION_AV_REMINDER = "<citation-reminder>STOP and CHECK: Did you use [N] markers for every claim and include the <<<CITATION_DATA>>> JSON block with timestamps at the end?</citation-reminder>";
12
46
  interface WrapSystemPromptOptions {
13
47
  /** The original system prompt to wrap with citation instructions */
14
48
  systemPrompt: string;
@@ -48,12 +82,12 @@ interface WrapCitationPromptResult {
48
82
  *
49
83
  * ### 2. Chain-of-Thought (CoT) Attribute Ordering
50
84
  * The citation attributes are ordered to encourage the model to "think first":
51
- * `attachment_id` → `reasoning` → `full_phrase` → `key_span` → `start_page_key` → `line_ids`
85
+ * `attachment_id` → `reasoning` → `full_phrase` → `anchor_text` → `page_id` → `line_ids`
52
86
  *
53
87
  * By placing `reasoning` early, the model must articulate WHY it's citing before
54
- * specifying WHAT it's citing. Then `full_phrase` comes before `key_span` so the model
55
- * first produces the complete verbatim quote, then extracts the key span from it,
56
- * ensuring `key_span` is always a valid substring of `full_phrase`.
88
+ * specifying WHAT it's citing. Then `full_phrase` comes before `anchor_text` so the model
89
+ * first produces the complete verbatim quote, then extracts the anchor text from it,
90
+ * ensuring `anchor_text` is always a valid substring of `full_phrase`.
57
91
  *
58
92
  * ### Why Not Just Append?
59
93
  * In large system prompts, appended instructions can get "lost" in the middle of the
@@ -107,72 +141,160 @@ declare function wrapSystemCitationPrompt(options: WrapSystemPromptOptions): str
107
141
  * ```
108
142
  */
109
143
  declare function wrapCitationPrompt(options: WrapCitationPromptOptions): WrapCitationPromptResult;
144
+ /**
145
+ * JSON schema for citation data (for structured output LLMs).
146
+ * This can be used with OpenAI's response_format or similar features.
147
+ */
110
148
  declare const CITATION_JSON_OUTPUT_FORMAT: {
111
- type: string;
112
- properties: {
113
- attachmentId: {
114
- type: string;
149
+ readonly type: "object";
150
+ readonly properties: {
151
+ readonly id: {
152
+ readonly type: "integer";
153
+ readonly description: "Citation marker number matching [N] in text";
154
+ };
155
+ readonly attachment_id: {
156
+ readonly type: "string";
157
+ readonly description: "Exact attachment ID from source document";
115
158
  };
116
- reasoning: {
117
- type: string;
118
- description: string;
159
+ readonly reasoning: {
160
+ readonly type: "string";
161
+ readonly description: "Brief explanation of why this supports the claim";
119
162
  };
120
- fullPhrase: {
121
- type: string;
122
- description: string;
163
+ readonly full_phrase: {
164
+ readonly type: "string";
165
+ readonly description: "Verbatim quote from source document";
123
166
  };
124
- keySpan: {
125
- type: string;
126
- description: string;
167
+ readonly anchor_text: {
168
+ readonly type: "string";
169
+ readonly description: "1-3 key words from full_phrase";
127
170
  };
128
- startPageKey: {
129
- type: string;
130
- description: string;
171
+ readonly page_id: {
172
+ readonly type: "string";
173
+ readonly description: "Page ID in format 'N_I' (pageNumber_index)";
131
174
  };
132
- lineIds: {
133
- type: string;
134
- items: {
135
- type: string;
175
+ readonly line_ids: {
176
+ readonly type: "array";
177
+ readonly items: {
178
+ readonly type: "integer";
136
179
  };
137
- description: string;
180
+ readonly description: "Array of line numbers for the citation";
138
181
  };
139
182
  };
140
- required: string[];
183
+ readonly required: readonly ["id", "attachment_id", "full_phrase", "anchor_text"];
141
184
  };
142
- declare const CITATION_AV_BASED_JSON_OUTPUT_FORMAT: {
143
- type: string;
144
- properties: {
145
- attachmentId: {
146
- type: string;
185
+ /**
186
+ * JSON schema for AV citation data.
187
+ */
188
+ declare const CITATION_AV_JSON_OUTPUT_FORMAT: {
189
+ readonly type: "object";
190
+ readonly properties: {
191
+ readonly id: {
192
+ readonly type: "integer";
193
+ readonly description: "Citation marker number matching [N] in text";
194
+ };
195
+ readonly attachment_id: {
196
+ readonly type: "string";
197
+ readonly description: "Exact attachment ID from source media";
147
198
  };
148
- startPageKey: {
149
- type: string;
150
- description: string;
199
+ readonly reasoning: {
200
+ readonly type: "string";
201
+ readonly description: "Brief explanation of why this supports the claim";
151
202
  };
152
- fullPhrase: {
153
- type: string;
154
- description: string;
203
+ readonly full_phrase: {
204
+ readonly type: "string";
205
+ readonly description: "Verbatim transcript quote";
155
206
  };
156
- keySpan: {
157
- type: string;
158
- description: string;
207
+ readonly anchor_text: {
208
+ readonly type: "string";
209
+ readonly description: "1-3 key words from full_phrase";
159
210
  };
160
- timestamps: {
161
- type: string;
162
- properties: {
163
- startTime: {
164
- type: string;
211
+ readonly timestamps: {
212
+ readonly type: "object";
213
+ readonly properties: {
214
+ readonly start_time: {
215
+ readonly type: "string";
216
+ readonly description: "Start time in HH:MM:SS.SSS format";
165
217
  };
166
- endTime: {
167
- type: string;
218
+ readonly end_time: {
219
+ readonly type: "string";
220
+ readonly description: "End time in HH:MM:SS.SSS format";
168
221
  };
169
222
  };
170
- required: string[];
171
- description: string;
223
+ readonly required: readonly ["start_time", "end_time"];
172
224
  };
173
225
  };
174
- required: string[];
226
+ readonly required: readonly ["id", "attachment_id", "full_phrase", "anchor_text", "timestamps"];
175
227
  };
228
+ /**
229
+ * Compact citation data format from LLM output.
230
+ * Uses single-character keys for token efficiency.
231
+ */
232
+ interface CompactCitationData {
233
+ /** Citation number (n) - matches [N] marker */
234
+ n: number;
235
+ /** Attachment ID (a) */
236
+ a?: string;
237
+ /** Reasoning (r) */
238
+ r?: string;
239
+ /** Full phrase (f) - verbatim quote */
240
+ f?: string;
241
+ /** Key phrase (k) - anchor text */
242
+ k?: string;
243
+ /** Page ID (p) - format "N_I" */
244
+ p?: string;
245
+ /** Line IDs (l) */
246
+ l?: number[];
247
+ /** Timestamps (t) for AV citations */
248
+ t?: {
249
+ /** Start time (s) */
250
+ s?: string;
251
+ /** End time (e) */
252
+ e?: string;
253
+ };
254
+ }
255
+ /**
256
+ * Interface for citation data from JSON block.
257
+ * This is the normalized/expanded format used internally after parsing.
258
+ * The parser expands compact keys (n,a,r,f,k,p,l,t) to these full names.
259
+ */
260
+ interface CitationData {
261
+ /** Citation marker number (matches [N] in text). Compact key: n */
262
+ id: number;
263
+ /** Attachment ID from source document. Compact key: a */
264
+ attachment_id?: string;
265
+ /** Reasoning for the citation. Compact key: r */
266
+ reasoning?: string;
267
+ /** Verbatim quote from source. Compact key: f */
268
+ full_phrase?: string;
269
+ /** Anchor text (1-3 words). Compact key: k */
270
+ anchor_text?: string;
271
+ /** Page ID in format "N_I" or legacy "page_number_N_index_I". Compact key: p */
272
+ page_id?: string;
273
+ /** Line IDs array. Compact key: l */
274
+ line_ids?: number[];
275
+ /** Timestamps for AV citations. Compact key: t */
276
+ timestamps?: {
277
+ /** Start time. Compact key: s */
278
+ start_time?: string;
279
+ /** End time. Compact key: e */
280
+ end_time?: string;
281
+ };
282
+ }
283
+ /**
284
+ * Result of parsing a citation response.
285
+ */
286
+ interface ParsedCitationResponse {
287
+ /** The clean text meant for display (content before the delimiter) */
288
+ visibleText: string;
289
+ /** The structured citation data from the JSON block */
290
+ citations: CitationData[];
291
+ /** Helper map for O(1) lookups by ID */
292
+ citationMap: Map<number, CitationData>;
293
+ /** Whether parsing was successful */
294
+ success: boolean;
295
+ /** Error message if parsing failed */
296
+ error?: string;
297
+ }
176
298
 
177
299
  interface CompressedResult<T> {
178
300
  compressed: T;
@@ -193,4 +315,4 @@ declare function compressPromptIds<T>(obj: T, ids: string[] | undefined): Compre
193
315
  */
194
316
  declare function decompressPromptIds<T>(compressed: T | string, prefixMap: Record<string, string>): T | string;
195
317
 
196
- export { AV_CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_AV_BASED_JSON_OUTPUT_FORMAT, CITATION_AV_REMINDER, CITATION_JSON_OUTPUT_FORMAT, CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_REMINDER, type CompressedResult, type WrapCitationPromptOptions, type WrapCitationPromptResult, type WrapSystemPromptOptions, compressPromptIds, decompressPromptIds, wrapCitationPrompt, wrapSystemCitationPrompt };
318
+ export { AV_CITATION_PROMPT, CITATION_AV_JSON_OUTPUT_FORMAT, CITATION_AV_REMINDER, CITATION_DATA_END_DELIMITER, CITATION_DATA_START_DELIMITER, CITATION_JSON_OUTPUT_FORMAT, CITATION_PROMPT, CITATION_REMINDER, type CitationData, type CompactCitationData, type CompressedResult, type ParsedCitationResponse, type WrapCitationPromptOptions, type WrapCitationPromptResult, type WrapSystemPromptOptions, compressPromptIds, decompressPromptIds, wrapCitationPrompt, wrapSystemCitationPrompt };