@deepcitation/deepcitation-js 1.1.49 → 1.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +73 -157
  2. package/lib/chunk-2HINOG74.js +3 -0
  3. package/lib/chunk-2HINOG74.js.map +1 -0
  4. package/lib/chunk-4UWAUWYL.cjs +3 -0
  5. package/lib/chunk-4UWAUWYL.cjs.map +1 -0
  6. package/lib/chunk-5XGN7UAV.js +2 -0
  7. package/lib/chunk-5XGN7UAV.js.map +1 -0
  8. package/lib/chunk-7TORYXU4.cjs +3 -0
  9. package/lib/chunk-7TORYXU4.cjs.map +1 -0
  10. package/lib/chunk-BDSA6VGC.js +125 -0
  11. package/lib/chunk-BDSA6VGC.js.map +1 -0
  12. package/lib/chunk-BEYJEW3Y.js +2 -0
  13. package/lib/chunk-BEYJEW3Y.js.map +1 -0
  14. package/lib/chunk-BWQLFMTV.js +2 -0
  15. package/lib/chunk-BWQLFMTV.js.map +1 -0
  16. package/lib/{chunk-F2MMVEVC.cjs → chunk-BYLIBOAU.cjs} +2 -1
  17. package/lib/chunk-BYLIBOAU.cjs.map +1 -0
  18. package/lib/chunk-DS6SOU4L.cjs +2 -0
  19. package/lib/chunk-DS6SOU4L.cjs.map +1 -0
  20. package/lib/{chunk-UUR2SQKU.cjs → chunk-HL3AXCDL.cjs} +2 -1
  21. package/lib/chunk-HL3AXCDL.cjs.map +1 -0
  22. package/lib/chunk-N7FTXSGM.js +3 -0
  23. package/lib/chunk-N7FTXSGM.js.map +1 -0
  24. package/lib/chunk-WS4CQVDI.cjs +125 -0
  25. package/lib/chunk-WS4CQVDI.cjs.map +1 -0
  26. package/lib/client/index.cjs +2 -1
  27. package/lib/client/index.cjs.map +1 -0
  28. package/lib/client/index.d.cts +58 -13
  29. package/lib/client/index.d.ts +58 -13
  30. package/lib/client/index.js +2 -1
  31. package/lib/client/index.js.map +1 -0
  32. package/lib/{index-fvVBZYVK.d.ts → index-BHjI8Bh1.d.cts} +61 -22
  33. package/lib/{index-fvVBZYVK.d.cts → index-BHjI8Bh1.d.ts} +61 -22
  34. package/lib/index.cjs +2 -1
  35. package/lib/index.cjs.map +1 -0
  36. package/lib/index.d.cts +137 -16
  37. package/lib/index.d.ts +137 -16
  38. package/lib/index.js +2 -1
  39. package/lib/index.js.map +1 -0
  40. package/lib/prompts/index.cjs +2 -1
  41. package/lib/prompts/index.cjs.map +1 -0
  42. package/lib/prompts/index.d.cts +177 -55
  43. package/lib/prompts/index.d.ts +177 -55
  44. package/lib/prompts/index.js +2 -1
  45. package/lib/prompts/index.js.map +1 -0
  46. package/lib/react/index.cjs +8 -5
  47. package/lib/react/index.cjs.map +1 -0
  48. package/lib/react/index.d.cts +348 -21
  49. package/lib/react/index.d.ts +348 -21
  50. package/lib/react/index.js +8 -5
  51. package/lib/react/index.js.map +1 -0
  52. package/lib/styles.css +1 -1
  53. package/lib/types/index.cjs +2 -1
  54. package/lib/types/index.cjs.map +1 -0
  55. package/lib/types/index.d.cts +1 -1
  56. package/lib/types/index.d.ts +1 -1
  57. package/lib/types/index.js +2 -1
  58. package/lib/types/index.js.map +1 -0
  59. package/lib/{utils-q6anRKO_.d.cts → utils-CCi9_JTv.d.cts} +5 -5
  60. package/lib/{utils-DuacFTtu.d.ts → utils-CoSP-i76.d.ts} +5 -5
  61. package/package.json +165 -152
  62. package/src/tailwind.css +5 -5
  63. package/lib/chunk-2PRW5PVT.cjs +0 -2
  64. package/lib/chunk-3XSZLKJW.js +0 -2
  65. package/lib/chunk-D2TKEF6D.cjs +0 -2
  66. package/lib/chunk-DHVODVIA.cjs +0 -71
  67. package/lib/chunk-HRCAI3NV.js +0 -1
  68. package/lib/chunk-ND6LFDGK.js +0 -71
  69. package/lib/chunk-O2XFH626.js +0 -1
  70. package/lib/chunk-PKXMJNRX.js +0 -2
@@ -1,4 +1,4 @@
1
- import { C as Citation, V as Verification } from '../index-fvVBZYVK.js';
1
+ import { C as Citation, V as Verification } from '../index-BHjI8Bh1.js';
2
2
 
3
3
  /**
4
4
  * Configuration options for the DeepCitation client
@@ -47,6 +47,30 @@ interface UploadFileOptions {
47
47
  /** Optional custom filename (uses File.name if not provided) */
48
48
  filename?: string;
49
49
  }
50
+ /**
51
+ * Options for preparing a URL for citation verification.
52
+ * URLs and Office files take ~30s to process vs. <1s for images/PDFs.
53
+ */
54
+ interface PrepareUrlOptions {
55
+ /** The URL to convert and prepare for citation verification */
56
+ url: string;
57
+ /** Optional custom attachment ID to use instead of auto-generated one */
58
+ attachmentId?: string;
59
+ /** Optional custom filename for the converted document */
60
+ filename?: string;
61
+ /**
62
+ * UNSAFE: Skip PDF conversion and extract text directly from HTML.
63
+ *
64
+ * This is much faster (<1s vs ~30s) but VULNERABLE to:
65
+ * - Hidden text (CSS display:none, tiny fonts, etc.)
66
+ * - Fine print that users can't see
67
+ * - Prompt injection attacks embedded in the page
68
+ *
69
+ * Only use this for trusted URLs where you control the content.
70
+ * Default: false (uses safe PDF conversion)
71
+ */
72
+ unsafeFastUrlOutput?: boolean;
73
+ }
50
74
  /**
51
75
  * Response from verifying citations
52
76
  */
@@ -93,12 +117,6 @@ interface FileDataPart {
93
117
  interface PrepareFilesResult {
94
118
  /** Array of file references for verification (includes deepTextPromptPortion for each file) */
95
119
  fileDataParts: FileDataPart[];
96
- /**
97
- * Array of formatted text content for LLM prompts (with page markers and line IDs).
98
- * @deprecated Use fileDataParts[].deepTextPromptPortion instead for single source of truth.
99
- * This is kept for backwards compatibility but will be removed in a future version.
100
- */
101
- deepTextPromptPortion: string[];
102
120
  }
103
121
  /**
104
122
  * Input for verify method
@@ -111,10 +129,6 @@ interface VerifyInput {
111
129
  /** Output image format for verification screenshots */
112
130
  outputImageFormat?: "jpeg" | "png" | "avif";
113
131
  }
114
- /**
115
- * @deprecated Use VerifyInput instead. This alias is kept for backwards compatibility.
116
- */
117
- type verifyAll = VerifyInput;
118
132
  /**
119
133
  * Input for convertFile - convert URL or Office file to PDF
120
134
  */
@@ -271,6 +285,37 @@ declare class DeepCitation {
271
285
  * ```
272
286
  */
273
287
  prepareConvertedFile(options: PrepareConvertedFileOptions): Promise<UploadFileResponse>;
288
+ /**
289
+ * Prepare a URL for citation verification.
290
+ *
291
+ * This is a convenience method that handles URL conversion and text extraction
292
+ * in a single call. The API will convert the URL to PDF and extract text content
293
+ * for citation verification.
294
+ *
295
+ * Note: URLs and Office files take ~30s to process vs. <1s for images/PDFs.
296
+ *
297
+ * @param options - URL and optional settings
298
+ * @returns Upload response with attachmentId and extracted text for LLM prompts
299
+ *
300
+ * @example
301
+ * ```typescript
302
+ * // Prepare a URL for citation verification
303
+ * const { attachmentId, deepTextPromptPortion } = await deepcitation.prepareUrl({
304
+ * url: "https://example.com/article"
305
+ * });
306
+ *
307
+ * // Use deepTextPromptPortion in your LLM prompt
308
+ * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
309
+ * systemPrompt,
310
+ * userPrompt: question,
311
+ * deepTextPromptPortion,
312
+ * });
313
+ *
314
+ * // Verify citations
315
+ * const verified = await deepcitation.verifyAttachment(attachmentId, citations);
316
+ * ```
317
+ */
318
+ prepareUrl(options: PrepareUrlOptions): Promise<UploadFileResponse>;
274
319
  /**
275
320
  * Upload multiple files for citation verification and get structured content.
276
321
  * This is the recommended way to prepare files for LLM prompts.
@@ -293,7 +338,7 @@ declare class DeepCitation {
293
338
  * });
294
339
  *
295
340
  * // Use fileDataParts later for verification
296
- * const result = await deepcitation.verifyAll({ llmOutput, fileDataParts });
341
+ * const result = await deepcitation.verify({ llmOutput, fileDataParts });
297
342
  * ```
298
343
  */
299
344
  prepareFiles(files: FileInput[]): Promise<PrepareFilesResult>;
@@ -355,4 +400,4 @@ declare class DeepCitation {
355
400
  }): Promise<VerifyCitationsResponse>;
356
401
  }
357
402
 
358
- export { type CitationInput, type ConvertFileInput, type ConvertFileResponse, DeepCitation, type DeepCitationConfig, type FileDataPart, type FileInput, type PrepareConvertedFileOptions, type PrepareFilesResult, type UploadFileOptions, type UploadFileResponse, type VerifyCitationsOptions, type VerifyCitationsResponse, type VerifyInput, type verifyAll };
403
+ export { type CitationInput, type ConvertFileInput, type ConvertFileResponse, DeepCitation, type DeepCitationConfig, type FileDataPart, type FileInput, type PrepareConvertedFileOptions, type PrepareFilesResult, type PrepareUrlOptions, type UploadFileOptions, type UploadFileResponse, type VerifyCitationsOptions, type VerifyCitationsResponse, type VerifyInput };
@@ -1 +1,2 @@
1
- export{a as DeepCitation}from'../chunk-3XSZLKJW.js';import'../chunk-PKXMJNRX.js';import'../chunk-O2XFH626.js';
1
+ export{a as DeepCitation}from'../chunk-2HINOG74.js';import'../chunk-N7FTXSGM.js';import'../chunk-BDSA6VGC.js';import'../chunk-5XGN7UAV.js';//# sourceMappingURL=index.js.map
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"index.js"}
@@ -10,23 +10,56 @@ interface ScreenBox extends IVertex {
10
10
  height: number;
11
11
  }
12
12
 
13
- type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "found_key_span_only" | "found_phrase_missed_value" | "found_on_other_page" | "found_on_other_line" | "first_word_found" | "timestamp_wip" | "skipped";
14
- type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "keyspan_fallback" | "adjacent_pages" | "expanded_window" | "regex_search" | "bm25_search" | "fuzzy_regex" | "first_word_fallback";
13
+ type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "found_anchor_text_only" | "found_phrase_missed_anchor_text" | "found_on_other_page" | "found_on_other_line" | "first_word_found" | "timestamp_wip" | "skipped";
14
+ type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "anchor_text_fallback" | "adjacent_pages" | "expanded_window" | "regex_search" | "first_word_fallback";
15
+ /**
16
+ * Indicates which variation of the citation was matched.
17
+ * Trust decreases as we fall back from fullPhrase to anchorText to partial matches.
18
+ *
19
+ * HIGH TRUST (green indicator):
20
+ * - exact_full_phrase: Exact match on the full phrase
21
+ * - normalized_full_phrase: Full phrase matched with whitespace/case normalization
22
+ *
23
+ * MEDIUM TRUST (green indicator, shows context in popover):
24
+ * - exact_anchor_text: anchorText matched exactly, but fullPhrase was not found
25
+ * - normalized_anchor_text: anchorText matched with normalization
26
+ *
27
+ * LOW TRUST (amber indicator):
28
+ * - partial_full_phrase: Only part of fullPhrase matched (tables, columns, line breaks)
29
+ * - partial_anchor_text: Only part of anchorText matched
30
+ * - first_word_only: Only first word matched (lowest trust)
31
+ */
32
+ type MatchedVariation = "exact_full_phrase" | "normalized_full_phrase" | "exact_anchor_text" | "normalized_anchor_text" | "partial_full_phrase" | "partial_anchor_text" | "first_word_only";
15
33
  interface SearchAttempt {
16
34
  method: SearchMethod;
17
35
  success: boolean;
18
- searchPhrases: string[];
36
+ /** The primary phrase searched for */
37
+ searchPhrase: string;
38
+ /** Additional variations tried (e.g., ["$4.89", "4.89"]) */
39
+ searchVariations?: string[];
40
+ /** What searchPhrase contains: "full_phrase" or "anchor_text" */
41
+ searchPhraseType?: "full_phrase" | "anchor_text";
19
42
  pageSearched?: number;
20
- matchedPhrases?: string[];
21
- matchedVariation?: string;
22
- phraseVariations?: string[];
23
- matchQuality?: string;
24
- isPartialMatch?: boolean;
25
- matchScore?: number;
26
- matchSnippet?: string;
27
- notes?: string;
28
- startTime?: number;
29
- endTime?: number;
43
+ /** Line ID(s) searched within the page */
44
+ lineSearched?: number | number[];
45
+ /** Search scope: specific line, entire page, or whole document */
46
+ searchScope?: "line" | "page" | "document";
47
+ /** Where we expected to find the match */
48
+ expectedLocation?: {
49
+ page: number;
50
+ line?: number;
51
+ };
52
+ /** Where we actually found the match (if success: true) */
53
+ foundLocation?: {
54
+ page: number;
55
+ line?: number;
56
+ };
57
+ /** Which variation matched + trust level (only if success: true) */
58
+ matchedVariation?: MatchedVariation;
59
+ /** The actual text found in document (may exist even if success: false, for rejected matches) */
60
+ matchedText?: string;
61
+ /** e.g., "not found on expected page (2)" */
62
+ note?: string;
30
63
  durationMs?: number;
31
64
  }
32
65
 
@@ -56,12 +89,18 @@ interface Verification {
56
89
  endTime?: string;
57
90
  } | null;
58
91
  verifiedFullPhrase?: string | null;
59
- verifiedKeySpan?: string | null;
92
+ verifiedAnchorText?: string | null;
60
93
  verifiedMatchSnippet?: string | null;
61
94
  hitIndexWithinPage?: number | null;
62
95
  phraseMatchDeepItem?: DeepTextItem;
63
- keySpanMatchDeepItem?: DeepTextItem;
96
+ /** Multiple boxes for anchorText highlighting when the anchorText spans multiple PDF items/words */
97
+ anchorTextMatchDeepItems?: DeepTextItem[];
64
98
  verificationImageBase64?: string | null;
99
+ /** Dimensions of the verification image (for coordinate mapping) */
100
+ verificationImageDimensions?: {
101
+ width: number;
102
+ height: number;
103
+ } | null;
65
104
  verifiedAt?: Date;
66
105
  /** The URL that was verified (from Citation.url when type: "url") */
67
106
  verifiedUrl?: string | null;
@@ -138,7 +177,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
138
177
  *
139
178
  * Common fields (used by both types):
140
179
  * - `fullPhrase`: The full context/excerpt containing the cited information
141
- * - `keySpan`: The specific key phrase being cited (must be substring of fullPhrase)
180
+ * - `anchorText`: The specific key phrase being cited (must be substring of fullPhrase)
142
181
  * - `citationNumber`: Citation number for display (e.g., [1], [2])
143
182
  *
144
183
  * @example Document citation
@@ -149,7 +188,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
149
188
  * pageNumber: 5,
150
189
  * lineIds: [12, 13],
151
190
  * fullPhrase: "Revenue increased by 15% in Q4.",
152
- * keySpan: "increased by 15%",
191
+ * anchorText: "increased by 15%",
153
192
  * citationNumber: 1,
154
193
  * };
155
194
  * ```
@@ -162,7 +201,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
162
201
  * domain: "example.com",
163
202
  * title: "Q4 Financial Report",
164
203
  * fullPhrase: "The TGU transitions require control, not brute strength.",
165
- * keySpan: "require control, not brute strength",
204
+ * anchorText: "require control, not brute strength",
166
205
  * citationNumber: 1,
167
206
  * };
168
207
  * ```
@@ -177,7 +216,7 @@ interface Citation {
177
216
  /** The full context/excerpt containing the cited information */
178
217
  fullPhrase?: string | null;
179
218
  /** The specific key phrase being cited (should be substring of fullPhrase) */
180
- keySpan?: string | null;
219
+ anchorText?: string | null;
181
220
  /** Citation number for display (e.g., [1], [2], [3]) */
182
221
  citationNumber?: number;
183
222
  /** Reasoning for why this citation was included */
@@ -190,8 +229,8 @@ interface Citation {
190
229
  pageNumber?: number | null;
191
230
  /** Line IDs within the page */
192
231
  lineIds?: number[] | null;
193
- /** Start page key for multi-page citations */
194
- startPageKey?: string | null;
232
+ /** Start page ID for multi-page citations */
233
+ startPageId?: string | null;
195
234
  /** Selection box coordinates in the document */
196
235
  selection?: ScreenBox | null;
197
236
  /** The source URL */
@@ -259,4 +298,4 @@ interface SourceMeta {
259
298
  accessedAt?: Date | string;
260
299
  }
261
300
 
262
- export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type SourceType as S, type UrlAccessStatus as U, type Verification as V, type CitationStatus as a, type CitationType as b, type VerifyCitationRequest as c, type VerifyCitationResponse as d, type SourceMeta as e, type ContentMatchStatus as f, type SearchStatus as g, type SearchMethod as h, type SearchAttempt as i, type ScreenBox as j, type DeepTextItem as k };
301
+ export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, type MatchedVariation as M, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type ScreenBox as S, type UrlAccessStatus as U, type Verification as V, type CitationStatus as a, type CitationType as b, type ContentMatchStatus as c, type DeepTextItem as d, type SearchAttempt as e, type SearchMethod as f, type SearchStatus as g, type SourceMeta as h, type SourceType as i, type VerifyCitationRequest as j, type VerifyCitationResponse as k };
@@ -10,23 +10,56 @@ interface ScreenBox extends IVertex {
10
10
  height: number;
11
11
  }
12
12
 
13
- type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "found_key_span_only" | "found_phrase_missed_value" | "found_on_other_page" | "found_on_other_line" | "first_word_found" | "timestamp_wip" | "skipped";
14
- type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "keyspan_fallback" | "adjacent_pages" | "expanded_window" | "regex_search" | "bm25_search" | "fuzzy_regex" | "first_word_fallback";
13
+ type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "found_anchor_text_only" | "found_phrase_missed_anchor_text" | "found_on_other_page" | "found_on_other_line" | "first_word_found" | "timestamp_wip" | "skipped";
14
+ type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "anchor_text_fallback" | "adjacent_pages" | "expanded_window" | "regex_search" | "first_word_fallback";
15
+ /**
16
+ * Indicates which variation of the citation was matched.
17
+ * Trust decreases as we fall back from fullPhrase to anchorText to partial matches.
18
+ *
19
+ * HIGH TRUST (green indicator):
20
+ * - exact_full_phrase: Exact match on the full phrase
21
+ * - normalized_full_phrase: Full phrase matched with whitespace/case normalization
22
+ *
23
+ * MEDIUM TRUST (green indicator, shows context in popover):
24
+ * - exact_anchor_text: anchorText matched exactly, but fullPhrase was not found
25
+ * - normalized_anchor_text: anchorText matched with normalization
26
+ *
27
+ * LOW TRUST (amber indicator):
28
+ * - partial_full_phrase: Only part of fullPhrase matched (tables, columns, line breaks)
29
+ * - partial_anchor_text: Only part of anchorText matched
30
+ * - first_word_only: Only first word matched (lowest trust)
31
+ */
32
+ type MatchedVariation = "exact_full_phrase" | "normalized_full_phrase" | "exact_anchor_text" | "normalized_anchor_text" | "partial_full_phrase" | "partial_anchor_text" | "first_word_only";
15
33
  interface SearchAttempt {
16
34
  method: SearchMethod;
17
35
  success: boolean;
18
- searchPhrases: string[];
36
+ /** The primary phrase searched for */
37
+ searchPhrase: string;
38
+ /** Additional variations tried (e.g., ["$4.89", "4.89"]) */
39
+ searchVariations?: string[];
40
+ /** What searchPhrase contains: "full_phrase" or "anchor_text" */
41
+ searchPhraseType?: "full_phrase" | "anchor_text";
19
42
  pageSearched?: number;
20
- matchedPhrases?: string[];
21
- matchedVariation?: string;
22
- phraseVariations?: string[];
23
- matchQuality?: string;
24
- isPartialMatch?: boolean;
25
- matchScore?: number;
26
- matchSnippet?: string;
27
- notes?: string;
28
- startTime?: number;
29
- endTime?: number;
43
+ /** Line ID(s) searched within the page */
44
+ lineSearched?: number | number[];
45
+ /** Search scope: specific line, entire page, or whole document */
46
+ searchScope?: "line" | "page" | "document";
47
+ /** Where we expected to find the match */
48
+ expectedLocation?: {
49
+ page: number;
50
+ line?: number;
51
+ };
52
+ /** Where we actually found the match (if success: true) */
53
+ foundLocation?: {
54
+ page: number;
55
+ line?: number;
56
+ };
57
+ /** Which variation matched + trust level (only if success: true) */
58
+ matchedVariation?: MatchedVariation;
59
+ /** The actual text found in document (may exist even if success: false, for rejected matches) */
60
+ matchedText?: string;
61
+ /** e.g., "not found on expected page (2)" */
62
+ note?: string;
30
63
  durationMs?: number;
31
64
  }
32
65
 
@@ -56,12 +89,18 @@ interface Verification {
56
89
  endTime?: string;
57
90
  } | null;
58
91
  verifiedFullPhrase?: string | null;
59
- verifiedKeySpan?: string | null;
92
+ verifiedAnchorText?: string | null;
60
93
  verifiedMatchSnippet?: string | null;
61
94
  hitIndexWithinPage?: number | null;
62
95
  phraseMatchDeepItem?: DeepTextItem;
63
- keySpanMatchDeepItem?: DeepTextItem;
96
+ /** Multiple boxes for anchorText highlighting when the anchorText spans multiple PDF items/words */
97
+ anchorTextMatchDeepItems?: DeepTextItem[];
64
98
  verificationImageBase64?: string | null;
99
+ /** Dimensions of the verification image (for coordinate mapping) */
100
+ verificationImageDimensions?: {
101
+ width: number;
102
+ height: number;
103
+ } | null;
65
104
  verifiedAt?: Date;
66
105
  /** The URL that was verified (from Citation.url when type: "url") */
67
106
  verifiedUrl?: string | null;
@@ -138,7 +177,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
138
177
  *
139
178
  * Common fields (used by both types):
140
179
  * - `fullPhrase`: The full context/excerpt containing the cited information
141
- * - `keySpan`: The specific key phrase being cited (must be substring of fullPhrase)
180
+ * - `anchorText`: The specific key phrase being cited (must be substring of fullPhrase)
142
181
  * - `citationNumber`: Citation number for display (e.g., [1], [2])
143
182
  *
144
183
  * @example Document citation
@@ -149,7 +188,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
149
188
  * pageNumber: 5,
150
189
  * lineIds: [12, 13],
151
190
  * fullPhrase: "Revenue increased by 15% in Q4.",
152
- * keySpan: "increased by 15%",
191
+ * anchorText: "increased by 15%",
153
192
  * citationNumber: 1,
154
193
  * };
155
194
  * ```
@@ -162,7 +201,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
162
201
  * domain: "example.com",
163
202
  * title: "Q4 Financial Report",
164
203
  * fullPhrase: "The TGU transitions require control, not brute strength.",
165
- * keySpan: "require control, not brute strength",
204
+ * anchorText: "require control, not brute strength",
166
205
  * citationNumber: 1,
167
206
  * };
168
207
  * ```
@@ -177,7 +216,7 @@ interface Citation {
177
216
  /** The full context/excerpt containing the cited information */
178
217
  fullPhrase?: string | null;
179
218
  /** The specific key phrase being cited (should be substring of fullPhrase) */
180
- keySpan?: string | null;
219
+ anchorText?: string | null;
181
220
  /** Citation number for display (e.g., [1], [2], [3]) */
182
221
  citationNumber?: number;
183
222
  /** Reasoning for why this citation was included */
@@ -190,8 +229,8 @@ interface Citation {
190
229
  pageNumber?: number | null;
191
230
  /** Line IDs within the page */
192
231
  lineIds?: number[] | null;
193
- /** Start page key for multi-page citations */
194
- startPageKey?: string | null;
232
+ /** Start page ID for multi-page citations */
233
+ startPageId?: string | null;
195
234
  /** Selection box coordinates in the document */
196
235
  selection?: ScreenBox | null;
197
236
  /** The source URL */
@@ -259,4 +298,4 @@ interface SourceMeta {
259
298
  accessedAt?: Date | string;
260
299
  }
261
300
 
262
- export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type SourceType as S, type UrlAccessStatus as U, type Verification as V, type CitationStatus as a, type CitationType as b, type VerifyCitationRequest as c, type VerifyCitationResponse as d, type SourceMeta as e, type ContentMatchStatus as f, type SearchStatus as g, type SearchMethod as h, type SearchAttempt as i, type ScreenBox as j, type DeepTextItem as k };
301
+ export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, type MatchedVariation as M, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type ScreenBox as S, type UrlAccessStatus as U, type Verification as V, type CitationStatus as a, type CitationType as b, type ContentMatchStatus as c, type DeepTextItem as d, type SearchAttempt as e, type SearchMethod as f, type SearchStatus as g, type SourceMeta as h, type SourceType as i, type VerifyCitationRequest as j, type VerifyCitationResponse as k };
package/lib/index.cjs CHANGED
@@ -1 +1,2 @@
1
- 'use strict';var chunkD2TKEF6D_cjs=require('./chunk-D2TKEF6D.cjs'),chunkDHVODVIA_cjs=require('./chunk-DHVODVIA.cjs'),chunkUUR2SQKU_cjs=require('./chunk-UUR2SQKU.cjs'),chunk2PRW5PVT_cjs=require('./chunk-2PRW5PVT.cjs');require('./chunk-F2MMVEVC.cjs');var z=t=>{if(!t)return false;let e=t.trim();if(e.length<64)return false;let a=e?.[0];for(let r=1;r<e.length;r++)if(e[r]!==a)return false;return true};function J(t){t=t.trim();let e=2,a=10,r=/[.?!](?=\s+|$)/g,n=[],c;for(;(c=r.exec(t))!==null;)n.push(c.index);if(n.length<2)return t;let I=n[n.length-1],f=n[n.length-2],o=t.substring(f+1,I+1),p=o.length;if(o.trim().slice(0,-1).length<a||p<=0||t.length<p*e)return t;let m=0,s=I+1;t.endsWith(o)&&(s=t.length);let T=-1;for(;;){let i=s-p;if(i<0)break;if(t.substring(i,s)===o)m++,T=i,s=i;else break}return m>=e?t.substring(0,T)+o:t}Object.defineProperty(exports,"DeepCitation",{enumerable:true,get:function(){return chunkD2TKEF6D_cjs.a}});Object.defineProperty(exports,"AV_CITATION_MARKDOWN_SYNTAX_PROMPT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.b}});Object.defineProperty(exports,"CITATION_AV_BASED_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.h}});Object.defineProperty(exports,"CITATION_AV_REMINDER",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.d}});Object.defineProperty(exports,"CITATION_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.g}});Object.defineProperty(exports,"CITATION_MARKDOWN_SYNTAX_PROMPT",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.a}});Object.defineProperty(exports,"CITATION_REMINDER",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.c}});Object.defineProperty(exports,"compressPromptIds",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.i}});Object.defineProperty(exports,"decompressPromptIds",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.j}});Object.defineProperty(exports,"wrapCitationPrompt",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.f}});Object.defineProperty(exports,"wrapSystemCitationPrompt",{enumerable:true,get:function(){return chunkDHVODVIA_cjs.e}});Object.defineProperty(exports,"BLANK_VERIFICATION",{enumerable:true,get:function(){return chunkUUR2SQKU_cjs.d}});Object.defineProperty(exports,"DEFAULT_OUTPUT_IMAGE_FORMAT",{enumerable:true,get:function(){return chunkUUR2SQKU_cjs.a}});Object.defineProperty(exports,"NOT_FOUND_VERIFICATION_INDEX",{enumerable:true,get:function(){return chunkUUR2SQKU_cjs.b}});Object.defineProperty(exports,"PENDING_VERIFICATION_INDEX",{enumerable:true,get:function(){return chunkUUR2SQKU_cjs.c}});Object.defineProperty(exports,"CITATION_X_PADDING",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.k}});Object.defineProperty(exports,"CITATION_Y_PADDING",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.l}});Object.defineProperty(exports,"generateCitationInstanceId",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.f}});Object.defineProperty(exports,"generateCitationKey",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.d}});Object.defineProperty(exports,"generateVerificationKey",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.e}});Object.defineProperty(exports,"getAllCitationsFromLlmOutput",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.v}});Object.defineProperty(exports,"getCitationPageNumber",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.r}});Object.defineProperty(exports,"getCitationStatus",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.t}});Object.defineProperty(exports,"getVerificationTextIndicator",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.m}});Object.defineProperty(exports,"groupCitationsByAttachmentId",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.w}});Object.defineProperty(exports,"groupCitationsByAttachmentIdObject",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.x}});Object.defineProperty(exports,"normalizeCitations",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.s}});Object.defineProperty(exports,"parseCitation",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.u}});Object.defineProperty(exports,"removeCitations",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.o}});Object.defineProperty(exports,"removeLineIdMetadata",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.q}});Object.defineProperty(exports,"removePageNumberMetadata",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.p}});Object.defineProperty(exports,"replaceCitations",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.n}});Object.defineProperty(exports,"sha1Hash",{enumerable:true,get:function(){return chunk2PRW5PVT_cjs.a}});exports.cleanRepeatingLastSentence=J;exports.isGeminiGarbage=z;
1
+ 'use strict';var chunk7TORYXU4_cjs=require('./chunk-7TORYXU4.cjs'),chunkDS6SOU4L_cjs=require('./chunk-DS6SOU4L.cjs'),chunkHL3AXCDL_cjs=require('./chunk-HL3AXCDL.cjs'),chunk4UWAUWYL_cjs=require('./chunk-4UWAUWYL.cjs'),chunkWS4CQVDI_cjs=require('./chunk-WS4CQVDI.cjs');require('./chunk-BYLIBOAU.cjs');var rt=t=>{if(!t)return false;let e=t.trim();if(e.length<64)return false;let a=e?.[0];for(let n=1;n<e.length;n++)if(e[n]!==a)return false;return true};function nt(t){t=t.trim();let e=2,a=10,n=/[.?!](?=\s+|$)/g,r=[],I;for(;(I=n.exec(t))!==null;)r.push(I.index);if(r.length<2)return t;let c=r[r.length-1],C=r[r.length-2],i=t.substring(C+1,c+1),p=i.length;if(i.trim().slice(0,-1).length<a||p<=0||t.length<p*e)return t;let m=0,s=c+1;t.endsWith(i)&&(s=t.length);let T=-1;for(;;){let o=s-p;if(o<0)break;if(t.substring(o,s)===i)m++,T=o,s=o;else break}return m>=e?t.substring(0,T)+i:t}Object.defineProperty(exports,"DeepCitation",{enumerable:true,get:function(){return chunk7TORYXU4_cjs.a}});Object.defineProperty(exports,"compressPromptIds",{enumerable:true,get:function(){return chunkDS6SOU4L_cjs.a}});Object.defineProperty(exports,"decompressPromptIds",{enumerable:true,get:function(){return chunkDS6SOU4L_cjs.b}});Object.defineProperty(exports,"BLANK_VERIFICATION",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.d}});Object.defineProperty(exports,"DEFAULT_OUTPUT_IMAGE_FORMAT",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.a}});Object.defineProperty(exports,"NOT_FOUND_VERIFICATION_INDEX",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.b}});Object.defineProperty(exports,"PENDING_VERIFICATION_INDEX",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.c}});Object.defineProperty(exports,"CITATION_X_PADDING",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.k}});Object.defineProperty(exports,"CITATION_Y_PADDING",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.l}});Object.defineProperty(exports,"deferredCitationToCitation",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.t}});Object.defineProperty(exports,"extractVisibleText",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.w}});Object.defineProperty(exports,"generateCitationInstanceId",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.f}});Object.defineProperty(exports,"generateCitationKey",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.d}});Object.defineProperty(exports,"generateVerificationKey",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.e}});Object.defineProperty(exports,"getAllCitationsFromDeferredResponse",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.u}});Object.defineProperty(exports,"getAllCitationsFromLlmOutput",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.B}});Object.defineProperty(exports,"getCitationMarkerIds",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.y}});Object.defineProperty(exports,"getCitationPageNumber",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.q}});Object.defineProperty(exports,"getCitationStatus",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.z}});Object.defineProperty(exports,"getVerificationTextIndicator",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.m}});Object.defineProperty(exports,"groupCitationsByAttachmentId",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.C}});Object.defineProperty(exports,"groupCitationsByAttachmentIdObject",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.D}});Object.defineProperty(exports,"hasDeferredCitations",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.v}});Object.defineProperty(exports,"normalizeCitations",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.r}});Object.defineProperty(exports,"parseCitation",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.A}});Object.defineProperty(exports,"parseDeferredCitationResponse",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.s}});Object.defineProperty(exports,"removeLineIdMetadata",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.p}});Object.defineProperty(exports,"removePageNumberMetadata",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.o}});Object.defineProperty(exports,"replaceCitations",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.n}});Object.defineProperty(exports,"replaceDeferredMarkers",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.x}});Object.defineProperty(exports,"sha1Hash",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.a}});Object.defineProperty(exports,"AV_CITATION_PROMPT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.d}});Object.defineProperty(exports,"CITATION_AV_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.j}});Object.defineProperty(exports,"CITATION_AV_REMINDER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.f}});Object.defineProperty(exports,"CITATION_DATA_END_DELIMITER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.b}});Object.defineProperty(exports,"CITATION_DATA_START_DELIMITER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.a}});Object.defineProperty(exports,"CITATION_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.i}});Object.defineProperty(exports,"CITATION_PROMPT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.c}});Object.defineProperty(exports,"CITATION_REMINDER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.e}});Object.defineProperty(exports,"wrapCitationPrompt",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.h}});Object.defineProperty(exports,"wrapSystemCitationPrompt",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.g}});exports.cleanRepeatingLastSentence=nt;exports.isGeminiGarbage=rt;//# sourceMappingURL=index.cjs.map
2
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/parsing/parseWorkAround.ts"],"names":["isGeminiGarbage","content","trimmedContent","firstCharacter","i","cleanRepeatingLastSentence","text","MIN_REPETITIONS","MIN_SENTENCE_CONTENT_LENGTH","sentenceEndRegex","sentenceEndIndices","match","lastTerminatorIndex","secondLastTerminatorIndex","repeatingUnit","unitLength","repetitionsFound","currentCheckEndIndex","firstRepetitionStartIndex","checkStartIndex"],"mappings":"2SAEO,IAAMA,GAAmBC,CAAAA,EAAoB,CAClD,GAAI,CAACA,EAAS,OAAO,MAAA,CACrB,IAAMC,CAAAA,CAAiBD,EAAQ,IAAA,EAAK,CACpC,GAAIC,CAAAA,CAAe,MAAA,CAAS,GAAuC,OAAO,MAAA,CAE1E,IAAMC,CAAAA,CAAiBD,IAAiB,CAAC,CAAA,CAEzC,QAASE,CAAAA,CAAI,CAAA,CAAGA,EAAIF,CAAAA,CAAe,MAAA,CAAQE,CAAAA,EAAAA,CACzC,GAAIF,EAAeE,CAAC,CAAA,GAAMD,EAAgB,OAAO,MAAA,CAEnD,OAAO,KACT,EAGO,SAASE,EAAAA,CAA2BC,EAAsB,CAC/DA,CAAAA,CAAOA,CAAAA,CAAK,IAAA,GACZ,IAAMC,CAAAA,CAAkB,CAAA,CAClBC,CAAAA,CAA8B,GAE9BC,CAAAA,CAAmB,iBAAA,CACnBC,EAA+B,EAAC,CAClCC,EACJ,KAAA,CAAQA,CAAAA,CAAQF,CAAAA,CAAiB,IAAA,CAAKH,CAAI,CAAA,IAAO,IAAA,EAC/CI,EAAmB,IAAA,CAAKC,CAAAA,CAAM,KAAK,CAAA,CAGrC,GAAID,CAAAA,CAAmB,MAAA,CAAS,EAC9B,OAAOJ,CAAAA,CAGT,IAAMM,CAAAA,CAAsBF,CAAAA,CAAmBA,EAAmB,MAAA,CAAS,CAAC,CAAA,CACtEG,CAAAA,CAA4BH,EAAmBA,CAAAA,CAAmB,MAAA,CAAS,CAAC,CAAA,CAE5EI,EAAgBR,CAAAA,CAAK,SAAA,CAAUO,CAAAA,CAA4B,CAAA,CAAGD,EAAsB,CAAC,CAAA,CACrFG,EAAaD,CAAAA,CAAc,MAAA,CAUjC,GARwBA,CAAAA,CAAc,IAAA,EAAK,CAAE,KAAA,CAAM,EAAG,EAAE,CAAA,CACpC,OAASN,CAAAA,EAGzBO,CAAAA,EAAc,GAIdT,CAAAA,CAAK,MAAA,CAASS,CAAAA,CAAaR,CAAAA,CAC7B,OAAOD,CAAAA,CAGT,IAAIU,EAAmB,CAAA,CACnBC,CAAAA,CAAuBL,EAAsB,CAAA,CAC7CN,CAAAA,CAAK,QAAA,CAASQ,CAAa,IAC7BG,CAAAA,CAAuBX,CAAAA,CAAK,QAG9B,IAAIY,CAAAA,CAA4B,GAEhC,OAAa,CACX,IAAMC,CAAAA,CAAkBF,EAAuBF,CAAAA,CAE/C,GAAII,EAAkB,CAAA,CACpB,MAKF,GAFcb,CAAAA,CAAK,SAAA,CAAUa,CAAAA,CAAiBF,CAAoB,IAEpDH,CAAAA,CACZE,CAAAA,EAAAA,CACAE,EAA4BC,CAAAA,CAC5BF,CAAAA,CAAuBE,OAEvB,KAEJ,CAEA,OAAIH,CAAAA,EAAoBT,EACQD,CAAAA,CAAK,SAAA,CAAU,EAAGY,CAAyB,CAAA,CAClCJ,EAGhCR,CAEX","file":"index.cjs","sourcesContent":["//flash and flash lite get super confused if we ask for a MD table and infinite loop\nconst MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE = 64;\nexport const isGeminiGarbage = (content: string) => {\n if (!content) return false;\n const trimmedContent = content.trim();\n if (trimmedContent.length < MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE) return false;\n\n const firstCharacter = trimmedContent?.[0];\n\n for (let i = 1; i < trimmedContent.length; i++) {\n if (trimmedContent[i] !== firstCharacter) return false;\n }\n return true;\n};\n\n// helps clean up infinite rambling bug output from gemini\nexport function cleanRepeatingLastSentence(text: string): string {\n text = text.trim();\n const MIN_REPETITIONS = 2;\n const MIN_SENTENCE_CONTENT_LENGTH = 10;\n\n const sentenceEndRegex = /[.?!](?=\\s+|$)/g;\n const sentenceEndIndices: number[] = [];\n let match;\n while ((match = sentenceEndRegex.exec(text)) !== null) {\n sentenceEndIndices.push(match.index);\n }\n\n if (sentenceEndIndices.length < 2) {\n return text;\n }\n\n const lastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 1];\n const secondLastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 2];\n\n const repeatingUnit = text.substring(secondLastTerminatorIndex + 1, lastTerminatorIndex + 1);\n const unitLength = repeatingUnit.length;\n\n const sentenceContent = repeatingUnit.trim().slice(0, -1);\n if (sentenceContent.length < MIN_SENTENCE_CONTENT_LENGTH) {\n return text;\n }\n if (unitLength <= 0) {\n return text;\n }\n\n if (text.length < unitLength * MIN_REPETITIONS) {\n return text;\n }\n\n let repetitionsFound = 0;\n let currentCheckEndIndex = lastTerminatorIndex + 1;\n if (text.endsWith(repeatingUnit)) {\n currentCheckEndIndex = text.length;\n }\n\n let firstRepetitionStartIndex = -1;\n\n while (true) {\n const checkStartIndex = currentCheckEndIndex - unitLength;\n\n if (checkStartIndex < 0) {\n break;\n }\n\n const chunk = text.substring(checkStartIndex, currentCheckEndIndex);\n\n if (chunk === repeatingUnit) {\n repetitionsFound++;\n firstRepetitionStartIndex = checkStartIndex;\n currentCheckEndIndex = checkStartIndex;\n } else {\n break;\n }\n }\n\n if (repetitionsFound >= MIN_REPETITIONS) {\n const textBeforeRepetitions = text.substring(0, firstRepetitionStartIndex);\n const result = textBeforeRepetitions + repeatingUnit;\n return result;\n } else {\n return text;\n }\n}\n"]}
package/lib/index.d.cts CHANGED
@@ -1,8 +1,9 @@
1
- export { CitationInput, DeepCitation, DeepCitationConfig, FileDataPart, FileInput, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsOptions, VerifyCitationsResponse, verifyAll } from './client/index.cjs';
2
- import { C as Citation, V as Verification, a as CitationStatus } from './index-fvVBZYVK.cjs';
3
- export { B as BLANK_VERIFICATION, b as CitationType, f as ContentMatchStatus, D as DEFAULT_OUTPUT_IMAGE_FORMAT, k as DeepTextItem, I as IVertex, N as NOT_FOUND_VERIFICATION_INDEX, O as OutputImageFormat, P as PENDING_VERIFICATION_INDEX, j as ScreenBox, i as SearchAttempt, h as SearchMethod, g as SearchStatus, e as SourceMeta, S as SourceType, U as UrlAccessStatus, c as VerifyCitationRequest, d as VerifyCitationResponse } from './index-fvVBZYVK.cjs';
4
- export { C as CITATION_X_PADDING, c as CITATION_Y_PADDING, b as generateCitationInstanceId, g as generateCitationKey, a as generateVerificationKey } from './utils-q6anRKO_.cjs';
5
- export { AV_CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_AV_BASED_JSON_OUTPUT_FORMAT, CITATION_AV_REMINDER, CITATION_JSON_OUTPUT_FORMAT, CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_REMINDER, CompressedResult, WrapCitationPromptOptions, WrapCitationPromptResult, WrapSystemPromptOptions, compressPromptIds, decompressPromptIds, wrapCitationPrompt, wrapSystemCitationPrompt } from './prompts/index.cjs';
1
+ export { CitationInput, DeepCitation, DeepCitationConfig, FileDataPart, FileInput, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsOptions, VerifyCitationsResponse } from './client/index.cjs';
2
+ import { C as Citation, V as Verification, a as CitationStatus } from './index-BHjI8Bh1.cjs';
3
+ export { B as BLANK_VERIFICATION, b as CitationType, c as ContentMatchStatus, D as DEFAULT_OUTPUT_IMAGE_FORMAT, d as DeepTextItem, I as IVertex, N as NOT_FOUND_VERIFICATION_INDEX, O as OutputImageFormat, P as PENDING_VERIFICATION_INDEX, S as ScreenBox, e as SearchAttempt, f as SearchMethod, g as SearchStatus, h as SourceMeta, i as SourceType, U as UrlAccessStatus, j as VerifyCitationRequest, k as VerifyCitationResponse } from './index-BHjI8Bh1.cjs';
4
+ import { CitationData, ParsedCitationResponse } from './prompts/index.cjs';
5
+ export { AV_CITATION_PROMPT, CITATION_AV_JSON_OUTPUT_FORMAT, CITATION_AV_REMINDER, CITATION_DATA_END_DELIMITER, CITATION_DATA_START_DELIMITER, CITATION_JSON_OUTPUT_FORMAT, CITATION_PROMPT, CITATION_REMINDER, CompressedResult, WrapCitationPromptOptions, WrapCitationPromptResult, WrapSystemPromptOptions, compressPromptIds, decompressPromptIds, wrapCitationPrompt, wrapSystemCitationPrompt } from './prompts/index.cjs';
6
+ export { C as CITATION_X_PADDING, a as CITATION_Y_PADDING, g as generateCitationInstanceId, b as generateCitationKey, c as generateVerificationKey } from './utils-CCi9_JTv.cjs';
6
7
 
7
8
  /**
8
9
  * Calculates the verification status of a citation based on the found highlight and search state.
@@ -82,12 +83,136 @@ declare function groupCitationsByAttachmentIdObject(citations: Citation[] | {
82
83
  };
83
84
  };
84
85
 
86
+ /**
87
+ * Citation Parser
88
+ *
89
+ * Implements the "Split & Parse" strategy for the deferred JSON citation pattern.
90
+ * This parser extracts citations from LLM responses that use [N] markers in text
91
+ * and include a JSON data block at the end.
92
+ *
93
+ * Algorithm:
94
+ * 1. Detection: Look for the start delimiter <<<CITATION_DATA>>>
95
+ * 2. Splitting: Separate visible content from the citation data block
96
+ * 3. Data Extraction: Extract the JSON string between delimiters
97
+ * 4. Sanitization: Parse with JSON.parse, with fallback repair for common issues
98
+ * 5. Hydration: Map the JSON objects to a usable format
99
+ */
100
+
101
+ /**
102
+ * Parses a citation response from an LLM.
103
+ *
104
+ * This function:
105
+ * 1. Finds the <<<CITATION_DATA>>> delimiter in the response
106
+ * 2. Splits the response into visible text and citation data
107
+ * 3. Parses the JSON citation data
108
+ * 4. Returns a structured result with both
109
+ *
110
+ * @param llmResponse - The full LLM response text
111
+ * @returns ParsedCitationResponse with visible text and parsed citations
112
+ *
113
+ * @example
114
+ * ```typescript
115
+ * const response = `
116
+ * The company grew 45% [1].
117
+ *
118
+ * <<<CITATION_DATA>>>
119
+ * [{"id": 1, "attachment_id": "abc", "full_phrase": "grew 45%", "anchor_text": "45%"}]
120
+ * <<<END_CITATION_DATA>>>
121
+ * `;
122
+ *
123
+ * const parsed = parseDeferredCitationResponse(response);
124
+ * console.log(parsed.visibleText); // "The company grew 45% [1]."
125
+ * console.log(parsed.citations); // [{id: 1, attachment_id: "abc", ...}]
126
+ * ```
127
+ */
128
+ declare function parseDeferredCitationResponse(llmResponse: string): ParsedCitationResponse;
129
+ /**
130
+ * Converts a CitationData object to the standard Citation format.
131
+ *
132
+ * @param data - The citation data
133
+ * @param citationNumber - Optional override for citation number (defaults to data.id)
134
+ * @returns Standard Citation object
135
+ */
136
+ declare function deferredCitationToCitation(data: CitationData, citationNumber?: number): Citation;
137
+ /**
138
+ * Extracts all citations from a citation response and returns them
139
+ * in the standard dictionary format used by the verification API.
140
+ *
141
+ * This function parses the response, converts each citation to the standard
142
+ * Citation format, and generates deterministic keys for each.
143
+ *
144
+ * @param llmResponse - The full LLM response with citation block
145
+ * @returns Dictionary of parsed Citation objects keyed by citation key
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * const citations = getAllCitationsFromDeferredResponse(llmOutput);
150
+ * // Returns: { "abc123...": { attachmentId: "...", fullPhrase: "...", ... }, ... }
151
+ * ```
152
+ */
153
+ declare function getAllCitationsFromDeferredResponse(llmResponse: string): {
154
+ [key: string]: Citation;
155
+ };
156
+ /**
157
+ * Checks if a response contains citation markers.
158
+ *
159
+ * @param response - The LLM response to check
160
+ * @returns True if the response contains the citation data delimiter
161
+ */
162
+ declare function hasDeferredCitations(response: string): boolean;
163
+ /**
164
+ * Extracts just the visible text from a response,
165
+ * removing the citation data block.
166
+ *
167
+ * @param llmResponse - The full LLM response
168
+ * @returns The visible text portion only
169
+ */
170
+ declare function extractVisibleText(llmResponse: string): string;
171
+ /**
172
+ * Replaces [N] citation markers in text with optional content.
173
+ *
174
+ * @param text - The text containing [N] markers
175
+ * @param options - Configuration for replacement
176
+ * @returns The text with markers replaced
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * const text = "Revenue grew 45% [1] in Q4 [2].";
181
+ *
182
+ * // Remove markers entirely
183
+ * replaceDeferredMarkers(text);
184
+ * // Returns: "Revenue grew 45% in Q4."
185
+ *
186
+ * // Replace with anchor texts
187
+ * replaceDeferredMarkers(text, {
188
+ * citationMap: new Map([[1, { anchor_text: "45%" }], [2, { anchor_text: "Q4" }]]),
189
+ * showAnchorText: true,
190
+ * });
191
+ * // Returns: "Revenue grew 45% 45% in Q4 Q4."
192
+ * ```
193
+ */
194
+ declare function replaceDeferredMarkers(text: string, options?: {
195
+ /** Map of citation IDs to their data */
196
+ citationMap?: Map<number, CitationData>;
197
+ /** Whether to show the anchor text after the marker */
198
+ showAnchorText?: boolean;
199
+ /** Custom replacement function */
200
+ replacer?: (id: number, data?: CitationData) => string;
201
+ }): string;
202
+ /**
203
+ * Gets all citation marker IDs found in a text.
204
+ *
205
+ * @param text - The text to scan for [N] markers
206
+ * @returns Array of citation IDs in order of appearance
207
+ */
208
+ declare function getCitationMarkerIds(text: string): number[];
209
+
85
210
  interface ReplaceCitationsOptions {
86
211
  /**
87
- * If true, leaves the key_span text behind when removing citations.
212
+ * If true, leaves the anchor_text text behind when removing citations.
88
213
  * @default false
89
214
  */
90
- leaveKeySpanBehind?: boolean;
215
+ leaveAnchorTextBehind?: boolean;
91
216
  /**
92
217
  * Map of citation keys to verification results.
93
218
  * Used to determine verification status for each citation.
@@ -120,12 +245,12 @@ declare const getVerificationTextIndicator: (verification: Verification | null |
120
245
  * // Remove all citations
121
246
  * const clean = replaceCitations(llmOutput);
122
247
  *
123
- * // Leave key_span text behind
124
- * const withKeySpans = replaceCitations(llmOutput, { leaveKeySpanBehind: true });
248
+ * // Leave anchor_text text behind
249
+ * const withAnchorTexts = replaceCitations(llmOutput, { leaveAnchorTextBehind: true });
125
250
  *
126
251
  * // Show verification status indicators
127
252
  * const withStatus = replaceCitations(llmOutput, {
128
- * leaveKeySpanBehind: true,
253
+ * leaveAnchorTextBehind: true,
129
254
  * verifications: verificationMap,
130
255
  * showVerificationStatus: true,
131
256
  * });
@@ -133,13 +258,9 @@ declare const getVerificationTextIndicator: (verification: Verification | null |
133
258
  * ```
134
259
  */
135
260
  declare const replaceCitations: (markdownWithCitations: string, options?: ReplaceCitationsOptions) => string;
136
- /**
137
- * @deprecated Use `replaceCitations` instead. This function is kept for backward compatibility.
138
- */
139
- declare const removeCitations: (markdownWithCitations: string, leaveKeySpanBehind?: boolean) => string;
140
261
  declare const removePageNumberMetadata: (pageText: string) => string;
141
262
  declare const removeLineIdMetadata: (pageText: string) => string;
142
- declare const getCitationPageNumber: (startPageKey?: string | null) => number | null;
263
+ declare const getCitationPageNumber: (startPageId?: string | null) => number | null;
143
264
  declare const normalizeCitations: (response: string) => string;
144
265
 
145
266
  declare const isGeminiGarbage: (content: string) => boolean;
@@ -156,4 +277,4 @@ declare function cleanRepeatingLastSentence(text: string): string;
156
277
  */
157
278
  declare function sha1Hash(data: string | any): string;
158
279
 
159
- export { Citation, CitationStatus, type ReplaceCitationsOptions, Verification, cleanRepeatingLastSentence, getAllCitationsFromLlmOutput, getCitationPageNumber, getCitationStatus, getVerificationTextIndicator, groupCitationsByAttachmentId, groupCitationsByAttachmentIdObject, isGeminiGarbage, normalizeCitations, parseCitation, removeCitations, removeLineIdMetadata, removePageNumberMetadata, replaceCitations, sha1Hash };
280
+ export { Citation, CitationData, CitationStatus, ParsedCitationResponse, type ReplaceCitationsOptions, Verification, cleanRepeatingLastSentence, deferredCitationToCitation, extractVisibleText, getAllCitationsFromDeferredResponse, getAllCitationsFromLlmOutput, getCitationMarkerIds, getCitationPageNumber, getCitationStatus, getVerificationTextIndicator, groupCitationsByAttachmentId, groupCitationsByAttachmentIdObject, hasDeferredCitations, isGeminiGarbage, normalizeCitations, parseCitation, parseDeferredCitationResponse, removeLineIdMetadata, removePageNumberMetadata, replaceCitations, replaceDeferredMarkers, sha1Hash };