@deepcitation/deepcitation-js 1.1.49 → 1.1.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -157
- package/lib/chunk-2HINOG74.js +3 -0
- package/lib/chunk-2HINOG74.js.map +1 -0
- package/lib/chunk-4UWAUWYL.cjs +3 -0
- package/lib/chunk-4UWAUWYL.cjs.map +1 -0
- package/lib/chunk-5XGN7UAV.js +2 -0
- package/lib/chunk-5XGN7UAV.js.map +1 -0
- package/lib/chunk-7TORYXU4.cjs +3 -0
- package/lib/chunk-7TORYXU4.cjs.map +1 -0
- package/lib/chunk-BDSA6VGC.js +125 -0
- package/lib/chunk-BDSA6VGC.js.map +1 -0
- package/lib/chunk-BEYJEW3Y.js +2 -0
- package/lib/chunk-BEYJEW3Y.js.map +1 -0
- package/lib/chunk-BWQLFMTV.js +2 -0
- package/lib/chunk-BWQLFMTV.js.map +1 -0
- package/lib/{chunk-F2MMVEVC.cjs → chunk-BYLIBOAU.cjs} +2 -1
- package/lib/chunk-BYLIBOAU.cjs.map +1 -0
- package/lib/chunk-DS6SOU4L.cjs +2 -0
- package/lib/chunk-DS6SOU4L.cjs.map +1 -0
- package/lib/{chunk-UUR2SQKU.cjs → chunk-HL3AXCDL.cjs} +2 -1
- package/lib/chunk-HL3AXCDL.cjs.map +1 -0
- package/lib/chunk-N7FTXSGM.js +3 -0
- package/lib/chunk-N7FTXSGM.js.map +1 -0
- package/lib/chunk-WS4CQVDI.cjs +125 -0
- package/lib/chunk-WS4CQVDI.cjs.map +1 -0
- package/lib/client/index.cjs +2 -1
- package/lib/client/index.cjs.map +1 -0
- package/lib/client/index.d.cts +58 -13
- package/lib/client/index.d.ts +58 -13
- package/lib/client/index.js +2 -1
- package/lib/client/index.js.map +1 -0
- package/lib/{index-fvVBZYVK.d.ts → index-BHjI8Bh1.d.cts} +61 -22
- package/lib/{index-fvVBZYVK.d.cts → index-BHjI8Bh1.d.ts} +61 -22
- package/lib/index.cjs +2 -1
- package/lib/index.cjs.map +1 -0
- package/lib/index.d.cts +137 -16
- package/lib/index.d.ts +137 -16
- package/lib/index.js +2 -1
- package/lib/index.js.map +1 -0
- package/lib/prompts/index.cjs +2 -1
- package/lib/prompts/index.cjs.map +1 -0
- package/lib/prompts/index.d.cts +177 -55
- package/lib/prompts/index.d.ts +177 -55
- package/lib/prompts/index.js +2 -1
- package/lib/prompts/index.js.map +1 -0
- package/lib/react/index.cjs +8 -5
- package/lib/react/index.cjs.map +1 -0
- package/lib/react/index.d.cts +348 -21
- package/lib/react/index.d.ts +348 -21
- package/lib/react/index.js +8 -5
- package/lib/react/index.js.map +1 -0
- package/lib/styles.css +1 -1
- package/lib/types/index.cjs +2 -1
- package/lib/types/index.cjs.map +1 -0
- package/lib/types/index.d.cts +1 -1
- package/lib/types/index.d.ts +1 -1
- package/lib/types/index.js +2 -1
- package/lib/types/index.js.map +1 -0
- package/lib/{utils-q6anRKO_.d.cts → utils-CCi9_JTv.d.cts} +5 -5
- package/lib/{utils-DuacFTtu.d.ts → utils-CoSP-i76.d.ts} +5 -5
- package/package.json +165 -152
- package/src/tailwind.css +5 -5
- package/lib/chunk-2PRW5PVT.cjs +0 -2
- package/lib/chunk-3XSZLKJW.js +0 -2
- package/lib/chunk-D2TKEF6D.cjs +0 -2
- package/lib/chunk-DHVODVIA.cjs +0 -71
- package/lib/chunk-HRCAI3NV.js +0 -1
- package/lib/chunk-ND6LFDGK.js +0 -71
- package/lib/chunk-O2XFH626.js +0 -1
- package/lib/chunk-PKXMJNRX.js +0 -2
package/lib/client/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as Citation, V as Verification } from '../index-
|
|
1
|
+
import { C as Citation, V as Verification } from '../index-BHjI8Bh1.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Configuration options for the DeepCitation client
|
|
@@ -47,6 +47,30 @@ interface UploadFileOptions {
|
|
|
47
47
|
/** Optional custom filename (uses File.name if not provided) */
|
|
48
48
|
filename?: string;
|
|
49
49
|
}
|
|
50
|
+
/**
|
|
51
|
+
* Options for preparing a URL for citation verification.
|
|
52
|
+
* URLs and Office files take ~30s to process vs. <1s for images/PDFs.
|
|
53
|
+
*/
|
|
54
|
+
interface PrepareUrlOptions {
|
|
55
|
+
/** The URL to convert and prepare for citation verification */
|
|
56
|
+
url: string;
|
|
57
|
+
/** Optional custom attachment ID to use instead of auto-generated one */
|
|
58
|
+
attachmentId?: string;
|
|
59
|
+
/** Optional custom filename for the converted document */
|
|
60
|
+
filename?: string;
|
|
61
|
+
/**
|
|
62
|
+
* UNSAFE: Skip PDF conversion and extract text directly from HTML.
|
|
63
|
+
*
|
|
64
|
+
* This is much faster (<1s vs ~30s) but VULNERABLE to:
|
|
65
|
+
* - Hidden text (CSS display:none, tiny fonts, etc.)
|
|
66
|
+
* - Fine print that users can't see
|
|
67
|
+
* - Prompt injection attacks embedded in the page
|
|
68
|
+
*
|
|
69
|
+
* Only use this for trusted URLs where you control the content.
|
|
70
|
+
* Default: false (uses safe PDF conversion)
|
|
71
|
+
*/
|
|
72
|
+
unsafeFastUrlOutput?: boolean;
|
|
73
|
+
}
|
|
50
74
|
/**
|
|
51
75
|
* Response from verifying citations
|
|
52
76
|
*/
|
|
@@ -93,12 +117,6 @@ interface FileDataPart {
|
|
|
93
117
|
interface PrepareFilesResult {
|
|
94
118
|
/** Array of file references for verification (includes deepTextPromptPortion for each file) */
|
|
95
119
|
fileDataParts: FileDataPart[];
|
|
96
|
-
/**
|
|
97
|
-
* Array of formatted text content for LLM prompts (with page markers and line IDs).
|
|
98
|
-
* @deprecated Use fileDataParts[].deepTextPromptPortion instead for single source of truth.
|
|
99
|
-
* This is kept for backwards compatibility but will be removed in a future version.
|
|
100
|
-
*/
|
|
101
|
-
deepTextPromptPortion: string[];
|
|
102
120
|
}
|
|
103
121
|
/**
|
|
104
122
|
* Input for verify method
|
|
@@ -111,10 +129,6 @@ interface VerifyInput {
|
|
|
111
129
|
/** Output image format for verification screenshots */
|
|
112
130
|
outputImageFormat?: "jpeg" | "png" | "avif";
|
|
113
131
|
}
|
|
114
|
-
/**
|
|
115
|
-
* @deprecated Use VerifyInput instead. This alias is kept for backwards compatibility.
|
|
116
|
-
*/
|
|
117
|
-
type verifyAll = VerifyInput;
|
|
118
132
|
/**
|
|
119
133
|
* Input for convertFile - convert URL or Office file to PDF
|
|
120
134
|
*/
|
|
@@ -271,6 +285,37 @@ declare class DeepCitation {
|
|
|
271
285
|
* ```
|
|
272
286
|
*/
|
|
273
287
|
prepareConvertedFile(options: PrepareConvertedFileOptions): Promise<UploadFileResponse>;
|
|
288
|
+
/**
|
|
289
|
+
* Prepare a URL for citation verification.
|
|
290
|
+
*
|
|
291
|
+
* This is a convenience method that handles URL conversion and text extraction
|
|
292
|
+
* in a single call. The API will convert the URL to PDF and extract text content
|
|
293
|
+
* for citation verification.
|
|
294
|
+
*
|
|
295
|
+
* Note: URLs and Office files take ~30s to process vs. <1s for images/PDFs.
|
|
296
|
+
*
|
|
297
|
+
* @param options - URL and optional settings
|
|
298
|
+
* @returns Upload response with attachmentId and extracted text for LLM prompts
|
|
299
|
+
*
|
|
300
|
+
* @example
|
|
301
|
+
* ```typescript
|
|
302
|
+
* // Prepare a URL for citation verification
|
|
303
|
+
* const { attachmentId, deepTextPromptPortion } = await deepcitation.prepareUrl({
|
|
304
|
+
* url: "https://example.com/article"
|
|
305
|
+
* });
|
|
306
|
+
*
|
|
307
|
+
* // Use deepTextPromptPortion in your LLM prompt
|
|
308
|
+
* const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
|
|
309
|
+
* systemPrompt,
|
|
310
|
+
* userPrompt: question,
|
|
311
|
+
* deepTextPromptPortion,
|
|
312
|
+
* });
|
|
313
|
+
*
|
|
314
|
+
* // Verify citations
|
|
315
|
+
* const verified = await deepcitation.verifyAttachment(attachmentId, citations);
|
|
316
|
+
* ```
|
|
317
|
+
*/
|
|
318
|
+
prepareUrl(options: PrepareUrlOptions): Promise<UploadFileResponse>;
|
|
274
319
|
/**
|
|
275
320
|
* Upload multiple files for citation verification and get structured content.
|
|
276
321
|
* This is the recommended way to prepare files for LLM prompts.
|
|
@@ -293,7 +338,7 @@ declare class DeepCitation {
|
|
|
293
338
|
* });
|
|
294
339
|
*
|
|
295
340
|
* // Use fileDataParts later for verification
|
|
296
|
-
* const result = await deepcitation.
|
|
341
|
+
* const result = await deepcitation.verify({ llmOutput, fileDataParts });
|
|
297
342
|
* ```
|
|
298
343
|
*/
|
|
299
344
|
prepareFiles(files: FileInput[]): Promise<PrepareFilesResult>;
|
|
@@ -355,4 +400,4 @@ declare class DeepCitation {
|
|
|
355
400
|
}): Promise<VerifyCitationsResponse>;
|
|
356
401
|
}
|
|
357
402
|
|
|
358
|
-
export { type CitationInput, type ConvertFileInput, type ConvertFileResponse, DeepCitation, type DeepCitationConfig, type FileDataPart, type FileInput, type PrepareConvertedFileOptions, type PrepareFilesResult, type UploadFileOptions, type UploadFileResponse, type VerifyCitationsOptions, type VerifyCitationsResponse, type VerifyInput
|
|
403
|
+
export { type CitationInput, type ConvertFileInput, type ConvertFileResponse, DeepCitation, type DeepCitationConfig, type FileDataPart, type FileInput, type PrepareConvertedFileOptions, type PrepareFilesResult, type PrepareUrlOptions, type UploadFileOptions, type UploadFileResponse, type VerifyCitationsOptions, type VerifyCitationsResponse, type VerifyInput };
|
package/lib/client/index.js
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
export{a as DeepCitation}from'../chunk-
|
|
1
|
+
export{a as DeepCitation}from'../chunk-2HINOG74.js';import'../chunk-N7FTXSGM.js';import'../chunk-BDSA6VGC.js';import'../chunk-5XGN7UAV.js';//# sourceMappingURL=index.js.map
|
|
2
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"names":[],"mappings":"","file":"index.js"}
|
|
@@ -10,23 +10,56 @@ interface ScreenBox extends IVertex {
|
|
|
10
10
|
height: number;
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "
|
|
14
|
-
type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "
|
|
13
|
+
type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "found_anchor_text_only" | "found_phrase_missed_anchor_text" | "found_on_other_page" | "found_on_other_line" | "first_word_found" | "timestamp_wip" | "skipped";
|
|
14
|
+
type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "anchor_text_fallback" | "adjacent_pages" | "expanded_window" | "regex_search" | "first_word_fallback";
|
|
15
|
+
/**
|
|
16
|
+
* Indicates which variation of the citation was matched.
|
|
17
|
+
* Trust decreases as we fall back from fullPhrase to anchorText to partial matches.
|
|
18
|
+
*
|
|
19
|
+
* HIGH TRUST (green indicator):
|
|
20
|
+
* - exact_full_phrase: Exact match on the full phrase
|
|
21
|
+
* - normalized_full_phrase: Full phrase matched with whitespace/case normalization
|
|
22
|
+
*
|
|
23
|
+
* MEDIUM TRUST (green indicator, shows context in popover):
|
|
24
|
+
* - exact_anchor_text: anchorText matched exactly, but fullPhrase was not found
|
|
25
|
+
* - normalized_anchor_text: anchorText matched with normalization
|
|
26
|
+
*
|
|
27
|
+
* LOW TRUST (amber indicator):
|
|
28
|
+
* - partial_full_phrase: Only part of fullPhrase matched (tables, columns, line breaks)
|
|
29
|
+
* - partial_anchor_text: Only part of anchorText matched
|
|
30
|
+
* - first_word_only: Only first word matched (lowest trust)
|
|
31
|
+
*/
|
|
32
|
+
type MatchedVariation = "exact_full_phrase" | "normalized_full_phrase" | "exact_anchor_text" | "normalized_anchor_text" | "partial_full_phrase" | "partial_anchor_text" | "first_word_only";
|
|
15
33
|
interface SearchAttempt {
|
|
16
34
|
method: SearchMethod;
|
|
17
35
|
success: boolean;
|
|
18
|
-
|
|
36
|
+
/** The primary phrase searched for */
|
|
37
|
+
searchPhrase: string;
|
|
38
|
+
/** Additional variations tried (e.g., ["$4.89", "4.89"]) */
|
|
39
|
+
searchVariations?: string[];
|
|
40
|
+
/** What searchPhrase contains: "full_phrase" or "anchor_text" */
|
|
41
|
+
searchPhraseType?: "full_phrase" | "anchor_text";
|
|
19
42
|
pageSearched?: number;
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
43
|
+
/** Line ID(s) searched within the page */
|
|
44
|
+
lineSearched?: number | number[];
|
|
45
|
+
/** Search scope: specific line, entire page, or whole document */
|
|
46
|
+
searchScope?: "line" | "page" | "document";
|
|
47
|
+
/** Where we expected to find the match */
|
|
48
|
+
expectedLocation?: {
|
|
49
|
+
page: number;
|
|
50
|
+
line?: number;
|
|
51
|
+
};
|
|
52
|
+
/** Where we actually found the match (if success: true) */
|
|
53
|
+
foundLocation?: {
|
|
54
|
+
page: number;
|
|
55
|
+
line?: number;
|
|
56
|
+
};
|
|
57
|
+
/** Which variation matched + trust level (only if success: true) */
|
|
58
|
+
matchedVariation?: MatchedVariation;
|
|
59
|
+
/** The actual text found in document (may exist even if success: false, for rejected matches) */
|
|
60
|
+
matchedText?: string;
|
|
61
|
+
/** e.g., "not found on expected page (2)" */
|
|
62
|
+
note?: string;
|
|
30
63
|
durationMs?: number;
|
|
31
64
|
}
|
|
32
65
|
|
|
@@ -56,12 +89,18 @@ interface Verification {
|
|
|
56
89
|
endTime?: string;
|
|
57
90
|
} | null;
|
|
58
91
|
verifiedFullPhrase?: string | null;
|
|
59
|
-
|
|
92
|
+
verifiedAnchorText?: string | null;
|
|
60
93
|
verifiedMatchSnippet?: string | null;
|
|
61
94
|
hitIndexWithinPage?: number | null;
|
|
62
95
|
phraseMatchDeepItem?: DeepTextItem;
|
|
63
|
-
|
|
96
|
+
/** Multiple boxes for anchorText highlighting when the anchorText spans multiple PDF items/words */
|
|
97
|
+
anchorTextMatchDeepItems?: DeepTextItem[];
|
|
64
98
|
verificationImageBase64?: string | null;
|
|
99
|
+
/** Dimensions of the verification image (for coordinate mapping) */
|
|
100
|
+
verificationImageDimensions?: {
|
|
101
|
+
width: number;
|
|
102
|
+
height: number;
|
|
103
|
+
} | null;
|
|
65
104
|
verifiedAt?: Date;
|
|
66
105
|
/** The URL that was verified (from Citation.url when type: "url") */
|
|
67
106
|
verifiedUrl?: string | null;
|
|
@@ -138,7 +177,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
|
|
|
138
177
|
*
|
|
139
178
|
* Common fields (used by both types):
|
|
140
179
|
* - `fullPhrase`: The full context/excerpt containing the cited information
|
|
141
|
-
* - `
|
|
180
|
+
* - `anchorText`: The specific key phrase being cited (must be substring of fullPhrase)
|
|
142
181
|
* - `citationNumber`: Citation number for display (e.g., [1], [2])
|
|
143
182
|
*
|
|
144
183
|
* @example Document citation
|
|
@@ -149,7 +188,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
|
|
|
149
188
|
* pageNumber: 5,
|
|
150
189
|
* lineIds: [12, 13],
|
|
151
190
|
* fullPhrase: "Revenue increased by 15% in Q4.",
|
|
152
|
-
*
|
|
191
|
+
* anchorText: "increased by 15%",
|
|
153
192
|
* citationNumber: 1,
|
|
154
193
|
* };
|
|
155
194
|
* ```
|
|
@@ -162,7 +201,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
|
|
|
162
201
|
* domain: "example.com",
|
|
163
202
|
* title: "Q4 Financial Report",
|
|
164
203
|
* fullPhrase: "The TGU transitions require control, not brute strength.",
|
|
165
|
-
*
|
|
204
|
+
* anchorText: "require control, not brute strength",
|
|
166
205
|
* citationNumber: 1,
|
|
167
206
|
* };
|
|
168
207
|
* ```
|
|
@@ -177,7 +216,7 @@ interface Citation {
|
|
|
177
216
|
/** The full context/excerpt containing the cited information */
|
|
178
217
|
fullPhrase?: string | null;
|
|
179
218
|
/** The specific key phrase being cited (should be substring of fullPhrase) */
|
|
180
|
-
|
|
219
|
+
anchorText?: string | null;
|
|
181
220
|
/** Citation number for display (e.g., [1], [2], [3]) */
|
|
182
221
|
citationNumber?: number;
|
|
183
222
|
/** Reasoning for why this citation was included */
|
|
@@ -190,8 +229,8 @@ interface Citation {
|
|
|
190
229
|
pageNumber?: number | null;
|
|
191
230
|
/** Line IDs within the page */
|
|
192
231
|
lineIds?: number[] | null;
|
|
193
|
-
/** Start page
|
|
194
|
-
|
|
232
|
+
/** Start page ID for multi-page citations */
|
|
233
|
+
startPageId?: string | null;
|
|
195
234
|
/** Selection box coordinates in the document */
|
|
196
235
|
selection?: ScreenBox | null;
|
|
197
236
|
/** The source URL */
|
|
@@ -259,4 +298,4 @@ interface SourceMeta {
|
|
|
259
298
|
accessedAt?: Date | string;
|
|
260
299
|
}
|
|
261
300
|
|
|
262
|
-
export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type
|
|
301
|
+
export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, type MatchedVariation as M, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type ScreenBox as S, type UrlAccessStatus as U, type Verification as V, type CitationStatus as a, type CitationType as b, type ContentMatchStatus as c, type DeepTextItem as d, type SearchAttempt as e, type SearchMethod as f, type SearchStatus as g, type SourceMeta as h, type SourceType as i, type VerifyCitationRequest as j, type VerifyCitationResponse as k };
|
|
@@ -10,23 +10,56 @@ interface ScreenBox extends IVertex {
|
|
|
10
10
|
height: number;
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "
|
|
14
|
-
type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "
|
|
13
|
+
type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "found_anchor_text_only" | "found_phrase_missed_anchor_text" | "found_on_other_page" | "found_on_other_line" | "first_word_found" | "timestamp_wip" | "skipped";
|
|
14
|
+
type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "anchor_text_fallback" | "adjacent_pages" | "expanded_window" | "regex_search" | "first_word_fallback";
|
|
15
|
+
/**
|
|
16
|
+
* Indicates which variation of the citation was matched.
|
|
17
|
+
* Trust decreases as we fall back from fullPhrase to anchorText to partial matches.
|
|
18
|
+
*
|
|
19
|
+
* HIGH TRUST (green indicator):
|
|
20
|
+
* - exact_full_phrase: Exact match on the full phrase
|
|
21
|
+
* - normalized_full_phrase: Full phrase matched with whitespace/case normalization
|
|
22
|
+
*
|
|
23
|
+
* MEDIUM TRUST (green indicator, shows context in popover):
|
|
24
|
+
* - exact_anchor_text: anchorText matched exactly, but fullPhrase was not found
|
|
25
|
+
* - normalized_anchor_text: anchorText matched with normalization
|
|
26
|
+
*
|
|
27
|
+
* LOW TRUST (amber indicator):
|
|
28
|
+
* - partial_full_phrase: Only part of fullPhrase matched (tables, columns, line breaks)
|
|
29
|
+
* - partial_anchor_text: Only part of anchorText matched
|
|
30
|
+
* - first_word_only: Only first word matched (lowest trust)
|
|
31
|
+
*/
|
|
32
|
+
type MatchedVariation = "exact_full_phrase" | "normalized_full_phrase" | "exact_anchor_text" | "normalized_anchor_text" | "partial_full_phrase" | "partial_anchor_text" | "first_word_only";
|
|
15
33
|
interface SearchAttempt {
|
|
16
34
|
method: SearchMethod;
|
|
17
35
|
success: boolean;
|
|
18
|
-
|
|
36
|
+
/** The primary phrase searched for */
|
|
37
|
+
searchPhrase: string;
|
|
38
|
+
/** Additional variations tried (e.g., ["$4.89", "4.89"]) */
|
|
39
|
+
searchVariations?: string[];
|
|
40
|
+
/** What searchPhrase contains: "full_phrase" or "anchor_text" */
|
|
41
|
+
searchPhraseType?: "full_phrase" | "anchor_text";
|
|
19
42
|
pageSearched?: number;
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
43
|
+
/** Line ID(s) searched within the page */
|
|
44
|
+
lineSearched?: number | number[];
|
|
45
|
+
/** Search scope: specific line, entire page, or whole document */
|
|
46
|
+
searchScope?: "line" | "page" | "document";
|
|
47
|
+
/** Where we expected to find the match */
|
|
48
|
+
expectedLocation?: {
|
|
49
|
+
page: number;
|
|
50
|
+
line?: number;
|
|
51
|
+
};
|
|
52
|
+
/** Where we actually found the match (if success: true) */
|
|
53
|
+
foundLocation?: {
|
|
54
|
+
page: number;
|
|
55
|
+
line?: number;
|
|
56
|
+
};
|
|
57
|
+
/** Which variation matched + trust level (only if success: true) */
|
|
58
|
+
matchedVariation?: MatchedVariation;
|
|
59
|
+
/** The actual text found in document (may exist even if success: false, for rejected matches) */
|
|
60
|
+
matchedText?: string;
|
|
61
|
+
/** e.g., "not found on expected page (2)" */
|
|
62
|
+
note?: string;
|
|
30
63
|
durationMs?: number;
|
|
31
64
|
}
|
|
32
65
|
|
|
@@ -56,12 +89,18 @@ interface Verification {
|
|
|
56
89
|
endTime?: string;
|
|
57
90
|
} | null;
|
|
58
91
|
verifiedFullPhrase?: string | null;
|
|
59
|
-
|
|
92
|
+
verifiedAnchorText?: string | null;
|
|
60
93
|
verifiedMatchSnippet?: string | null;
|
|
61
94
|
hitIndexWithinPage?: number | null;
|
|
62
95
|
phraseMatchDeepItem?: DeepTextItem;
|
|
63
|
-
|
|
96
|
+
/** Multiple boxes for anchorText highlighting when the anchorText spans multiple PDF items/words */
|
|
97
|
+
anchorTextMatchDeepItems?: DeepTextItem[];
|
|
64
98
|
verificationImageBase64?: string | null;
|
|
99
|
+
/** Dimensions of the verification image (for coordinate mapping) */
|
|
100
|
+
verificationImageDimensions?: {
|
|
101
|
+
width: number;
|
|
102
|
+
height: number;
|
|
103
|
+
} | null;
|
|
65
104
|
verifiedAt?: Date;
|
|
66
105
|
/** The URL that was verified (from Citation.url when type: "url") */
|
|
67
106
|
verifiedUrl?: string | null;
|
|
@@ -138,7 +177,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
|
|
|
138
177
|
*
|
|
139
178
|
* Common fields (used by both types):
|
|
140
179
|
* - `fullPhrase`: The full context/excerpt containing the cited information
|
|
141
|
-
* - `
|
|
180
|
+
* - `anchorText`: The specific key phrase being cited (must be substring of fullPhrase)
|
|
142
181
|
* - `citationNumber`: Citation number for display (e.g., [1], [2])
|
|
143
182
|
*
|
|
144
183
|
* @example Document citation
|
|
@@ -149,7 +188,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
|
|
|
149
188
|
* pageNumber: 5,
|
|
150
189
|
* lineIds: [12, 13],
|
|
151
190
|
* fullPhrase: "Revenue increased by 15% in Q4.",
|
|
152
|
-
*
|
|
191
|
+
* anchorText: "increased by 15%",
|
|
153
192
|
* citationNumber: 1,
|
|
154
193
|
* };
|
|
155
194
|
* ```
|
|
@@ -162,7 +201,7 @@ type SourceType = "web" | "pdf" | "document" | "social" | "video" | "news" | "ac
|
|
|
162
201
|
* domain: "example.com",
|
|
163
202
|
* title: "Q4 Financial Report",
|
|
164
203
|
* fullPhrase: "The TGU transitions require control, not brute strength.",
|
|
165
|
-
*
|
|
204
|
+
* anchorText: "require control, not brute strength",
|
|
166
205
|
* citationNumber: 1,
|
|
167
206
|
* };
|
|
168
207
|
* ```
|
|
@@ -177,7 +216,7 @@ interface Citation {
|
|
|
177
216
|
/** The full context/excerpt containing the cited information */
|
|
178
217
|
fullPhrase?: string | null;
|
|
179
218
|
/** The specific key phrase being cited (should be substring of fullPhrase) */
|
|
180
|
-
|
|
219
|
+
anchorText?: string | null;
|
|
181
220
|
/** Citation number for display (e.g., [1], [2], [3]) */
|
|
182
221
|
citationNumber?: number;
|
|
183
222
|
/** Reasoning for why this citation was included */
|
|
@@ -190,8 +229,8 @@ interface Citation {
|
|
|
190
229
|
pageNumber?: number | null;
|
|
191
230
|
/** Line IDs within the page */
|
|
192
231
|
lineIds?: number[] | null;
|
|
193
|
-
/** Start page
|
|
194
|
-
|
|
232
|
+
/** Start page ID for multi-page citations */
|
|
233
|
+
startPageId?: string | null;
|
|
195
234
|
/** Selection box coordinates in the document */
|
|
196
235
|
selection?: ScreenBox | null;
|
|
197
236
|
/** The source URL */
|
|
@@ -259,4 +298,4 @@ interface SourceMeta {
|
|
|
259
298
|
accessedAt?: Date | string;
|
|
260
299
|
}
|
|
261
300
|
|
|
262
|
-
export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type
|
|
301
|
+
export { BLANK_VERIFICATION as B, type Citation as C, DEFAULT_OUTPUT_IMAGE_FORMAT as D, type IVertex as I, type MatchedVariation as M, NOT_FOUND_VERIFICATION_INDEX as N, type OutputImageFormat as O, PENDING_VERIFICATION_INDEX as P, type ScreenBox as S, type UrlAccessStatus as U, type Verification as V, type CitationStatus as a, type CitationType as b, type ContentMatchStatus as c, type DeepTextItem as d, type SearchAttempt as e, type SearchMethod as f, type SearchStatus as g, type SourceMeta as h, type SourceType as i, type VerifyCitationRequest as j, type VerifyCitationResponse as k };
|
package/lib/index.cjs
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
'use strict';var
|
|
1
|
+
'use strict';var chunk7TORYXU4_cjs=require('./chunk-7TORYXU4.cjs'),chunkDS6SOU4L_cjs=require('./chunk-DS6SOU4L.cjs'),chunkHL3AXCDL_cjs=require('./chunk-HL3AXCDL.cjs'),chunk4UWAUWYL_cjs=require('./chunk-4UWAUWYL.cjs'),chunkWS4CQVDI_cjs=require('./chunk-WS4CQVDI.cjs');require('./chunk-BYLIBOAU.cjs');var rt=t=>{if(!t)return false;let e=t.trim();if(e.length<64)return false;let a=e?.[0];for(let n=1;n<e.length;n++)if(e[n]!==a)return false;return true};function nt(t){t=t.trim();let e=2,a=10,n=/[.?!](?=\s+|$)/g,r=[],I;for(;(I=n.exec(t))!==null;)r.push(I.index);if(r.length<2)return t;let c=r[r.length-1],C=r[r.length-2],i=t.substring(C+1,c+1),p=i.length;if(i.trim().slice(0,-1).length<a||p<=0||t.length<p*e)return t;let m=0,s=c+1;t.endsWith(i)&&(s=t.length);let T=-1;for(;;){let o=s-p;if(o<0)break;if(t.substring(o,s)===i)m++,T=o,s=o;else break}return m>=e?t.substring(0,T)+i:t}Object.defineProperty(exports,"DeepCitation",{enumerable:true,get:function(){return chunk7TORYXU4_cjs.a}});Object.defineProperty(exports,"compressPromptIds",{enumerable:true,get:function(){return chunkDS6SOU4L_cjs.a}});Object.defineProperty(exports,"decompressPromptIds",{enumerable:true,get:function(){return chunkDS6SOU4L_cjs.b}});Object.defineProperty(exports,"BLANK_VERIFICATION",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.d}});Object.defineProperty(exports,"DEFAULT_OUTPUT_IMAGE_FORMAT",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.a}});Object.defineProperty(exports,"NOT_FOUND_VERIFICATION_INDEX",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.b}});Object.defineProperty(exports,"PENDING_VERIFICATION_INDEX",{enumerable:true,get:function(){return chunkHL3AXCDL_cjs.c}});Object.defineProperty(exports,"CITATION_X_PADDING",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.k}});Object.defineProperty(exports,"CITATION_Y_PADDING",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.l}});Object.defineProperty(exports,"deferredCitationToCitation",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.t}});Object.defineProperty(exports,"extractVisibleText",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.w}});Object.defineProperty(exports,"generateCitationInstanceId",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.f}});Object.defineProperty(exports,"generateCitationKey",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.d}});Object.defineProperty(exports,"generateVerificationKey",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.e}});Object.defineProperty(exports,"getAllCitationsFromDeferredResponse",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.u}});Object.defineProperty(exports,"getAllCitationsFromLlmOutput",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.B}});Object.defineProperty(exports,"getCitationMarkerIds",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.y}});Object.defineProperty(exports,"getCitationPageNumber",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.q}});Object.defineProperty(exports,"getCitationStatus",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.z}});Object.defineProperty(exports,"getVerificationTextIndicator",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.m}});Object.defineProperty(exports,"groupCitationsByAttachmentId",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.C}});Object.defineProperty(exports,"groupCitationsByAttachmentIdObject",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.D}});Object.defineProperty(exports,"hasDeferredCitations",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.v}});Object.defineProperty(exports,"normalizeCitations",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.r}});Object.defineProperty(exports,"parseCitation",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.A}});Object.defineProperty(exports,"parseDeferredCitationResponse",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.s}});Object.defineProperty(exports,"removeLineIdMetadata",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.p}});Object.defineProperty(exports,"removePageNumberMetadata",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.o}});Object.defineProperty(exports,"replaceCitations",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.n}});Object.defineProperty(exports,"replaceDeferredMarkers",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.x}});Object.defineProperty(exports,"sha1Hash",{enumerable:true,get:function(){return chunk4UWAUWYL_cjs.a}});Object.defineProperty(exports,"AV_CITATION_PROMPT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.d}});Object.defineProperty(exports,"CITATION_AV_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.j}});Object.defineProperty(exports,"CITATION_AV_REMINDER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.f}});Object.defineProperty(exports,"CITATION_DATA_END_DELIMITER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.b}});Object.defineProperty(exports,"CITATION_DATA_START_DELIMITER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.a}});Object.defineProperty(exports,"CITATION_JSON_OUTPUT_FORMAT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.i}});Object.defineProperty(exports,"CITATION_PROMPT",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.c}});Object.defineProperty(exports,"CITATION_REMINDER",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.e}});Object.defineProperty(exports,"wrapCitationPrompt",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.h}});Object.defineProperty(exports,"wrapSystemCitationPrompt",{enumerable:true,get:function(){return chunkWS4CQVDI_cjs.g}});exports.cleanRepeatingLastSentence=nt;exports.isGeminiGarbage=rt;//# sourceMappingURL=index.cjs.map
|
|
2
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/parsing/parseWorkAround.ts"],"names":["isGeminiGarbage","content","trimmedContent","firstCharacter","i","cleanRepeatingLastSentence","text","MIN_REPETITIONS","MIN_SENTENCE_CONTENT_LENGTH","sentenceEndRegex","sentenceEndIndices","match","lastTerminatorIndex","secondLastTerminatorIndex","repeatingUnit","unitLength","repetitionsFound","currentCheckEndIndex","firstRepetitionStartIndex","checkStartIndex"],"mappings":"2SAEO,IAAMA,GAAmBC,CAAAA,EAAoB,CAClD,GAAI,CAACA,EAAS,OAAO,MAAA,CACrB,IAAMC,CAAAA,CAAiBD,EAAQ,IAAA,EAAK,CACpC,GAAIC,CAAAA,CAAe,MAAA,CAAS,GAAuC,OAAO,MAAA,CAE1E,IAAMC,CAAAA,CAAiBD,IAAiB,CAAC,CAAA,CAEzC,QAASE,CAAAA,CAAI,CAAA,CAAGA,EAAIF,CAAAA,CAAe,MAAA,CAAQE,CAAAA,EAAAA,CACzC,GAAIF,EAAeE,CAAC,CAAA,GAAMD,EAAgB,OAAO,MAAA,CAEnD,OAAO,KACT,EAGO,SAASE,EAAAA,CAA2BC,EAAsB,CAC/DA,CAAAA,CAAOA,CAAAA,CAAK,IAAA,GACZ,IAAMC,CAAAA,CAAkB,CAAA,CAClBC,CAAAA,CAA8B,GAE9BC,CAAAA,CAAmB,iBAAA,CACnBC,EAA+B,EAAC,CAClCC,EACJ,KAAA,CAAQA,CAAAA,CAAQF,CAAAA,CAAiB,IAAA,CAAKH,CAAI,CAAA,IAAO,IAAA,EAC/CI,EAAmB,IAAA,CAAKC,CAAAA,CAAM,KAAK,CAAA,CAGrC,GAAID,CAAAA,CAAmB,MAAA,CAAS,EAC9B,OAAOJ,CAAAA,CAGT,IAAMM,CAAAA,CAAsBF,CAAAA,CAAmBA,EAAmB,MAAA,CAAS,CAAC,CAAA,CACtEG,CAAAA,CAA4BH,EAAmBA,CAAAA,CAAmB,MAAA,CAAS,CAAC,CAAA,CAE5EI,EAAgBR,CAAAA,CAAK,SAAA,CAAUO,CAAAA,CAA4B,CAAA,CAAGD,EAAsB,CAAC,CAAA,CACrFG,EAAaD,CAAAA,CAAc,MAAA,CAUjC,GARwBA,CAAAA,CAAc,IAAA,EAAK,CAAE,KAAA,CAAM,EAAG,EAAE,CAAA,CACpC,OAASN,CAAAA,EAGzBO,CAAAA,EAAc,GAIdT,CAAAA,CAAK,MAAA,CAASS,CAAAA,CAAaR,CAAAA,CAC7B,OAAOD,CAAAA,CAGT,IAAIU,EAAmB,CAAA,CACnBC,CAAAA,CAAuBL,EAAsB,CAAA,CAC7CN,CAAAA,CAAK,QAAA,CAASQ,CAAa,IAC7BG,CAAAA,CAAuBX,CAAAA,CAAK,QAG9B,IAAIY,CAAAA,CAA4B,GAEhC,OAAa,CACX,IAAMC,CAAAA,CAAkBF,EAAuBF,CAAAA,CAE/C,GAAII,EAAkB,CAAA,CACpB,MAKF,GAFcb,CAAAA,CAAK,SAAA,CAAUa,CAAAA,CAAiBF,CAAoB,IAEpDH,CAAAA,CACZE,CAAAA,EAAAA,CACAE,EAA4BC,CAAAA,CAC5BF,CAAAA,CAAuBE,OAEvB,KAEJ,CAEA,OAAIH,CAAAA,EAAoBT,EACQD,CAAAA,CAAK,SAAA,CAAU,EAAGY,CAAyB,CAAA,CAClCJ,EAGhCR,CAEX","file":"index.cjs","sourcesContent":["//flash and flash lite get super confused if we ask for a MD table and infinite loop\nconst MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE = 64;\nexport const isGeminiGarbage = (content: string) => {\n if (!content) return false;\n const trimmedContent = content.trim();\n if (trimmedContent.length < MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE) return false;\n\n const firstCharacter = trimmedContent?.[0];\n\n for (let i = 1; i < trimmedContent.length; i++) {\n if (trimmedContent[i] !== firstCharacter) return false;\n }\n return true;\n};\n\n// helps clean up infinite rambling bug output from gemini\nexport function cleanRepeatingLastSentence(text: string): string {\n text = text.trim();\n const MIN_REPETITIONS = 2;\n const MIN_SENTENCE_CONTENT_LENGTH = 10;\n\n const sentenceEndRegex = /[.?!](?=\\s+|$)/g;\n const sentenceEndIndices: number[] = [];\n let match;\n while ((match = sentenceEndRegex.exec(text)) !== null) {\n sentenceEndIndices.push(match.index);\n }\n\n if (sentenceEndIndices.length < 2) {\n return text;\n }\n\n const lastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 1];\n const secondLastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 2];\n\n const repeatingUnit = text.substring(secondLastTerminatorIndex + 1, lastTerminatorIndex + 1);\n const unitLength = repeatingUnit.length;\n\n const sentenceContent = repeatingUnit.trim().slice(0, -1);\n if (sentenceContent.length < MIN_SENTENCE_CONTENT_LENGTH) {\n return text;\n }\n if (unitLength <= 0) {\n return text;\n }\n\n if (text.length < unitLength * MIN_REPETITIONS) {\n return text;\n }\n\n let repetitionsFound = 0;\n let currentCheckEndIndex = lastTerminatorIndex + 1;\n if (text.endsWith(repeatingUnit)) {\n currentCheckEndIndex = text.length;\n }\n\n let firstRepetitionStartIndex = -1;\n\n while (true) {\n const checkStartIndex = currentCheckEndIndex - unitLength;\n\n if (checkStartIndex < 0) {\n break;\n }\n\n const chunk = text.substring(checkStartIndex, currentCheckEndIndex);\n\n if (chunk === repeatingUnit) {\n repetitionsFound++;\n firstRepetitionStartIndex = checkStartIndex;\n currentCheckEndIndex = checkStartIndex;\n } else {\n break;\n }\n }\n\n if (repetitionsFound >= MIN_REPETITIONS) {\n const textBeforeRepetitions = text.substring(0, firstRepetitionStartIndex);\n const result = textBeforeRepetitions + repeatingUnit;\n return result;\n } else {\n return text;\n }\n}\n"]}
|
package/lib/index.d.cts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
export { CitationInput, DeepCitation, DeepCitationConfig, FileDataPart, FileInput, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsOptions, VerifyCitationsResponse
|
|
2
|
-
import { C as Citation, V as Verification, a as CitationStatus } from './index-
|
|
3
|
-
export { B as BLANK_VERIFICATION, b as CitationType,
|
|
4
|
-
|
|
5
|
-
export {
|
|
1
|
+
export { CitationInput, DeepCitation, DeepCitationConfig, FileDataPart, FileInput, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsOptions, VerifyCitationsResponse } from './client/index.cjs';
|
|
2
|
+
import { C as Citation, V as Verification, a as CitationStatus } from './index-BHjI8Bh1.cjs';
|
|
3
|
+
export { B as BLANK_VERIFICATION, b as CitationType, c as ContentMatchStatus, D as DEFAULT_OUTPUT_IMAGE_FORMAT, d as DeepTextItem, I as IVertex, N as NOT_FOUND_VERIFICATION_INDEX, O as OutputImageFormat, P as PENDING_VERIFICATION_INDEX, S as ScreenBox, e as SearchAttempt, f as SearchMethod, g as SearchStatus, h as SourceMeta, i as SourceType, U as UrlAccessStatus, j as VerifyCitationRequest, k as VerifyCitationResponse } from './index-BHjI8Bh1.cjs';
|
|
4
|
+
import { CitationData, ParsedCitationResponse } from './prompts/index.cjs';
|
|
5
|
+
export { AV_CITATION_PROMPT, CITATION_AV_JSON_OUTPUT_FORMAT, CITATION_AV_REMINDER, CITATION_DATA_END_DELIMITER, CITATION_DATA_START_DELIMITER, CITATION_JSON_OUTPUT_FORMAT, CITATION_PROMPT, CITATION_REMINDER, CompressedResult, WrapCitationPromptOptions, WrapCitationPromptResult, WrapSystemPromptOptions, compressPromptIds, decompressPromptIds, wrapCitationPrompt, wrapSystemCitationPrompt } from './prompts/index.cjs';
|
|
6
|
+
export { C as CITATION_X_PADDING, a as CITATION_Y_PADDING, g as generateCitationInstanceId, b as generateCitationKey, c as generateVerificationKey } from './utils-CCi9_JTv.cjs';
|
|
6
7
|
|
|
7
8
|
/**
|
|
8
9
|
* Calculates the verification status of a citation based on the found highlight and search state.
|
|
@@ -82,12 +83,136 @@ declare function groupCitationsByAttachmentIdObject(citations: Citation[] | {
|
|
|
82
83
|
};
|
|
83
84
|
};
|
|
84
85
|
|
|
86
|
+
/**
|
|
87
|
+
* Citation Parser
|
|
88
|
+
*
|
|
89
|
+
* Implements the "Split & Parse" strategy for the deferred JSON citation pattern.
|
|
90
|
+
* This parser extracts citations from LLM responses that use [N] markers in text
|
|
91
|
+
* and include a JSON data block at the end.
|
|
92
|
+
*
|
|
93
|
+
* Algorithm:
|
|
94
|
+
* 1. Detection: Look for the start delimiter <<<CITATION_DATA>>>
|
|
95
|
+
* 2. Splitting: Separate visible content from the citation data block
|
|
96
|
+
* 3. Data Extraction: Extract the JSON string between delimiters
|
|
97
|
+
* 4. Sanitization: Parse with JSON.parse, with fallback repair for common issues
|
|
98
|
+
* 5. Hydration: Map the JSON objects to a usable format
|
|
99
|
+
*/
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Parses a citation response from an LLM.
|
|
103
|
+
*
|
|
104
|
+
* This function:
|
|
105
|
+
* 1. Finds the <<<CITATION_DATA>>> delimiter in the response
|
|
106
|
+
* 2. Splits the response into visible text and citation data
|
|
107
|
+
* 3. Parses the JSON citation data
|
|
108
|
+
* 4. Returns a structured result with both
|
|
109
|
+
*
|
|
110
|
+
* @param llmResponse - The full LLM response text
|
|
111
|
+
* @returns ParsedCitationResponse with visible text and parsed citations
|
|
112
|
+
*
|
|
113
|
+
* @example
|
|
114
|
+
* ```typescript
|
|
115
|
+
* const response = `
|
|
116
|
+
* The company grew 45% [1].
|
|
117
|
+
*
|
|
118
|
+
* <<<CITATION_DATA>>>
|
|
119
|
+
* [{"id": 1, "attachment_id": "abc", "full_phrase": "grew 45%", "anchor_text": "45%"}]
|
|
120
|
+
* <<<END_CITATION_DATA>>>
|
|
121
|
+
* `;
|
|
122
|
+
*
|
|
123
|
+
* const parsed = parseDeferredCitationResponse(response);
|
|
124
|
+
* console.log(parsed.visibleText); // "The company grew 45% [1]."
|
|
125
|
+
* console.log(parsed.citations); // [{id: 1, attachment_id: "abc", ...}]
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
128
|
+
declare function parseDeferredCitationResponse(llmResponse: string): ParsedCitationResponse;
|
|
129
|
+
/**
|
|
130
|
+
* Converts a CitationData object to the standard Citation format.
|
|
131
|
+
*
|
|
132
|
+
* @param data - The citation data
|
|
133
|
+
* @param citationNumber - Optional override for citation number (defaults to data.id)
|
|
134
|
+
* @returns Standard Citation object
|
|
135
|
+
*/
|
|
136
|
+
declare function deferredCitationToCitation(data: CitationData, citationNumber?: number): Citation;
|
|
137
|
+
/**
|
|
138
|
+
* Extracts all citations from a citation response and returns them
|
|
139
|
+
* in the standard dictionary format used by the verification API.
|
|
140
|
+
*
|
|
141
|
+
* This function parses the response, converts each citation to the standard
|
|
142
|
+
* Citation format, and generates deterministic keys for each.
|
|
143
|
+
*
|
|
144
|
+
* @param llmResponse - The full LLM response with citation block
|
|
145
|
+
* @returns Dictionary of parsed Citation objects keyed by citation key
|
|
146
|
+
*
|
|
147
|
+
* @example
|
|
148
|
+
* ```typescript
|
|
149
|
+
* const citations = getAllCitationsFromDeferredResponse(llmOutput);
|
|
150
|
+
* // Returns: { "abc123...": { attachmentId: "...", fullPhrase: "...", ... }, ... }
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
153
|
+
declare function getAllCitationsFromDeferredResponse(llmResponse: string): {
|
|
154
|
+
[key: string]: Citation;
|
|
155
|
+
};
|
|
156
|
+
/**
|
|
157
|
+
* Checks if a response contains citation markers.
|
|
158
|
+
*
|
|
159
|
+
* @param response - The LLM response to check
|
|
160
|
+
* @returns True if the response contains the citation data delimiter
|
|
161
|
+
*/
|
|
162
|
+
declare function hasDeferredCitations(response: string): boolean;
|
|
163
|
+
/**
|
|
164
|
+
* Extracts just the visible text from a response,
|
|
165
|
+
* removing the citation data block.
|
|
166
|
+
*
|
|
167
|
+
* @param llmResponse - The full LLM response
|
|
168
|
+
* @returns The visible text portion only
|
|
169
|
+
*/
|
|
170
|
+
declare function extractVisibleText(llmResponse: string): string;
|
|
171
|
+
/**
|
|
172
|
+
* Replaces [N] citation markers in text with optional content.
|
|
173
|
+
*
|
|
174
|
+
* @param text - The text containing [N] markers
|
|
175
|
+
* @param options - Configuration for replacement
|
|
176
|
+
* @returns The text with markers replaced
|
|
177
|
+
*
|
|
178
|
+
* @example
|
|
179
|
+
* ```typescript
|
|
180
|
+
* const text = "Revenue grew 45% [1] in Q4 [2].";
|
|
181
|
+
*
|
|
182
|
+
* // Remove markers entirely
|
|
183
|
+
* replaceDeferredMarkers(text);
|
|
184
|
+
* // Returns: "Revenue grew 45% in Q4."
|
|
185
|
+
*
|
|
186
|
+
* // Replace with anchor texts
|
|
187
|
+
* replaceDeferredMarkers(text, {
|
|
188
|
+
* citationMap: new Map([[1, { anchor_text: "45%" }], [2, { anchor_text: "Q4" }]]),
|
|
189
|
+
* showAnchorText: true,
|
|
190
|
+
* });
|
|
191
|
+
* // Returns: "Revenue grew 45% 45% in Q4 Q4."
|
|
192
|
+
* ```
|
|
193
|
+
*/
|
|
194
|
+
declare function replaceDeferredMarkers(text: string, options?: {
|
|
195
|
+
/** Map of citation IDs to their data */
|
|
196
|
+
citationMap?: Map<number, CitationData>;
|
|
197
|
+
/** Whether to show the anchor text after the marker */
|
|
198
|
+
showAnchorText?: boolean;
|
|
199
|
+
/** Custom replacement function */
|
|
200
|
+
replacer?: (id: number, data?: CitationData) => string;
|
|
201
|
+
}): string;
|
|
202
|
+
/**
|
|
203
|
+
* Gets all citation marker IDs found in a text.
|
|
204
|
+
*
|
|
205
|
+
* @param text - The text to scan for [N] markers
|
|
206
|
+
* @returns Array of citation IDs in order of appearance
|
|
207
|
+
*/
|
|
208
|
+
declare function getCitationMarkerIds(text: string): number[];
|
|
209
|
+
|
|
85
210
|
interface ReplaceCitationsOptions {
|
|
86
211
|
/**
|
|
87
|
-
* If true, leaves the
|
|
212
|
+
* If true, leaves the anchor_text text behind when removing citations.
|
|
88
213
|
* @default false
|
|
89
214
|
*/
|
|
90
|
-
|
|
215
|
+
leaveAnchorTextBehind?: boolean;
|
|
91
216
|
/**
|
|
92
217
|
* Map of citation keys to verification results.
|
|
93
218
|
* Used to determine verification status for each citation.
|
|
@@ -120,12 +245,12 @@ declare const getVerificationTextIndicator: (verification: Verification | null |
|
|
|
120
245
|
* // Remove all citations
|
|
121
246
|
* const clean = replaceCitations(llmOutput);
|
|
122
247
|
*
|
|
123
|
-
* // Leave
|
|
124
|
-
* const
|
|
248
|
+
* // Leave anchor_text text behind
|
|
249
|
+
* const withAnchorTexts = replaceCitations(llmOutput, { leaveAnchorTextBehind: true });
|
|
125
250
|
*
|
|
126
251
|
* // Show verification status indicators
|
|
127
252
|
* const withStatus = replaceCitations(llmOutput, {
|
|
128
|
-
*
|
|
253
|
+
* leaveAnchorTextBehind: true,
|
|
129
254
|
* verifications: verificationMap,
|
|
130
255
|
* showVerificationStatus: true,
|
|
131
256
|
* });
|
|
@@ -133,13 +258,9 @@ declare const getVerificationTextIndicator: (verification: Verification | null |
|
|
|
133
258
|
* ```
|
|
134
259
|
*/
|
|
135
260
|
declare const replaceCitations: (markdownWithCitations: string, options?: ReplaceCitationsOptions) => string;
|
|
136
|
-
/**
|
|
137
|
-
* @deprecated Use `replaceCitations` instead. This function is kept for backward compatibility.
|
|
138
|
-
*/
|
|
139
|
-
declare const removeCitations: (markdownWithCitations: string, leaveKeySpanBehind?: boolean) => string;
|
|
140
261
|
declare const removePageNumberMetadata: (pageText: string) => string;
|
|
141
262
|
declare const removeLineIdMetadata: (pageText: string) => string;
|
|
142
|
-
declare const getCitationPageNumber: (
|
|
263
|
+
declare const getCitationPageNumber: (startPageId?: string | null) => number | null;
|
|
143
264
|
declare const normalizeCitations: (response: string) => string;
|
|
144
265
|
|
|
145
266
|
declare const isGeminiGarbage: (content: string) => boolean;
|
|
@@ -156,4 +277,4 @@ declare function cleanRepeatingLastSentence(text: string): string;
|
|
|
156
277
|
*/
|
|
157
278
|
declare function sha1Hash(data: string | any): string;
|
|
158
279
|
|
|
159
|
-
export { Citation, CitationStatus, type ReplaceCitationsOptions, Verification, cleanRepeatingLastSentence, getAllCitationsFromLlmOutput, getCitationPageNumber, getCitationStatus, getVerificationTextIndicator, groupCitationsByAttachmentId, groupCitationsByAttachmentIdObject, isGeminiGarbage, normalizeCitations, parseCitation,
|
|
280
|
+
export { Citation, CitationData, CitationStatus, ParsedCitationResponse, type ReplaceCitationsOptions, Verification, cleanRepeatingLastSentence, deferredCitationToCitation, extractVisibleText, getAllCitationsFromDeferredResponse, getAllCitationsFromLlmOutput, getCitationMarkerIds, getCitationPageNumber, getCitationStatus, getVerificationTextIndicator, groupCitationsByAttachmentId, groupCitationsByAttachmentIdObject, hasDeferredCitations, isGeminiGarbage, normalizeCitations, parseCitation, parseDeferredCitationResponse, removeLineIdMetadata, removePageNumberMetadata, replaceCitations, replaceDeferredMarkers, sha1Hash };
|