@deepcitation/deepcitation-js 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +71 -1197
  2. package/lib/client/DeepCitation.d.ts +204 -0
  3. package/lib/client/DeepCitation.js +473 -0
  4. package/lib/client/index.d.ts +2 -0
  5. package/lib/client/index.js +1 -0
  6. package/lib/client/types.d.ts +157 -0
  7. package/lib/client/types.js +1 -0
  8. package/lib/index.d.ts +25 -0
  9. package/lib/index.js +22 -0
  10. package/lib/parsing/normalizeCitation.d.ts +5 -0
  11. package/lib/parsing/normalizeCitation.js +182 -0
  12. package/lib/parsing/parseCitation.d.ts +79 -0
  13. package/lib/parsing/parseCitation.js +371 -0
  14. package/lib/parsing/parseWorkAround.d.ts +2 -0
  15. package/lib/parsing/parseWorkAround.js +73 -0
  16. package/lib/prompts/citationPrompts.d.ts +133 -0
  17. package/lib/prompts/citationPrompts.js +152 -0
  18. package/lib/prompts/index.d.ts +3 -0
  19. package/lib/prompts/index.js +3 -0
  20. package/lib/prompts/promptCompression.d.ts +14 -0
  21. package/lib/prompts/promptCompression.js +109 -0
  22. package/lib/prompts/types.d.ts +4 -0
  23. package/lib/prompts/types.js +1 -0
  24. package/lib/react/CitationComponent.d.ts +134 -0
  25. package/lib/react/CitationComponent.js +376 -0
  26. package/lib/react/CitationVariants.d.ts +135 -0
  27. package/lib/react/CitationVariants.js +283 -0
  28. package/lib/react/DiffDisplay.d.ts +10 -0
  29. package/lib/react/DiffDisplay.js +33 -0
  30. package/lib/react/UrlCitationComponent.d.ts +83 -0
  31. package/lib/react/UrlCitationComponent.js +224 -0
  32. package/lib/react/VerificationTabs.d.ts +10 -0
  33. package/lib/react/VerificationTabs.js +36 -0
  34. package/lib/react/icons.d.ts +8 -0
  35. package/lib/react/icons.js +9 -0
  36. package/lib/react/index.d.ts +16 -0
  37. package/lib/react/index.js +18 -0
  38. package/lib/react/primitives.d.ts +104 -0
  39. package/lib/react/primitives.js +190 -0
  40. package/lib/react/types.d.ts +192 -0
  41. package/lib/react/types.js +1 -0
  42. package/lib/react/useSmartDiff.d.ts +16 -0
  43. package/lib/react/useSmartDiff.js +64 -0
  44. package/lib/react/utils.d.ts +34 -0
  45. package/lib/react/utils.js +59 -0
  46. package/lib/types/boxes.d.ts +11 -0
  47. package/lib/types/boxes.js +1 -0
  48. package/lib/types/citation.d.ts +44 -0
  49. package/lib/types/citation.js +2 -0
  50. package/lib/types/foundHighlight.d.ts +23 -0
  51. package/lib/types/foundHighlight.js +22 -0
  52. package/lib/types/index.d.ts +11 -0
  53. package/lib/types/index.js +7 -0
  54. package/lib/types/search.d.ts +30 -0
  55. package/lib/types/search.js +1 -0
  56. package/lib/utils/sha.d.ts +10 -0
  57. package/lib/utils/sha.js +108 -0
  58. package/package.json +5 -2
@@ -0,0 +1,371 @@
1
+ import { sha1Hash } from "../utils/sha.js";
2
+ import { normalizeCitations } from "./normalizeCitation.js";
3
+ import { generateCitationKey } from "../react/utils.js";
4
+ /**
5
+ * Calculates the verification status of a citation based on the found highlight and search state.
6
+ *
7
+ * @param foundHighlight - The found highlight location, or null/undefined if not found
8
+ * @returns An object containing boolean flags for verification status
9
+ */
10
+ export function getCitationStatus(foundHighlight) {
11
+ const searchState = foundHighlight?.searchState;
12
+ const isMiss = searchState?.status === "not_found";
13
+ const isFullMatchWithMissedValue = searchState?.status === "found_phrase_missed_value";
14
+ const isFoundValueMissedFullMatch = searchState?.status === "found_value_only";
15
+ const isPartialMatch = searchState?.status === "partial_text_found" ||
16
+ searchState?.status === "found_on_other_page" ||
17
+ searchState?.status === "found_on_other_line" ||
18
+ searchState?.status === "first_word_found";
19
+ const isVerified = searchState?.status === "found" || isFoundValueMissedFullMatch || isPartialMatch || isFullMatchWithMissedValue;
20
+ const isPending = searchState?.status === "pending" || searchState?.status === "loading" || !searchState;
21
+ return { isVerified, isMiss, isPartialMatch, isPending };
22
+ }
23
+ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVerbose) => {
24
+ // Helper: Remove wrapper quotes and unescape internal single quotes (e.g. It\'s -> It's)
25
+ const cleanAndUnescape = (str) => {
26
+ if (!str)
27
+ return undefined;
28
+ // Remove surrounding quotes if present (regex usually handles this, but safety first)
29
+ const trimmed = str.replace(/^['"]|['"]$/g, "");
30
+ // Replace escaped single quotes with actual single quotes
31
+ return trimmed.replace(/\\'/g, "'");
32
+ };
33
+ const citationNumber = citationCounterRef?.current ? citationCounterRef.current++ : undefined;
34
+ const beforeCite = fragment.substring(0, fragment.indexOf("<cite"));
35
+ const afterCite = fragment.includes("/>") ? fragment.slice(fragment.indexOf("/>") + 2) : "";
36
+ const middleCite = fragment.substring(fragment.indexOf("<cite"), fragment.indexOf("/>") + 2);
37
+ // GROUPS:
38
+ // 1: fileId
39
+ // 2: start_page number
40
+ // 3: index number
41
+ // 4: full_phrase content (escaped)
42
+ // 5: line_ids content
43
+ // 6: Optional Key (value|reasoning)
44
+ // 7: Optional Value content (escaped)
45
+ const citationRegex = /<cite\s+file(?:_id|Id)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
46
+ const citationMatches = [...middleCite.matchAll(citationRegex)];
47
+ const match = citationMatches?.[0];
48
+ const rawCitationMd = match?.[0];
49
+ const pageNumber = match?.[2] ? parseInt(match?.[2]) : undefined;
50
+ let fileId = match?.[1];
51
+ let attachmentId = fileId?.length === 20 ? fileId : mdAttachmentId || match?.[1];
52
+ // Use helper to handle escaped quotes inside the phrase
53
+ let fullPhrase = cleanAndUnescape(match?.[4]);
54
+ // Handle the optional attribute (value or reasoning)
55
+ let value;
56
+ let reasoning;
57
+ const optionalKey = match?.[6]; // "value" or "reasoning"
58
+ const optionalContent = cleanAndUnescape(match?.[7]);
59
+ if (optionalKey === "value") {
60
+ value = optionalContent;
61
+ }
62
+ else if (optionalKey === "reasoning") {
63
+ reasoning = optionalContent;
64
+ }
65
+ let lineIds;
66
+ try {
67
+ // match[5] is line_ids
68
+ const lineIdsString = match?.[5]?.replace(/[A-Za-z_[\](){}:]/g, "");
69
+ lineIds = lineIdsString
70
+ ? lineIdsString
71
+ .split(",")
72
+ .map(id => (isNaN(parseInt(id)) ? undefined : parseInt(id)))
73
+ .filter(id => id !== undefined)
74
+ .sort((a, b) => a - b)
75
+ : undefined;
76
+ }
77
+ catch (e) {
78
+ if (isVerbose)
79
+ console.error("Error parsing lineIds", e);
80
+ }
81
+ // GROUPS for AV:
82
+ // 1: fileId
83
+ // 2: full_phrase content (escaped)
84
+ // 3: timestamps content
85
+ // 4: Optional Key (value|reasoning)
86
+ // 5: Optional Value content (escaped)
87
+ const avCitationRegex = /<cite\s+file(?:_id|Id)='(\w{0,25})'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+timestamps='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
88
+ const avCitationMatches = [...middleCite.matchAll(avCitationRegex)];
89
+ const avMatch = avCitationMatches?.[0];
90
+ let timestamps;
91
+ if (avMatch) {
92
+ fileId = avMatch?.[1];
93
+ attachmentId = fileId?.length === 20 ? fileId : mdAttachmentId || avMatch?.[1];
94
+ fullPhrase = cleanAndUnescape(avMatch?.[2]);
95
+ const timestampsString = avMatch?.[3]?.replace(/timestamps=['"]|['"]/g, "");
96
+ const [startTime, endTime] = timestampsString?.split("-") || [];
97
+ const avOptionalKey = avMatch?.[4];
98
+ const avOptionalContent = cleanAndUnescape(avMatch?.[5]);
99
+ if (avOptionalKey === "value") {
100
+ value = avOptionalContent;
101
+ }
102
+ else if (avOptionalKey === "reasoning") {
103
+ reasoning = avOptionalContent;
104
+ }
105
+ timestamps = { startTime, endTime };
106
+ }
107
+ const fragmentContext = sha1Hash(fragment).toString().slice(0, 8);
108
+ const citation = {
109
+ fragmentContext,
110
+ fileId: attachmentId,
111
+ pageNumber,
112
+ fullPhrase,
113
+ citationNumber,
114
+ lineIds,
115
+ rawCitationMd,
116
+ beforeCite,
117
+ value,
118
+ timestamps,
119
+ reasoning,
120
+ };
121
+ return {
122
+ beforeCite,
123
+ afterCite,
124
+ citation,
125
+ };
126
+ };
127
+ /**
128
+ * Parses a JSON-based citation object into a Citation.
129
+ * Supports both camelCase and snake_case property names.
130
+ *
131
+ * @param jsonCitation - The JSON citation object (can have camelCase or snake_case properties)
132
+ * @param citationNumber - Optional citation number for ordering
133
+ * @returns Parsed Citation object
134
+ */
135
+ const parseJsonCitation = (jsonCitation, citationNumber) => {
136
+ if (!jsonCitation) {
137
+ return null;
138
+ }
139
+ // Support both camelCase and snake_case property names
140
+ const fullPhrase = jsonCitation.fullPhrase ?? jsonCitation.full_phrase;
141
+ const startPageKey = jsonCitation.startPageKey ?? jsonCitation.start_page_key;
142
+ const rawLineIds = jsonCitation.lineIds ?? jsonCitation.line_ids;
143
+ const fileId = jsonCitation.fileId ?? jsonCitation.file_id;
144
+ const reasoning = jsonCitation.reasoning;
145
+ const value = jsonCitation.value;
146
+ if (!fullPhrase) {
147
+ return null;
148
+ }
149
+ // Parse startPageKey format: "page_number_PAGE_index_INDEX"
150
+ let pageNumber;
151
+ if (startPageKey) {
152
+ const pageMatch = startPageKey.match(/page[_a-zA-Z]*(\d+)_index_(\d+)/i);
153
+ if (pageMatch) {
154
+ pageNumber = parseInt(pageMatch[1], 10);
155
+ }
156
+ }
157
+ // Sort lineIds if present
158
+ const lineIds = rawLineIds?.length ? [...rawLineIds].sort((a, b) => a - b) : undefined;
159
+ const citation = {
160
+ fileId,
161
+ pageNumber,
162
+ fullPhrase,
163
+ citationNumber,
164
+ lineIds,
165
+ reasoning,
166
+ value,
167
+ };
168
+ return citation;
169
+ };
170
+ /**
171
+ * Checks if an object has citation-like properties (camelCase or snake_case).
172
+ */
173
+ const hasCitationProperties = (item) => typeof item === "object" &&
174
+ item !== null &&
175
+ ("fullPhrase" in item ||
176
+ "full_phrase" in item ||
177
+ "startPageKey" in item ||
178
+ "start_page_key" in item ||
179
+ "lineIds" in item ||
180
+ "line_ids" in item);
181
+ /**
182
+ * Checks if the input appears to be JSON-based citations.
183
+ * Looks for array of objects with citation-like properties (supports both camelCase and snake_case).
184
+ */
185
+ const isJsonCitationFormat = (data) => {
186
+ if (Array.isArray(data)) {
187
+ return data.length > 0 && data.some(hasCitationProperties);
188
+ }
189
+ if (typeof data === "object" && data !== null) {
190
+ return hasCitationProperties(data);
191
+ }
192
+ return false;
193
+ };
194
+ /**
195
+ * Extracts citations from JSON format (array or single object).
196
+ */
197
+ const extractJsonCitations = (data) => {
198
+ const citations = {};
199
+ const items = Array.isArray(data) ? data : [data];
200
+ let citationNumber = 1;
201
+ for (const item of items) {
202
+ const citation = parseJsonCitation(item, citationNumber++);
203
+ if (citation && citation.fullPhrase) {
204
+ const citationKey = generateCitationKey(citation);
205
+ citations[citationKey] = citation;
206
+ }
207
+ }
208
+ return citations;
209
+ };
210
+ /**
211
+ * Recursively traverses an object looking for `citation` or `citations` properties
212
+ * that match our JSON citation format.
213
+ */
214
+ const findJsonCitationsInObject = (obj, found) => {
215
+ if (!obj || typeof obj !== "object")
216
+ return;
217
+ // Check for citation/citations properties
218
+ if (obj.citation && isJsonCitationFormat(obj.citation)) {
219
+ const items = Array.isArray(obj.citation) ? obj.citation : [obj.citation];
220
+ found.push(...items);
221
+ }
222
+ if (obj.citations && isJsonCitationFormat(obj.citations)) {
223
+ const items = Array.isArray(obj.citations) ? obj.citations : [obj.citations];
224
+ found.push(...items);
225
+ }
226
+ // Recurse into object properties
227
+ if (Array.isArray(obj)) {
228
+ for (const item of obj) {
229
+ findJsonCitationsInObject(item, found);
230
+ }
231
+ }
232
+ else {
233
+ for (const key of Object.keys(obj)) {
234
+ if (key !== "citation" && key !== "citations") {
235
+ findJsonCitationsInObject(obj[key], found);
236
+ }
237
+ }
238
+ }
239
+ };
240
+ /**
241
+ * Extracts XML citations from text using <cite ... /> tags.
242
+ */
243
+ const extractXmlCitations = (text) => {
244
+ const normalizedText = normalizeCitations(text);
245
+ // Find all <cite ... /> tags
246
+ const citeRegex = /<cite\s+[^>]*\/>/g;
247
+ const matches = normalizedText.match(citeRegex);
248
+ if (!matches || matches.length === 0)
249
+ return {};
250
+ const citations = {};
251
+ const citationCounterRef = { current: 1 };
252
+ for (const match of matches) {
253
+ const { citation } = parseCitation(match, undefined, citationCounterRef);
254
+ if (citation && citation.fullPhrase) {
255
+ const citationKey = generateCitationKey(citation);
256
+ citations[citationKey] = citation;
257
+ }
258
+ }
259
+ return citations;
260
+ };
261
+ /**
262
+ * Extracts all citations from LLM output.
263
+ * Supports both XML <cite ... /> tags (embedded in strings/markdown) and JSON-based citation formats.
264
+ *
265
+ * For object input:
266
+ * - Traverses the object looking for `citation` or `citations` properties matching JSON format
267
+ * - Also stringifies the object to find embedded XML citations in markdown content
268
+ *
269
+ * @param llmOutput - The LLM output (string or object)
270
+ * @returns Dictionary of parsed Citation objects keyed by citation key
271
+ */
272
+ export const getAllCitationsFromLlmOutput = (llmOutput) => {
273
+ if (!llmOutput)
274
+ return {};
275
+ const citations = {};
276
+ if (typeof llmOutput === "object") {
277
+ // Check if the root object itself is JSON citation format
278
+ if (isJsonCitationFormat(llmOutput)) {
279
+ const jsonCitations = extractJsonCitations(llmOutput);
280
+ Object.assign(citations, jsonCitations);
281
+ }
282
+ else {
283
+ // Traverse object for nested citation/citations properties
284
+ const foundJsonCitations = [];
285
+ findJsonCitationsInObject(llmOutput, foundJsonCitations);
286
+ if (foundJsonCitations.length > 0) {
287
+ const jsonCitations = extractJsonCitations(foundJsonCitations);
288
+ Object.assign(citations, jsonCitations);
289
+ }
290
+ }
291
+ // Also stringify and parse for embedded XML citations in markdown
292
+ const text = JSON.stringify(llmOutput);
293
+ const xmlCitations = extractXmlCitations(text);
294
+ Object.assign(citations, xmlCitations);
295
+ }
296
+ else if (typeof llmOutput === "string") {
297
+ // String input - parse for XML citations
298
+ const xmlCitations = extractXmlCitations(llmOutput);
299
+ Object.assign(citations, xmlCitations);
300
+ }
301
+ return citations;
302
+ };
303
+ /**
304
+ * Groups citations by their fileId for multi-file verification scenarios.
305
+ * This is useful when you have citations from multiple files and need to
306
+ * verify them against their respective source documents.
307
+ *
308
+ * @param citations - Array of Citation objects or a dictionary of citations
309
+ * @returns Map of fileId to dictionary of citations from that file
310
+ *
311
+ * @example
312
+ * ```typescript
313
+ * const citations = getAllCitationsFromLlmOutput(response.content);
314
+ * const citationsByFile = groupCitationsByFileId(citations);
315
+ *
316
+ * // Verify citations for each file
317
+ * for (const [fileId, fileCitations] of citationsByFile) {
318
+ * const verified = await dc.verifyCitations(fileId, fileCitations);
319
+ * // Process verification results...
320
+ * }
321
+ * ```
322
+ */
323
+ export function groupCitationsByFileId(citations) {
324
+ const grouped = new Map();
325
+ // Normalize input to entries
326
+ const entries = Array.isArray(citations)
327
+ ? citations.map((c, idx) => [generateCitationKey(c) || String(idx + 1), c])
328
+ : Object.entries(citations);
329
+ for (const [key, citation] of entries) {
330
+ const fileId = citation.fileId || "";
331
+ if (!grouped.has(fileId)) {
332
+ grouped.set(fileId, {});
333
+ }
334
+ grouped.get(fileId)[key] = citation;
335
+ }
336
+ return grouped;
337
+ }
338
+ /**
339
+ * Groups citations by their fileId and returns as a plain object.
340
+ * Alternative to groupCitationsByFileId that returns a plain object instead of a Map.
341
+ *
342
+ * @param citations - Array of Citation objects or a dictionary of citations
343
+ * @returns Object with fileId keys mapping to citation dictionaries
344
+ *
345
+ * @example
346
+ * ```typescript
347
+ * const citations = getAllCitationsFromLlmOutput(response.content);
348
+ * const citationsByFile = groupCitationsByFileIdObject(citations);
349
+ *
350
+ * // Verify citations for each file using Promise.all
351
+ * const verificationPromises = Object.entries(citationsByFile).map(
352
+ * ([fileId, fileCitations]) => dc.verifyCitations(fileId, fileCitations)
353
+ * );
354
+ * const results = await Promise.all(verificationPromises);
355
+ * ```
356
+ */
357
+ export function groupCitationsByFileIdObject(citations) {
358
+ const grouped = {};
359
+ // Normalize input to entries
360
+ const entries = Array.isArray(citations)
361
+ ? citations.map((c, idx) => [generateCitationKey(c) || String(idx + 1), c])
362
+ : Object.entries(citations);
363
+ for (const [key, citation] of entries) {
364
+ const fileId = citation.fileId || "";
365
+ if (!grouped[fileId]) {
366
+ grouped[fileId] = {};
367
+ }
368
+ grouped[fileId][key] = citation;
369
+ }
370
+ return grouped;
371
+ }
@@ -0,0 +1,2 @@
1
+ export declare const isGeminiGarbage: (content: string) => boolean;
2
+ export declare function cleanRepeatingLastSentence(text: string): string;
@@ -0,0 +1,73 @@
1
+ //flash and flash lite get super confused if we ask for a MD table and infinite loop
2
+ const MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE = 64;
3
+ export const isGeminiGarbage = (content) => {
4
+ if (!content)
5
+ return false;
6
+ const trimmedContent = content.trim();
7
+ if (trimmedContent.length < MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE)
8
+ return false;
9
+ const firstCharacter = trimmedContent?.[0];
10
+ for (let i = 1; i < trimmedContent.length; i++) {
11
+ if (trimmedContent[i] !== firstCharacter)
12
+ return false;
13
+ }
14
+ return true;
15
+ };
16
+ // helps clean up infinite rambling bug output from gemini
17
+ export function cleanRepeatingLastSentence(text) {
18
+ text = text.trim();
19
+ const MIN_REPETITIONS = 2;
20
+ const MIN_SENTENCE_CONTENT_LENGTH = 10;
21
+ const sentenceEndRegex = /[.?!](?=\s+|$)/g;
22
+ let match;
23
+ const sentenceEndIndices = [];
24
+ while ((match = sentenceEndRegex.exec(text)) !== null) {
25
+ sentenceEndIndices.push(match.index);
26
+ }
27
+ if (sentenceEndIndices.length < 2) {
28
+ return text;
29
+ }
30
+ const lastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 1];
31
+ const secondLastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 2];
32
+ const repeatingUnit = text.substring(secondLastTerminatorIndex + 1, lastTerminatorIndex + 1);
33
+ const unitLength = repeatingUnit.length;
34
+ const sentenceContent = repeatingUnit.trim().slice(0, -1);
35
+ if (sentenceContent.length < MIN_SENTENCE_CONTENT_LENGTH) {
36
+ return text;
37
+ }
38
+ if (unitLength <= 0) {
39
+ return text;
40
+ }
41
+ if (text.length < unitLength * MIN_REPETITIONS) {
42
+ return text;
43
+ }
44
+ let repetitionsFound = 0;
45
+ let currentCheckEndIndex = lastTerminatorIndex + 1;
46
+ if (text.endsWith(repeatingUnit)) {
47
+ currentCheckEndIndex = text.length;
48
+ }
49
+ let firstRepetitionStartIndex = -1;
50
+ while (true) {
51
+ const checkStartIndex = currentCheckEndIndex - unitLength;
52
+ if (checkStartIndex < 0) {
53
+ break;
54
+ }
55
+ const chunk = text.substring(checkStartIndex, currentCheckEndIndex);
56
+ if (chunk === repeatingUnit) {
57
+ repetitionsFound++;
58
+ firstRepetitionStartIndex = checkStartIndex;
59
+ currentCheckEndIndex = checkStartIndex;
60
+ }
61
+ else {
62
+ break;
63
+ }
64
+ }
65
+ if (repetitionsFound >= MIN_REPETITIONS) {
66
+ const textBeforeRepetitions = text.substring(0, firstRepetitionStartIndex);
67
+ const result = textBeforeRepetitions + repeatingUnit;
68
+ return result;
69
+ }
70
+ else {
71
+ return text;
72
+ }
73
+ }
@@ -0,0 +1,133 @@
1
+ export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text /> (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
2
+ export declare const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = "\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' full_phrase='the verbatim text of the phrase (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />\n\u2022 These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.\n";
3
+ export interface WrapSystemPromptOptions {
4
+ /** The original system prompt to wrap with citation instructions */
5
+ systemPrompt: string;
6
+ /** Whether to use audio/video citation format (with timestamps) instead of text-based (with line IDs) */
7
+ isAudioVideo?: boolean;
8
+ prependCitationInstructions?: boolean;
9
+ }
10
+ export interface WrapCitationPromptOptions {
11
+ /** The original system prompt to wrap with citation instructions */
12
+ systemPrompt: string;
13
+ /** The original user prompt */
14
+ userPrompt: string;
15
+ /** The extracted file text with metadata (from uploadFile response). Can be a single string or array for multiple files. */
16
+ fileDeepText?: string | string[];
17
+ /** Whether to use audio/video citation format (with timestamps) instead of text-based (with line IDs) */
18
+ isAudioVideo?: boolean;
19
+ }
20
+ export interface WrapCitationPromptResult {
21
+ /** Enhanced system prompt with citation instructions */
22
+ enhancedSystemPrompt: string;
23
+ /** Enhanced user prompt (currently passed through unchanged) */
24
+ enhancedUserPrompt: string;
25
+ }
26
+ /**
27
+ * Wraps your existing system prompt with DeepCitation's citation syntax instructions.
28
+ * This enables LLMs to output verifiable citations that can be checked against source documents.
29
+ *
30
+ * @example
31
+ * ```typescript
32
+ * import { wrapSystemCitationPrompt } from '@deepcitation/deepcitation-js';
33
+ *
34
+ * const systemPrompt = "You are a helpful assistant that analyzes documents.";
35
+ * const enhanced = wrapSystemCitationPrompt({ systemPrompt });
36
+ *
37
+ * // Use enhanced prompt with your LLM
38
+ * const response = await openai.chat.completions.create({
39
+ * messages: [{ role: "system", content: enhanced }],
40
+ * // ...
41
+ * });
42
+ * ```
43
+ */
44
+ export declare function wrapSystemCitationPrompt(options: WrapSystemPromptOptions): string;
45
+ /**
46
+ * Wraps both system and user prompts with DeepCitation's citation syntax instructions.
47
+ * This is the recommended way to prepare prompts for citation verification.
48
+ *
49
+ * @example
50
+ * ```typescript
51
+ * import { wrapCitationPrompt } from '@deepcitation/deepcitation-js';
52
+ *
53
+ * // Single file
54
+ * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
55
+ * systemPrompt: "You are a helpful assistant.",
56
+ * userPrompt: "Analyze this document and summarize it.",
57
+ * fileDeepText, // from uploadFile response
58
+ * });
59
+ *
60
+ * // Multiple files
61
+ * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
62
+ * systemPrompt: "You are a helpful assistant.",
63
+ * userPrompt: "Compare these documents.",
64
+ * fileDeepText: [fileDeepText1, fileDeepText2], // array of file texts
65
+ * });
66
+ *
67
+ * // Use enhanced prompts with your LLM
68
+ * const response = await llm.chat({
69
+ * messages: [
70
+ * { role: "system", content: enhancedSystemPrompt },
71
+ * { role: "user", content: enhancedUserPrompt },
72
+ * ],
73
+ * });
74
+ * ```
75
+ */
76
+ export declare function wrapCitationPrompt(options: WrapCitationPromptOptions): WrapCitationPromptResult;
77
+ export declare const CITATION_JSON_OUTPUT_FORMAT: {
78
+ type: string;
79
+ properties: {
80
+ fileId: {
81
+ type: string;
82
+ };
83
+ startPageKey: {
84
+ type: string;
85
+ description: string;
86
+ };
87
+ reasoning: {
88
+ type: string;
89
+ description: string;
90
+ };
91
+ fullPhrase: {
92
+ type: string;
93
+ description: string;
94
+ };
95
+ lineIds: {
96
+ type: string;
97
+ items: {
98
+ type: string;
99
+ };
100
+ description: string;
101
+ };
102
+ };
103
+ required: string[];
104
+ };
105
+ export declare const CITATION_AV_BASED_JSON_OUTPUT_FORMAT: {
106
+ type: string;
107
+ properties: {
108
+ fileId: {
109
+ type: string;
110
+ };
111
+ startPageKey: {
112
+ type: string;
113
+ description: string;
114
+ };
115
+ fullPhrase: {
116
+ type: string;
117
+ description: string;
118
+ };
119
+ timestamps: {
120
+ type: string;
121
+ properties: {
122
+ startTime: {
123
+ type: string;
124
+ };
125
+ endTime: {
126
+ type: string;
127
+ };
128
+ };
129
+ required: string[];
130
+ description: string;
131
+ };
132
+ };
133
+ };