@deepcitation/deepcitation-js 1.1.27 → 1.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +253 -253
  3. package/lib/chunk-2IZXUOQR.js +66 -0
  4. package/lib/chunk-4FGOHQFP.cjs +66 -0
  5. package/lib/chunk-CFXDRAJL.cjs +1 -0
  6. package/lib/chunk-DEUSSEFH.js +2 -0
  7. package/lib/chunk-F2MMVEVC.cjs +1 -0
  8. package/lib/chunk-J7U6YFOI.cjs +2 -0
  9. package/lib/chunk-O2XFH626.js +1 -0
  10. package/lib/chunk-RQPZSRID.js +1 -0
  11. package/lib/client/index.cjs +1 -0
  12. package/lib/client/{DeepCitation.d.ts → index.d.cts} +159 -3
  13. package/lib/client/index.d.ts +342 -2
  14. package/lib/client/index.js +1 -1
  15. package/lib/index.cjs +1 -0
  16. package/lib/index.d.cts +127 -0
  17. package/lib/index.d.ts +126 -22
  18. package/lib/index.js +1 -20
  19. package/lib/prompts/index.cjs +1 -0
  20. package/lib/prompts/index.d.cts +196 -0
  21. package/lib/prompts/index.d.ts +196 -3
  22. package/lib/prompts/index.js +1 -3
  23. package/lib/react/index.cjs +4 -0
  24. package/lib/react/index.js +4 -20
  25. package/lib/types/index.cjs +1 -0
  26. package/lib/types/index.d.cts +96 -0
  27. package/lib/types/index.d.ts +96 -11
  28. package/lib/types/index.js +1 -7
  29. package/package.json +46 -11
  30. package/lib/client/DeepCitation.js +0 -374
  31. package/lib/client/types.d.ts +0 -154
  32. package/lib/client/types.js +0 -1
  33. package/lib/parsing/normalizeCitation.d.ts +0 -5
  34. package/lib/parsing/normalizeCitation.js +0 -198
  35. package/lib/parsing/parseCitation.d.ts +0 -79
  36. package/lib/parsing/parseCitation.js +0 -431
  37. package/lib/parsing/parseWorkAround.d.ts +0 -2
  38. package/lib/parsing/parseWorkAround.js +0 -73
  39. package/lib/prompts/citationPrompts.d.ts +0 -138
  40. package/lib/prompts/citationPrompts.js +0 -168
  41. package/lib/prompts/promptCompression.d.ts +0 -14
  42. package/lib/prompts/promptCompression.js +0 -127
  43. package/lib/prompts/types.d.ts +0 -4
  44. package/lib/prompts/types.js +0 -1
  45. package/lib/react/CitationComponent.d.ts +0 -106
  46. package/lib/react/CitationComponent.js +0 -419
  47. package/lib/react/CitationVariants.d.ts +0 -132
  48. package/lib/react/CitationVariants.js +0 -277
  49. package/lib/react/DiffDisplay.d.ts +0 -10
  50. package/lib/react/DiffDisplay.js +0 -33
  51. package/lib/react/Popover.d.ts +0 -15
  52. package/lib/react/Popover.js +0 -20
  53. package/lib/react/UrlCitationComponent.d.ts +0 -83
  54. package/lib/react/UrlCitationComponent.js +0 -224
  55. package/lib/react/VerificationTabs.d.ts +0 -10
  56. package/lib/react/VerificationTabs.js +0 -36
  57. package/lib/react/icons.d.ts +0 -22
  58. package/lib/react/icons.js +0 -16
  59. package/lib/react/index.d.ts +0 -17
  60. package/lib/react/primitives.d.ts +0 -99
  61. package/lib/react/primitives.js +0 -187
  62. package/lib/react/types.d.ts +0 -315
  63. package/lib/react/types.js +0 -1
  64. package/lib/react/useSmartDiff.d.ts +0 -16
  65. package/lib/react/useSmartDiff.js +0 -64
  66. package/lib/react/utils.d.ts +0 -44
  67. package/lib/react/utils.js +0 -88
  68. package/lib/types/boxes.d.ts +0 -11
  69. package/lib/types/boxes.js +0 -1
  70. package/lib/types/citation.d.ts +0 -39
  71. package/lib/types/citation.js +0 -1
  72. package/lib/types/search.d.ts +0 -19
  73. package/lib/types/search.js +0 -1
  74. package/lib/types/verification.d.ts +0 -27
  75. package/lib/types/verification.js +0 -11
  76. package/lib/utils/diff.d.ts +0 -60
  77. package/lib/utils/diff.js +0 -414
  78. package/lib/utils/sha.d.ts +0 -10
  79. package/lib/utils/sha.js +0 -108
@@ -1,431 +0,0 @@
1
- import { normalizeCitations } from "./normalizeCitation.js";
2
- import { generateCitationKey } from "../react/utils.js";
3
- /**
4
- * Parses a line_ids string that may contain individual numbers, ranges, or both.
5
- * Examples: "1,2,3", "5-10", "1,5-7,10", "20-20"
6
- *
7
- * @param lineIdsString - The raw line_ids string (e.g., "1,5-7,10")
8
- * @returns Sorted array of unique line IDs, or undefined if empty/invalid
9
- */
10
- function parseLineIds(lineIdsString) {
11
- if (!lineIdsString)
12
- return undefined;
13
- const lineIds = [];
14
- const parts = lineIdsString.split(",");
15
- for (const part of parts) {
16
- const trimmed = part.trim();
17
- if (!trimmed)
18
- continue;
19
- // Check if this part is a range (e.g., "5-10")
20
- if (trimmed.includes("-")) {
21
- const [startStr, endStr] = trimmed.split("-");
22
- const start = parseInt(startStr, 10);
23
- const end = parseInt(endStr, 10);
24
- if (!isNaN(start) && !isNaN(end) && start <= end) {
25
- // Expand the range
26
- for (let i = start; i <= end; i++) {
27
- lineIds.push(i);
28
- }
29
- }
30
- else if (!isNaN(start)) {
31
- // If only start is valid, just use it
32
- lineIds.push(start);
33
- }
34
- }
35
- else {
36
- // Single number
37
- const num = parseInt(trimmed, 10);
38
- if (!isNaN(num)) {
39
- lineIds.push(num);
40
- }
41
- }
42
- }
43
- if (lineIds.length === 0)
44
- return undefined;
45
- // Sort and deduplicate
46
- return [...new Set(lineIds)].sort((a, b) => a - b);
47
- }
48
- /**
49
- * Calculates the verification status of a citation based on the found highlight and search state.
50
- *
51
- * @param verification - The found highlight location, or null/undefined if not found
52
- * @returns An object containing boolean flags for verification status
53
- */
54
- export function getCitationStatus(verification) {
55
- const status = verification?.status;
56
- const isMiss = status === "not_found";
57
- const isFullMatchWithMissedValue = status === "found_phrase_missed_value";
58
- const isFoundValueMissedFullMatch = status === "found_key_span_only";
59
- const isPartialMatch = status === "partial_text_found" ||
60
- status === "found_on_other_page" ||
61
- status === "found_on_other_line" ||
62
- status === "first_word_found";
63
- const isVerified = status === "found" ||
64
- isFoundValueMissedFullMatch ||
65
- isPartialMatch ||
66
- isFullMatchWithMissedValue;
67
- const isPending = status === "pending" || status === "loading" || !status;
68
- return { isVerified, isMiss, isPartialMatch, isPending };
69
- }
70
- export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVerbose) => {
71
- // Helper: Remove wrapper quotes and unescape internal single quotes (e.g. It\'s -> It's)
72
- const cleanAndUnescape = (str) => {
73
- if (!str)
74
- return undefined;
75
- // Remove surrounding quotes if present (regex usually handles this, but safety first)
76
- const trimmed = str.replace(/^['"]|['"]$/g, "");
77
- // Replace escaped single quotes with actual single quotes
78
- return trimmed.replace(/\\'/g, "'");
79
- };
80
- const citationNumber = citationCounterRef?.current
81
- ? citationCounterRef.current++
82
- : undefined;
83
- const beforeCite = fragment.substring(0, fragment.indexOf("<cite"));
84
- const afterCite = fragment.includes("/>")
85
- ? fragment.slice(fragment.indexOf("/>") + 2)
86
- : "";
87
- const middleCite = fragment.substring(fragment.indexOf("<cite"), fragment.indexOf("/>") + 2);
88
- // GROUPS:
89
- // 1: attachmentId
90
- // 2: start_page number
91
- // 3: index number
92
- // 4: full_phrase content (escaped)
93
- // 5: key_span content (escaped)
94
- // 6: line_ids content
95
- // 7: Optional Key (value|reasoning)
96
- // 8: Optional Value content (escaped)
97
- const citationRegex = /<cite\s+(?:attachment_id|attachmentId|file_id|fileId)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+key_span='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
98
- const citationMatches = [...middleCite.matchAll(citationRegex)];
99
- const match = citationMatches?.[0];
100
- const pageNumber = match?.[2] ? parseInt(match?.[2]) : undefined;
101
- const pageIndex = match?.[3] ? parseInt(match?.[3]) : undefined;
102
- let rawAttachmentId = match?.[1];
103
- let attachmentId = rawAttachmentId?.length === 20 ? rawAttachmentId : mdAttachmentId || rawAttachmentId;
104
- // Use helper to handle escaped quotes inside the phrase
105
- let fullPhrase = cleanAndUnescape(match?.[4]);
106
- let keySpan = cleanAndUnescape(match?.[5]);
107
- // Handle the optional attribute (value or reasoning)
108
- let value;
109
- let reasoning;
110
- const optionalKey = match?.[7]; // "value" or "reasoning"
111
- const optionalContent = cleanAndUnescape(match?.[8]);
112
- if (optionalKey === "value") {
113
- value = optionalContent;
114
- }
115
- else if (optionalKey === "reasoning") {
116
- reasoning = optionalContent;
117
- }
118
- let lineIds;
119
- try {
120
- // match[6] is line_ids
121
- const lineIdsString = match?.[6]?.replace(/[A-Za-z_[\](){}:]/g, "");
122
- lineIds = lineIdsString ? parseLineIds(lineIdsString) : undefined;
123
- }
124
- catch (e) {
125
- if (isVerbose)
126
- console.error("Error parsing lineIds", e);
127
- }
128
- // GROUPS for AV:
129
- // 1: attachmentId
130
- // 2: full_phrase content (escaped)
131
- // 3: timestamps content
132
- // 4: Optional Key (value|reasoning)
133
- // 5: Optional Value content (escaped)
134
- const avCitationRegex = /<cite\s+(?:attachment_id|attachmentId|file_id|fileId)='(\w{0,25})'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+timestamps='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
135
- const avCitationMatches = [...middleCite.matchAll(avCitationRegex)];
136
- const avMatch = avCitationMatches?.[0];
137
- let timestamps;
138
- if (avMatch) {
139
- rawAttachmentId = avMatch?.[1];
140
- attachmentId = rawAttachmentId?.length === 20 ? rawAttachmentId : mdAttachmentId || rawAttachmentId;
141
- fullPhrase = cleanAndUnescape(avMatch?.[2]);
142
- const timestampsString = avMatch?.[3]?.replace(/timestamps=['"]|['"]/g, "");
143
- const [startTime, endTime] = timestampsString?.split("-") || [];
144
- const avOptionalKey = avMatch?.[4];
145
- const avOptionalContent = cleanAndUnescape(avMatch?.[5]);
146
- if (avOptionalKey === "value") {
147
- value = avOptionalContent;
148
- }
149
- else if (avOptionalKey === "reasoning") {
150
- reasoning = avOptionalContent;
151
- }
152
- timestamps = { startTime, endTime };
153
- }
154
- const citation = {
155
- attachmentId: attachmentId,
156
- pageNumber,
157
- startPageKey: `page_number_${pageNumber || 1}_index_${pageIndex || 0}`,
158
- fullPhrase,
159
- keySpan: keySpan || value,
160
- citationNumber,
161
- lineIds,
162
- beforeCite,
163
- timestamps,
164
- reasoning,
165
- };
166
- return {
167
- beforeCite,
168
- afterCite,
169
- citation,
170
- };
171
- };
172
- /**
173
- * Parses a JSON-based citation object into a Citation.
174
- * Supports both camelCase and snake_case property names.
175
- *
176
- * @param jsonCitation - The JSON citation object (can have camelCase or snake_case properties)
177
- * @param citationNumber - Optional citation number for ordering
178
- * @returns Parsed Citation object
179
- */
180
- const parseJsonCitation = (jsonCitation, citationNumber) => {
181
- if (!jsonCitation) {
182
- return null;
183
- }
184
- // Support both camelCase and snake_case property names
185
- const fullPhrase = jsonCitation.fullPhrase ?? jsonCitation.full_phrase;
186
- const startPageKey = jsonCitation.startPageKey ?? jsonCitation.start_page_key;
187
- const keySpan = jsonCitation.keySpan ?? jsonCitation.key_span;
188
- const rawLineIds = jsonCitation.lineIds ?? jsonCitation.line_ids;
189
- const attachmentId = jsonCitation.attachmentId ?? jsonCitation.attachment_id ?? jsonCitation.fileId ?? jsonCitation.file_id;
190
- const reasoning = jsonCitation.reasoning;
191
- const value = jsonCitation.value;
192
- if (!fullPhrase) {
193
- return null;
194
- }
195
- // Parse startPageKey format: "page_number_PAGE_index_INDEX" or simple "PAGE_INDEX"
196
- let pageNumber;
197
- if (startPageKey) {
198
- // Try full format first: page_number_5_index_2 or pageKey_5_index_2
199
- const pageMatch = startPageKey.match(/page[_a-zA-Z]*(\d+)_index_(\d+)/i);
200
- if (pageMatch) {
201
- pageNumber = parseInt(pageMatch[1], 10);
202
- }
203
- else {
204
- // Try simple n_m format: 5_4 (page 5, index 4)
205
- const simpleMatch = startPageKey.match(/^(\d+)_(\d+)$/);
206
- if (simpleMatch) {
207
- pageNumber = parseInt(simpleMatch[1], 10);
208
- }
209
- }
210
- }
211
- // Sort lineIds if present
212
- const lineIds = rawLineIds?.length
213
- ? [...rawLineIds].sort((a, b) => a - b)
214
- : undefined;
215
- const citation = {
216
- attachmentId,
217
- pageNumber,
218
- fullPhrase,
219
- citationNumber,
220
- lineIds,
221
- keySpan: keySpan || value,
222
- reasoning,
223
- };
224
- return citation;
225
- };
226
- /**
227
- * Checks if an object has citation-like properties (camelCase or snake_case).
228
- */
229
- const hasCitationProperties = (item) => typeof item === "object" &&
230
- item !== null &&
231
- ("fullPhrase" in item ||
232
- "full_phrase" in item ||
233
- "startPageKey" in item ||
234
- "start_page_key" in item ||
235
- "keySpan" in item ||
236
- "key_span" in item ||
237
- "lineIds" in item ||
238
- "line_ids" in item);
239
- /**
240
- * Checks if the input appears to be JSON-based citations.
241
- * Looks for array of objects with citation-like properties (supports both camelCase and snake_case).
242
- */
243
- const isJsonCitationFormat = (data) => {
244
- if (Array.isArray(data)) {
245
- return data.length > 0 && data.some(hasCitationProperties);
246
- }
247
- if (typeof data === "object" && data !== null) {
248
- return hasCitationProperties(data);
249
- }
250
- return false;
251
- };
252
- /**
253
- * Extracts citations from JSON format (array or single object).
254
- */
255
- const extractJsonCitations = (data) => {
256
- const citations = {};
257
- const items = Array.isArray(data) ? data : [data];
258
- let citationNumber = 1;
259
- for (const item of items) {
260
- const citation = parseJsonCitation(item, citationNumber++);
261
- if (citation && citation.fullPhrase) {
262
- const citationKey = generateCitationKey(citation);
263
- citations[citationKey] = citation;
264
- }
265
- }
266
- return citations;
267
- };
268
- /**
269
- * Recursively traverses an object looking for `citation` or `citations` properties
270
- * that match our JSON citation format.
271
- */
272
- const findJsonCitationsInObject = (obj, found) => {
273
- if (!obj || typeof obj !== "object")
274
- return;
275
- // Check for citation/citations properties
276
- if (obj.citation && isJsonCitationFormat(obj.citation)) {
277
- const items = Array.isArray(obj.citation) ? obj.citation : [obj.citation];
278
- found.push(...items);
279
- }
280
- if (obj.citations && isJsonCitationFormat(obj.citations)) {
281
- const items = Array.isArray(obj.citations)
282
- ? obj.citations
283
- : [obj.citations];
284
- found.push(...items);
285
- }
286
- // Recurse into object properties
287
- if (Array.isArray(obj)) {
288
- for (const item of obj) {
289
- findJsonCitationsInObject(item, found);
290
- }
291
- }
292
- else {
293
- for (const key of Object.keys(obj)) {
294
- if (key !== "citation" && key !== "citations") {
295
- findJsonCitationsInObject(obj[key], found);
296
- }
297
- }
298
- }
299
- };
300
- /**
301
- * Extracts XML citations from text using <cite ... /> tags.
302
- */
303
- const extractXmlCitations = (text) => {
304
- const normalizedText = normalizeCitations(text);
305
- // Find all <cite ... /> tags
306
- const citeRegex = /<cite\s+[^>]*\/>/g;
307
- const matches = normalizedText.match(citeRegex);
308
- if (!matches || matches.length === 0)
309
- return {};
310
- const citations = {};
311
- const citationCounterRef = { current: 1 };
312
- for (const match of matches) {
313
- const { citation } = parseCitation(match, undefined, citationCounterRef);
314
- if (citation && citation.fullPhrase) {
315
- const citationKey = generateCitationKey(citation);
316
- citations[citationKey] = citation;
317
- }
318
- }
319
- return citations;
320
- };
321
- /**
322
- * Extracts all citations from LLM output.
323
- * Supports both XML <cite ... /> tags (embedded in strings/markdown) and JSON-based citation formats.
324
- *
325
- * For object input:
326
- * - Traverses the object looking for `citation` or `citations` properties matching JSON format
327
- * - Also stringifies the object to find embedded XML citations in markdown content
328
- *
329
- * @param llmOutput - The LLM output (string or object)
330
- * @returns Dictionary of parsed Citation objects keyed by citation key
331
- */
332
- export const getAllCitationsFromLlmOutput = (llmOutput) => {
333
- if (!llmOutput)
334
- return {};
335
- const citations = {};
336
- if (typeof llmOutput === "object") {
337
- // Check if the root object itself is JSON citation format
338
- if (isJsonCitationFormat(llmOutput)) {
339
- const jsonCitations = extractJsonCitations(llmOutput);
340
- Object.assign(citations, jsonCitations);
341
- }
342
- else {
343
- // Traverse object for nested citation/citations properties
344
- const foundJsonCitations = [];
345
- findJsonCitationsInObject(llmOutput, foundJsonCitations);
346
- if (foundJsonCitations.length > 0) {
347
- const jsonCitations = extractJsonCitations(foundJsonCitations);
348
- Object.assign(citations, jsonCitations);
349
- }
350
- }
351
- // Also stringify and parse for embedded XML citations in markdown
352
- const text = JSON.stringify(llmOutput);
353
- const xmlCitations = extractXmlCitations(text);
354
- Object.assign(citations, xmlCitations);
355
- }
356
- else if (typeof llmOutput === "string") {
357
- // String input - parse for XML citations
358
- const xmlCitations = extractXmlCitations(llmOutput);
359
- Object.assign(citations, xmlCitations);
360
- }
361
- return citations;
362
- };
363
- /**
364
- * Groups citations by their attachmentId for multi-file verification scenarios.
365
- * This is useful when you have citations from multiple files and need to
366
- * verify them against their respective source documents.
367
- *
368
- * @param citations - Array of Citation objects or a dictionary of citations
369
- * @returns Map of attachmentId to dictionary of citations from that file
370
- *
371
- * @example
372
- * ```typescript
373
- * const citations = getAllCitationsFromLlmOutput(response.content);
374
- * const citationsByAttachment = groupCitationsByAttachmentId(citations);
375
- *
376
- * // Verify citations for each file
377
- * for (const [attachmentId, fileCitations] of citationsByAttachment) {
378
- * const verified = await dc.verifyCitations(attachmentId, fileCitations);
379
- * // Process verification results...
380
- * }
381
- * ```
382
- */
383
- export function groupCitationsByAttachmentId(citations) {
384
- const grouped = new Map();
385
- // Normalize input to entries
386
- const entries = Array.isArray(citations)
387
- ? citations.map((c, idx) => [generateCitationKey(c) || String(idx + 1), c])
388
- : Object.entries(citations);
389
- for (const [key, citation] of entries) {
390
- const attachmentId = citation.attachmentId || "";
391
- if (!grouped.has(attachmentId)) {
392
- grouped.set(attachmentId, {});
393
- }
394
- grouped.get(attachmentId)[key] = citation;
395
- }
396
- return grouped;
397
- }
398
- /**
399
- * Groups citations by their attachmentId and returns as a plain object.
400
- * Alternative to groupCitationsByAttachmentId that returns a plain object instead of a Map.
401
- *
402
- * @param citations - Array of Citation objects or a dictionary of citations
403
- * @returns Object with attachmentId keys mapping to citation dictionaries
404
- *
405
- * @example
406
- * ```typescript
407
- * const citations = getAllCitationsFromLlmOutput(response.content);
408
- * const citationsByAttachment = groupCitationsByAttachmentIdObject(citations);
409
- *
410
- * // Verify citations for each file using Promise.all
411
- * const verificationPromises = Object.entries(citationsByAttachment).map(
412
- * ([attachmentId, fileCitations]) => dc.verifyCitations(attachmentId, fileCitations)
413
- * );
414
- * const results = await Promise.all(verificationPromises);
415
- * ```
416
- */
417
- export function groupCitationsByAttachmentIdObject(citations) {
418
- const grouped = {};
419
- // Normalize input to entries
420
- const entries = Array.isArray(citations)
421
- ? citations.map((c, idx) => [generateCitationKey(c) || String(idx + 1), c])
422
- : Object.entries(citations);
423
- for (const [key, citation] of entries) {
424
- const attachmentId = citation.attachmentId || "";
425
- if (!grouped[attachmentId]) {
426
- grouped[attachmentId] = {};
427
- }
428
- grouped[attachmentId][key] = citation;
429
- }
430
- return grouped;
431
- }
@@ -1,2 +0,0 @@
1
- export declare const isGeminiGarbage: (content: string) => boolean;
2
- export declare function cleanRepeatingLastSentence(text: string): string;
@@ -1,73 +0,0 @@
1
- //flash and flash lite get super confused if we ask for a MD table and infinite loop
2
- const MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE = 64;
3
- export const isGeminiGarbage = (content) => {
4
- if (!content)
5
- return false;
6
- const trimmedContent = content.trim();
7
- if (trimmedContent.length < MIN_CONTENT_LENGTH_FOR_GEMINI_GARBAGE)
8
- return false;
9
- const firstCharacter = trimmedContent?.[0];
10
- for (let i = 1; i < trimmedContent.length; i++) {
11
- if (trimmedContent[i] !== firstCharacter)
12
- return false;
13
- }
14
- return true;
15
- };
16
- // helps clean up infinite rambling bug output from gemini
17
- export function cleanRepeatingLastSentence(text) {
18
- text = text.trim();
19
- const MIN_REPETITIONS = 2;
20
- const MIN_SENTENCE_CONTENT_LENGTH = 10;
21
- const sentenceEndRegex = /[.?!](?=\s+|$)/g;
22
- let match;
23
- const sentenceEndIndices = [];
24
- while ((match = sentenceEndRegex.exec(text)) !== null) {
25
- sentenceEndIndices.push(match.index);
26
- }
27
- if (sentenceEndIndices.length < 2) {
28
- return text;
29
- }
30
- const lastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 1];
31
- const secondLastTerminatorIndex = sentenceEndIndices[sentenceEndIndices.length - 2];
32
- const repeatingUnit = text.substring(secondLastTerminatorIndex + 1, lastTerminatorIndex + 1);
33
- const unitLength = repeatingUnit.length;
34
- const sentenceContent = repeatingUnit.trim().slice(0, -1);
35
- if (sentenceContent.length < MIN_SENTENCE_CONTENT_LENGTH) {
36
- return text;
37
- }
38
- if (unitLength <= 0) {
39
- return text;
40
- }
41
- if (text.length < unitLength * MIN_REPETITIONS) {
42
- return text;
43
- }
44
- let repetitionsFound = 0;
45
- let currentCheckEndIndex = lastTerminatorIndex + 1;
46
- if (text.endsWith(repeatingUnit)) {
47
- currentCheckEndIndex = text.length;
48
- }
49
- let firstRepetitionStartIndex = -1;
50
- while (true) {
51
- const checkStartIndex = currentCheckEndIndex - unitLength;
52
- if (checkStartIndex < 0) {
53
- break;
54
- }
55
- const chunk = text.substring(checkStartIndex, currentCheckEndIndex);
56
- if (chunk === repeatingUnit) {
57
- repetitionsFound++;
58
- firstRepetitionStartIndex = checkStartIndex;
59
- currentCheckEndIndex = checkStartIndex;
60
- }
61
- else {
62
- break;
63
- }
64
- }
65
- if (repetitionsFound >= MIN_REPETITIONS) {
66
- const textBeforeRepetitions = text.substring(0, firstRepetitionStartIndex);
67
- const result = textBeforeRepetitions + repeatingUnit;
68
- return result;
69
- }
70
- else {
71
- return text;
72
- }
73
- }
@@ -1,138 +0,0 @@
1
- export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite attachment_id='attachment_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <attachment_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim 1-3 words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
2
- export declare const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = "\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite attachment_id='attachment_id' full_phrase='the verbatim text of the phrase; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />\n\u2022 These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.\n";
3
- export interface WrapSystemPromptOptions {
4
- /** The original system prompt to wrap with citation instructions */
5
- systemPrompt: string;
6
- /** Whether to use audio/video citation format (with timestamps) instead of text-based (with line IDs) */
7
- isAudioVideo?: boolean;
8
- prependCitationInstructions?: boolean;
9
- }
10
- export interface WrapCitationPromptOptions {
11
- /** The original system prompt to wrap with citation instructions */
12
- systemPrompt: string;
13
- /** The original user prompt */
14
- userPrompt: string;
15
- /** The extracted file text with metadata (from uploadFile response). Can be a single string or array for multiple files. */
16
- deepTextPromptPortion?: string | string[];
17
- /** Whether to use audio/video citation format (with timestamps) instead of text-based (with line IDs) */
18
- isAudioVideo?: boolean;
19
- }
20
- export interface WrapCitationPromptResult {
21
- /** Enhanced system prompt with citation instructions */
22
- enhancedSystemPrompt: string;
23
- /** Enhanced user prompt (currently passed through unchanged) */
24
- enhancedUserPrompt: string;
25
- }
26
- /**
27
- * Wraps your existing system prompt with DeepCitation's citation syntax instructions.
28
- * This enables LLMs to output verifiable citations that can be checked against source documents.
29
- *
30
- * @example
31
- * ```typescript
32
- * import { wrapSystemCitationPrompt } from '@deepcitation/deepcitation-js';
33
- *
34
- * const systemPrompt = "You are a helpful assistant that analyzes documents.";
35
- * const enhanced = wrapSystemCitationPrompt({ systemPrompt });
36
- *
37
- * // Use enhanced prompt with your LLM
38
- * const response = await openai.chat.completions.create({
39
- * messages: [{ role: "system", content: enhanced }],
40
- * // ...
41
- * });
42
- * ```
43
- */
44
- export declare function wrapSystemCitationPrompt(options: WrapSystemPromptOptions): string;
45
- /**
46
- * Wraps both system and user prompts with DeepCitation's citation syntax instructions.
47
- * This is the recommended way to prepare prompts for citation verification.
48
- *
49
- * @example
50
- * ```typescript
51
- * import { wrapCitationPrompt } from '@deepcitation/deepcitation-js';
52
- *
53
- * // Single file
54
- * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
55
- * systemPrompt: "You are a helpful assistant.",
56
- * userPrompt: "Analyze this document and summarize it.",
57
- * deepTextPromptPortion, // from uploadFile response
58
- * });
59
- *
60
- * // Multiple files
61
- * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
62
- * systemPrompt: "You are a helpful assistant.",
63
- * userPrompt: "Compare these documents.",
64
- * deepTextPromptPortion: [deepTextPromptPortion1, deepTextPromptPortion2], // array of file texts
65
- * });
66
- *
67
- * // Use enhanced prompts with your LLM
68
- * const response = await llm.chat({
69
- * messages: [
70
- * { role: "system", content: enhancedSystemPrompt },
71
- * { role: "user", content: enhancedUserPrompt },
72
- * ],
73
- * });
74
- * ```
75
- */
76
- export declare function wrapCitationPrompt(options: WrapCitationPromptOptions): WrapCitationPromptResult;
77
- export declare const CITATION_JSON_OUTPUT_FORMAT: {
78
- type: string;
79
- properties: {
80
- attachmentId: {
81
- type: string;
82
- };
83
- startPageKey: {
84
- type: string;
85
- description: string;
86
- };
87
- reasoning: {
88
- type: string;
89
- description: string;
90
- };
91
- fullPhrase: {
92
- type: string;
93
- description: string;
94
- };
95
- keySpan: {
96
- type: string;
97
- description: string;
98
- };
99
- lineIds: {
100
- type: string;
101
- items: {
102
- type: string;
103
- };
104
- description: string;
105
- };
106
- };
107
- required: string[];
108
- };
109
- export declare const CITATION_AV_BASED_JSON_OUTPUT_FORMAT: {
110
- type: string;
111
- properties: {
112
- attachmentId: {
113
- type: string;
114
- };
115
- startPageKey: {
116
- type: string;
117
- description: string;
118
- };
119
- fullPhrase: {
120
- type: string;
121
- description: string;
122
- };
123
- timestamps: {
124
- type: string;
125
- properties: {
126
- startTime: {
127
- type: string;
128
- };
129
- endTime: {
130
- type: string;
131
- };
132
- };
133
- required: string[];
134
- description: string;
135
- };
136
- };
137
- required: string[];
138
- };