@deepcitation/deepcitation-js 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +71 -1197
  2. package/lib/client/DeepCitation.d.ts +204 -0
  3. package/lib/client/DeepCitation.js +473 -0
  4. package/lib/client/index.d.ts +2 -0
  5. package/lib/client/index.js +1 -0
  6. package/lib/client/types.d.ts +157 -0
  7. package/lib/client/types.js +1 -0
  8. package/lib/index.d.ts +25 -0
  9. package/lib/index.js +22 -0
  10. package/lib/parsing/normalizeCitation.d.ts +5 -0
  11. package/lib/parsing/normalizeCitation.js +182 -0
  12. package/lib/parsing/parseCitation.d.ts +79 -0
  13. package/lib/parsing/parseCitation.js +371 -0
  14. package/lib/parsing/parseWorkAround.d.ts +2 -0
  15. package/lib/parsing/parseWorkAround.js +73 -0
  16. package/lib/prompts/citationPrompts.d.ts +133 -0
  17. package/lib/prompts/citationPrompts.js +152 -0
  18. package/lib/prompts/index.d.ts +3 -0
  19. package/lib/prompts/index.js +3 -0
  20. package/lib/prompts/promptCompression.d.ts +14 -0
  21. package/lib/prompts/promptCompression.js +109 -0
  22. package/lib/prompts/types.d.ts +4 -0
  23. package/lib/prompts/types.js +1 -0
  24. package/lib/react/CitationComponent.d.ts +134 -0
  25. package/lib/react/CitationComponent.js +376 -0
  26. package/lib/react/CitationVariants.d.ts +135 -0
  27. package/lib/react/CitationVariants.js +283 -0
  28. package/lib/react/DiffDisplay.d.ts +10 -0
  29. package/lib/react/DiffDisplay.js +33 -0
  30. package/lib/react/UrlCitationComponent.d.ts +83 -0
  31. package/lib/react/UrlCitationComponent.js +224 -0
  32. package/lib/react/VerificationTabs.d.ts +10 -0
  33. package/lib/react/VerificationTabs.js +36 -0
  34. package/lib/react/icons.d.ts +8 -0
  35. package/lib/react/icons.js +9 -0
  36. package/lib/react/index.d.ts +16 -0
  37. package/lib/react/index.js +18 -0
  38. package/lib/react/primitives.d.ts +104 -0
  39. package/lib/react/primitives.js +190 -0
  40. package/lib/react/types.d.ts +192 -0
  41. package/lib/react/types.js +1 -0
  42. package/lib/react/useSmartDiff.d.ts +16 -0
  43. package/lib/react/useSmartDiff.js +64 -0
  44. package/lib/react/utils.d.ts +34 -0
  45. package/lib/react/utils.js +59 -0
  46. package/lib/types/boxes.d.ts +11 -0
  47. package/lib/types/boxes.js +1 -0
  48. package/lib/types/citation.d.ts +44 -0
  49. package/lib/types/citation.js +2 -0
  50. package/lib/types/foundHighlight.d.ts +23 -0
  51. package/lib/types/foundHighlight.js +22 -0
  52. package/lib/types/index.d.ts +11 -0
  53. package/lib/types/index.js +7 -0
  54. package/lib/types/search.d.ts +30 -0
  55. package/lib/types/search.js +1 -0
  56. package/lib/utils/sha.d.ts +10 -0
  57. package/lib/utils/sha.js +108 -0
  58. package/package.json +11 -23
@@ -0,0 +1,152 @@
1
+ export const CITATION_MARKDOWN_SYNTAX_PROMPT = `
2
+ Citation syntax to use within Markdown:
3
+ • To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
4
+ <cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text /> (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
5
+
6
+ • Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.
7
+ • start_page_key, full_phrase, and line_ids are required for each citation.
8
+ • Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.
9
+ • Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.
10
+ • These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.
11
+ • The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.
12
+ • We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.
13
+ `;
14
+ export const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = `
15
+ • To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
16
+ <cite file_id='file_id' full_phrase='the verbatim text of the phrase (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />
17
+ • These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.
18
+ `;
19
+ /**
20
+ * Wraps your existing system prompt with DeepCitation's citation syntax instructions.
21
+ * This enables LLMs to output verifiable citations that can be checked against source documents.
22
+ *
23
+ * @example
24
+ * ```typescript
25
+ * import { wrapSystemCitationPrompt } from '@deepcitation/deepcitation-js';
26
+ *
27
+ * const systemPrompt = "You are a helpful assistant that analyzes documents.";
28
+ * const enhanced = wrapSystemCitationPrompt({ systemPrompt });
29
+ *
30
+ * // Use enhanced prompt with your LLM
31
+ * const response = await openai.chat.completions.create({
32
+ * messages: [{ role: "system", content: enhanced }],
33
+ * // ...
34
+ * });
35
+ * ```
36
+ */
37
+ export function wrapSystemCitationPrompt(options) {
38
+ const { systemPrompt, isAudioVideo = false, prependCitationInstructions = false } = options;
39
+ const citationPrompt = isAudioVideo ? AV_CITATION_MARKDOWN_SYNTAX_PROMPT : CITATION_MARKDOWN_SYNTAX_PROMPT;
40
+ if (prependCitationInstructions) {
41
+ return `${citationPrompt.trim()}
42
+
43
+ ${systemPrompt.trim()}`;
44
+ }
45
+ //append
46
+ return `${systemPrompt.trim()}
47
+
48
+ ${citationPrompt.trim()}`;
49
+ }
50
+ /**
51
+ * Wraps both system and user prompts with DeepCitation's citation syntax instructions.
52
+ * This is the recommended way to prepare prompts for citation verification.
53
+ *
54
+ * @example
55
+ * ```typescript
56
+ * import { wrapCitationPrompt } from '@deepcitation/deepcitation-js';
57
+ *
58
+ * // Single file
59
+ * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
60
+ * systemPrompt: "You are a helpful assistant.",
61
+ * userPrompt: "Analyze this document and summarize it.",
62
+ * fileDeepText, // from uploadFile response
63
+ * });
64
+ *
65
+ * // Multiple files
66
+ * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
67
+ * systemPrompt: "You are a helpful assistant.",
68
+ * userPrompt: "Compare these documents.",
69
+ * fileDeepText: [fileDeepText1, fileDeepText2], // array of file texts
70
+ * });
71
+ *
72
+ * // Use enhanced prompts with your LLM
73
+ * const response = await llm.chat({
74
+ * messages: [
75
+ * { role: "system", content: enhancedSystemPrompt },
76
+ * { role: "user", content: enhancedUserPrompt },
77
+ * ],
78
+ * });
79
+ * ```
80
+ */
81
+ export function wrapCitationPrompt(options) {
82
+ const { systemPrompt, userPrompt, fileDeepText, isAudioVideo = false } = options;
83
+ const enhancedSystemPrompt = wrapSystemCitationPrompt({
84
+ systemPrompt,
85
+ isAudioVideo,
86
+ });
87
+ // Build enhanced user prompt with file content if provided
88
+ let enhancedUserPrompt = userPrompt;
89
+ if (fileDeepText) {
90
+ const fileTexts = Array.isArray(fileDeepText) ? fileDeepText : [fileDeepText];
91
+ const fileContent = fileTexts
92
+ .map((text, index) => {
93
+ if (fileTexts.length === 1) {
94
+ return `<file_text>\n${text}\n</file_text>`;
95
+ }
96
+ return `<file_text file_index="${index + 1}">\n${text}\n</file_text>`;
97
+ })
98
+ .join("\n\n");
99
+ enhancedUserPrompt = `${fileContent}\n\n${userPrompt}`;
100
+ }
101
+ return {
102
+ enhancedSystemPrompt,
103
+ enhancedUserPrompt,
104
+ };
105
+ }
106
+ export const CITATION_JSON_OUTPUT_FORMAT = {
107
+ type: "object",
108
+ properties: {
109
+ fileId: { type: "string" },
110
+ startPageKey: {
111
+ type: "string",
112
+ description: 'Only return a result like "page_number_PAGE_index_INDEX" from the provided page keys (e.g. <page_number_1_index_0>) and never from the contents inside the page.',
113
+ },
114
+ reasoning: {
115
+ type: "string",
116
+ description: "The logic connecting the form section requirements to the supporting source citation",
117
+ },
118
+ fullPhrase: {
119
+ type: "string",
120
+ description: "The verbatim text of the terse phrase inside <file_text /> to support the value description (if there is a detected OCR correction, use the corrected text)",
121
+ },
122
+ lineIds: {
123
+ type: "array",
124
+ items: { type: "number" },
125
+ description: "Infer lineIds, as we only provide the first, last, and every 5th line. Provide inclusive lineIds for the fullPhrase.",
126
+ },
127
+ },
128
+ required: ["fileId", "startPageKey", "reasoning", "fullPhrase", "lineIds"],
129
+ };
130
+ export const CITATION_AV_BASED_JSON_OUTPUT_FORMAT = {
131
+ type: "object",
132
+ properties: {
133
+ fileId: { type: "string" },
134
+ startPageKey: {
135
+ type: "string",
136
+ description: 'Only return a result like "page_number_PAGE_index_INDEX" from the provided page keys (e.g. <page_number_1_index_0>) and never from the contents inside the page.',
137
+ },
138
+ fullPhrase: {
139
+ type: "string",
140
+ description: "The exact verbatim text of the phrase or paragraph from the source document to support the value description (if there is a detected OCR correction, use the verbatim corrected text)",
141
+ },
142
+ timestamps: {
143
+ type: "object",
144
+ properties: {
145
+ startTime: { type: "string" },
146
+ endTime: { type: "string" },
147
+ },
148
+ required: ["startTime", "endTime"],
149
+ description: "The timestamp of the audio or video frame including milliseconds formatted as: HH:MM:SS.SSS",
150
+ },
151
+ },
152
+ };
@@ -0,0 +1,3 @@
1
+ export * from "./promptCompression";
2
+ export * from "./citationPrompts";
3
+ export * from "./types";
@@ -0,0 +1,3 @@
1
+ export * from "./promptCompression";
2
+ export * from "./citationPrompts";
3
+ export * from "./types";
@@ -0,0 +1,14 @@
1
+ import { CompressedResult } from "./types";
2
+ /**
3
+ * Compress all occurrences of `ids` inside `obj`, returning a new object
4
+ * plus the `prefixMap` needed to decompress.
5
+ */
6
+ export declare function compressPromptIds<T>(obj: T, ids: string[] | undefined): CompressedResult<T>;
7
+ /**
8
+ * Decompress all minimal prefixes back into their full IDs,
9
+ * using the `prefixMap` returned from `compressPromptIds`.
10
+ *
11
+ * If you pass in a string, it will return a string.
12
+ * If you pass in an object, it will JSON‑serialize and parse it back.
13
+ */
14
+ export declare function decompressPromptIds<T>(compressed: T | string, prefixMap: Record<string, string>): T | string;
@@ -0,0 +1,109 @@
1
+ const MIN_PREFIX_LENGTH = 4;
2
+ const MIN_CHARACTERS_PER_PREFIX_WITH_AT_LEAST_ONE_DIGIT = 3;
3
+ const MIN_CHARACTERS_PER_PREFIX_WITH_NO_DIGITS = 5;
4
+ /**
5
+ * Build a map from each ID's minimal unique prefix to the full ID,
6
+ * such that the prefix only ever appears in the prompt where the full ID appears.
7
+ */
8
+ function buildSafePrefixMap(ids, prompt) {
9
+ const map = {};
10
+ for (const id of ids) {
11
+ for (let len = MIN_PREFIX_LENGTH; len <= id.length; len++) {
12
+ const prefix = id.slice(0, len);
13
+ // Check minimum requirements
14
+ const digitCount = (prefix.match(/\d/g) || []).length;
15
+ const letterCount = (prefix.match(/[a-zA-Z]/g) || []).length;
16
+ if (prefix.length < MIN_PREFIX_LENGTH ||
17
+ (digitCount > 0 && letterCount < MIN_CHARACTERS_PER_PREFIX_WITH_AT_LEAST_ONE_DIGIT) ||
18
+ (digitCount === 0 && letterCount < MIN_CHARACTERS_PER_PREFIX_WITH_NO_DIGITS)) {
19
+ continue;
20
+ }
21
+ // 1) Unique among IDs
22
+ if (ids.some(other => other !== id && other.startsWith(prefix))) {
23
+ continue;
24
+ }
25
+ // 2) Only appears in prompt as part of the full ID
26
+ const esc = (s) => s.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
27
+ const prefixCount = (prompt.match(new RegExp(esc(prefix), "g")) || []).length;
28
+ const fullCount = (prompt.match(new RegExp(esc(id), "g")) || []).length;
29
+ if (prefixCount !== fullCount) {
30
+ continue;
31
+ }
32
+ map[prefix] = id;
33
+ break;
34
+ }
35
+ if (!Object.values(map).includes(id)) {
36
+ throw new Error(`Cannot find a safe unique prefix for ID "${id}" that meets the minimum requirements (length: ${MIN_PREFIX_LENGTH})`);
37
+ }
38
+ }
39
+ return map;
40
+ }
41
+ /**
42
+ * Compress all occurrences of `ids` inside `obj`, returning a new object
43
+ * plus the `prefixMap` needed to decompress.
44
+ */
45
+ export function compressPromptIds(obj, ids) {
46
+ if (!ids || ids.length === 0) {
47
+ return { compressed: obj, prefixMap: {} };
48
+ }
49
+ const uniqueIds = Array.from(new Set(ids));
50
+ const text = JSON.stringify(obj);
51
+ const prefixMap = buildSafePrefixMap(uniqueIds, text);
52
+ // Sort prefixes by descending length to avoid partial matches
53
+ const prefixes = Object.keys(prefixMap).sort((a, b) => b.length - a.length);
54
+ let compressedText = text;
55
+ for (const prefix of prefixes) {
56
+ const full = prefixMap[prefix];
57
+ const escFull = full.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
58
+ compressedText = compressedText.replace(new RegExp(escFull, "g"), prefix);
59
+ }
60
+ return {
61
+ compressed: JSON.parse(compressedText),
62
+ prefixMap,
63
+ };
64
+ }
65
+ /**
66
+ * Decompress all minimal prefixes back into their full IDs,
67
+ * using the `prefixMap` returned from `compressPromptIds`.
68
+ *
69
+ * If you pass in a string, it will return a string.
70
+ * If you pass in an object, it will JSON‑serialize and parse it back.
71
+ */
72
+ export function decompressPromptIds(compressed, prefixMap) {
73
+ if (!prefixMap || Object.keys(prefixMap).length === 0) {
74
+ return compressed;
75
+ }
76
+ // Prepare sorted [prefix, full] entries (longest prefix first)
77
+ const entries = Object.entries(prefixMap).sort((a, b) => b[0].length - a[0].length);
78
+ // Decide whether we're working on a string or an object
79
+ let text;
80
+ let shouldParseBack = false;
81
+ if (typeof compressed === "string") {
82
+ text = compressed;
83
+ }
84
+ else {
85
+ text = JSON.stringify(compressed);
86
+ shouldParseBack = true;
87
+ }
88
+ const originalLength = text?.length;
89
+ // Perform all prefix → full-ID replacements
90
+ for (const [prefix, full] of entries) {
91
+ const escPrefix = prefix.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
92
+ text = text.replace(new RegExp(escPrefix, "g"), full);
93
+ }
94
+ //this is for citation fileId or file_id
95
+ if (entries.length === 1 && (text.includes("file_id='") || text.includes('file_id="'))) {
96
+ const fullId = entries[0][1];
97
+ text = text.replace(/file_id='[^']*'|file_id="[^"]*"/g, `file_id='${fullId}'`);
98
+ }
99
+ else if (entries.length === 1 && (text.includes("fileId='") || text.includes('fileId="'))) {
100
+ const fullId = entries[0][1];
101
+ text = text.replace(/fileId='[^']*'|fileId="[^"]*"/g, `fileId='${fullId}'`);
102
+ }
103
+ const newLength = text?.length;
104
+ const diff = originalLength - newLength;
105
+ if (diff > 0) {
106
+ throw new Error(`[decompressedPromptIds] diff ${diff} originalLength ${originalLength} newLength ${newLength}`);
107
+ }
108
+ return shouldParseBack ? JSON.parse(text) : text;
109
+ }
@@ -0,0 +1,4 @@
1
+ export interface CompressedResult<T> {
2
+ compressed: T;
3
+ prefixMap: Record<string, string>;
4
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,134 @@
1
+ import React, { type ReactNode } from "react";
2
+ import { type CitationStatus } from "../types/citation.js";
3
+ import type { FoundHighlightLocation } from "../types/foundHighlight.js";
4
+ import type { BaseCitationProps, CitationEventHandlers, CitationRenderProps, CitationVariant } from "./types.js";
5
+ import "./styles.css";
6
+ export type { CitationVariant } from "./types.js";
7
+ /**
8
+ * Props for the CitationComponent.
9
+ *
10
+ * @example Brackets variant (default) - shows value/number in brackets with blue styling
11
+ * ```tsx
12
+ * <CitationComponent
13
+ * citation={{ citationNumber: 1, fullPhrase: "Revenue grew by 25%" }}
14
+ * foundCitation={verificationResult}
15
+ * />
16
+ * // Renders: [1✓] with blue text
17
+ * ```
18
+ *
19
+ * @example Numeric variant - shows just the citation number with indicator
20
+ * ```tsx
21
+ * <CitationComponent
22
+ * citation={{ citationNumber: 1, value: "25% growth" }}
23
+ * foundCitation={verificationResult}
24
+ * variant="numeric"
25
+ * />
26
+ * // Renders: 1✓
27
+ * ```
28
+ *
29
+ * @example Text variant - shows the value without styling
30
+ * ```tsx
31
+ * <CitationComponent
32
+ * citation={{ citationNumber: 1, value: "25% growth" }}
33
+ * foundCitation={verificationResult}
34
+ * variant="text"
35
+ * />
36
+ * // Renders: 25% growth✓
37
+ * ```
38
+ *
39
+ * @example Minimal variant - no brackets, just text and indicator
40
+ * ```tsx
41
+ * <CitationComponent
42
+ * citation={citation}
43
+ * foundCitation={verificationResult}
44
+ * variant="minimal"
45
+ * />
46
+ * // Renders: Revenue grew...✓
47
+ * ```
48
+ *
49
+ * @example Indicator-only variant
50
+ * ```tsx
51
+ * <CitationComponent
52
+ * citation={citation}
53
+ * foundCitation={verificationResult}
54
+ * variant="indicator"
55
+ * />
56
+ * // Renders: ✓
57
+ * ```
58
+ *
59
+ * @example Hidden popover
60
+ * ```tsx
61
+ * <CitationComponent
62
+ * citation={citation}
63
+ * foundCitation={verificationResult}
64
+ * popoverPosition="hidden"
65
+ * />
66
+ * ```
67
+ */
68
+ export interface CitationComponentProps extends BaseCitationProps {
69
+ /**
70
+ * Verification result from the DeepCitation API.
71
+ * Contains match snippet, page number, and verification image.
72
+ */
73
+ foundCitation?: FoundHighlightLocation | null;
74
+ /**
75
+ * Display variant for the citation.
76
+ * - `brackets`: Shows value/number in brackets, blue text styling (default)
77
+ * - `numeric`: Shows citation number with indicator, no brackets
78
+ * - `text`: Shows the value, no text styling, no truncate, shows indicator
79
+ * - `minimal`: No brackets, just display text with indicator
80
+ * - `indicator`: Only the status indicator (checkmark/warning), no text
81
+ */
82
+ variant?: CitationVariant;
83
+ /**
84
+ * Event handlers for citation interactions.
85
+ */
86
+ eventHandlers?: CitationEventHandlers;
87
+ /**
88
+ * Enable mobile touch handlers.
89
+ * @default false
90
+ */
91
+ isMobile?: boolean;
92
+ /**
93
+ * Custom render function for the status indicator.
94
+ * Receives the citation status and should return a ReactNode.
95
+ */
96
+ renderIndicator?: (status: CitationStatus) => ReactNode;
97
+ /**
98
+ * Custom render function for the entire citation content.
99
+ * When provided, takes full control of rendering (ignores format, showBrackets).
100
+ */
101
+ renderContent?: (props: CitationRenderProps) => ReactNode;
102
+ /**
103
+ * Position of the verification popover.
104
+ * Use "hidden" to disable the popover entirely.
105
+ * @default "top"
106
+ */
107
+ popoverPosition?: "top" | "bottom" | "hidden";
108
+ /**
109
+ * Custom render function for popover content.
110
+ */
111
+ renderPopoverContent?: (props: {
112
+ citation: BaseCitationProps["citation"];
113
+ foundCitation: FoundHighlightLocation | null;
114
+ status: CitationStatus;
115
+ }) => ReactNode;
116
+ }
117
+ /**
118
+ * CitationComponent displays a citation with verification status.
119
+ *
120
+ * The component separates two concepts:
121
+ * 1. **Found status** (text styling) - whether the citation was found in the document
122
+ * - Verified & Partial both use "found" styling (blue text)
123
+ * - Miss uses "not found" styling (gray/strikethrough)
124
+ *
125
+ * 2. **Match quality** (indicator styling) - how well the citation matched
126
+ * - Exact match: green checkmark
127
+ * - Partial match: orange checkmark
128
+ * - Miss: no indicator
129
+ *
130
+ * This means partial matches have blue text (because they were found) but
131
+ * an orange indicator (because they didn't match exactly).
132
+ */
133
+ export declare const CitationComponent: React.ForwardRefExoticComponent<CitationComponentProps & React.RefAttributes<HTMLSpanElement>>;
134
+ export declare const MemoizedCitationComponent: React.NamedExoticComponent<CitationComponentProps & React.RefAttributes<HTMLSpanElement>>;