@deepcitation/deepcitation-js 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -1197
- package/lib/client/DeepCitation.d.ts +204 -0
- package/lib/client/DeepCitation.js +473 -0
- package/lib/client/index.d.ts +2 -0
- package/lib/client/index.js +1 -0
- package/lib/client/types.d.ts +157 -0
- package/lib/client/types.js +1 -0
- package/lib/index.d.ts +25 -0
- package/lib/index.js +22 -0
- package/lib/parsing/normalizeCitation.d.ts +5 -0
- package/lib/parsing/normalizeCitation.js +182 -0
- package/lib/parsing/parseCitation.d.ts +79 -0
- package/lib/parsing/parseCitation.js +371 -0
- package/lib/parsing/parseWorkAround.d.ts +2 -0
- package/lib/parsing/parseWorkAround.js +73 -0
- package/lib/prompts/citationPrompts.d.ts +133 -0
- package/lib/prompts/citationPrompts.js +152 -0
- package/lib/prompts/index.d.ts +3 -0
- package/lib/prompts/index.js +3 -0
- package/lib/prompts/promptCompression.d.ts +14 -0
- package/lib/prompts/promptCompression.js +109 -0
- package/lib/prompts/types.d.ts +4 -0
- package/lib/prompts/types.js +1 -0
- package/lib/react/CitationComponent.d.ts +134 -0
- package/lib/react/CitationComponent.js +376 -0
- package/lib/react/CitationVariants.d.ts +135 -0
- package/lib/react/CitationVariants.js +283 -0
- package/lib/react/DiffDisplay.d.ts +10 -0
- package/lib/react/DiffDisplay.js +33 -0
- package/lib/react/UrlCitationComponent.d.ts +83 -0
- package/lib/react/UrlCitationComponent.js +224 -0
- package/lib/react/VerificationTabs.d.ts +10 -0
- package/lib/react/VerificationTabs.js +36 -0
- package/lib/react/icons.d.ts +8 -0
- package/lib/react/icons.js +9 -0
- package/lib/react/index.d.ts +16 -0
- package/lib/react/index.js +18 -0
- package/lib/react/primitives.d.ts +104 -0
- package/lib/react/primitives.js +190 -0
- package/lib/react/types.d.ts +192 -0
- package/lib/react/types.js +1 -0
- package/lib/react/useSmartDiff.d.ts +16 -0
- package/lib/react/useSmartDiff.js +64 -0
- package/lib/react/utils.d.ts +34 -0
- package/lib/react/utils.js +59 -0
- package/lib/types/boxes.d.ts +11 -0
- package/lib/types/boxes.js +1 -0
- package/lib/types/citation.d.ts +44 -0
- package/lib/types/citation.js +2 -0
- package/lib/types/foundHighlight.d.ts +23 -0
- package/lib/types/foundHighlight.js +22 -0
- package/lib/types/index.d.ts +11 -0
- package/lib/types/index.js +7 -0
- package/lib/types/search.d.ts +30 -0
- package/lib/types/search.js +1 -0
- package/lib/utils/sha.d.ts +10 -0
- package/lib/utils/sha.js +108 -0
- package/package.json +11 -23
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import type { Citation } from "../types/index";
|
|
2
|
+
import type { CitationInput, ConvertFileInput, ConvertFileResponse, DeepCitationConfig, FileInput, PrepareConvertedFileOptions, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsFromLlmOutputInput, VerifyCitationsOptions, VerifyCitationsResponse } from "./types";
|
|
3
|
+
/**
|
|
4
|
+
* DeepCitation client for file upload and citation verification.
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
* ```typescript
|
|
8
|
+
* import { DeepCitation } from '@deepcitation/deepcitation-js';
|
|
9
|
+
*
|
|
10
|
+
* const dc = new DeepCitation({ apiKey: process.env.DEEPCITATION_API_KEY });
|
|
11
|
+
*
|
|
12
|
+
* // Upload a file
|
|
13
|
+
* const { fileId, promptContent } = await dc.uploadFile(file);
|
|
14
|
+
*
|
|
15
|
+
* // Include promptContent in your LLM messages
|
|
16
|
+
* const response = await llm.chat({
|
|
17
|
+
* messages: [
|
|
18
|
+
* { role: "system", content: wrapSystemCitationPrompt({ systemPrompt }) },
|
|
19
|
+
* { role: "user", content: userMessage + "\n\n" + promptContent },
|
|
20
|
+
* ]
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* // Verify citations in the LLM output
|
|
24
|
+
* const citations = getAllCitationsFromLlmOutput(response);
|
|
25
|
+
* const verified = await dc.verifyCitations(fileId, citations);
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
export declare class DeepCitation {
|
|
29
|
+
private readonly apiKey;
|
|
30
|
+
private readonly apiUrl;
|
|
31
|
+
/**
|
|
32
|
+
* Stores mapping of user-provided fileId to internal attachmentId
|
|
33
|
+
* This allows users to reference files by their own IDs
|
|
34
|
+
*/
|
|
35
|
+
private fileIdMap;
|
|
36
|
+
/**
|
|
37
|
+
* Create a new DeepCitation client instance.
|
|
38
|
+
*
|
|
39
|
+
* @param config - Configuration options
|
|
40
|
+
* @throws Error if apiKey is not provided
|
|
41
|
+
*/
|
|
42
|
+
constructor(config: DeepCitationConfig);
|
|
43
|
+
/**
|
|
44
|
+
* Upload a file for citation verification.
|
|
45
|
+
*
|
|
46
|
+
* Supported file types:
|
|
47
|
+
* - PDF documents
|
|
48
|
+
* - Images (PNG, JPEG, WebP, AVIF, HEIC)
|
|
49
|
+
* - Coming soon: DOCX, XLSX, plain text
|
|
50
|
+
*
|
|
51
|
+
* @param file - The file to upload (File, Blob, or Buffer)
|
|
52
|
+
* @param options - Optional upload options
|
|
53
|
+
* @returns Upload response with fileId and extracted text
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```typescript
|
|
57
|
+
* // Browser with File object
|
|
58
|
+
* const file = document.querySelector('input[type="file"]').files[0];
|
|
59
|
+
* const result = await dc.uploadFile(file);
|
|
60
|
+
*
|
|
61
|
+
* // Node.js with Buffer
|
|
62
|
+
* const buffer = fs.readFileSync('document.pdf');
|
|
63
|
+
* const result = await dc.uploadFile(buffer, { filename: 'document.pdf' });
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
66
|
+
uploadFile(file: File | Blob | Buffer, options?: UploadFileOptions): Promise<UploadFileResponse>;
|
|
67
|
+
/**
|
|
68
|
+
* Convert a URL or Office file to PDF for citation verification.
|
|
69
|
+
* The converted file can then be processed with prepareConvertedFile().
|
|
70
|
+
*
|
|
71
|
+
* Supported Office formats:
|
|
72
|
+
* - Microsoft Word (.doc, .docx)
|
|
73
|
+
* - Microsoft Excel (.xls, .xlsx)
|
|
74
|
+
* - Microsoft PowerPoint (.ppt, .pptx)
|
|
75
|
+
* - OpenDocument (.odt, .ods, .odp)
|
|
76
|
+
* - Rich Text Format (.rtf)
|
|
77
|
+
* - CSV (.csv)
|
|
78
|
+
*
|
|
79
|
+
* @param input - URL string or object with URL/file options
|
|
80
|
+
* @returns Conversion result with attachmentId for prepareConvertedFile
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* ```typescript
|
|
84
|
+
* // Convert a URL to PDF
|
|
85
|
+
* const result = await dc.convertToPdf({ url: "https://example.com/article" });
|
|
86
|
+
*
|
|
87
|
+
* // Convert an Office document
|
|
88
|
+
* const result = await dc.convertToPdf({
|
|
89
|
+
* file: docxBuffer,
|
|
90
|
+
* filename: "report.docx"
|
|
91
|
+
* });
|
|
92
|
+
*
|
|
93
|
+
* // Then prepare the file for verification
|
|
94
|
+
* const { fileDeepText, fileId } = await dc.prepareConvertedFile({
|
|
95
|
+
* fileId: result.fileId
|
|
96
|
+
* });
|
|
97
|
+
* ```
|
|
98
|
+
*/
|
|
99
|
+
convertToPdf(input: ConvertFileInput | string): Promise<ConvertFileResponse>;
|
|
100
|
+
/**
|
|
101
|
+
* Prepare a previously converted file for citation verification.
|
|
102
|
+
* Use this after calling convertToPdf() to extract text and get fileDeepText.
|
|
103
|
+
*
|
|
104
|
+
* @param options - Options with fileId from convertFile
|
|
105
|
+
* @returns Upload response with fileId and extracted text
|
|
106
|
+
*
|
|
107
|
+
* @example
|
|
108
|
+
* ```typescript
|
|
109
|
+
* // First convert the file
|
|
110
|
+
* const converted = await dc.convertToPdf({ url: "https://example.com/article" });
|
|
111
|
+
*
|
|
112
|
+
* // Then prepare it for verification
|
|
113
|
+
* const { fileDeepText, fileId } = await dc.prepareConvertedFile({
|
|
114
|
+
* fileId: converted.fileId
|
|
115
|
+
* });
|
|
116
|
+
*
|
|
117
|
+
* // Use fileDeepText in your LLM prompt...
|
|
118
|
+
* ```
|
|
119
|
+
*/
|
|
120
|
+
prepareConvertedFile(options: PrepareConvertedFileOptions): Promise<UploadFileResponse>;
|
|
121
|
+
/**
|
|
122
|
+
* Upload multiple files for citation verification and get structured content.
|
|
123
|
+
* This is the recommended way to prepare files for LLM prompts.
|
|
124
|
+
*
|
|
125
|
+
* @param files - Array of files to upload with optional filenames and fileIds
|
|
126
|
+
* @returns Object containing fileDataParts for verification and fileDeepTexts for LLM
|
|
127
|
+
*
|
|
128
|
+
* @example
|
|
129
|
+
* ```typescript
|
|
130
|
+
* const { fileDataParts, fileDeepTexts } = await dc.prepareFiles([
|
|
131
|
+
* { file: pdfBuffer, filename: "report.pdf" },
|
|
132
|
+
* { file: invoiceBuffer, filename: "invoice.pdf" },
|
|
133
|
+
* ]);
|
|
134
|
+
*
|
|
135
|
+
* // Use fileDeepTexts in wrapCitationPrompt
|
|
136
|
+
* const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
|
|
137
|
+
* systemPrompt,
|
|
138
|
+
* userPrompt,
|
|
139
|
+
* fileDeepText: fileDeepTexts
|
|
140
|
+
* });
|
|
141
|
+
*
|
|
142
|
+
* // Use fileDataParts later for verification
|
|
143
|
+
* const result = await dc.verifyCitationsFromLlmOutput({ llmOutput, fileDataParts });
|
|
144
|
+
* ```
|
|
145
|
+
*/
|
|
146
|
+
prepareFiles(files: FileInput[]): Promise<PrepareFilesResult>;
|
|
147
|
+
/**
|
|
148
|
+
* Verify citations against a previously uploaded file.
|
|
149
|
+
*
|
|
150
|
+
* @param fileId - The file ID returned from uploadFile
|
|
151
|
+
* @param citations - Citations to verify (from getAllCitationsFromLlmOutput)
|
|
152
|
+
* @param options - Optional verification options
|
|
153
|
+
* @returns Verification results with status and proof images
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* import { getAllCitationsFromLlmOutput } from '@deepcitation/deepcitation-js';
|
|
158
|
+
*
|
|
159
|
+
* const citations = getAllCitationsFromLlmOutput(llmResponse);
|
|
160
|
+
* const verified = await dc.verifyCitations(fileId, citations);
|
|
161
|
+
*
|
|
162
|
+
* for (const [key, result] of Object.entries(verified.foundHighlights)) {
|
|
163
|
+
* console.log(key, result.searchState?.status);
|
|
164
|
+
* // "found", "partial_text_found", "not_found", etc.
|
|
165
|
+
* }
|
|
166
|
+
* ```
|
|
167
|
+
*/
|
|
168
|
+
verifyCitations(fileId: string, citations: CitationInput, options?: VerifyCitationsOptions): Promise<VerifyCitationsResponse>;
|
|
169
|
+
/**
|
|
170
|
+
* Verify citations from LLM output with automatic parsing.
|
|
171
|
+
* This is the recommended way to verify citations for new integrations.
|
|
172
|
+
*
|
|
173
|
+
* @param input - Object containing llmOutput and optional fileDataParts
|
|
174
|
+
* @returns Verification results with status and proof images
|
|
175
|
+
*
|
|
176
|
+
* @example
|
|
177
|
+
* ```typescript
|
|
178
|
+
* const result = await dc.verifyCitationsFromLlmOutput({
|
|
179
|
+
* llmOutput: response.content,
|
|
180
|
+
* fileDataParts, // From prepareFiles()
|
|
181
|
+
* });
|
|
182
|
+
*
|
|
183
|
+
* for (const [key, result] of Object.entries(result.foundHighlights)) {
|
|
184
|
+
* console.log(key, result.searchState?.status);
|
|
185
|
+
* }
|
|
186
|
+
* ```
|
|
187
|
+
*/
|
|
188
|
+
verifyCitationsFromLlmOutput(input: VerifyCitationsFromLlmOutputInput, citations?: {
|
|
189
|
+
[key: string]: Citation;
|
|
190
|
+
}): Promise<VerifyCitationsResponse>;
|
|
191
|
+
/**
|
|
192
|
+
* Register a file that was uploaded separately (e.g., via direct API call).
|
|
193
|
+
* This allows you to use verifyCitations with files not uploaded via uploadFile().
|
|
194
|
+
*
|
|
195
|
+
* @param fileId - Your file ID
|
|
196
|
+
* @param attachmentId - The internal attachment ID
|
|
197
|
+
*/
|
|
198
|
+
registerFile(fileId: string, attachmentId: string): void;
|
|
199
|
+
/**
|
|
200
|
+
* Clear the internal file ID mapping.
|
|
201
|
+
* Useful for cleanup or when working with many files.
|
|
202
|
+
*/
|
|
203
|
+
clearFileMap(): void;
|
|
204
|
+
}
|
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
import { getAllCitationsFromLlmOutput } from "../parsing/parseCitation";
|
|
2
|
+
import { generateCitationKey } from "../react/utils";
|
|
3
|
+
const DEFAULT_API_URL = "https://api.deepcitation.com";
|
|
4
|
+
/**
|
|
5
|
+
* DeepCitation client for file upload and citation verification.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { DeepCitation } from '@deepcitation/deepcitation-js';
|
|
10
|
+
*
|
|
11
|
+
* const dc = new DeepCitation({ apiKey: process.env.DEEPCITATION_API_KEY });
|
|
12
|
+
*
|
|
13
|
+
* // Upload a file
|
|
14
|
+
* const { fileId, promptContent } = await dc.uploadFile(file);
|
|
15
|
+
*
|
|
16
|
+
* // Include promptContent in your LLM messages
|
|
17
|
+
* const response = await llm.chat({
|
|
18
|
+
* messages: [
|
|
19
|
+
* { role: "system", content: wrapSystemCitationPrompt({ systemPrompt }) },
|
|
20
|
+
* { role: "user", content: userMessage + "\n\n" + promptContent },
|
|
21
|
+
* ]
|
|
22
|
+
* });
|
|
23
|
+
*
|
|
24
|
+
* // Verify citations in the LLM output
|
|
25
|
+
* const citations = getAllCitationsFromLlmOutput(response);
|
|
26
|
+
* const verified = await dc.verifyCitations(fileId, citations);
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export class DeepCitation {
|
|
30
|
+
apiKey;
|
|
31
|
+
apiUrl;
|
|
32
|
+
/**
|
|
33
|
+
* Stores mapping of user-provided fileId to internal attachmentId
|
|
34
|
+
* This allows users to reference files by their own IDs
|
|
35
|
+
*/
|
|
36
|
+
fileIdMap = new Map();
|
|
37
|
+
/**
|
|
38
|
+
* Create a new DeepCitation client instance.
|
|
39
|
+
*
|
|
40
|
+
* @param config - Configuration options
|
|
41
|
+
* @throws Error if apiKey is not provided
|
|
42
|
+
*/
|
|
43
|
+
constructor(config) {
|
|
44
|
+
if (!config.apiKey) {
|
|
45
|
+
throw new Error("DeepCitation API key is required. Get one at https://deepcitation.com/dashboard");
|
|
46
|
+
}
|
|
47
|
+
this.apiKey = config.apiKey;
|
|
48
|
+
this.apiUrl = config.apiUrl?.replace(/\/$/, "") || DEFAULT_API_URL;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Upload a file for citation verification.
|
|
52
|
+
*
|
|
53
|
+
* Supported file types:
|
|
54
|
+
* - PDF documents
|
|
55
|
+
* - Images (PNG, JPEG, WebP, AVIF, HEIC)
|
|
56
|
+
* - Coming soon: DOCX, XLSX, plain text
|
|
57
|
+
*
|
|
58
|
+
* @param file - The file to upload (File, Blob, or Buffer)
|
|
59
|
+
* @param options - Optional upload options
|
|
60
|
+
* @returns Upload response with fileId and extracted text
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```typescript
|
|
64
|
+
* // Browser with File object
|
|
65
|
+
* const file = document.querySelector('input[type="file"]').files[0];
|
|
66
|
+
* const result = await dc.uploadFile(file);
|
|
67
|
+
*
|
|
68
|
+
* // Node.js with Buffer
|
|
69
|
+
* const buffer = fs.readFileSync('document.pdf');
|
|
70
|
+
* const result = await dc.uploadFile(buffer, { filename: 'document.pdf' });
|
|
71
|
+
* ```
|
|
72
|
+
*/
|
|
73
|
+
async uploadFile(file, options) {
|
|
74
|
+
const formData = new FormData();
|
|
75
|
+
// Handle different input types
|
|
76
|
+
if (typeof Buffer !== "undefined" && Buffer.isBuffer(file)) {
|
|
77
|
+
// Node.js Buffer - copy to a new ArrayBuffer for Blob compatibility
|
|
78
|
+
const filename = options?.filename || "document";
|
|
79
|
+
// Use Uint8Array.from to create a copy that's definitely backed by ArrayBuffer (not SharedArrayBuffer)
|
|
80
|
+
const uint8 = Uint8Array.from(file);
|
|
81
|
+
const blob = new Blob([uint8]);
|
|
82
|
+
formData.append("file", blob, filename);
|
|
83
|
+
}
|
|
84
|
+
else if (file instanceof Blob) {
|
|
85
|
+
// File or Blob
|
|
86
|
+
const filename = options?.filename || (file instanceof File ? file.name : "document");
|
|
87
|
+
formData.append("file", file, filename);
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
throw new Error("Invalid file type. Expected File, Blob, or Buffer.");
|
|
91
|
+
}
|
|
92
|
+
// Add optional fields
|
|
93
|
+
if (options?.fileId) {
|
|
94
|
+
formData.append("fileId", options.fileId);
|
|
95
|
+
}
|
|
96
|
+
if (options?.filename) {
|
|
97
|
+
formData.append("filename", options.filename);
|
|
98
|
+
}
|
|
99
|
+
const response = await fetch(`${this.apiUrl}/prepareFile`, {
|
|
100
|
+
method: "POST",
|
|
101
|
+
headers: {
|
|
102
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
103
|
+
},
|
|
104
|
+
body: formData,
|
|
105
|
+
});
|
|
106
|
+
if (!response.ok) {
|
|
107
|
+
const error = await response.json().catch(() => ({}));
|
|
108
|
+
throw new Error(error?.error?.message || `Upload failed with status ${response.status}`);
|
|
109
|
+
}
|
|
110
|
+
// Internal response includes attachmentId which we need for verification
|
|
111
|
+
const apiResponse = (await response.json());
|
|
112
|
+
// Store the mapping for later verification calls
|
|
113
|
+
this.fileIdMap.set(apiResponse.fileId, {
|
|
114
|
+
attachmentId: apiResponse.attachmentId,
|
|
115
|
+
});
|
|
116
|
+
// Return public response without internal fields
|
|
117
|
+
const { attachmentId: _attachmentId, ...publicResponse } = apiResponse;
|
|
118
|
+
return publicResponse;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Convert a URL or Office file to PDF for citation verification.
|
|
122
|
+
* The converted file can then be processed with prepareConvertedFile().
|
|
123
|
+
*
|
|
124
|
+
* Supported Office formats:
|
|
125
|
+
* - Microsoft Word (.doc, .docx)
|
|
126
|
+
* - Microsoft Excel (.xls, .xlsx)
|
|
127
|
+
* - Microsoft PowerPoint (.ppt, .pptx)
|
|
128
|
+
* - OpenDocument (.odt, .ods, .odp)
|
|
129
|
+
* - Rich Text Format (.rtf)
|
|
130
|
+
* - CSV (.csv)
|
|
131
|
+
*
|
|
132
|
+
* @param input - URL string or object with URL/file options
|
|
133
|
+
* @returns Conversion result with attachmentId for prepareConvertedFile
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* ```typescript
|
|
137
|
+
* // Convert a URL to PDF
|
|
138
|
+
* const result = await dc.convertToPdf({ url: "https://example.com/article" });
|
|
139
|
+
*
|
|
140
|
+
* // Convert an Office document
|
|
141
|
+
* const result = await dc.convertToPdf({
|
|
142
|
+
* file: docxBuffer,
|
|
143
|
+
* filename: "report.docx"
|
|
144
|
+
* });
|
|
145
|
+
*
|
|
146
|
+
* // Then prepare the file for verification
|
|
147
|
+
* const { fileDeepText, fileId } = await dc.prepareConvertedFile({
|
|
148
|
+
* fileId: result.fileId
|
|
149
|
+
* });
|
|
150
|
+
* ```
|
|
151
|
+
*/
|
|
152
|
+
async convertToPdf(input) {
|
|
153
|
+
// Handle string URL shorthand
|
|
154
|
+
const inputObj = typeof input === "string" ? { url: input } : input;
|
|
155
|
+
const { url, file, filename, fileId, singlePage } = inputObj;
|
|
156
|
+
if (!url && !file) {
|
|
157
|
+
throw new Error("Either url or file must be provided");
|
|
158
|
+
}
|
|
159
|
+
let response;
|
|
160
|
+
if (url) {
|
|
161
|
+
// URL conversion - send as JSON
|
|
162
|
+
response = await fetch(`${this.apiUrl}/convertFile`, {
|
|
163
|
+
method: "POST",
|
|
164
|
+
headers: {
|
|
165
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
166
|
+
"Content-Type": "application/json",
|
|
167
|
+
},
|
|
168
|
+
body: JSON.stringify({
|
|
169
|
+
url,
|
|
170
|
+
filename,
|
|
171
|
+
fileId,
|
|
172
|
+
singlePage,
|
|
173
|
+
}),
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
else if (file) {
|
|
177
|
+
// Office file conversion - send as multipart
|
|
178
|
+
const formData = new FormData();
|
|
179
|
+
if (typeof Buffer !== "undefined" && Buffer.isBuffer(file)) {
|
|
180
|
+
const fname = filename || "document";
|
|
181
|
+
const uint8 = Uint8Array.from(file);
|
|
182
|
+
const blob = new Blob([uint8]);
|
|
183
|
+
formData.append("file", blob, fname);
|
|
184
|
+
}
|
|
185
|
+
else if (file instanceof Blob) {
|
|
186
|
+
const fname = filename || (file instanceof File ? file.name : "document");
|
|
187
|
+
formData.append("file", file, fname);
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
throw new Error("Invalid file type. Expected File, Blob, or Buffer.");
|
|
191
|
+
}
|
|
192
|
+
if (fileId) {
|
|
193
|
+
formData.append("fileId", fileId);
|
|
194
|
+
}
|
|
195
|
+
if (filename) {
|
|
196
|
+
formData.append("filename", filename);
|
|
197
|
+
}
|
|
198
|
+
response = await fetch(`${this.apiUrl}/convertFile`, {
|
|
199
|
+
method: "POST",
|
|
200
|
+
headers: {
|
|
201
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
202
|
+
},
|
|
203
|
+
body: formData,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
throw new Error("Either url or file must be provided");
|
|
208
|
+
}
|
|
209
|
+
if (!response.ok) {
|
|
210
|
+
const error = await response.json().catch(() => ({}));
|
|
211
|
+
throw new Error(error?.error?.message || `Conversion failed with status ${response.status}`);
|
|
212
|
+
}
|
|
213
|
+
// Internal response includes attachmentId which we need for the two-step flow
|
|
214
|
+
const apiResponse = (await response.json());
|
|
215
|
+
// Store the mapping for later verification and prepareConvertedFile calls
|
|
216
|
+
this.fileIdMap.set(apiResponse.fileId, {
|
|
217
|
+
attachmentId: apiResponse.attachmentId,
|
|
218
|
+
});
|
|
219
|
+
// Return public response without internal fields
|
|
220
|
+
const { attachmentId: _attachmentId, ...publicResponse } = apiResponse;
|
|
221
|
+
return publicResponse;
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Prepare a previously converted file for citation verification.
|
|
225
|
+
* Use this after calling convertToPdf() to extract text and get fileDeepText.
|
|
226
|
+
*
|
|
227
|
+
* @param options - Options with fileId from convertFile
|
|
228
|
+
* @returns Upload response with fileId and extracted text
|
|
229
|
+
*
|
|
230
|
+
* @example
|
|
231
|
+
* ```typescript
|
|
232
|
+
* // First convert the file
|
|
233
|
+
* const converted = await dc.convertToPdf({ url: "https://example.com/article" });
|
|
234
|
+
*
|
|
235
|
+
* // Then prepare it for verification
|
|
236
|
+
* const { fileDeepText, fileId } = await dc.prepareConvertedFile({
|
|
237
|
+
* fileId: converted.fileId
|
|
238
|
+
* });
|
|
239
|
+
*
|
|
240
|
+
* // Use fileDeepText in your LLM prompt...
|
|
241
|
+
* ```
|
|
242
|
+
*/
|
|
243
|
+
async prepareConvertedFile(options) {
|
|
244
|
+
// Look up the internal attachmentId from the fileId
|
|
245
|
+
const fileInfo = this.fileIdMap.get(options.fileId);
|
|
246
|
+
if (!fileInfo) {
|
|
247
|
+
throw new Error(`File ID "${options.fileId}" not found. Make sure to call convertToPdf() first.`);
|
|
248
|
+
}
|
|
249
|
+
const response = await fetch(`${this.apiUrl}/prepareFile`, {
|
|
250
|
+
method: "POST",
|
|
251
|
+
headers: {
|
|
252
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
253
|
+
"Content-Type": "application/json",
|
|
254
|
+
},
|
|
255
|
+
body: JSON.stringify({
|
|
256
|
+
attachmentId: fileInfo.attachmentId,
|
|
257
|
+
fileId: options.fileId,
|
|
258
|
+
}),
|
|
259
|
+
});
|
|
260
|
+
if (!response.ok) {
|
|
261
|
+
const error = await response.json().catch(() => ({}));
|
|
262
|
+
throw new Error(error?.error?.message || `Prepare failed with status ${response.status}`);
|
|
263
|
+
}
|
|
264
|
+
// Internal response includes attachmentId
|
|
265
|
+
const apiResponse = (await response.json());
|
|
266
|
+
// Update the mapping (attachmentId should remain the same)
|
|
267
|
+
this.fileIdMap.set(apiResponse.fileId, {
|
|
268
|
+
attachmentId: apiResponse.attachmentId,
|
|
269
|
+
});
|
|
270
|
+
// Return public response without internal fields
|
|
271
|
+
const { attachmentId: _attachmentId, ...publicResponse } = apiResponse;
|
|
272
|
+
return publicResponse;
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Upload multiple files for citation verification and get structured content.
|
|
276
|
+
* This is the recommended way to prepare files for LLM prompts.
|
|
277
|
+
*
|
|
278
|
+
* @param files - Array of files to upload with optional filenames and fileIds
|
|
279
|
+
* @returns Object containing fileDataParts for verification and fileDeepTexts for LLM
|
|
280
|
+
*
|
|
281
|
+
* @example
|
|
282
|
+
* ```typescript
|
|
283
|
+
* const { fileDataParts, fileDeepTexts } = await dc.prepareFiles([
|
|
284
|
+
* { file: pdfBuffer, filename: "report.pdf" },
|
|
285
|
+
* { file: invoiceBuffer, filename: "invoice.pdf" },
|
|
286
|
+
* ]);
|
|
287
|
+
*
|
|
288
|
+
* // Use fileDeepTexts in wrapCitationPrompt
|
|
289
|
+
* const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
|
|
290
|
+
* systemPrompt,
|
|
291
|
+
* userPrompt,
|
|
292
|
+
* fileDeepText: fileDeepTexts
|
|
293
|
+
* });
|
|
294
|
+
*
|
|
295
|
+
* // Use fileDataParts later for verification
|
|
296
|
+
* const result = await dc.verifyCitationsFromLlmOutput({ llmOutput, fileDataParts });
|
|
297
|
+
* ```
|
|
298
|
+
*/
|
|
299
|
+
async prepareFiles(files) {
|
|
300
|
+
if (files.length === 0) {
|
|
301
|
+
return { fileDataParts: [], fileDeepTexts: [] };
|
|
302
|
+
}
|
|
303
|
+
// Upload all files in parallel
|
|
304
|
+
const uploadPromises = files.map(({ file, filename, fileId }) => this.uploadFile(file, { filename, fileId }));
|
|
305
|
+
const results = await Promise.all(uploadPromises);
|
|
306
|
+
// Extract file data parts and file deep texts
|
|
307
|
+
const fileDataParts = results.map(result => ({
|
|
308
|
+
fileId: result.fileId,
|
|
309
|
+
}));
|
|
310
|
+
const fileDeepTexts = results.map(result => result.fileDeepText);
|
|
311
|
+
return { fileDataParts, fileDeepTexts };
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Verify citations against a previously uploaded file.
|
|
315
|
+
*
|
|
316
|
+
* @param fileId - The file ID returned from uploadFile
|
|
317
|
+
* @param citations - Citations to verify (from getAllCitationsFromLlmOutput)
|
|
318
|
+
* @param options - Optional verification options
|
|
319
|
+
* @returns Verification results with status and proof images
|
|
320
|
+
*
|
|
321
|
+
* @example
|
|
322
|
+
* ```typescript
|
|
323
|
+
* import { getAllCitationsFromLlmOutput } from '@deepcitation/deepcitation-js';
|
|
324
|
+
*
|
|
325
|
+
* const citations = getAllCitationsFromLlmOutput(llmResponse);
|
|
326
|
+
* const verified = await dc.verifyCitations(fileId, citations);
|
|
327
|
+
*
|
|
328
|
+
* for (const [key, result] of Object.entries(verified.foundHighlights)) {
|
|
329
|
+
* console.log(key, result.searchState?.status);
|
|
330
|
+
* // "found", "partial_text_found", "not_found", etc.
|
|
331
|
+
* }
|
|
332
|
+
* ```
|
|
333
|
+
*/
|
|
334
|
+
async verifyCitations(fileId, citations, options) {
|
|
335
|
+
// Look up the internal IDs from our map
|
|
336
|
+
const fileInfo = this.fileIdMap.get(fileId);
|
|
337
|
+
if (!fileInfo) {
|
|
338
|
+
throw new Error(`File ID "${fileId}" not found. Make sure to upload the file first with uploadFile().`);
|
|
339
|
+
}
|
|
340
|
+
// Normalize citations to a map with citation keys
|
|
341
|
+
const citationMap = {};
|
|
342
|
+
if (Array.isArray(citations)) {
|
|
343
|
+
// Array of citations - generate keys
|
|
344
|
+
for (const citation of citations) {
|
|
345
|
+
const key = generateCitationKey(citation);
|
|
346
|
+
citationMap[key] = citation;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
else if (typeof citations === "object" && citations !== null) {
|
|
350
|
+
// Check if it's a single citation or a map
|
|
351
|
+
if ("fullPhrase" in citations || "value" in citations) {
|
|
352
|
+
// Single citation
|
|
353
|
+
const key = generateCitationKey(citations);
|
|
354
|
+
citationMap[key] = citations;
|
|
355
|
+
}
|
|
356
|
+
else {
|
|
357
|
+
// Already a map
|
|
358
|
+
Object.assign(citationMap, citations);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
throw new Error("Invalid citations format");
|
|
363
|
+
}
|
|
364
|
+
const response = await fetch(`${this.apiUrl}/verifyCitation`, {
|
|
365
|
+
method: "POST",
|
|
366
|
+
headers: {
|
|
367
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
368
|
+
"Content-Type": "application/json",
|
|
369
|
+
},
|
|
370
|
+
body: JSON.stringify({
|
|
371
|
+
data: {
|
|
372
|
+
attachmentId: fileInfo.attachmentId,
|
|
373
|
+
citations: citationMap,
|
|
374
|
+
outputImageFormat: options?.outputImageFormat || "avif",
|
|
375
|
+
},
|
|
376
|
+
}),
|
|
377
|
+
});
|
|
378
|
+
if (!response.ok) {
|
|
379
|
+
const error = await response.json().catch(() => ({}));
|
|
380
|
+
throw new Error(error?.error?.message || `Verification failed with status ${response.status}`);
|
|
381
|
+
}
|
|
382
|
+
return (await response.json());
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Verify citations from LLM output with automatic parsing.
|
|
386
|
+
* This is the recommended way to verify citations for new integrations.
|
|
387
|
+
*
|
|
388
|
+
* @param input - Object containing llmOutput and optional fileDataParts
|
|
389
|
+
* @returns Verification results with status and proof images
|
|
390
|
+
*
|
|
391
|
+
* @example
|
|
392
|
+
* ```typescript
|
|
393
|
+
* const result = await dc.verifyCitationsFromLlmOutput({
|
|
394
|
+
* llmOutput: response.content,
|
|
395
|
+
* fileDataParts, // From prepareFiles()
|
|
396
|
+
* });
|
|
397
|
+
*
|
|
398
|
+
* for (const [key, result] of Object.entries(result.foundHighlights)) {
|
|
399
|
+
* console.log(key, result.searchState?.status);
|
|
400
|
+
* }
|
|
401
|
+
* ```
|
|
402
|
+
*/
|
|
403
|
+
async verifyCitationsFromLlmOutput(input, citations) {
|
|
404
|
+
const { llmOutput, outputImageFormat = "avif" } = input;
|
|
405
|
+
// Parse citations from LLM output
|
|
406
|
+
if (!citations)
|
|
407
|
+
citations = getAllCitationsFromLlmOutput(llmOutput);
|
|
408
|
+
// If no citations found, return empty result
|
|
409
|
+
if (Object.keys(citations).length === 0) {
|
|
410
|
+
return { foundHighlights: {} };
|
|
411
|
+
}
|
|
412
|
+
// Note: fileDataParts is now only used to identify which files to verify
|
|
413
|
+
// The mapping from fileId to attachmentId must be registered via uploadFile() or prepareFiles()
|
|
414
|
+
// in the same session. For Zero Data Retention scenarios, use verifyCitations() directly.
|
|
415
|
+
// Group citations by fileId and verify each group
|
|
416
|
+
const citationsByFile = new Map();
|
|
417
|
+
for (const [key, citation] of Object.entries(citations)) {
|
|
418
|
+
const fileId = citation.fileId || "";
|
|
419
|
+
if (!citationsByFile.has(fileId)) {
|
|
420
|
+
citationsByFile.set(fileId, {});
|
|
421
|
+
}
|
|
422
|
+
citationsByFile.get(fileId)[key] = citation;
|
|
423
|
+
}
|
|
424
|
+
// Verify citations for each file
|
|
425
|
+
const allHighlights = {};
|
|
426
|
+
for (const [fileId, fileCitations] of citationsByFile) {
|
|
427
|
+
// Check if we have the file registered
|
|
428
|
+
const fileInfo = this.fileIdMap.get(fileId);
|
|
429
|
+
if (!fileInfo) {
|
|
430
|
+
// Skip citations for unregistered files
|
|
431
|
+
continue;
|
|
432
|
+
}
|
|
433
|
+
const response = await fetch(`${this.apiUrl}/verifyCitation`, {
|
|
434
|
+
method: "POST",
|
|
435
|
+
headers: {
|
|
436
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
437
|
+
"Content-Type": "application/json",
|
|
438
|
+
},
|
|
439
|
+
body: JSON.stringify({
|
|
440
|
+
data: {
|
|
441
|
+
attachmentId: fileInfo.attachmentId,
|
|
442
|
+
citations: fileCitations,
|
|
443
|
+
outputImageFormat,
|
|
444
|
+
},
|
|
445
|
+
}),
|
|
446
|
+
});
|
|
447
|
+
if (!response.ok) {
|
|
448
|
+
const error = await response.json().catch(() => ({}));
|
|
449
|
+
throw new Error(error?.error?.message || `Verification failed with status ${response.status}`);
|
|
450
|
+
}
|
|
451
|
+
const result = (await response.json());
|
|
452
|
+
Object.assign(allHighlights, result.foundHighlights);
|
|
453
|
+
}
|
|
454
|
+
return { foundHighlights: allHighlights };
|
|
455
|
+
}
|
|
456
|
+
/**
|
|
457
|
+
* Register a file that was uploaded separately (e.g., via direct API call).
|
|
458
|
+
* This allows you to use verifyCitations with files not uploaded via uploadFile().
|
|
459
|
+
*
|
|
460
|
+
* @param fileId - Your file ID
|
|
461
|
+
* @param attachmentId - The internal attachment ID
|
|
462
|
+
*/
|
|
463
|
+
registerFile(fileId, attachmentId) {
|
|
464
|
+
this.fileIdMap.set(fileId, { attachmentId });
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Clear the internal file ID mapping.
|
|
468
|
+
* Useful for cleanup or when working with many files.
|
|
469
|
+
*/
|
|
470
|
+
clearFileMap() {
|
|
471
|
+
this.fileIdMap.clear();
|
|
472
|
+
}
|
|
473
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export { DeepCitation } from "./DeepCitation";
|
|
2
|
+
export type { DeepCitationConfig, UploadFileResponse, UploadFileOptions, VerifyCitationsResponse, VerifyCitationsOptions, CitationInput, FileInput, FileDataPart, PrepareFilesResult, VerifyCitationsFromLlmOutputInput, ConvertFileInput, ConvertFileResponse, PrepareConvertedFileOptions, } from "./types";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { DeepCitation } from "./DeepCitation";
|