@deepcitation/deepcitation-js 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,7 +60,7 @@ import { DeepCitation, wrapCitationPrompt } from "@deepcitation/deepcitation-js"
60
60
  const dc = new DeepCitation({ apiKey: process.env.DEEPCITATION_API_KEY });
61
61
 
62
62
  // Upload source files
63
- const { fileDataParts, fileDeepTexts } = await dc.prepareFiles([
63
+ const { fileDataParts, deepTextPromptPortion } = await dc.prepareFiles([
64
64
  { file: pdfBuffer, filename: "report.pdf" },
65
65
  ]);
66
66
 
@@ -68,7 +68,7 @@ const { fileDataParts, fileDeepTexts } = await dc.prepareFiles([
68
68
  const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
69
69
  systemPrompt: "You are a helpful assistant...",
70
70
  userPrompt: "Analyze this document",
71
- fileDeepText: fileDeepTexts,
71
+ deepTextPromptPortion,
72
72
  });
73
73
 
74
74
  // Call your LLM
@@ -91,7 +91,7 @@ export declare class DeepCitation {
91
91
  * });
92
92
  *
93
93
  * // Then prepare the file for verification
94
- * const { fileDeepText, fileId } = await dc.prepareConvertedFile({
94
+ * const { deepTextPromptPortion, fileId } = await dc.prepareConvertedFile({
95
95
  * fileId: result.fileId
96
96
  * });
97
97
  * ```
@@ -99,7 +99,7 @@ export declare class DeepCitation {
99
99
  convertToPdf(input: ConvertFileInput | string): Promise<ConvertFileResponse>;
100
100
  /**
101
101
  * Prepare a previously converted file for citation verification.
102
- * Use this after calling convertToPdf() to extract text and get fileDeepText.
102
+ * Use this after calling convertToPdf() to extract text and get deepTextPromptPortion.
103
103
  *
104
104
  * @param options - Options with fileId from convertFile
105
105
  * @returns Upload response with fileId and extracted text
@@ -110,11 +110,11 @@ export declare class DeepCitation {
110
110
  * const converted = await dc.convertToPdf({ url: "https://example.com/article" });
111
111
  *
112
112
  * // Then prepare it for verification
113
- * const { fileDeepText, fileId } = await dc.prepareConvertedFile({
113
+ * const { deepTextPromptPortion, fileId } = await dc.prepareConvertedFile({
114
114
  * fileId: converted.fileId
115
115
  * });
116
116
  *
117
- * // Use fileDeepText in your LLM prompt...
117
+ * // Use deepTextPromptPortion in your LLM prompt...
118
118
  * ```
119
119
  */
120
120
  prepareConvertedFile(options: PrepareConvertedFileOptions): Promise<UploadFileResponse>;
@@ -123,20 +123,20 @@ export declare class DeepCitation {
123
123
  * This is the recommended way to prepare files for LLM prompts.
124
124
  *
125
125
  * @param files - Array of files to upload with optional filenames and fileIds
126
- * @returns Object containing fileDataParts for verification and fileDeepTexts for LLM
126
+ * @returns Object containing fileDataParts for verification and deepTextPromptPortion for LLM
127
127
  *
128
128
  * @example
129
129
  * ```typescript
130
- * const { fileDataParts, fileDeepTexts } = await dc.prepareFiles([
130
+ * const { fileDataParts, deepTextPromptPortion } = await dc.prepareFiles([
131
131
  * { file: pdfBuffer, filename: "report.pdf" },
132
132
  * { file: invoiceBuffer, filename: "invoice.pdf" },
133
133
  * ]);
134
134
  *
135
- * // Use fileDeepTexts in wrapCitationPrompt
135
+ * // Use deepTextPromptPortion in wrapCitationPrompt
136
136
  * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
137
137
  * systemPrompt,
138
138
  * userPrompt,
139
- * fileDeepText: fileDeepTexts
139
+ * deepTextPromptPortion
140
140
  * });
141
141
  *
142
142
  * // Use fileDataParts later for verification
@@ -144,7 +144,7 @@ export class DeepCitation {
144
144
  * });
145
145
  *
146
146
  * // Then prepare the file for verification
147
- * const { fileDeepText, fileId } = await dc.prepareConvertedFile({
147
+ * const { deepTextPromptPortion, fileId } = await dc.prepareConvertedFile({
148
148
  * fileId: result.fileId
149
149
  * });
150
150
  * ```
@@ -208,7 +208,8 @@ export class DeepCitation {
208
208
  }
209
209
  if (!response.ok) {
210
210
  const error = await response.json().catch(() => ({}));
211
- throw new Error(error?.error?.message || `Conversion failed with status ${response.status}`);
211
+ throw new Error(error?.error?.message ||
212
+ `Conversion failed with status ${response.status}`);
212
213
  }
213
214
  // Internal response includes attachmentId which we need for the two-step flow
214
215
  const apiResponse = (await response.json());
@@ -222,7 +223,7 @@ export class DeepCitation {
222
223
  }
223
224
  /**
224
225
  * Prepare a previously converted file for citation verification.
225
- * Use this after calling convertToPdf() to extract text and get fileDeepText.
226
+ * Use this after calling convertToPdf() to extract text and get deepTextPromptPortion.
226
227
  *
227
228
  * @param options - Options with fileId from convertFile
228
229
  * @returns Upload response with fileId and extracted text
@@ -233,11 +234,11 @@ export class DeepCitation {
233
234
  * const converted = await dc.convertToPdf({ url: "https://example.com/article" });
234
235
  *
235
236
  * // Then prepare it for verification
236
- * const { fileDeepText, fileId } = await dc.prepareConvertedFile({
237
+ * const { deepTextPromptPortion, fileId } = await dc.prepareConvertedFile({
237
238
  * fileId: converted.fileId
238
239
  * });
239
240
  *
240
- * // Use fileDeepText in your LLM prompt...
241
+ * // Use deepTextPromptPortion in your LLM prompt...
241
242
  * ```
242
243
  */
243
244
  async prepareConvertedFile(options) {
@@ -276,20 +277,20 @@ export class DeepCitation {
276
277
  * This is the recommended way to prepare files for LLM prompts.
277
278
  *
278
279
  * @param files - Array of files to upload with optional filenames and fileIds
279
- * @returns Object containing fileDataParts for verification and fileDeepTexts for LLM
280
+ * @returns Object containing fileDataParts for verification and deepTextPromptPortion for LLM
280
281
  *
281
282
  * @example
282
283
  * ```typescript
283
- * const { fileDataParts, fileDeepTexts } = await dc.prepareFiles([
284
+ * const { fileDataParts, deepTextPromptPortion } = await dc.prepareFiles([
284
285
  * { file: pdfBuffer, filename: "report.pdf" },
285
286
  * { file: invoiceBuffer, filename: "invoice.pdf" },
286
287
  * ]);
287
288
  *
288
- * // Use fileDeepTexts in wrapCitationPrompt
289
+ * // Use deepTextPromptPortion in wrapCitationPrompt
289
290
  * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
290
291
  * systemPrompt,
291
292
  * userPrompt,
292
- * fileDeepText: fileDeepTexts
293
+ * deepTextPromptPortion
293
294
  * });
294
295
  *
295
296
  * // Use fileDataParts later for verification
@@ -298,17 +299,17 @@ export class DeepCitation {
298
299
  */
299
300
  async prepareFiles(files) {
300
301
  if (files.length === 0) {
301
- return { fileDataParts: [], fileDeepTexts: [] };
302
+ return { fileDataParts: [], deepTextPromptPortion: [] };
302
303
  }
303
304
  // Upload all files in parallel
304
305
  const uploadPromises = files.map(({ file, filename, fileId }) => this.uploadFile(file, { filename, fileId }));
305
306
  const results = await Promise.all(uploadPromises);
306
307
  // Extract file data parts and file deep texts
307
- const fileDataParts = results.map(result => ({
308
+ const fileDataParts = results.map((result) => ({
308
309
  fileId: result.fileId,
309
310
  }));
310
- const fileDeepTexts = results.map(result => result.fileDeepText);
311
- return { fileDataParts, fileDeepTexts };
311
+ const deepTextPromptPortion = results.map((result) => result.deepTextPromptPortion);
312
+ return { fileDataParts, deepTextPromptPortion };
312
313
  }
313
314
  /**
314
315
  * Verify citations against a previously uploaded file.
@@ -377,7 +378,8 @@ export class DeepCitation {
377
378
  });
378
379
  if (!response.ok) {
379
380
  const error = await response.json().catch(() => ({}));
380
- throw new Error(error?.error?.message || `Verification failed with status ${response.status}`);
381
+ throw new Error(error?.error?.message ||
382
+ `Verification failed with status ${response.status}`);
381
383
  }
382
384
  return (await response.json());
383
385
  }
@@ -446,7 +448,8 @@ export class DeepCitation {
446
448
  });
447
449
  if (!response.ok) {
448
450
  const error = await response.json().catch(() => ({}));
449
- throw new Error(error?.error?.message || `Verification failed with status ${response.status}`);
451
+ throw new Error(error?.error?.message ||
452
+ `Verification failed with status ${response.status}`);
450
453
  }
451
454
  const result = (await response.json());
452
455
  Object.assign(allHighlights, result.foundHighlights);
@@ -15,7 +15,7 @@ export interface UploadFileResponse {
15
15
  /** The file ID assigned by DeepCitation (custom or auto-generated) */
16
16
  fileId: string;
17
17
  /** The full text content formatted for LLM prompts with page markers and line IDs. Use this in your user prompts. */
18
- fileDeepText: string;
18
+ deepTextPromptPortion: string;
19
19
  /** Form fields extracted from PDF forms */
20
20
  formFields?: Array<{
21
21
  name: string;
@@ -89,7 +89,7 @@ export interface PrepareFilesResult {
89
89
  /** Array of file references for verification */
90
90
  fileDataParts: FileDataPart[];
91
91
  /** Array of formatted text content for LLM prompts (with page markers and line IDs) */
92
- fileDeepTexts: string[];
92
+ deepTextPromptPortion: string[];
93
93
  }
94
94
  /**
95
95
  * Input for verifyCitationsFromLlmOutput
package/lib/index.d.ts CHANGED
@@ -5,13 +5,13 @@
5
5
  export { DeepCitation } from "./client/index.js";
6
6
  export type { DeepCitationConfig, UploadFileResponse, UploadFileOptions, VerifyCitationsResponse, VerifyCitationsOptions, CitationInput, FileInput, FileDataPart, PrepareFilesResult, VerifyCitationsFromLlmOutputInput, } from "./client/index.js";
7
7
  export { parseCitation, getCitationStatus, getAllCitationsFromLlmOutput, groupCitationsByFileId, groupCitationsByFileIdObject, } from "./parsing/parseCitation.js";
8
- export { normalizeCitations, getCitationPageNumber } from "./parsing/normalizeCitation.js";
9
- export { isGeminiGarbage, cleanRepeatingLastSentence } from "./parsing/parseWorkAround.js";
8
+ export { normalizeCitations, getCitationPageNumber, } from "./parsing/normalizeCitation.js";
9
+ export { isGeminiGarbage, cleanRepeatingLastSentence, } from "./parsing/parseWorkAround.js";
10
10
  export type { Citation, CitationStatus, VerifyCitationRequest, VerifyCitationResponse, OutputImageFormat, } from "./types/citation.js";
11
- export { VERIFICATION_VERSION_NUMBER, DEFAULT_OUTPUT_IMAGE_FORMAT } from "./types/citation.js";
11
+ export { DEFAULT_OUTPUT_IMAGE_FORMAT } from "./types/citation.js";
12
12
  export type { FoundHighlightLocation } from "./types/foundHighlight.js";
13
13
  export { NOT_FOUND_HIGHLIGHT_INDEX, PENDING_HIGHLIGHT_INDEX, BLANK_HIGHLIGHT_LOCATION, deterministicIdFromHighlightLocation, } from "./types/foundHighlight.js";
14
- export type { SearchState, SearchStatus, SearchMethod, SearchAttempt } from "./types/search.js";
14
+ export type { SearchState, SearchStatus, SearchMethod, SearchAttempt, } from "./types/search.js";
15
15
  export type { ScreenBox, PdfSpaceItem, IVertex } from "./types/boxes.js";
16
16
  export { sha1Hash } from "./utils/sha.js";
17
17
  export { generateCitationKey } from "./react/utils.js";
@@ -19,7 +19,6 @@ export { generateCitationInstanceId } from "./react/utils.js";
19
19
  export { CITATION_X_PADDING, CITATION_Y_PADDING } from "./react/utils.js";
20
20
  export { CITATION_JSON_OUTPUT_FORMAT, CITATION_MARKDOWN_SYNTAX_PROMPT, AV_CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_AV_BASED_JSON_OUTPUT_FORMAT, wrapSystemCitationPrompt, wrapCitationPrompt, } from "./prompts/citationPrompts.js";
21
21
  export type { WrapSystemPromptOptions, WrapCitationPromptOptions, WrapCitationPromptResult, } from "./prompts/citationPrompts.js";
22
- export { removeLineIdMetadata, removePageNumberMetadata, removeCitations } from "./parsing/normalizeCitation.js";
23
- export { compressPromptIds, decompressPromptIds } from "./prompts/promptCompression.js";
22
+ export { removeLineIdMetadata, removePageNumberMetadata, removeCitations, } from "./parsing/normalizeCitation.js";
23
+ export { compressPromptIds, decompressPromptIds, } from "./prompts/promptCompression.js";
24
24
  export type { CompressedResult } from "./prompts/types.js";
25
- export { CitationComponent } from "./react/CitationComponent.js";
package/lib/index.js CHANGED
@@ -6,9 +6,9 @@
6
6
  export { DeepCitation } from "./client/index.js";
7
7
  // Parsing
8
8
  export { parseCitation, getCitationStatus, getAllCitationsFromLlmOutput, groupCitationsByFileId, groupCitationsByFileIdObject, } from "./parsing/parseCitation.js";
9
- export { normalizeCitations, getCitationPageNumber } from "./parsing/normalizeCitation.js";
10
- export { isGeminiGarbage, cleanRepeatingLastSentence } from "./parsing/parseWorkAround.js";
11
- export { VERIFICATION_VERSION_NUMBER, DEFAULT_OUTPUT_IMAGE_FORMAT } from "./types/citation.js";
9
+ export { normalizeCitations, getCitationPageNumber, } from "./parsing/normalizeCitation.js";
10
+ export { isGeminiGarbage, cleanRepeatingLastSentence, } from "./parsing/parseWorkAround.js";
11
+ export { DEFAULT_OUTPUT_IMAGE_FORMAT } from "./types/citation.js";
12
12
  export { NOT_FOUND_HIGHLIGHT_INDEX, PENDING_HIGHLIGHT_INDEX, BLANK_HIGHLIGHT_LOCATION, deterministicIdFromHighlightLocation, } from "./types/foundHighlight.js";
13
13
  // Utilities
14
14
  export { sha1Hash } from "./utils/sha.js";
@@ -17,6 +17,5 @@ export { generateCitationInstanceId } from "./react/utils.js";
17
17
  export { CITATION_X_PADDING, CITATION_Y_PADDING } from "./react/utils.js";
18
18
  // Prompts
19
19
  export { CITATION_JSON_OUTPUT_FORMAT, CITATION_MARKDOWN_SYNTAX_PROMPT, AV_CITATION_MARKDOWN_SYNTAX_PROMPT, CITATION_AV_BASED_JSON_OUTPUT_FORMAT, wrapSystemCitationPrompt, wrapCitationPrompt, } from "./prompts/citationPrompts.js";
20
- export { removeLineIdMetadata, removePageNumberMetadata, removeCitations } from "./parsing/normalizeCitation.js";
21
- export { compressPromptIds, decompressPromptIds } from "./prompts/promptCompression.js";
22
- export { CitationComponent } from "./react/CitationComponent.js";
20
+ export { removeLineIdMetadata, removePageNumberMetadata, removeCitations, } from "./parsing/normalizeCitation.js";
21
+ export { compressPromptIds, decompressPromptIds, } from "./prompts/promptCompression.js";
@@ -35,7 +35,7 @@ export const normalizeCitations = (response) => {
35
35
  return normalizeCitationContent(trimmedResponse);
36
36
  }
37
37
  trimmedResponse = citationParts
38
- .map(part => (part.startsWith("<cite") ? normalizeCitationContent(part) : part))
38
+ .map((part) => part.startsWith("<cite") ? normalizeCitationContent(part) : part)
39
39
  .join("");
40
40
  return trimmedResponse;
41
41
  };
@@ -49,10 +49,14 @@ const normalizeCitationContent = (input) => {
49
49
  return "full_phrase";
50
50
  if (key === "lineIds" || key === "line_ids")
51
51
  return "line_ids";
52
- if (key === "startPageKey" || key === "start_pageKey" || key === "start_page_key")
52
+ if (key === "startPageKey" ||
53
+ key === "start_pageKey" ||
54
+ key === "start_page_key")
53
55
  return "start_page_key";
54
56
  if (key === "fileID" || key === "fileId" || key === "file_id")
55
57
  return "file_id";
58
+ if (key === "keySpan" || key === "key_span")
59
+ return "key_span";
56
60
  return key;
57
61
  };
58
62
  // Helper to decode HTML entities (simple implementation, expand if needed)
@@ -67,7 +71,7 @@ const normalizeCitationContent = (input) => {
67
71
  // 2. ROBUST TEXT ATTRIBUTE PARSING (reasoning, value, full_phrase)
68
72
  // This regex matches: Key = Quote -> Content (lazy) -> Lookahead for (Next Attribute OR End of Tag)
69
73
  // It effectively ignores quotes inside the content during the initial capture.
70
- const textAttributeRegex = /(fullPhrase|full_phrase|reasoning|value)\s*=\s*(['"])([\s\S]*?)(?=\s+(?:line_ids|lineIds|timestamps|fileId|file_id|start_page_key|start_pageKey|startPageKey|reasoning|value|full_phrase)|\s*\/?>)/gm;
74
+ const textAttributeRegex = /(fullPhrase|full_phrase|keySpan|key_span|reasoning|value)\s*=\s*(['"])([\s\S]*?)(?=\s+(?:line_ids|lineIds|timestamps|fileId|file_id|start_page_key|start_pageKey|startPageKey|keySpan|key_span|reasoning|value|full_phrase)|\s*\/?>)/gm;
71
75
  normalized = normalized.replace(textAttributeRegex, (_match, key, openQuote, rawContent) => {
72
76
  let content = rawContent;
73
77
  // The lazy match usually captures the closing quote because the lookahead
@@ -139,7 +143,7 @@ const normalizeCitationContent = (input) => {
139
143
  if (keys.length === 0)
140
144
  return tag;
141
145
  const hasTimestamps = typeof attrs.timestamps === "string" && attrs.timestamps.length > 0;
142
- const startPageKeys = keys.filter(k => k.startsWith("start_page"));
146
+ const startPageKeys = keys.filter((k) => k.startsWith("start_page"));
143
147
  const ordered = [];
144
148
  // Shared first
145
149
  if (attrs.file_id)
@@ -151,15 +155,17 @@ const normalizeCitationContent = (input) => {
151
155
  ordered.push("timestamps");
152
156
  }
153
157
  else {
154
- // Document citations: fileId, start_page*, full_phrase, line_ids, (optional reasoning/value), then any extras
158
+ // Document citations: fileId, start_page*, full_phrase, key_span, line_ids, (optional reasoning/value), then any extras
155
159
  if (startPageKeys.includes("start_page_key"))
156
160
  ordered.push("start_page_key");
157
161
  startPageKeys
158
- .filter(k => k !== "start_page_key")
162
+ .filter((k) => k !== "start_page_key")
159
163
  .sort()
160
- .forEach(k => ordered.push(k));
164
+ .forEach((k) => ordered.push(k));
161
165
  if (attrs.full_phrase)
162
166
  ordered.push("full_phrase");
167
+ if (attrs.key_span)
168
+ ordered.push("key_span");
163
169
  if (attrs.line_ids)
164
170
  ordered.push("line_ids");
165
171
  }
@@ -171,12 +177,12 @@ const normalizeCitationContent = (input) => {
171
177
  // Any remaining attributes, stable + deterministic (alpha)
172
178
  const used = new Set(ordered);
173
179
  keys
174
- .filter(k => !used.has(k))
180
+ .filter((k) => !used.has(k))
175
181
  .sort()
176
- .forEach(k => ordered.push(k));
177
- const rebuiltAttrs = ordered.map(k => `${k}='${attrs[k]}'`).join(" ");
182
+ .forEach((k) => ordered.push(k));
183
+ const rebuiltAttrs = ordered.map((k) => `${k}='${attrs[k]}'`).join(" ");
178
184
  return `<cite ${rebuiltAttrs} />`;
179
185
  };
180
- normalized = normalized.replace(/<cite\b[\s\S]*?\/>/gm, tag => reorderCiteTagAttributes(tag));
186
+ normalized = normalized.replace(/<cite\b[\s\S]*?\/>/gm, (tag) => reorderCiteTagAttributes(tag));
181
187
  return normalized;
182
188
  };
@@ -16,8 +16,13 @@ export function getCitationStatus(foundHighlight) {
16
16
  searchState?.status === "found_on_other_page" ||
17
17
  searchState?.status === "found_on_other_line" ||
18
18
  searchState?.status === "first_word_found";
19
- const isVerified = searchState?.status === "found" || isFoundValueMissedFullMatch || isPartialMatch || isFullMatchWithMissedValue;
20
- const isPending = searchState?.status === "pending" || searchState?.status === "loading" || !searchState;
19
+ const isVerified = searchState?.status === "found" ||
20
+ isFoundValueMissedFullMatch ||
21
+ isPartialMatch ||
22
+ isFullMatchWithMissedValue;
23
+ const isPending = searchState?.status === "pending" ||
24
+ searchState?.status === "loading" ||
25
+ !searchState;
21
26
  return { isVerified, isMiss, isPartialMatch, isPending };
22
27
  }
23
28
  export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVerbose) => {
@@ -30,19 +35,24 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
30
35
  // Replace escaped single quotes with actual single quotes
31
36
  return trimmed.replace(/\\'/g, "'");
32
37
  };
33
- const citationNumber = citationCounterRef?.current ? citationCounterRef.current++ : undefined;
38
+ const citationNumber = citationCounterRef?.current
39
+ ? citationCounterRef.current++
40
+ : undefined;
34
41
  const beforeCite = fragment.substring(0, fragment.indexOf("<cite"));
35
- const afterCite = fragment.includes("/>") ? fragment.slice(fragment.indexOf("/>") + 2) : "";
42
+ const afterCite = fragment.includes("/>")
43
+ ? fragment.slice(fragment.indexOf("/>") + 2)
44
+ : "";
36
45
  const middleCite = fragment.substring(fragment.indexOf("<cite"), fragment.indexOf("/>") + 2);
37
46
  // GROUPS:
38
47
  // 1: fileId
39
48
  // 2: start_page number
40
49
  // 3: index number
41
50
  // 4: full_phrase content (escaped)
42
- // 5: line_ids content
51
+ // 5: key_span content (escaped)
52
+ // 6: line_ids content
43
53
  // 6: Optional Key (value|reasoning)
44
54
  // 7: Optional Value content (escaped)
45
- const citationRegex = /<cite\s+file(?:_id|Id)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
55
+ const citationRegex = /<cite\s+file(?:_id|Id)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+key_span='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
46
56
  const citationMatches = [...middleCite.matchAll(citationRegex)];
47
57
  const match = citationMatches?.[0];
48
58
  const rawCitationMd = match?.[0];
@@ -51,11 +61,12 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
51
61
  let attachmentId = fileId?.length === 20 ? fileId : mdAttachmentId || match?.[1];
52
62
  // Use helper to handle escaped quotes inside the phrase
53
63
  let fullPhrase = cleanAndUnescape(match?.[4]);
64
+ let keySpan = cleanAndUnescape(match?.[5]);
54
65
  // Handle the optional attribute (value or reasoning)
55
66
  let value;
56
67
  let reasoning;
57
- const optionalKey = match?.[6]; // "value" or "reasoning"
58
- const optionalContent = cleanAndUnescape(match?.[7]);
68
+ const optionalKey = match?.[7]; // "value" or "reasoning"
69
+ const optionalContent = cleanAndUnescape(match?.[8]);
59
70
  if (optionalKey === "value") {
60
71
  value = optionalContent;
61
72
  }
@@ -65,12 +76,12 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
65
76
  let lineIds;
66
77
  try {
67
78
  // match[5] is line_ids
68
- const lineIdsString = match?.[5]?.replace(/[A-Za-z_[\](){}:]/g, "");
79
+ const lineIdsString = match?.[6]?.replace(/[A-Za-z_[\](){}:]/g, "");
69
80
  lineIds = lineIdsString
70
81
  ? lineIdsString
71
82
  .split(",")
72
- .map(id => (isNaN(parseInt(id)) ? undefined : parseInt(id)))
73
- .filter(id => id !== undefined)
83
+ .map((id) => (isNaN(parseInt(id)) ? undefined : parseInt(id)))
84
+ .filter((id) => id !== undefined)
74
85
  .sort((a, b) => a - b)
75
86
  : undefined;
76
87
  }
@@ -90,7 +101,8 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
90
101
  let timestamps;
91
102
  if (avMatch) {
92
103
  fileId = avMatch?.[1];
93
- attachmentId = fileId?.length === 20 ? fileId : mdAttachmentId || avMatch?.[1];
104
+ attachmentId =
105
+ fileId?.length === 20 ? fileId : mdAttachmentId || avMatch?.[1];
94
106
  fullPhrase = cleanAndUnescape(avMatch?.[2]);
95
107
  const timestampsString = avMatch?.[3]?.replace(/timestamps=['"]|['"]/g, "");
96
108
  const [startTime, endTime] = timestampsString?.split("-") || [];
@@ -110,6 +122,7 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
110
122
  fileId: attachmentId,
111
123
  pageNumber,
112
124
  fullPhrase,
125
+ keySpan,
113
126
  citationNumber,
114
127
  lineIds,
115
128
  rawCitationMd,
@@ -139,6 +152,7 @@ const parseJsonCitation = (jsonCitation, citationNumber) => {
139
152
  // Support both camelCase and snake_case property names
140
153
  const fullPhrase = jsonCitation.fullPhrase ?? jsonCitation.full_phrase;
141
154
  const startPageKey = jsonCitation.startPageKey ?? jsonCitation.start_page_key;
155
+ const keySpan = jsonCitation.keySpan ?? jsonCitation.key_span;
142
156
  const rawLineIds = jsonCitation.lineIds ?? jsonCitation.line_ids;
143
157
  const fileId = jsonCitation.fileId ?? jsonCitation.file_id;
144
158
  const reasoning = jsonCitation.reasoning;
@@ -155,13 +169,16 @@ const parseJsonCitation = (jsonCitation, citationNumber) => {
155
169
  }
156
170
  }
157
171
  // Sort lineIds if present
158
- const lineIds = rawLineIds?.length ? [...rawLineIds].sort((a, b) => a - b) : undefined;
172
+ const lineIds = rawLineIds?.length
173
+ ? [...rawLineIds].sort((a, b) => a - b)
174
+ : undefined;
159
175
  const citation = {
160
176
  fileId,
161
177
  pageNumber,
162
178
  fullPhrase,
163
179
  citationNumber,
164
180
  lineIds,
181
+ keySpan,
165
182
  reasoning,
166
183
  value,
167
184
  };
@@ -176,6 +193,8 @@ const hasCitationProperties = (item) => typeof item === "object" &&
176
193
  "full_phrase" in item ||
177
194
  "startPageKey" in item ||
178
195
  "start_page_key" in item ||
196
+ "keySpan" in item ||
197
+ "key_span" in item ||
179
198
  "lineIds" in item ||
180
199
  "line_ids" in item);
181
200
  /**
@@ -220,7 +239,9 @@ const findJsonCitationsInObject = (obj, found) => {
220
239
  found.push(...items);
221
240
  }
222
241
  if (obj.citations && isJsonCitationFormat(obj.citations)) {
223
- const items = Array.isArray(obj.citations) ? obj.citations : [obj.citations];
242
+ const items = Array.isArray(obj.citations)
243
+ ? obj.citations
244
+ : [obj.citations];
224
245
  found.push(...items);
225
246
  }
226
247
  // Recurse into object properties
@@ -1,5 +1,5 @@
1
- export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text /> (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
2
- export declare const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = "\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' full_phrase='the verbatim text of the phrase (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />\n\u2022 These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.\n";
1
+ export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
2
+ export declare const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = "\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' full_phrase='the verbatim text of the phrase; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />\n\u2022 These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.\n";
3
3
  export interface WrapSystemPromptOptions {
4
4
  /** The original system prompt to wrap with citation instructions */
5
5
  systemPrompt: string;
@@ -13,7 +13,7 @@ export interface WrapCitationPromptOptions {
13
13
  /** The original user prompt */
14
14
  userPrompt: string;
15
15
  /** The extracted file text with metadata (from uploadFile response). Can be a single string or array for multiple files. */
16
- fileDeepText?: string | string[];
16
+ deepTextPromptPortion?: string | string[];
17
17
  /** Whether to use audio/video citation format (with timestamps) instead of text-based (with line IDs) */
18
18
  isAudioVideo?: boolean;
19
19
  }
@@ -54,14 +54,14 @@ export declare function wrapSystemCitationPrompt(options: WrapSystemPromptOption
54
54
  * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
55
55
  * systemPrompt: "You are a helpful assistant.",
56
56
  * userPrompt: "Analyze this document and summarize it.",
57
- * fileDeepText, // from uploadFile response
57
+ * deepTextPromptPortion, // from uploadFile response
58
58
  * });
59
59
  *
60
60
  * // Multiple files
61
61
  * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
62
62
  * systemPrompt: "You are a helpful assistant.",
63
63
  * userPrompt: "Compare these documents.",
64
- * fileDeepText: [fileDeepText1, fileDeepText2], // array of file texts
64
+ * deepTextPromptPortion: [deepTextPromptPortion1, deepTextPromptPortion2], // array of file texts
65
65
  * });
66
66
  *
67
67
  * // Use enhanced prompts with your LLM
@@ -92,6 +92,10 @@ export declare const CITATION_JSON_OUTPUT_FORMAT: {
92
92
  type: string;
93
93
  description: string;
94
94
  };
95
+ keySpan: {
96
+ type: string;
97
+ description: string;
98
+ };
95
99
  lineIds: {
96
100
  type: string;
97
101
  items: {
@@ -1,7 +1,7 @@
1
1
  export const CITATION_MARKDOWN_SYNTAX_PROMPT = `
2
2
  Citation syntax to use within Markdown:
3
3
  • To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
4
- <cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text /> (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
4
+ <cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
5
5
 
6
6
  • Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.
7
7
  • start_page_key, full_phrase, and line_ids are required for each citation.
@@ -13,7 +13,7 @@ Citation syntax to use within Markdown:
13
13
  `;
14
14
  export const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = `
15
15
  • To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
16
- <cite file_id='file_id' full_phrase='the verbatim text of the phrase (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />
16
+ <cite file_id='file_id' full_phrase='the verbatim text of the phrase; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />
17
17
  • These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.
18
18
  `;
19
19
  /**
@@ -35,8 +35,10 @@ export const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = `
35
35
  * ```
36
36
  */
37
37
  export function wrapSystemCitationPrompt(options) {
38
- const { systemPrompt, isAudioVideo = false, prependCitationInstructions = false } = options;
39
- const citationPrompt = isAudioVideo ? AV_CITATION_MARKDOWN_SYNTAX_PROMPT : CITATION_MARKDOWN_SYNTAX_PROMPT;
38
+ const { systemPrompt, isAudioVideo = false, prependCitationInstructions = false, } = options;
39
+ const citationPrompt = isAudioVideo
40
+ ? AV_CITATION_MARKDOWN_SYNTAX_PROMPT
41
+ : CITATION_MARKDOWN_SYNTAX_PROMPT;
40
42
  if (prependCitationInstructions) {
41
43
  return `${citationPrompt.trim()}
42
44
 
@@ -59,14 +61,14 @@ ${citationPrompt.trim()}`;
59
61
  * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
60
62
  * systemPrompt: "You are a helpful assistant.",
61
63
  * userPrompt: "Analyze this document and summarize it.",
62
- * fileDeepText, // from uploadFile response
64
+ * deepTextPromptPortion, // from uploadFile response
63
65
  * });
64
66
  *
65
67
  * // Multiple files
66
68
  * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
67
69
  * systemPrompt: "You are a helpful assistant.",
68
70
  * userPrompt: "Compare these documents.",
69
- * fileDeepText: [fileDeepText1, fileDeepText2], // array of file texts
71
+ * deepTextPromptPortion: [deepTextPromptPortion1, deepTextPromptPortion2], // array of file texts
70
72
  * });
71
73
  *
72
74
  * // Use enhanced prompts with your LLM
@@ -79,21 +81,23 @@ ${citationPrompt.trim()}`;
79
81
  * ```
80
82
  */
81
83
  export function wrapCitationPrompt(options) {
82
- const { systemPrompt, userPrompt, fileDeepText, isAudioVideo = false } = options;
84
+ const { systemPrompt, userPrompt, deepTextPromptPortion, isAudioVideo = false, } = options;
83
85
  const enhancedSystemPrompt = wrapSystemCitationPrompt({
84
86
  systemPrompt,
85
87
  isAudioVideo,
86
88
  });
87
89
  // Build enhanced user prompt with file content if provided
88
90
  let enhancedUserPrompt = userPrompt;
89
- if (fileDeepText) {
90
- const fileTexts = Array.isArray(fileDeepText) ? fileDeepText : [fileDeepText];
91
+ if (deepTextPromptPortion) {
92
+ const fileTexts = Array.isArray(deepTextPromptPortion)
93
+ ? deepTextPromptPortion
94
+ : [deepTextPromptPortion];
91
95
  const fileContent = fileTexts
92
96
  .map((text, index) => {
93
97
  if (fileTexts.length === 1) {
94
- return `<file_text>\n${text}\n</file_text>`;
98
+ return `\n${text}`;
95
99
  }
96
- return `<file_text file_index="${index + 1}">\n${text}\n</file_text>`;
100
+ return `\n${text}`;
97
101
  })
98
102
  .join("\n\n");
99
103
  enhancedUserPrompt = `${fileContent}\n\n${userPrompt}`;
@@ -119,13 +123,24 @@ export const CITATION_JSON_OUTPUT_FORMAT = {
119
123
  type: "string",
120
124
  description: "The verbatim text of the terse phrase inside <file_text /> to support the value description (if there is a detected OCR correction, use the corrected text)",
121
125
  },
126
+ keySpan: {
127
+ type: "string",
128
+ description: "the verbatim value or words within fullPhrase that best support the citation",
129
+ },
122
130
  lineIds: {
123
131
  type: "array",
124
132
  items: { type: "number" },
125
133
  description: "Infer lineIds, as we only provide the first, last, and every 5th line. Provide inclusive lineIds for the fullPhrase.",
126
134
  },
127
135
  },
128
- required: ["fileId", "startPageKey", "reasoning", "fullPhrase", "lineIds"],
136
+ required: [
137
+ "fileId",
138
+ "startPageKey",
139
+ "reasoning",
140
+ "fullPhrase",
141
+ "keySpan",
142
+ "lineIds",
143
+ ],
129
144
  };
130
145
  export const CITATION_AV_BASED_JSON_OUTPUT_FORMAT = {
131
146
  type: "object",
@@ -1,6 +1,5 @@
1
1
  import { type ScreenBox } from "./boxes";
2
2
  import { type FoundHighlightLocation } from "./foundHighlight";
3
- export declare const VERIFICATION_VERSION_NUMBER = "0.4.37";
4
3
  export type OutputImageFormat = "jpeg" | "png" | "avif" | undefined | null;
5
4
  export declare const DEFAULT_OUTPUT_IMAGE_FORMAT: "avif";
6
5
  export interface VerifyCitationResponse {
@@ -19,6 +18,7 @@ export interface VerifyCitationRequest {
19
18
  export interface Citation {
20
19
  fileId?: string;
21
20
  fullPhrase?: string | null;
21
+ keySpan?: string | null;
22
22
  value?: string | null;
23
23
  startPageKey?: string | null;
24
24
  pageNumber?: number | null;
@@ -33,8 +33,6 @@ export interface Citation {
33
33
  fragmentContext?: string | null;
34
34
  rawCitationMd?: string;
35
35
  beforeCite?: string;
36
- formFieldName?: string | null;
37
- formFieldValue?: string | null;
38
36
  }
39
37
  export interface CitationStatus {
40
38
  isVerified: boolean;
@@ -1,2 +1 @@
1
- export const VERIFICATION_VERSION_NUMBER = "0.4.37";
2
1
  export const DEFAULT_OUTPUT_IMAGE_FORMAT = "avif";
@@ -1,4 +1,4 @@
1
- import { VERIFICATION_VERSION_NUMBER, type Citation } from "./citation";
1
+ import { type Citation } from "./citation";
2
2
  import { type SearchState } from "./search";
3
3
  import { type PdfSpaceItem } from "./boxes";
4
4
  export declare const NOT_FOUND_HIGHLIGHT_INDEX = -1;
@@ -18,6 +18,6 @@ export interface FoundHighlightLocation {
18
18
  matchSnippet?: string | null;
19
19
  pdfSpaceItem?: PdfSpaceItem;
20
20
  verificationImageBase64?: string | null;
21
- source?: typeof VERIFICATION_VERSION_NUMBER | string | null;
21
+ source?: string | null;
22
22
  verifiedAt?: Date;
23
23
  }
@@ -4,7 +4,7 @@
4
4
  * @packageDocumentation
5
5
  */
6
6
  export type { Citation, CitationStatus, VerifyCitationRequest, VerifyCitationResponse, OutputImageFormat, } from "./citation.js";
7
- export { VERIFICATION_VERSION_NUMBER, DEFAULT_OUTPUT_IMAGE_FORMAT } from "./citation.js";
7
+ export { DEFAULT_OUTPUT_IMAGE_FORMAT } from "./citation.js";
8
8
  export type { FoundHighlightLocation } from "./foundHighlight.js";
9
9
  export { NOT_FOUND_HIGHLIGHT_INDEX, PENDING_HIGHLIGHT_INDEX, BLANK_HIGHLIGHT_LOCATION, deterministicIdFromHighlightLocation, } from "./foundHighlight.js";
10
10
  export type { SearchState, SearchStatus } from "./search.js";
@@ -3,5 +3,5 @@
3
3
  *
4
4
  * @packageDocumentation
5
5
  */
6
- export { VERIFICATION_VERSION_NUMBER, DEFAULT_OUTPUT_IMAGE_FORMAT } from "./citation.js";
6
+ export { DEFAULT_OUTPUT_IMAGE_FORMAT } from "./citation.js";
7
7
  export { NOT_FOUND_HIGHLIGHT_INDEX, PENDING_HIGHLIGHT_INDEX, BLANK_HIGHLIGHT_LOCATION, deterministicIdFromHighlightLocation, } from "./foundHighlight.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@deepcitation/deepcitation-js",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "DeepCitation JavaScript SDK for deterministic AI citation verification",
5
5
  "type": "module",
6
6
  "private": false,
@@ -14,16 +14,14 @@
14
14
  "main": "./lib/index.js",
15
15
  "types": "./lib/index.d.ts",
16
16
  "sideEffects": [
17
- "*.css",
18
- "src/react/styles.css"
17
+ "*.css"
19
18
  ],
20
19
  "files": [
21
20
  "lib",
22
- "src/react/styles.css",
23
21
  "LICENSE"
24
22
  ],
25
23
  "scripts": {
26
- "build": "rimraf lib && tsc",
24
+ "build": "rimraf lib && tsc && cp src/react/styles.css lib/react/styles.css",
27
25
  "build:watch": "rimraf lib && tsc --watch",
28
26
  "test": "bun test ./src/__tests__/*.test.ts ./src/__tests__/*.test.tsx",
29
27
  "test:jest": "jest",
@@ -79,7 +77,7 @@
79
77
  "import": "./lib/react/index.js",
80
78
  "require": "./lib/react/index.js"
81
79
  },
82
- "./react/styles.css": "./src/react/styles.css"
80
+ "./react/styles.css": "./lib/react/styles.css"
83
81
  },
84
82
  "keywords": [
85
83
  "citation",
File without changes