@deepcitation/deepcitation-js 1.1.27 → 1.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +253 -253
- package/lib/chunk-2IZXUOQR.js +66 -0
- package/lib/chunk-4FGOHQFP.cjs +66 -0
- package/lib/chunk-CFXDRAJL.cjs +1 -0
- package/lib/chunk-DEUSSEFH.js +2 -0
- package/lib/chunk-F2MMVEVC.cjs +1 -0
- package/lib/chunk-J7U6YFOI.cjs +2 -0
- package/lib/chunk-O2XFH626.js +1 -0
- package/lib/chunk-RQPZSRID.js +1 -0
- package/lib/client/index.cjs +1 -0
- package/lib/client/{DeepCitation.d.ts → index.d.cts} +159 -3
- package/lib/client/index.d.ts +342 -2
- package/lib/client/index.js +1 -1
- package/lib/index.cjs +1 -0
- package/lib/index.d.cts +127 -0
- package/lib/index.d.ts +126 -22
- package/lib/index.js +1 -20
- package/lib/prompts/index.cjs +1 -0
- package/lib/prompts/index.d.cts +196 -0
- package/lib/prompts/index.d.ts +196 -3
- package/lib/prompts/index.js +1 -3
- package/lib/react/index.cjs +4 -0
- package/lib/react/index.js +4 -20
- package/lib/types/index.cjs +1 -0
- package/lib/types/index.d.cts +96 -0
- package/lib/types/index.d.ts +96 -11
- package/lib/types/index.js +1 -7
- package/package.json +46 -11
- package/lib/client/DeepCitation.js +0 -374
- package/lib/client/types.d.ts +0 -154
- package/lib/client/types.js +0 -1
- package/lib/parsing/normalizeCitation.d.ts +0 -5
- package/lib/parsing/normalizeCitation.js +0 -198
- package/lib/parsing/parseCitation.d.ts +0 -79
- package/lib/parsing/parseCitation.js +0 -431
- package/lib/parsing/parseWorkAround.d.ts +0 -2
- package/lib/parsing/parseWorkAround.js +0 -73
- package/lib/prompts/citationPrompts.d.ts +0 -138
- package/lib/prompts/citationPrompts.js +0 -168
- package/lib/prompts/promptCompression.d.ts +0 -14
- package/lib/prompts/promptCompression.js +0 -127
- package/lib/prompts/types.d.ts +0 -4
- package/lib/prompts/types.js +0 -1
- package/lib/react/CitationComponent.d.ts +0 -106
- package/lib/react/CitationComponent.js +0 -419
- package/lib/react/CitationVariants.d.ts +0 -132
- package/lib/react/CitationVariants.js +0 -277
- package/lib/react/DiffDisplay.d.ts +0 -10
- package/lib/react/DiffDisplay.js +0 -33
- package/lib/react/Popover.d.ts +0 -15
- package/lib/react/Popover.js +0 -20
- package/lib/react/UrlCitationComponent.d.ts +0 -83
- package/lib/react/UrlCitationComponent.js +0 -224
- package/lib/react/VerificationTabs.d.ts +0 -10
- package/lib/react/VerificationTabs.js +0 -36
- package/lib/react/icons.d.ts +0 -22
- package/lib/react/icons.js +0 -16
- package/lib/react/index.d.ts +0 -17
- package/lib/react/primitives.d.ts +0 -99
- package/lib/react/primitives.js +0 -187
- package/lib/react/types.d.ts +0 -315
- package/lib/react/types.js +0 -1
- package/lib/react/useSmartDiff.d.ts +0 -16
- package/lib/react/useSmartDiff.js +0 -64
- package/lib/react/utils.d.ts +0 -44
- package/lib/react/utils.js +0 -88
- package/lib/types/boxes.d.ts +0 -11
- package/lib/types/boxes.js +0 -1
- package/lib/types/citation.d.ts +0 -39
- package/lib/types/citation.js +0 -1
- package/lib/types/search.d.ts +0 -19
- package/lib/types/search.js +0 -1
- package/lib/types/verification.d.ts +0 -27
- package/lib/types/verification.js +0 -11
- package/lib/utils/diff.d.ts +0 -60
- package/lib/utils/diff.js +0 -414
- package/lib/utils/sha.d.ts +0 -10
- package/lib/utils/sha.js +0 -108
|
@@ -1,198 +0,0 @@
|
|
|
1
|
-
export const removeCitations = (pageText, leaveKeySpanBehind) => {
|
|
2
|
-
const citationRegex = /<cite\s+(?:fileId|attachmentId)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+key_span='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
|
|
3
|
-
return pageText.replace(citationRegex, (match, attachmentId, pageNumber, index, fullPhrase, keySpan, lineIds, value) => {
|
|
4
|
-
//it is still value= so we need to remove the value=
|
|
5
|
-
if (leaveKeySpanBehind) {
|
|
6
|
-
return keySpan?.replace(/key_span=['"]|['"]/g, "") || "";
|
|
7
|
-
}
|
|
8
|
-
else {
|
|
9
|
-
return "";
|
|
10
|
-
}
|
|
11
|
-
});
|
|
12
|
-
};
|
|
13
|
-
export const removePageNumberMetadata = (pageText) => {
|
|
14
|
-
return pageText
|
|
15
|
-
.replace(/<page_number_\d+_index_\d+>/g, "")
|
|
16
|
-
.replace(/<\/page_number_\d+_index_\d+>/g, "")
|
|
17
|
-
.trim();
|
|
18
|
-
};
|
|
19
|
-
export const removeLineIdMetadata = (pageText) => {
|
|
20
|
-
const lineIdRegex = /<line id="[^"]*">|<\/line>/g;
|
|
21
|
-
return pageText.replace(lineIdRegex, "");
|
|
22
|
-
};
|
|
23
|
-
export const getCitationPageNumber = (startPageKey) => {
|
|
24
|
-
//page_number_{page_number}_index_{page_index} or page_number_{page_number} or page_key_{page_number}_index_{page_index}
|
|
25
|
-
if (!startPageKey)
|
|
26
|
-
return null;
|
|
27
|
-
//regex first \d+ is the page number
|
|
28
|
-
const pageNumber = startPageKey.match(/\d+/)?.[0];
|
|
29
|
-
return pageNumber ? parseInt(pageNumber) : null;
|
|
30
|
-
};
|
|
31
|
-
export const normalizeCitations = (response) => {
|
|
32
|
-
let trimmedResponse = response?.trim() || "";
|
|
33
|
-
const citationParts = trimmedResponse.split(/(<cite[\s\S]*?(?:\/>|<\/cite>))/gm);
|
|
34
|
-
if (citationParts.length <= 1) {
|
|
35
|
-
return normalizeCitationContent(trimmedResponse);
|
|
36
|
-
}
|
|
37
|
-
trimmedResponse = citationParts
|
|
38
|
-
.map((part) => part.startsWith("<cite") ? normalizeCitationContent(part) : part)
|
|
39
|
-
.join("");
|
|
40
|
-
return trimmedResponse;
|
|
41
|
-
};
|
|
42
|
-
const normalizeCitationContent = (input) => {
|
|
43
|
-
let normalized = input;
|
|
44
|
-
// 1. Standardize self-closing tags
|
|
45
|
-
// Replace ></cite> with /> for consistency
|
|
46
|
-
normalized = normalized.replace(/><\/cite>/g, "/>");
|
|
47
|
-
const canonicalizeCiteAttributeKey = (key) => {
|
|
48
|
-
const lowerKey = key.toLowerCase();
|
|
49
|
-
if (lowerKey === "fullphrase" || lowerKey === "full_phrase")
|
|
50
|
-
return "full_phrase";
|
|
51
|
-
if (lowerKey === "lineids" || lowerKey === "line_ids")
|
|
52
|
-
return "line_ids";
|
|
53
|
-
if (lowerKey === "startpagekey" ||
|
|
54
|
-
lowerKey === "start_pagekey" ||
|
|
55
|
-
lowerKey === "start_page_key")
|
|
56
|
-
return "start_page_key";
|
|
57
|
-
if (lowerKey === "fileid" ||
|
|
58
|
-
lowerKey === "file_id" ||
|
|
59
|
-
lowerKey === "attachmentid" ||
|
|
60
|
-
lowerKey === "attachment_id")
|
|
61
|
-
return "attachment_id";
|
|
62
|
-
if (lowerKey === "keyspan" || lowerKey === "key_span")
|
|
63
|
-
return "key_span";
|
|
64
|
-
if (lowerKey === "reasoning" || lowerKey === "value")
|
|
65
|
-
return lowerKey;
|
|
66
|
-
if (lowerKey === "timestamps" ||
|
|
67
|
-
lowerKey === "timestamp" ||
|
|
68
|
-
lowerKey === "timestamps")
|
|
69
|
-
return "timestamps";
|
|
70
|
-
return lowerKey;
|
|
71
|
-
};
|
|
72
|
-
// Helper to decode HTML entities (simple implementation, expand if needed)
|
|
73
|
-
const decodeHtmlEntities = (str) => {
|
|
74
|
-
return str
|
|
75
|
-
.replace(/"/g, '"')
|
|
76
|
-
.replace(/'/g, "'")
|
|
77
|
-
.replace(/</g, "<")
|
|
78
|
-
.replace(/>/g, ">")
|
|
79
|
-
.replace(/&/g, "&");
|
|
80
|
-
};
|
|
81
|
-
// 2. ROBUST TEXT ATTRIBUTE PARSING (reasoning, value, full_phrase)
|
|
82
|
-
// This regex matches: Key = Quote -> Content (lazy) -> Lookahead for (Next Attribute OR End of Tag)
|
|
83
|
-
// It effectively ignores quotes inside the content during the initial capture.
|
|
84
|
-
const textAttributeRegex = /(fullPhrase|full_phrase|keySpan|key_span|reasoning|value)\s*=\s*(['"])([\s\S]*?)(?=\s+(?:line_ids|lineIds|timestamps|fileId|file_id|attachmentId|attachment_id|start_page_key|start_pageKey|startPageKey|keySpan|key_span|reasoning|value|full_phrase)|\s*\/?>)/gm;
|
|
85
|
-
normalized = normalized.replace(textAttributeRegex, (_match, key, openQuote, rawContent) => {
|
|
86
|
-
let content = rawContent;
|
|
87
|
-
// The lazy match usually captures the closing quote because the lookahead
|
|
88
|
-
// starts at the space *after* the attribute. We must strip it.
|
|
89
|
-
if (content.endsWith(openQuote)) {
|
|
90
|
-
content = content.slice(0, -1);
|
|
91
|
-
}
|
|
92
|
-
// 1. Normalization: Flatten newlines to spaces
|
|
93
|
-
content = content.replace(/(\r?\n)+/g, " ");
|
|
94
|
-
// 2. Decode entities to get raw text (e.g., ' -> ')
|
|
95
|
-
content = decodeHtmlEntities(content);
|
|
96
|
-
// 3. Remove Markdown bold/italic markers often hallucinated by LLMs inside attributes
|
|
97
|
-
content = content.replace(/(\*|_){2,}/g, "");
|
|
98
|
-
// 4. Sanitize Quotes:
|
|
99
|
-
// First, unescape existing backslashed quotes to avoid double escaping (e.g. \\' -> ')
|
|
100
|
-
content = content.replace(/\\\\'/g, "'");
|
|
101
|
-
content = content.replace(/\\'/g, "'");
|
|
102
|
-
content = content.replace(/'/g, "\\'");
|
|
103
|
-
content = content.replace(/\\\\"/g, '"');
|
|
104
|
-
content = content.replace(/\\"/g, '"');
|
|
105
|
-
content = content.replace(/"/g, '\\"');
|
|
106
|
-
// 5. Remove * from the content, sometimes a md list will really mess things up here so we remove it
|
|
107
|
-
content = content.replace(/\*/g, ""); //this is a hack to remove the * from the content
|
|
108
|
-
return `${canonicalizeCiteAttributeKey(key)}='${content}'`;
|
|
109
|
-
});
|
|
110
|
-
// 3. ROBUST LINE_ID / TIMESTAMP PARSING
|
|
111
|
-
// Handles unquoted, single quoted, or double quoted numbers/ranges.
|
|
112
|
-
// Can handle line_ids appearing anywhere in the tag, not just at the end.
|
|
113
|
-
normalized = normalized.replace(/(line_ids|lineIds|timestamps)=['"]?([\[\]\(\){}A-Za-z0-9_\-, ]+)['"]?(\s*\/?>|\s+)/gm, (_match, key, rawValue, trailingChars) => {
|
|
114
|
-
// Clean up the value (remove generic text, keep numbers/separators)
|
|
115
|
-
let cleanedValue = rawValue.replace(/[A-Za-z\[\]\(\){}]/g, "");
|
|
116
|
-
// Expand ranges (e.g., "1-3" -> "1,2,3")
|
|
117
|
-
cleanedValue = cleanedValue.replace(/(\d+)-(\d+)/g, (_rangeMatch, start, end) => {
|
|
118
|
-
const startNum = parseInt(start, 10);
|
|
119
|
-
const endNum = parseInt(end, 10);
|
|
120
|
-
const range = [];
|
|
121
|
-
// Handle ascending range
|
|
122
|
-
if (startNum <= endNum) {
|
|
123
|
-
for (let i = startNum; i <= endNum; i++) {
|
|
124
|
-
range.push(i);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
else {
|
|
128
|
-
// Fallback for weird descending ranges or just return start
|
|
129
|
-
range.push(startNum);
|
|
130
|
-
}
|
|
131
|
-
return range.join(",");
|
|
132
|
-
});
|
|
133
|
-
// Normalize commas
|
|
134
|
-
cleanedValue = cleanedValue.replace(/,+/g, ",").replace(/^,|,$/g, "");
|
|
135
|
-
// Return standardized format: key='value' + preserved trailing characters (space or />)
|
|
136
|
-
return `${canonicalizeCiteAttributeKey(key)}='${cleanedValue}'${trailingChars}`;
|
|
137
|
-
});
|
|
138
|
-
// 4. Re-order <cite ... /> attributes to match the strict parsing expectations in `citationParser.ts`
|
|
139
|
-
// (the parser uses regexes that assume a canonical attribute order).
|
|
140
|
-
const reorderCiteTagAttributes = (tag) => {
|
|
141
|
-
// Match both single-quoted and double-quoted attributes
|
|
142
|
-
const attrRegex = /([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(['"])((?:[^'"\\\n]|\\.)*)(?:\2)/g;
|
|
143
|
-
const attrs = {};
|
|
144
|
-
let match;
|
|
145
|
-
while ((match = attrRegex.exec(tag))) {
|
|
146
|
-
const rawKey = match[1];
|
|
147
|
-
const value = match[3]; // match[2] is the quote character
|
|
148
|
-
const key = canonicalizeCiteAttributeKey(rawKey);
|
|
149
|
-
attrs[key] = value;
|
|
150
|
-
}
|
|
151
|
-
// If we didn't find any parsable attrs, don't touch the tag.
|
|
152
|
-
const keys = Object.keys(attrs);
|
|
153
|
-
if (keys.length === 0)
|
|
154
|
-
return tag;
|
|
155
|
-
const hasTimestamps = typeof attrs.timestamps === "string" && attrs.timestamps.length > 0;
|
|
156
|
-
const startPageKeys = keys.filter((k) => k.startsWith("start_page"));
|
|
157
|
-
const ordered = [];
|
|
158
|
-
// Shared first
|
|
159
|
-
if (attrs.attachment_id)
|
|
160
|
-
ordered.push("attachment_id");
|
|
161
|
-
if (hasTimestamps) {
|
|
162
|
-
// AV citations: attachment_id, full_phrase, timestamps, (optional reasoning/value), then any extras
|
|
163
|
-
if (attrs.full_phrase)
|
|
164
|
-
ordered.push("full_phrase");
|
|
165
|
-
ordered.push("timestamps");
|
|
166
|
-
}
|
|
167
|
-
else {
|
|
168
|
-
// Document citations: attachment_id, start_page*, full_phrase, key_span, line_ids, (optional reasoning/value), then any extras
|
|
169
|
-
if (startPageKeys.includes("start_page_key"))
|
|
170
|
-
ordered.push("start_page_key");
|
|
171
|
-
startPageKeys
|
|
172
|
-
.filter((k) => k !== "start_page_key")
|
|
173
|
-
.sort()
|
|
174
|
-
.forEach((k) => ordered.push(k));
|
|
175
|
-
if (attrs.full_phrase)
|
|
176
|
-
ordered.push("full_phrase");
|
|
177
|
-
if (attrs.key_span)
|
|
178
|
-
ordered.push("key_span");
|
|
179
|
-
if (attrs.line_ids)
|
|
180
|
-
ordered.push("line_ids");
|
|
181
|
-
}
|
|
182
|
-
// Optional attrs supported by the parser (but not required)
|
|
183
|
-
if (attrs.reasoning)
|
|
184
|
-
ordered.push("reasoning");
|
|
185
|
-
if (attrs.value)
|
|
186
|
-
ordered.push("value");
|
|
187
|
-
// Any remaining attributes, stable + deterministic (alpha)
|
|
188
|
-
const used = new Set(ordered);
|
|
189
|
-
keys
|
|
190
|
-
.filter((k) => !used.has(k))
|
|
191
|
-
.sort()
|
|
192
|
-
.forEach((k) => ordered.push(k));
|
|
193
|
-
const rebuiltAttrs = ordered.map((k) => `${k}='${attrs[k]}'`).join(" ");
|
|
194
|
-
return `<cite ${rebuiltAttrs} />`;
|
|
195
|
-
};
|
|
196
|
-
normalized = normalized.replace(/<cite\b[\s\S]*?\/>/gm, (tag) => reorderCiteTagAttributes(tag));
|
|
197
|
-
return normalized;
|
|
198
|
-
};
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import { type Verification } from "../types/verification.js";
|
|
2
|
-
import { type Citation, type CitationStatus } from "../types/citation.js";
|
|
3
|
-
/**
|
|
4
|
-
* Calculates the verification status of a citation based on the found highlight and search state.
|
|
5
|
-
*
|
|
6
|
-
* @param verification - The found highlight location, or null/undefined if not found
|
|
7
|
-
* @returns An object containing boolean flags for verification status
|
|
8
|
-
*/
|
|
9
|
-
export declare function getCitationStatus(verification: Verification | null | undefined): CitationStatus;
|
|
10
|
-
export declare const parseCitation: (fragment: string, mdAttachmentId?: string | null, citationCounterRef?: any | null, isVerbose?: boolean) => {
|
|
11
|
-
beforeCite: string;
|
|
12
|
-
afterCite: string;
|
|
13
|
-
citation: Citation;
|
|
14
|
-
};
|
|
15
|
-
/**
|
|
16
|
-
* Extracts all citations from LLM output.
|
|
17
|
-
* Supports both XML <cite ... /> tags (embedded in strings/markdown) and JSON-based citation formats.
|
|
18
|
-
*
|
|
19
|
-
* For object input:
|
|
20
|
-
* - Traverses the object looking for `citation` or `citations` properties matching JSON format
|
|
21
|
-
* - Also stringifies the object to find embedded XML citations in markdown content
|
|
22
|
-
*
|
|
23
|
-
* @param llmOutput - The LLM output (string or object)
|
|
24
|
-
* @returns Dictionary of parsed Citation objects keyed by citation key
|
|
25
|
-
*/
|
|
26
|
-
export declare const getAllCitationsFromLlmOutput: (llmOutput: any) => {
|
|
27
|
-
[key: string]: Citation;
|
|
28
|
-
};
|
|
29
|
-
/**
|
|
30
|
-
* Groups citations by their attachmentId for multi-file verification scenarios.
|
|
31
|
-
* This is useful when you have citations from multiple files and need to
|
|
32
|
-
* verify them against their respective source documents.
|
|
33
|
-
*
|
|
34
|
-
* @param citations - Array of Citation objects or a dictionary of citations
|
|
35
|
-
* @returns Map of attachmentId to dictionary of citations from that file
|
|
36
|
-
*
|
|
37
|
-
* @example
|
|
38
|
-
* ```typescript
|
|
39
|
-
* const citations = getAllCitationsFromLlmOutput(response.content);
|
|
40
|
-
* const citationsByAttachment = groupCitationsByAttachmentId(citations);
|
|
41
|
-
*
|
|
42
|
-
* // Verify citations for each file
|
|
43
|
-
* for (const [attachmentId, fileCitations] of citationsByAttachment) {
|
|
44
|
-
* const verified = await dc.verifyCitations(attachmentId, fileCitations);
|
|
45
|
-
* // Process verification results...
|
|
46
|
-
* }
|
|
47
|
-
* ```
|
|
48
|
-
*/
|
|
49
|
-
export declare function groupCitationsByAttachmentId(citations: Citation[] | {
|
|
50
|
-
[key: string]: Citation;
|
|
51
|
-
}): Map<string, {
|
|
52
|
-
[key: string]: Citation;
|
|
53
|
-
}>;
|
|
54
|
-
/**
|
|
55
|
-
* Groups citations by their attachmentId and returns as a plain object.
|
|
56
|
-
* Alternative to groupCitationsByAttachmentId that returns a plain object instead of a Map.
|
|
57
|
-
*
|
|
58
|
-
* @param citations - Array of Citation objects or a dictionary of citations
|
|
59
|
-
* @returns Object with attachmentId keys mapping to citation dictionaries
|
|
60
|
-
*
|
|
61
|
-
* @example
|
|
62
|
-
* ```typescript
|
|
63
|
-
* const citations = getAllCitationsFromLlmOutput(response.content);
|
|
64
|
-
* const citationsByAttachment = groupCitationsByAttachmentIdObject(citations);
|
|
65
|
-
*
|
|
66
|
-
* // Verify citations for each file using Promise.all
|
|
67
|
-
* const verificationPromises = Object.entries(citationsByAttachment).map(
|
|
68
|
-
* ([attachmentId, fileCitations]) => dc.verifyCitations(attachmentId, fileCitations)
|
|
69
|
-
* );
|
|
70
|
-
* const results = await Promise.all(verificationPromises);
|
|
71
|
-
* ```
|
|
72
|
-
*/
|
|
73
|
-
export declare function groupCitationsByAttachmentIdObject(citations: Citation[] | {
|
|
74
|
-
[key: string]: Citation;
|
|
75
|
-
}): {
|
|
76
|
-
[attachmentId: string]: {
|
|
77
|
-
[key: string]: Citation;
|
|
78
|
-
};
|
|
79
|
-
};
|