@deepcitation/deepcitation-js 1.1.26 → 1.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +253 -253
  3. package/lib/chunk-2IZXUOQR.js +66 -0
  4. package/lib/chunk-4FGOHQFP.cjs +66 -0
  5. package/lib/chunk-CFXDRAJL.cjs +1 -0
  6. package/lib/chunk-DEUSSEFH.js +2 -0
  7. package/lib/chunk-F2MMVEVC.cjs +1 -0
  8. package/lib/chunk-J7U6YFOI.cjs +2 -0
  9. package/lib/chunk-O2XFH626.js +1 -0
  10. package/lib/chunk-RQPZSRID.js +1 -0
  11. package/lib/client/index.cjs +1 -0
  12. package/lib/client/{DeepCitation.d.ts → index.d.cts} +159 -3
  13. package/lib/client/index.d.ts +342 -2
  14. package/lib/client/index.js +1 -1
  15. package/lib/index.cjs +1 -0
  16. package/lib/index.d.cts +127 -0
  17. package/lib/index.d.ts +126 -23
  18. package/lib/index.js +1 -22
  19. package/lib/prompts/index.cjs +1 -0
  20. package/lib/prompts/index.d.cts +196 -0
  21. package/lib/prompts/index.d.ts +196 -3
  22. package/lib/prompts/index.js +1 -3
  23. package/lib/react/index.cjs +4 -0
  24. package/lib/react/index.js +4 -18
  25. package/lib/types/index.cjs +1 -0
  26. package/lib/types/index.d.cts +96 -0
  27. package/lib/types/index.d.ts +96 -11
  28. package/lib/types/index.js +1 -7
  29. package/package.json +62 -10
  30. package/lib/client/DeepCitation.js +0 -374
  31. package/lib/client/types.d.ts +0 -154
  32. package/lib/client/types.js +0 -1
  33. package/lib/parsing/normalizeCitation.d.ts +0 -5
  34. package/lib/parsing/normalizeCitation.js +0 -198
  35. package/lib/parsing/parseCitation.d.ts +0 -79
  36. package/lib/parsing/parseCitation.js +0 -431
  37. package/lib/parsing/parseWorkAround.d.ts +0 -2
  38. package/lib/parsing/parseWorkAround.js +0 -73
  39. package/lib/prompts/citationPrompts.d.ts +0 -138
  40. package/lib/prompts/citationPrompts.js +0 -168
  41. package/lib/prompts/promptCompression.d.ts +0 -14
  42. package/lib/prompts/promptCompression.js +0 -127
  43. package/lib/prompts/types.d.ts +0 -4
  44. package/lib/prompts/types.js +0 -1
  45. package/lib/react/CitationComponent.d.ts +0 -93
  46. package/lib/react/CitationComponent.js +0 -371
  47. package/lib/react/CitationVariants.d.ts +0 -132
  48. package/lib/react/CitationVariants.js +0 -284
  49. package/lib/react/DiffDisplay.d.ts +0 -10
  50. package/lib/react/DiffDisplay.js +0 -33
  51. package/lib/react/Popover.d.ts +0 -15
  52. package/lib/react/Popover.js +0 -20
  53. package/lib/react/UrlCitationComponent.d.ts +0 -83
  54. package/lib/react/UrlCitationComponent.js +0 -224
  55. package/lib/react/VerificationTabs.d.ts +0 -10
  56. package/lib/react/VerificationTabs.js +0 -36
  57. package/lib/react/icons.d.ts +0 -18
  58. package/lib/react/icons.js +0 -16
  59. package/lib/react/index.d.ts +0 -16
  60. package/lib/react/primitives.d.ts +0 -101
  61. package/lib/react/primitives.js +0 -193
  62. package/lib/react/types.d.ts +0 -283
  63. package/lib/react/types.js +0 -1
  64. package/lib/react/useSmartDiff.d.ts +0 -16
  65. package/lib/react/useSmartDiff.js +0 -64
  66. package/lib/react/utils.d.ts +0 -43
  67. package/lib/react/utils.js +0 -89
  68. package/lib/types/boxes.d.ts +0 -11
  69. package/lib/types/boxes.js +0 -1
  70. package/lib/types/citation.d.ts +0 -39
  71. package/lib/types/citation.js +0 -1
  72. package/lib/types/search.d.ts +0 -19
  73. package/lib/types/search.js +0 -1
  74. package/lib/types/verification.d.ts +0 -27
  75. package/lib/types/verification.js +0 -11
  76. package/lib/utils/diff.d.ts +0 -60
  77. package/lib/utils/diff.js +0 -414
  78. package/lib/utils/sha.d.ts +0 -10
  79. package/lib/utils/sha.js +0 -108
@@ -1,198 +0,0 @@
1
- export const removeCitations = (pageText, leaveKeySpanBehind) => {
2
- const citationRegex = /<cite\s+(?:fileId|attachmentId)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+key_span='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
3
- return pageText.replace(citationRegex, (match, attachmentId, pageNumber, index, fullPhrase, keySpan, lineIds, value) => {
4
- //it is still value= so we need to remove the value=
5
- if (leaveKeySpanBehind) {
6
- return keySpan?.replace(/key_span=['"]|['"]/g, "") || "";
7
- }
8
- else {
9
- return "";
10
- }
11
- });
12
- };
13
- export const removePageNumberMetadata = (pageText) => {
14
- return pageText
15
- .replace(/<page_number_\d+_index_\d+>/g, "")
16
- .replace(/<\/page_number_\d+_index_\d+>/g, "")
17
- .trim();
18
- };
19
- export const removeLineIdMetadata = (pageText) => {
20
- const lineIdRegex = /<line id="[^"]*">|<\/line>/g;
21
- return pageText.replace(lineIdRegex, "");
22
- };
23
- export const getCitationPageNumber = (startPageKey) => {
24
- //page_number_{page_number}_index_{page_index} or page_number_{page_number} or page_key_{page_number}_index_{page_index}
25
- if (!startPageKey)
26
- return null;
27
- //regex first \d+ is the page number
28
- const pageNumber = startPageKey.match(/\d+/)?.[0];
29
- return pageNumber ? parseInt(pageNumber) : null;
30
- };
31
- export const normalizeCitations = (response) => {
32
- let trimmedResponse = response?.trim() || "";
33
- const citationParts = trimmedResponse.split(/(<cite[\s\S]*?(?:\/>|<\/cite>))/gm);
34
- if (citationParts.length <= 1) {
35
- return normalizeCitationContent(trimmedResponse);
36
- }
37
- trimmedResponse = citationParts
38
- .map((part) => part.startsWith("<cite") ? normalizeCitationContent(part) : part)
39
- .join("");
40
- return trimmedResponse;
41
- };
42
- const normalizeCitationContent = (input) => {
43
- let normalized = input;
44
- // 1. Standardize self-closing tags
45
- // Replace ></cite> with /> for consistency
46
- normalized = normalized.replace(/><\/cite>/g, "/>");
47
- const canonicalizeCiteAttributeKey = (key) => {
48
- const lowerKey = key.toLowerCase();
49
- if (lowerKey === "fullphrase" || lowerKey === "full_phrase")
50
- return "full_phrase";
51
- if (lowerKey === "lineids" || lowerKey === "line_ids")
52
- return "line_ids";
53
- if (lowerKey === "startpagekey" ||
54
- lowerKey === "start_pagekey" ||
55
- lowerKey === "start_page_key")
56
- return "start_page_key";
57
- if (lowerKey === "fileid" ||
58
- lowerKey === "file_id" ||
59
- lowerKey === "attachmentid" ||
60
- lowerKey === "attachment_id")
61
- return "attachment_id";
62
- if (lowerKey === "keyspan" || lowerKey === "key_span")
63
- return "key_span";
64
- if (lowerKey === "reasoning" || lowerKey === "value")
65
- return lowerKey;
66
- if (lowerKey === "timestamps" ||
67
- lowerKey === "timestamp" ||
68
- lowerKey === "timestamps")
69
- return "timestamps";
70
- return lowerKey;
71
- };
72
- // Helper to decode HTML entities (simple implementation, expand if needed)
73
- const decodeHtmlEntities = (str) => {
74
- return str
75
- .replace(/&quot;/g, '"')
76
- .replace(/&apos;/g, "'")
77
- .replace(/&lt;/g, "<")
78
- .replace(/&gt;/g, ">")
79
- .replace(/&amp;/g, "&");
80
- };
81
- // 2. ROBUST TEXT ATTRIBUTE PARSING (reasoning, value, full_phrase)
82
- // This regex matches: Key = Quote -> Content (lazy) -> Lookahead for (Next Attribute OR End of Tag)
83
- // It effectively ignores quotes inside the content during the initial capture.
84
- const textAttributeRegex = /(fullPhrase|full_phrase|keySpan|key_span|reasoning|value)\s*=\s*(['"])([\s\S]*?)(?=\s+(?:line_ids|lineIds|timestamps|fileId|file_id|attachmentId|attachment_id|start_page_key|start_pageKey|startPageKey|keySpan|key_span|reasoning|value|full_phrase)|\s*\/?>)/gm;
85
- normalized = normalized.replace(textAttributeRegex, (_match, key, openQuote, rawContent) => {
86
- let content = rawContent;
87
- // The lazy match usually captures the closing quote because the lookahead
88
- // starts at the space *after* the attribute. We must strip it.
89
- if (content.endsWith(openQuote)) {
90
- content = content.slice(0, -1);
91
- }
92
- // 1. Normalization: Flatten newlines to spaces
93
- content = content.replace(/(\r?\n)+/g, " ");
94
- // 2. Decode entities to get raw text (e.g., &apos; -> ')
95
- content = decodeHtmlEntities(content);
96
- // 3. Remove Markdown bold/italic markers often hallucinated by LLMs inside attributes
97
- content = content.replace(/(\*|_){2,}/g, "");
98
- // 4. Sanitize Quotes:
99
- // First, unescape existing backslashed quotes to avoid double escaping (e.g. \\' -> ')
100
- content = content.replace(/\\\\'/g, "'");
101
- content = content.replace(/\\'/g, "'");
102
- content = content.replace(/'/g, "\\'");
103
- content = content.replace(/\\\\"/g, '"');
104
- content = content.replace(/\\"/g, '"');
105
- content = content.replace(/"/g, '\\"');
106
- // 5. Remove * from the content, sometimes a md list will really mess things up here so we remove it
107
- content = content.replace(/\*/g, ""); //this is a hack to remove the * from the content
108
- return `${canonicalizeCiteAttributeKey(key)}='${content}'`;
109
- });
110
- // 3. ROBUST LINE_ID / TIMESTAMP PARSING
111
- // Handles unquoted, single quoted, or double quoted numbers/ranges.
112
- // Can handle line_ids appearing anywhere in the tag, not just at the end.
113
- normalized = normalized.replace(/(line_ids|lineIds|timestamps)=['"]?([\[\]\(\){}A-Za-z0-9_\-, ]+)['"]?(\s*\/?>|\s+)/gm, (_match, key, rawValue, trailingChars) => {
114
- // Clean up the value (remove generic text, keep numbers/separators)
115
- let cleanedValue = rawValue.replace(/[A-Za-z\[\]\(\){}]/g, "");
116
- // Expand ranges (e.g., "1-3" -> "1,2,3")
117
- cleanedValue = cleanedValue.replace(/(\d+)-(\d+)/g, (_rangeMatch, start, end) => {
118
- const startNum = parseInt(start, 10);
119
- const endNum = parseInt(end, 10);
120
- const range = [];
121
- // Handle ascending range
122
- if (startNum <= endNum) {
123
- for (let i = startNum; i <= endNum; i++) {
124
- range.push(i);
125
- }
126
- }
127
- else {
128
- // Fallback for weird descending ranges or just return start
129
- range.push(startNum);
130
- }
131
- return range.join(",");
132
- });
133
- // Normalize commas
134
- cleanedValue = cleanedValue.replace(/,+/g, ",").replace(/^,|,$/g, "");
135
- // Return standardized format: key='value' + preserved trailing characters (space or />)
136
- return `${canonicalizeCiteAttributeKey(key)}='${cleanedValue}'${trailingChars}`;
137
- });
138
- // 4. Re-order <cite ... /> attributes to match the strict parsing expectations in `citationParser.ts`
139
- // (the parser uses regexes that assume a canonical attribute order).
140
- const reorderCiteTagAttributes = (tag) => {
141
- // Match both single-quoted and double-quoted attributes
142
- const attrRegex = /([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(['"])((?:[^'"\\\n]|\\.)*)(?:\2)/g;
143
- const attrs = {};
144
- let match;
145
- while ((match = attrRegex.exec(tag))) {
146
- const rawKey = match[1];
147
- const value = match[3]; // match[2] is the quote character
148
- const key = canonicalizeCiteAttributeKey(rawKey);
149
- attrs[key] = value;
150
- }
151
- // If we didn't find any parsable attrs, don't touch the tag.
152
- const keys = Object.keys(attrs);
153
- if (keys.length === 0)
154
- return tag;
155
- const hasTimestamps = typeof attrs.timestamps === "string" && attrs.timestamps.length > 0;
156
- const startPageKeys = keys.filter((k) => k.startsWith("start_page"));
157
- const ordered = [];
158
- // Shared first
159
- if (attrs.attachment_id)
160
- ordered.push("attachment_id");
161
- if (hasTimestamps) {
162
- // AV citations: attachment_id, full_phrase, timestamps, (optional reasoning/value), then any extras
163
- if (attrs.full_phrase)
164
- ordered.push("full_phrase");
165
- ordered.push("timestamps");
166
- }
167
- else {
168
- // Document citations: attachment_id, start_page*, full_phrase, key_span, line_ids, (optional reasoning/value), then any extras
169
- if (startPageKeys.includes("start_page_key"))
170
- ordered.push("start_page_key");
171
- startPageKeys
172
- .filter((k) => k !== "start_page_key")
173
- .sort()
174
- .forEach((k) => ordered.push(k));
175
- if (attrs.full_phrase)
176
- ordered.push("full_phrase");
177
- if (attrs.key_span)
178
- ordered.push("key_span");
179
- if (attrs.line_ids)
180
- ordered.push("line_ids");
181
- }
182
- // Optional attrs supported by the parser (but not required)
183
- if (attrs.reasoning)
184
- ordered.push("reasoning");
185
- if (attrs.value)
186
- ordered.push("value");
187
- // Any remaining attributes, stable + deterministic (alpha)
188
- const used = new Set(ordered);
189
- keys
190
- .filter((k) => !used.has(k))
191
- .sort()
192
- .forEach((k) => ordered.push(k));
193
- const rebuiltAttrs = ordered.map((k) => `${k}='${attrs[k]}'`).join(" ");
194
- return `<cite ${rebuiltAttrs} />`;
195
- };
196
- normalized = normalized.replace(/<cite\b[\s\S]*?\/>/gm, (tag) => reorderCiteTagAttributes(tag));
197
- return normalized;
198
- };
@@ -1,79 +0,0 @@
1
- import { type Verification } from "../types/verification.js";
2
- import { type Citation, type CitationStatus } from "../types/citation.js";
3
- /**
4
- * Calculates the verification status of a citation based on the found highlight and search state.
5
- *
6
- * @param verification - The found highlight location, or null/undefined if not found
7
- * @returns An object containing boolean flags for verification status
8
- */
9
- export declare function getCitationStatus(verification: Verification | null | undefined): CitationStatus;
10
- export declare const parseCitation: (fragment: string, mdAttachmentId?: string | null, citationCounterRef?: any | null, isVerbose?: boolean) => {
11
- beforeCite: string;
12
- afterCite: string;
13
- citation: Citation;
14
- };
15
- /**
16
- * Extracts all citations from LLM output.
17
- * Supports both XML <cite ... /> tags (embedded in strings/markdown) and JSON-based citation formats.
18
- *
19
- * For object input:
20
- * - Traverses the object looking for `citation` or `citations` properties matching JSON format
21
- * - Also stringifies the object to find embedded XML citations in markdown content
22
- *
23
- * @param llmOutput - The LLM output (string or object)
24
- * @returns Dictionary of parsed Citation objects keyed by citation key
25
- */
26
- export declare const getAllCitationsFromLlmOutput: (llmOutput: any) => {
27
- [key: string]: Citation;
28
- };
29
- /**
30
- * Groups citations by their attachmentId for multi-file verification scenarios.
31
- * This is useful when you have citations from multiple files and need to
32
- * verify them against their respective source documents.
33
- *
34
- * @param citations - Array of Citation objects or a dictionary of citations
35
- * @returns Map of attachmentId to dictionary of citations from that file
36
- *
37
- * @example
38
- * ```typescript
39
- * const citations = getAllCitationsFromLlmOutput(response.content);
40
- * const citationsByAttachment = groupCitationsByAttachmentId(citations);
41
- *
42
- * // Verify citations for each file
43
- * for (const [attachmentId, fileCitations] of citationsByAttachment) {
44
- * const verified = await dc.verifyCitations(attachmentId, fileCitations);
45
- * // Process verification results...
46
- * }
47
- * ```
48
- */
49
- export declare function groupCitationsByAttachmentId(citations: Citation[] | {
50
- [key: string]: Citation;
51
- }): Map<string, {
52
- [key: string]: Citation;
53
- }>;
54
- /**
55
- * Groups citations by their attachmentId and returns as a plain object.
56
- * Alternative to groupCitationsByAttachmentId that returns a plain object instead of a Map.
57
- *
58
- * @param citations - Array of Citation objects or a dictionary of citations
59
- * @returns Object with attachmentId keys mapping to citation dictionaries
60
- *
61
- * @example
62
- * ```typescript
63
- * const citations = getAllCitationsFromLlmOutput(response.content);
64
- * const citationsByAttachment = groupCitationsByAttachmentIdObject(citations);
65
- *
66
- * // Verify citations for each file using Promise.all
67
- * const verificationPromises = Object.entries(citationsByAttachment).map(
68
- * ([attachmentId, fileCitations]) => dc.verifyCitations(attachmentId, fileCitations)
69
- * );
70
- * const results = await Promise.all(verificationPromises);
71
- * ```
72
- */
73
- export declare function groupCitationsByAttachmentIdObject(citations: Citation[] | {
74
- [key: string]: Citation;
75
- }): {
76
- [attachmentId: string]: {
77
- [key: string]: Citation;
78
- };
79
- };