@deepcitation/deepcitation-js 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/client/DeepCitation.d.ts +2 -2
- package/lib/client/DeepCitation.js +2 -2
- package/lib/client/index.d.ts +2 -2
- package/lib/client/index.js +1 -1
- package/lib/client/types.d.ts +1 -1
- package/lib/parsing/parseCitation.js +47 -13
- package/lib/prompts/citationPrompts.d.ts +1 -1
- package/lib/prompts/citationPrompts.js +2 -2
- package/lib/prompts/index.d.ts +3 -3
- package/lib/prompts/index.js +3 -3
- package/lib/prompts/promptCompression.d.ts +1 -1
- package/lib/react/utils.js +4 -2
- package/lib/types/citation.d.ts +2 -4
- package/lib/types/foundHighlight.d.ts +3 -3
- package/package.json +1 -1
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { Citation } from "../types/index";
|
|
2
|
-
import type { CitationInput, ConvertFileInput, ConvertFileResponse, DeepCitationConfig, FileInput, PrepareConvertedFileOptions, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsFromLlmOutput, VerifyCitationsOptions, VerifyCitationsResponse } from "./types";
|
|
1
|
+
import type { Citation } from "../types/index.js";
|
|
2
|
+
import type { CitationInput, ConvertFileInput, ConvertFileResponse, DeepCitationConfig, FileInput, PrepareConvertedFileOptions, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsFromLlmOutput, VerifyCitationsOptions, VerifyCitationsResponse } from "./types.js";
|
|
3
3
|
/**
|
|
4
4
|
* DeepCitation client for file upload and citation verification.
|
|
5
5
|
*
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { getAllCitationsFromLlmOutput } from "../parsing/parseCitation";
|
|
2
|
-
import { generateCitationKey } from "../react/utils";
|
|
1
|
+
import { getAllCitationsFromLlmOutput } from "../parsing/parseCitation.js";
|
|
2
|
+
import { generateCitationKey } from "../react/utils.js";
|
|
3
3
|
const DEFAULT_API_URL = "https://api.deepcitation.com";
|
|
4
4
|
/** Convert File/Blob/Buffer to a Blob suitable for FormData */
|
|
5
5
|
function toBlob(file, filename) {
|
package/lib/client/index.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { DeepCitation } from "./DeepCitation";
|
|
2
|
-
export type { DeepCitationConfig, UploadFileResponse, UploadFileOptions, VerifyCitationsResponse, VerifyCitationsOptions, CitationInput, FileInput, FileDataPart, PrepareFilesResult, VerifyCitationsFromLlmOutput, ConvertFileInput, ConvertFileResponse, PrepareConvertedFileOptions, } from "./types";
|
|
1
|
+
export { DeepCitation } from "./DeepCitation.js";
|
|
2
|
+
export type { DeepCitationConfig, UploadFileResponse, UploadFileOptions, VerifyCitationsResponse, VerifyCitationsOptions, CitationInput, FileInput, FileDataPart, PrepareFilesResult, VerifyCitationsFromLlmOutput, ConvertFileInput, ConvertFileResponse, PrepareConvertedFileOptions, } from "./types.js";
|
package/lib/client/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export { DeepCitation } from "./DeepCitation";
|
|
1
|
+
export { DeepCitation } from "./DeepCitation.js";
|
package/lib/client/types.d.ts
CHANGED
|
@@ -1,6 +1,50 @@
|
|
|
1
|
-
import { sha1Hash } from "../utils/sha.js";
|
|
2
1
|
import { normalizeCitations } from "./normalizeCitation.js";
|
|
3
2
|
import { generateCitationKey } from "../react/utils.js";
|
|
3
|
+
/**
|
|
4
|
+
* Parses a line_ids string that may contain individual numbers, ranges, or both.
|
|
5
|
+
* Examples: "1,2,3", "5-10", "1,5-7,10", "20-20"
|
|
6
|
+
*
|
|
7
|
+
* @param lineIdsString - The raw line_ids string (e.g., "1,5-7,10")
|
|
8
|
+
* @returns Sorted array of unique line IDs, or undefined if empty/invalid
|
|
9
|
+
*/
|
|
10
|
+
function parseLineIds(lineIdsString) {
|
|
11
|
+
if (!lineIdsString)
|
|
12
|
+
return undefined;
|
|
13
|
+
const lineIds = [];
|
|
14
|
+
const parts = lineIdsString.split(",");
|
|
15
|
+
for (const part of parts) {
|
|
16
|
+
const trimmed = part.trim();
|
|
17
|
+
if (!trimmed)
|
|
18
|
+
continue;
|
|
19
|
+
// Check if this part is a range (e.g., "5-10")
|
|
20
|
+
if (trimmed.includes("-")) {
|
|
21
|
+
const [startStr, endStr] = trimmed.split("-");
|
|
22
|
+
const start = parseInt(startStr, 10);
|
|
23
|
+
const end = parseInt(endStr, 10);
|
|
24
|
+
if (!isNaN(start) && !isNaN(end) && start <= end) {
|
|
25
|
+
// Expand the range
|
|
26
|
+
for (let i = start; i <= end; i++) {
|
|
27
|
+
lineIds.push(i);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
else if (!isNaN(start)) {
|
|
31
|
+
// If only start is valid, just use it
|
|
32
|
+
lineIds.push(start);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
// Single number
|
|
37
|
+
const num = parseInt(trimmed, 10);
|
|
38
|
+
if (!isNaN(num)) {
|
|
39
|
+
lineIds.push(num);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
if (lineIds.length === 0)
|
|
44
|
+
return undefined;
|
|
45
|
+
// Sort and deduplicate
|
|
46
|
+
return [...new Set(lineIds)].sort((a, b) => a - b);
|
|
47
|
+
}
|
|
4
48
|
/**
|
|
5
49
|
* Calculates the verification status of a citation based on the found highlight and search state.
|
|
6
50
|
*
|
|
@@ -55,7 +99,6 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
|
|
|
55
99
|
const citationRegex = /<cite\s+file(?:_id|Id)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+key_span='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
|
|
56
100
|
const citationMatches = [...middleCite.matchAll(citationRegex)];
|
|
57
101
|
const match = citationMatches?.[0];
|
|
58
|
-
const rawCitationMd = match?.[0];
|
|
59
102
|
const pageNumber = match?.[2] ? parseInt(match?.[2]) : undefined;
|
|
60
103
|
let fileId = match?.[1];
|
|
61
104
|
let attachmentId = fileId?.length === 20 ? fileId : mdAttachmentId || match?.[1];
|
|
@@ -75,15 +118,9 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
|
|
|
75
118
|
}
|
|
76
119
|
let lineIds;
|
|
77
120
|
try {
|
|
78
|
-
// match[
|
|
121
|
+
// match[6] is line_ids
|
|
79
122
|
const lineIdsString = match?.[6]?.replace(/[A-Za-z_[\](){}:]/g, "");
|
|
80
|
-
lineIds = lineIdsString
|
|
81
|
-
? lineIdsString
|
|
82
|
-
.split(",")
|
|
83
|
-
.map((id) => (isNaN(parseInt(id)) ? undefined : parseInt(id)))
|
|
84
|
-
.filter((id) => id !== undefined)
|
|
85
|
-
.sort((a, b) => a - b)
|
|
86
|
-
: undefined;
|
|
123
|
+
lineIds = lineIdsString ? parseLineIds(lineIdsString) : undefined;
|
|
87
124
|
}
|
|
88
125
|
catch (e) {
|
|
89
126
|
if (isVerbose)
|
|
@@ -116,16 +153,13 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
|
|
|
116
153
|
}
|
|
117
154
|
timestamps = { startTime, endTime };
|
|
118
155
|
}
|
|
119
|
-
const fragmentContext = sha1Hash(fragment).toString().slice(0, 8);
|
|
120
156
|
const citation = {
|
|
121
|
-
fragmentContext,
|
|
122
157
|
fileId: attachmentId,
|
|
123
158
|
pageNumber,
|
|
124
159
|
fullPhrase,
|
|
125
160
|
keySpan,
|
|
126
161
|
citationNumber,
|
|
127
162
|
lineIds,
|
|
128
|
-
rawCitationMd,
|
|
129
163
|
beforeCite,
|
|
130
164
|
value,
|
|
131
165
|
timestamps,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
|
|
1
|
+
export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or 1-3 words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
|
|
2
2
|
export declare const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = "\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' full_phrase='the verbatim text of the phrase; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />\n\u2022 These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.\n";
|
|
3
3
|
export interface WrapSystemPromptOptions {
|
|
4
4
|
/** The original system prompt to wrap with citation instructions */
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export const CITATION_MARKDOWN_SYNTAX_PROMPT = `
|
|
2
2
|
Citation syntax to use within Markdown:
|
|
3
3
|
• To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
|
|
4
|
-
<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
|
|
4
|
+
<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or 1-3 words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
|
|
5
5
|
|
|
6
6
|
• Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.
|
|
7
7
|
• start_page_key, full_phrase, and line_ids are required for each citation.
|
|
@@ -125,7 +125,7 @@ export const CITATION_JSON_OUTPUT_FORMAT = {
|
|
|
125
125
|
},
|
|
126
126
|
keySpan: {
|
|
127
127
|
type: "string",
|
|
128
|
-
description: "the verbatim value or words within fullPhrase that best support the citation",
|
|
128
|
+
description: "the verbatim value or 1-3 words within fullPhrase that best support the citation",
|
|
129
129
|
},
|
|
130
130
|
lineIds: {
|
|
131
131
|
type: "array",
|
package/lib/prompts/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export * from "./promptCompression";
|
|
2
|
-
export * from "./citationPrompts";
|
|
3
|
-
export * from "./types";
|
|
1
|
+
export * from "./promptCompression.js";
|
|
2
|
+
export * from "./citationPrompts.js";
|
|
3
|
+
export * from "./types.js";
|
package/lib/prompts/index.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export * from "./promptCompression";
|
|
2
|
-
export * from "./citationPrompts";
|
|
3
|
-
export * from "./types";
|
|
1
|
+
export * from "./promptCompression.js";
|
|
2
|
+
export * from "./citationPrompts.js";
|
|
3
|
+
export * from "./types.js";
|
package/lib/react/utils.js
CHANGED
|
@@ -13,7 +13,6 @@ export function generateCitationKey(citation) {
|
|
|
13
13
|
citation.lineIds?.join(",") || "",
|
|
14
14
|
citation.timestamps?.startTime || "",
|
|
15
15
|
citation.timestamps?.endTime || "",
|
|
16
|
-
citation.fragmentContext || "",
|
|
17
16
|
];
|
|
18
17
|
return sha1Hash(keyParts.join("|")).slice(0, 16);
|
|
19
18
|
}
|
|
@@ -31,7 +30,10 @@ export function generateCitationInstanceId(citationKey) {
|
|
|
31
30
|
export function getCitationDisplayText(citation, options = {}) {
|
|
32
31
|
const { displayCitationValue = false, fallbackDisplay } = options;
|
|
33
32
|
if (displayCitationValue) {
|
|
34
|
-
return citation.value ||
|
|
33
|
+
return (citation.value ||
|
|
34
|
+
citation.citationNumber?.toString() ||
|
|
35
|
+
fallbackDisplay ||
|
|
36
|
+
"");
|
|
35
37
|
}
|
|
36
38
|
return citation.citationNumber?.toString() || "";
|
|
37
39
|
}
|
package/lib/types/citation.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { type ScreenBox } from "./boxes";
|
|
2
|
-
import { type FoundHighlightLocation } from "./foundHighlight";
|
|
1
|
+
import { type ScreenBox } from "./boxes.js";
|
|
2
|
+
import { type FoundHighlightLocation } from "./foundHighlight.js";
|
|
3
3
|
export type OutputImageFormat = "jpeg" | "png" | "avif" | undefined | null;
|
|
4
4
|
export declare const DEFAULT_OUTPUT_IMAGE_FORMAT: "avif";
|
|
5
5
|
export interface VerifyCitationResponse {
|
|
@@ -30,8 +30,6 @@ export interface Citation {
|
|
|
30
30
|
endTime?: string;
|
|
31
31
|
startTime?: string;
|
|
32
32
|
};
|
|
33
|
-
fragmentContext?: string | null;
|
|
34
|
-
rawCitationMd?: string;
|
|
35
33
|
beforeCite?: string;
|
|
36
34
|
}
|
|
37
35
|
export interface CitationStatus {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { type Citation } from "./citation";
|
|
2
|
-
import { type SearchState } from "./search";
|
|
3
|
-
import { type PdfSpaceItem } from "./boxes";
|
|
1
|
+
import { type Citation } from "./citation.js";
|
|
2
|
+
import { type SearchState } from "./search.js";
|
|
3
|
+
import { type PdfSpaceItem } from "./boxes.js";
|
|
4
4
|
export declare const NOT_FOUND_HIGHLIGHT_INDEX = -1;
|
|
5
5
|
export declare const PENDING_HIGHLIGHT_INDEX = -2;
|
|
6
6
|
export declare const BLANK_HIGHLIGHT_LOCATION: FoundHighlightLocation;
|