@deepcitation/deepcitation-js 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- import type { Citation } from "../types/index";
2
- import type { CitationInput, ConvertFileInput, ConvertFileResponse, DeepCitationConfig, FileInput, PrepareConvertedFileOptions, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsFromLlmOutput, VerifyCitationsOptions, VerifyCitationsResponse } from "./types";
1
+ import type { Citation } from "../types/index.js";
2
+ import type { CitationInput, ConvertFileInput, ConvertFileResponse, DeepCitationConfig, FileInput, PrepareConvertedFileOptions, PrepareFilesResult, UploadFileOptions, UploadFileResponse, VerifyCitationsFromLlmOutput, VerifyCitationsOptions, VerifyCitationsResponse } from "./types.js";
3
3
  /**
4
4
  * DeepCitation client for file upload and citation verification.
5
5
  *
@@ -1,5 +1,5 @@
1
- import { getAllCitationsFromLlmOutput } from "../parsing/parseCitation";
2
- import { generateCitationKey } from "../react/utils";
1
+ import { getAllCitationsFromLlmOutput } from "../parsing/parseCitation.js";
2
+ import { generateCitationKey } from "../react/utils.js";
3
3
  const DEFAULT_API_URL = "https://api.deepcitation.com";
4
4
  /** Convert File/Blob/Buffer to a Blob suitable for FormData */
5
5
  function toBlob(file, filename) {
@@ -1,2 +1,2 @@
1
- export { DeepCitation } from "./DeepCitation";
2
- export type { DeepCitationConfig, UploadFileResponse, UploadFileOptions, VerifyCitationsResponse, VerifyCitationsOptions, CitationInput, FileInput, FileDataPart, PrepareFilesResult, VerifyCitationsFromLlmOutput, ConvertFileInput, ConvertFileResponse, PrepareConvertedFileOptions, } from "./types";
1
+ export { DeepCitation } from "./DeepCitation.js";
2
+ export type { DeepCitationConfig, UploadFileResponse, UploadFileOptions, VerifyCitationsResponse, VerifyCitationsOptions, CitationInput, FileInput, FileDataPart, PrepareFilesResult, VerifyCitationsFromLlmOutput, ConvertFileInput, ConvertFileResponse, PrepareConvertedFileOptions, } from "./types.js";
@@ -1 +1 @@
1
- export { DeepCitation } from "./DeepCitation";
1
+ export { DeepCitation } from "./DeepCitation.js";
@@ -1,4 +1,4 @@
1
- import type { Citation, FoundHighlightLocation } from "../types/index";
1
+ import type { Citation, FoundHighlightLocation } from "../types/index.js";
2
2
  /**
3
3
  * Configuration options for the DeepCitation client
4
4
  */
@@ -1,6 +1,50 @@
1
- import { sha1Hash } from "../utils/sha.js";
2
1
  import { normalizeCitations } from "./normalizeCitation.js";
3
2
  import { generateCitationKey } from "../react/utils.js";
3
+ /**
4
+ * Parses a line_ids string that may contain individual numbers, ranges, or both.
5
+ * Examples: "1,2,3", "5-10", "1,5-7,10", "20-20"
6
+ *
7
+ * @param lineIdsString - The raw line_ids string (e.g., "1,5-7,10")
8
+ * @returns Sorted array of unique line IDs, or undefined if empty/invalid
9
+ */
10
+ function parseLineIds(lineIdsString) {
11
+ if (!lineIdsString)
12
+ return undefined;
13
+ const lineIds = [];
14
+ const parts = lineIdsString.split(",");
15
+ for (const part of parts) {
16
+ const trimmed = part.trim();
17
+ if (!trimmed)
18
+ continue;
19
+ // Check if this part is a range (e.g., "5-10")
20
+ if (trimmed.includes("-")) {
21
+ const [startStr, endStr] = trimmed.split("-");
22
+ const start = parseInt(startStr, 10);
23
+ const end = parseInt(endStr, 10);
24
+ if (!isNaN(start) && !isNaN(end) && start <= end) {
25
+ // Expand the range
26
+ for (let i = start; i <= end; i++) {
27
+ lineIds.push(i);
28
+ }
29
+ }
30
+ else if (!isNaN(start)) {
31
+ // If only start is valid, just use it
32
+ lineIds.push(start);
33
+ }
34
+ }
35
+ else {
36
+ // Single number
37
+ const num = parseInt(trimmed, 10);
38
+ if (!isNaN(num)) {
39
+ lineIds.push(num);
40
+ }
41
+ }
42
+ }
43
+ if (lineIds.length === 0)
44
+ return undefined;
45
+ // Sort and deduplicate
46
+ return [...new Set(lineIds)].sort((a, b) => a - b);
47
+ }
4
48
  /**
5
49
  * Calculates the verification status of a citation based on the found highlight and search state.
6
50
  *
@@ -55,7 +99,6 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
55
99
  const citationRegex = /<cite\s+file(?:_id|Id)='(\w{0,25})'\s+start_page[\_a-zA-Z]*='page[\_a-zA-Z]*(\d+)_index_(\d+)'\s+full_phrase='((?:[^'\\]|\\.)*)'\s+key_span='((?:[^'\\]|\\.)*)'\s+line(?:_ids|Ids)='([^']+)'(?:\s+(value|reasoning)='((?:[^'\\]|\\.)*)')?\s*\/>/g;
56
100
  const citationMatches = [...middleCite.matchAll(citationRegex)];
57
101
  const match = citationMatches?.[0];
58
- const rawCitationMd = match?.[0];
59
102
  const pageNumber = match?.[2] ? parseInt(match?.[2]) : undefined;
60
103
  let fileId = match?.[1];
61
104
  let attachmentId = fileId?.length === 20 ? fileId : mdAttachmentId || match?.[1];
@@ -75,15 +118,9 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
75
118
  }
76
119
  let lineIds;
77
120
  try {
78
- // match[5] is line_ids
121
+ // match[6] is line_ids
79
122
  const lineIdsString = match?.[6]?.replace(/[A-Za-z_[\](){}:]/g, "");
80
- lineIds = lineIdsString
81
- ? lineIdsString
82
- .split(",")
83
- .map((id) => (isNaN(parseInt(id)) ? undefined : parseInt(id)))
84
- .filter((id) => id !== undefined)
85
- .sort((a, b) => a - b)
86
- : undefined;
123
+ lineIds = lineIdsString ? parseLineIds(lineIdsString) : undefined;
87
124
  }
88
125
  catch (e) {
89
126
  if (isVerbose)
@@ -116,16 +153,13 @@ export const parseCitation = (fragment, mdAttachmentId, citationCounterRef, isVe
116
153
  }
117
154
  timestamps = { startTime, endTime };
118
155
  }
119
- const fragmentContext = sha1Hash(fragment).toString().slice(0, 8);
120
156
  const citation = {
121
- fragmentContext,
122
157
  fileId: attachmentId,
123
158
  pageNumber,
124
159
  fullPhrase,
125
160
  keySpan,
126
161
  citationNumber,
127
162
  lineIds,
128
- rawCitationMd,
129
163
  beforeCite,
130
164
  value,
131
165
  timestamps,
@@ -1,4 +1,4 @@
1
- export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
1
+ export declare const CITATION_MARKDOWN_SYNTAX_PROMPT = "\nCitation syntax to use within Markdown:\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or 1-3 words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />\n\n\u2022 Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.\n\u2022 start_page_key, full_phrase, and line_ids are required for each citation.\n\u2022 Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.\n\u2022 Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.\n\u2022 These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.\n\u2022 The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.\n\u2022 We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.\n";
2
2
  export declare const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = "\n\u2022 To support any ideas or information that requires a citation from the provided content, use the following citation syntax:\n<cite file_id='file_id' full_phrase='the verbatim text of the phrase; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />\n\u2022 These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.\n";
3
3
  export interface WrapSystemPromptOptions {
4
4
  /** The original system prompt to wrap with citation instructions */
@@ -1,7 +1,7 @@
1
1
  export const CITATION_MARKDOWN_SYNTAX_PROMPT = `
2
2
  Citation syntax to use within Markdown:
3
3
  • To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
4
- <cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
4
+ <cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text />; remember to escape quotes and newlines inside the full_phrase to remain as valid JSON' key_span='the verbatim value or 1-3 words within full_phrase that best support the citation' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
5
5
 
6
6
  • Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.
7
7
  • start_page_key, full_phrase, and line_ids are required for each citation.
@@ -125,7 +125,7 @@ export const CITATION_JSON_OUTPUT_FORMAT = {
125
125
  },
126
126
  keySpan: {
127
127
  type: "string",
128
- description: "the verbatim value or words within fullPhrase that best support the citation",
128
+ description: "the verbatim value or 1-3 words within fullPhrase that best support the citation",
129
129
  },
130
130
  lineIds: {
131
131
  type: "array",
@@ -1,3 +1,3 @@
1
- export * from "./promptCompression";
2
- export * from "./citationPrompts";
3
- export * from "./types";
1
+ export * from "./promptCompression.js";
2
+ export * from "./citationPrompts.js";
3
+ export * from "./types.js";
@@ -1,3 +1,3 @@
1
- export * from "./promptCompression";
2
- export * from "./citationPrompts";
3
- export * from "./types";
1
+ export * from "./promptCompression.js";
2
+ export * from "./citationPrompts.js";
3
+ export * from "./types.js";
@@ -1,4 +1,4 @@
1
- import { CompressedResult } from "./types";
1
+ import { CompressedResult } from "./types.js";
2
2
  /**
3
3
  * Compress all occurrences of `ids` inside `obj`, returning a new object
4
4
  * plus the `prefixMap` needed to decompress.
@@ -13,7 +13,6 @@ export function generateCitationKey(citation) {
13
13
  citation.lineIds?.join(",") || "",
14
14
  citation.timestamps?.startTime || "",
15
15
  citation.timestamps?.endTime || "",
16
- citation.fragmentContext || "",
17
16
  ];
18
17
  return sha1Hash(keyParts.join("|")).slice(0, 16);
19
18
  }
@@ -31,7 +30,10 @@ export function generateCitationInstanceId(citationKey) {
31
30
  export function getCitationDisplayText(citation, options = {}) {
32
31
  const { displayCitationValue = false, fallbackDisplay } = options;
33
32
  if (displayCitationValue) {
34
- return citation.value || citation.citationNumber?.toString() || fallbackDisplay || "";
33
+ return (citation.value ||
34
+ citation.citationNumber?.toString() ||
35
+ fallbackDisplay ||
36
+ "");
35
37
  }
36
38
  return citation.citationNumber?.toString() || "";
37
39
  }
@@ -1,5 +1,5 @@
1
- import { type ScreenBox } from "./boxes";
2
- import { type FoundHighlightLocation } from "./foundHighlight";
1
+ import { type ScreenBox } from "./boxes.js";
2
+ import { type FoundHighlightLocation } from "./foundHighlight.js";
3
3
  export type OutputImageFormat = "jpeg" | "png" | "avif" | undefined | null;
4
4
  export declare const DEFAULT_OUTPUT_IMAGE_FORMAT: "avif";
5
5
  export interface VerifyCitationResponse {
@@ -30,8 +30,6 @@ export interface Citation {
30
30
  endTime?: string;
31
31
  startTime?: string;
32
32
  };
33
- fragmentContext?: string | null;
34
- rawCitationMd?: string;
35
33
  beforeCite?: string;
36
34
  }
37
35
  export interface CitationStatus {
@@ -1,6 +1,6 @@
1
- import { type Citation } from "./citation";
2
- import { type SearchState } from "./search";
3
- import { type PdfSpaceItem } from "./boxes";
1
+ import { type Citation } from "./citation.js";
2
+ import { type SearchState } from "./search.js";
3
+ import { type PdfSpaceItem } from "./boxes.js";
4
4
  export declare const NOT_FOUND_HIGHLIGHT_INDEX = -1;
5
5
  export declare const PENDING_HIGHLIGHT_INDEX = -2;
6
6
  export declare const BLANK_HIGHLIGHT_LOCATION: FoundHighlightLocation;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@deepcitation/deepcitation-js",
3
- "version": "1.0.7",
3
+ "version": "1.0.9",
4
4
  "description": "DeepCitation JavaScript SDK for deterministic AI citation verification",
5
5
  "type": "module",
6
6
  "private": false,