npm - @deepcitation/deepcitation-js - Versions diffs - 1.0.1 → 1.0.3 - Mend

@deepcitation/deepcitation-js 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/README.md +71 -1197
package/lib/client/DeepCitation.d.ts +204 -0
package/lib/client/DeepCitation.js +473 -0
package/lib/client/index.d.ts +2 -0
package/lib/client/index.js +1 -0
package/lib/client/types.d.ts +157 -0
package/lib/client/types.js +1 -0
package/lib/index.d.ts +25 -0
package/lib/index.js +22 -0
package/lib/parsing/normalizeCitation.d.ts +5 -0
package/lib/parsing/normalizeCitation.js +182 -0
package/lib/parsing/parseCitation.d.ts +79 -0
package/lib/parsing/parseCitation.js +371 -0
package/lib/parsing/parseWorkAround.d.ts +2 -0
package/lib/parsing/parseWorkAround.js +73 -0
package/lib/prompts/citationPrompts.d.ts +133 -0
package/lib/prompts/citationPrompts.js +152 -0
package/lib/prompts/index.d.ts +3 -0
package/lib/prompts/index.js +3 -0
package/lib/prompts/promptCompression.d.ts +14 -0
package/lib/prompts/promptCompression.js +109 -0
package/lib/prompts/types.d.ts +4 -0
package/lib/prompts/types.js +1 -0
package/lib/react/CitationComponent.d.ts +134 -0
package/lib/react/CitationComponent.js +376 -0
package/lib/react/CitationVariants.d.ts +135 -0
package/lib/react/CitationVariants.js +283 -0
package/lib/react/DiffDisplay.d.ts +10 -0
package/lib/react/DiffDisplay.js +33 -0
package/lib/react/UrlCitationComponent.d.ts +83 -0
package/lib/react/UrlCitationComponent.js +224 -0
package/lib/react/VerificationTabs.d.ts +10 -0
package/lib/react/VerificationTabs.js +36 -0
package/lib/react/icons.d.ts +8 -0
package/lib/react/icons.js +9 -0
package/lib/react/index.d.ts +16 -0
package/lib/react/index.js +18 -0
package/lib/react/primitives.d.ts +104 -0
package/lib/react/primitives.js +190 -0
package/lib/react/types.d.ts +192 -0
package/lib/react/types.js +1 -0
package/lib/react/useSmartDiff.d.ts +16 -0
package/lib/react/useSmartDiff.js +64 -0
package/lib/react/utils.d.ts +34 -0
package/lib/react/utils.js +59 -0
package/lib/types/boxes.d.ts +11 -0
package/lib/types/boxes.js +1 -0
package/lib/types/citation.d.ts +44 -0
package/lib/types/citation.js +2 -0
package/lib/types/foundHighlight.d.ts +23 -0
package/lib/types/foundHighlight.js +22 -0
package/lib/types/index.d.ts +11 -0
package/lib/types/index.js +7 -0
package/lib/types/search.d.ts +30 -0
package/lib/types/search.js +1 -0
package/lib/utils/sha.d.ts +10 -0
package/lib/utils/sha.js +108 -0
package/package.json +11 -23

package/lib/prompts/citationPrompts.js ADDED Viewed

@@ -0,0 +1,152 @@
+export const CITATION_MARKDOWN_SYNTAX_PROMPT = `
+Citation syntax to use within Markdown:
+• To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
+<cite file_id='file_id' start_page_key='page_number_PAGE_index_INDEX' full_phrase='the verbatim text of the terse phrase inside <file_text /> (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' line_ids='2-6' reasoning='the terse logic used to conclude the citation' />
+• Very important: for page numbers, only use the page number and page index info from the page_number_PAGE_index_INDEX format (e.g. <page_number_1_index_0>) and never from the contents inside the page.
+• start_page_key, full_phrase, and line_ids are required for each citation.
+• Infer line_ids, as we only provide the first, last, and every 5th line. When copying a previous <cite />, use the full info from the previous citation without changing the start_page_key, line_ids, or any other <cite /> attributes.
+• Use refer to line_ids inclusively, and use a range (or single) for each citation, split multiple sequential line_ids into multiple citations.
+• These citations will be replaced and displayed in-line as a numeric element (e.g. [1]), the markdown preceding <cite /> should read naturally with only one <cite /> per sentence with rare exceptions for two <cite /> in a sentence. <cite /> often present best at the end of the sentence, and are not grouped at the end of the document.
+• The full_phrase should be the exact verbatim text of the phrase or paragraph from the source document to support the insight or idea.
+• We do NOT put the full_phrase inside <cite ...></cite>; we only use full_phrase inside the full_phrase attribute.
+`;
+export const AV_CITATION_MARKDOWN_SYNTAX_PROMPT = `
+• To support any ideas or information that requires a citation from the provided content, use the following citation syntax:
+<cite file_id='file_id' full_phrase='the verbatim text of the phrase (remember to escape quotes and newlines inside the full_phrase to remain as valid JSON)' timestamps='HH:MM:SS.SSS-HH:MM:SS.SSS' reasoning='the logic connecting the form section requirements to the supporting source citation' />
+• These citations are displayed in-line or in the relevant list item, and are not grouped at the end of the document.
+`;
+/**
+ * Wraps your existing system prompt with DeepCitation's citation syntax instructions.
+ * This enables LLMs to output verifiable citations that can be checked against source documents.
+ *
+ * @example
+ * ```typescript
+ * import { wrapSystemCitationPrompt } from '@deepcitation/deepcitation-js';
+ *
+ * const systemPrompt = "You are a helpful assistant that analyzes documents.";
+ * const enhanced = wrapSystemCitationPrompt({ systemPrompt });
+ *
+ * // Use enhanced prompt with your LLM
+ * const response = await openai.chat.completions.create({
+ *   messages: [{ role: "system", content: enhanced }],
+ *   // ...
+ * });
+ * ```
+ */
+export function wrapSystemCitationPrompt(options) {
+    const { systemPrompt, isAudioVideo = false, prependCitationInstructions = false } = options;
+    const citationPrompt = isAudioVideo ? AV_CITATION_MARKDOWN_SYNTAX_PROMPT : CITATION_MARKDOWN_SYNTAX_PROMPT;
+    if (prependCitationInstructions) {
+        return `${citationPrompt.trim()}
+${systemPrompt.trim()}`;
+    }
+    //append
+    return `${systemPrompt.trim()}
+${citationPrompt.trim()}`;
+}
+/**
+ * Wraps both system and user prompts with DeepCitation's citation syntax instructions.
+ * This is the recommended way to prepare prompts for citation verification.
+ *
+ * @example
+ * ```typescript
+ * import { wrapCitationPrompt } from '@deepcitation/deepcitation-js';
+ *
+ * // Single file
+ * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
+ *   systemPrompt: "You are a helpful assistant.",
+ *   userPrompt: "Analyze this document and summarize it.",
+ *   fileDeepText, // from uploadFile response
+ * });
+ *
+ * // Multiple files
+ * const { enhancedSystemPrompt, enhancedUserPrompt } = wrapCitationPrompt({
+ *   systemPrompt: "You are a helpful assistant.",
+ *   userPrompt: "Compare these documents.",
+ *   fileDeepText: [fileDeepText1, fileDeepText2], // array of file texts
+ * });
+ *
+ * // Use enhanced prompts with your LLM
+ * const response = await llm.chat({
+ *   messages: [
+ *     { role: "system", content: enhancedSystemPrompt },
+ *     { role: "user", content: enhancedUserPrompt },
+ *   ],
+ * });
+ * ```
+ */
+export function wrapCitationPrompt(options) {
+    const { systemPrompt, userPrompt, fileDeepText, isAudioVideo = false } = options;
+    const enhancedSystemPrompt = wrapSystemCitationPrompt({
+        systemPrompt,
+        isAudioVideo,
+    });
+    // Build enhanced user prompt with file content if provided
+    let enhancedUserPrompt = userPrompt;
+    if (fileDeepText) {
+        const fileTexts = Array.isArray(fileDeepText) ? fileDeepText : [fileDeepText];
+        const fileContent = fileTexts
+            .map((text, index) => {
+            if (fileTexts.length === 1) {
+                return `<file_text>\n${text}\n</file_text>`;
+            }
+            return `<file_text file_index="${index + 1}">\n${text}\n</file_text>`;
+        })
+            .join("\n\n");
+        enhancedUserPrompt = `${fileContent}\n\n${userPrompt}`;
+    }
+    return {
+        enhancedSystemPrompt,
+        enhancedUserPrompt,
+    };
+}
+export const CITATION_JSON_OUTPUT_FORMAT = {
+    type: "object",
+    properties: {
+        fileId: { type: "string" },
+        startPageKey: {
+            type: "string",
+            description: 'Only return a result like "page_number_PAGE_index_INDEX" from the provided page keys (e.g. <page_number_1_index_0>) and never from the contents inside the page.',
+        },
+        reasoning: {
+            type: "string",
+            description: "The logic connecting the form section requirements to the supporting source citation",
+        },
+        fullPhrase: {
+            type: "string",
+            description: "The verbatim text of the terse phrase inside <file_text /> to support the value description (if there is a detected OCR correction, use the corrected text)",
+        },
+        lineIds: {
+            type: "array",
+            items: { type: "number" },
+            description: "Infer lineIds, as we only provide the first, last, and every 5th line. Provide inclusive lineIds for the fullPhrase.",
+        },
+    },
+    required: ["fileId", "startPageKey", "reasoning", "fullPhrase", "lineIds"],
+};
+export const CITATION_AV_BASED_JSON_OUTPUT_FORMAT = {
+    type: "object",
+    properties: {
+        fileId: { type: "string" },
+        startPageKey: {
+            type: "string",
+            description: 'Only return a result like "page_number_PAGE_index_INDEX" from the provided page keys (e.g. <page_number_1_index_0>) and never from the contents inside the page.',
+        },
+        fullPhrase: {
+            type: "string",
+            description: "The exact verbatim text of the phrase or paragraph from the source document to support the value description (if there is a detected OCR correction, use the verbatim corrected text)",
+        },
+        timestamps: {
+            type: "object",
+            properties: {
+                startTime: { type: "string" },
+                endTime: { type: "string" },
+            },
+            required: ["startTime", "endTime"],
+            description: "The timestamp of the audio or video frame including milliseconds formatted as: HH:MM:SS.SSS",
+        },
+    },
+};

package/lib/prompts/index.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export * from "./promptCompression";
+export * from "./citationPrompts";
+export * from "./types";

package/lib/prompts/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+export * from "./promptCompression";
+export * from "./citationPrompts";
+export * from "./types";

package/lib/prompts/promptCompression.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import { CompressedResult } from "./types";
+/**
+ * Compress all occurrences of `ids` inside `obj`, returning a new object
+ * plus the `prefixMap` needed to decompress.
+ */
+export declare function compressPromptIds<T>(obj: T, ids: string[] | undefined): CompressedResult<T>;
+/**
+ * Decompress all minimal prefixes back into their full IDs,
+ * using the `prefixMap` returned from `compressPromptIds`.
+ *
+ * If you pass in a string, it will return a string.
+ * If you pass in an object, it will JSON‑serialize and parse it back.
+ */
+export declare function decompressPromptIds<T>(compressed: T | string, prefixMap: Record<string, string>): T | string;

package/lib/prompts/promptCompression.js ADDED Viewed

@@ -0,0 +1,109 @@
+const MIN_PREFIX_LENGTH = 4;
+const MIN_CHARACTERS_PER_PREFIX_WITH_AT_LEAST_ONE_DIGIT = 3;
+const MIN_CHARACTERS_PER_PREFIX_WITH_NO_DIGITS = 5;
+/**
+ * Build a map from each ID's minimal unique prefix to the full ID,
+ * such that the prefix only ever appears in the prompt where the full ID appears.
+ */
+function buildSafePrefixMap(ids, prompt) {
+    const map = {};
+    for (const id of ids) {
+        for (let len = MIN_PREFIX_LENGTH; len <= id.length; len++) {
+            const prefix = id.slice(0, len);
+            // Check minimum requirements
+            const digitCount = (prefix.match(/\d/g) || []).length;
+            const letterCount = (prefix.match(/[a-zA-Z]/g) || []).length;
+            if (prefix.length < MIN_PREFIX_LENGTH ||
+                (digitCount > 0 && letterCount < MIN_CHARACTERS_PER_PREFIX_WITH_AT_LEAST_ONE_DIGIT) ||
+                (digitCount === 0 && letterCount < MIN_CHARACTERS_PER_PREFIX_WITH_NO_DIGITS)) {
+                continue;
+            }
+            // 1) Unique among IDs
+            if (ids.some(other => other !== id && other.startsWith(prefix))) {
+                continue;
+            }
+            // 2) Only appears in prompt as part of the full ID
+            const esc = (s) => s.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
+            const prefixCount = (prompt.match(new RegExp(esc(prefix), "g")) || []).length;
+            const fullCount = (prompt.match(new RegExp(esc(id), "g")) || []).length;
+            if (prefixCount !== fullCount) {
+                continue;
+            }
+            map[prefix] = id;
+            break;
+        }
+        if (!Object.values(map).includes(id)) {
+            throw new Error(`Cannot find a safe unique prefix for ID "${id}" that meets the minimum requirements (length: ${MIN_PREFIX_LENGTH})`);
+        }
+    }
+    return map;
+}
+/**
+ * Compress all occurrences of `ids` inside `obj`, returning a new object
+ * plus the `prefixMap` needed to decompress.
+ */
+export function compressPromptIds(obj, ids) {
+    if (!ids || ids.length === 0) {
+        return { compressed: obj, prefixMap: {} };
+    }
+    const uniqueIds = Array.from(new Set(ids));
+    const text = JSON.stringify(obj);
+    const prefixMap = buildSafePrefixMap(uniqueIds, text);
+    // Sort prefixes by descending length to avoid partial matches
+    const prefixes = Object.keys(prefixMap).sort((a, b) => b.length - a.length);
+    let compressedText = text;
+    for (const prefix of prefixes) {
+        const full = prefixMap[prefix];
+        const escFull = full.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
+        compressedText = compressedText.replace(new RegExp(escFull, "g"), prefix);
+    }
+    return {
+        compressed: JSON.parse(compressedText),
+        prefixMap,
+    };
+}
+/**
+ * Decompress all minimal prefixes back into their full IDs,
+ * using the `prefixMap` returned from `compressPromptIds`.
+ *
+ * If you pass in a string, it will return a string.
+ * If you pass in an object, it will JSON‑serialize and parse it back.
+ */
+export function decompressPromptIds(compressed, prefixMap) {
+    if (!prefixMap || Object.keys(prefixMap).length === 0) {
+        return compressed;
+    }
+    // Prepare sorted [prefix, full] entries (longest prefix first)
+    const entries = Object.entries(prefixMap).sort((a, b) => b[0].length - a[0].length);
+    // Decide whether we're working on a string or an object
+    let text;
+    let shouldParseBack = false;
+    if (typeof compressed === "string") {
+        text = compressed;
+    }
+    else {
+        text = JSON.stringify(compressed);
+        shouldParseBack = true;
+    }
+    const originalLength = text?.length;
+    // Perform all prefix → full-ID replacements
+    for (const [prefix, full] of entries) {
+        const escPrefix = prefix.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
+        text = text.replace(new RegExp(escPrefix, "g"), full);
+    }
+    //this is for citation fileId or file_id
+    if (entries.length === 1 && (text.includes("file_id='") || text.includes('file_id="'))) {
+        const fullId = entries[0][1];
+        text = text.replace(/file_id='[^']*'|file_id="[^"]*"/g, `file_id='${fullId}'`);
+    }
+    else if (entries.length === 1 && (text.includes("fileId='") || text.includes('fileId="'))) {
+        const fullId = entries[0][1];
+        text = text.replace(/fileId='[^']*'|fileId="[^"]*"/g, `fileId='${fullId}'`);
+    }
+    const newLength = text?.length;
+    const diff = originalLength - newLength;
+    if (diff > 0) {
+        throw new Error(`[decompressedPromptIds] diff ${diff} originalLength ${originalLength} newLength ${newLength}`);
+    }
+    return shouldParseBack ? JSON.parse(text) : text;
+}

package/lib/prompts/types.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export interface CompressedResult<T> {
+    compressed: T;
+    prefixMap: Record<string, string>;
+}

package/lib/prompts/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/lib/react/CitationComponent.d.ts ADDED Viewed

@@ -0,0 +1,134 @@
+import React, { type ReactNode } from "react";
+import { type CitationStatus } from "../types/citation.js";
+import type { FoundHighlightLocation } from "../types/foundHighlight.js";
+import type { BaseCitationProps, CitationEventHandlers, CitationRenderProps, CitationVariant } from "./types.js";
+import "./styles.css";
+export type { CitationVariant } from "./types.js";
+/**
+ * Props for the CitationComponent.
+ *
+ * @example Brackets variant (default) - shows value/number in brackets with blue styling
+ * ```tsx
+ * <CitationComponent
+ *   citation={{ citationNumber: 1, fullPhrase: "Revenue grew by 25%" }}
+ *   foundCitation={verificationResult}
+ * />
+ * // Renders: [1✓] with blue text
+ * ```
+ *
+ * @example Numeric variant - shows just the citation number with indicator
+ * ```tsx
+ * <CitationComponent
+ *   citation={{ citationNumber: 1, value: "25% growth" }}
+ *   foundCitation={verificationResult}
+ *   variant="numeric"
+ * />
+ * // Renders: 1✓
+ * ```
+ *
+ * @example Text variant - shows the value without styling
+ * ```tsx
+ * <CitationComponent
+ *   citation={{ citationNumber: 1, value: "25% growth" }}
+ *   foundCitation={verificationResult}
+ *   variant="text"
+ * />
+ * // Renders: 25% growth✓
+ * ```
+ *
+ * @example Minimal variant - no brackets, just text and indicator
+ * ```tsx
+ * <CitationComponent
+ *   citation={citation}
+ *   foundCitation={verificationResult}
+ *   variant="minimal"
+ * />
+ * // Renders: Revenue grew...✓
+ * ```
+ *
+ * @example Indicator-only variant
+ * ```tsx
+ * <CitationComponent
+ *   citation={citation}
+ *   foundCitation={verificationResult}
+ *   variant="indicator"
+ * />
+ * // Renders: ✓
+ * ```
+ *
+ * @example Hidden popover
+ * ```tsx
+ * <CitationComponent
+ *   citation={citation}
+ *   foundCitation={verificationResult}
+ *   popoverPosition="hidden"
+ * />
+ * ```
+ */
+export interface CitationComponentProps extends BaseCitationProps {
+    /**
+     * Verification result from the DeepCitation API.
+     * Contains match snippet, page number, and verification image.
+     */
+    foundCitation?: FoundHighlightLocation | null;
+    /**
+     * Display variant for the citation.
+     * - `brackets`: Shows value/number in brackets, blue text styling (default)
+     * - `numeric`: Shows citation number with indicator, no brackets
+     * - `text`: Shows the value, no text styling, no truncate, shows indicator
+     * - `minimal`: No brackets, just display text with indicator
+     * - `indicator`: Only the status indicator (checkmark/warning), no text
+     */
+    variant?: CitationVariant;
+    /**
+     * Event handlers for citation interactions.
+     */
+    eventHandlers?: CitationEventHandlers;
+    /**
+     * Enable mobile touch handlers.
+     * @default false
+     */
+    isMobile?: boolean;
+    /**
+     * Custom render function for the status indicator.
+     * Receives the citation status and should return a ReactNode.
+     */
+    renderIndicator?: (status: CitationStatus) => ReactNode;
+    /**
+     * Custom render function for the entire citation content.
+     * When provided, takes full control of rendering (ignores format, showBrackets).
+     */
+    renderContent?: (props: CitationRenderProps) => ReactNode;
+    /**
+     * Position of the verification popover.
+     * Use "hidden" to disable the popover entirely.
+     * @default "top"
+     */
+    popoverPosition?: "top" | "bottom" | "hidden";
+    /**
+     * Custom render function for popover content.
+     */
+    renderPopoverContent?: (props: {
+        citation: BaseCitationProps["citation"];
+        foundCitation: FoundHighlightLocation | null;
+        status: CitationStatus;
+    }) => ReactNode;
+}
+/**
+ * CitationComponent displays a citation with verification status.
+ *
+ * The component separates two concepts:
+ * 1. **Found status** (text styling) - whether the citation was found in the document
+ *    - Verified & Partial both use "found" styling (blue text)
+ *    - Miss uses "not found" styling (gray/strikethrough)
+ *
+ * 2. **Match quality** (indicator styling) - how well the citation matched
+ *    - Exact match: green checkmark
+ *    - Partial match: orange checkmark
+ *    - Miss: no indicator
+ *
+ * This means partial matches have blue text (because they were found) but
+ * an orange indicator (because they didn't match exactly).
+ */
+export declare const CitationComponent: React.ForwardRefExoticComponent<CitationComponentProps & React.RefAttributes<HTMLSpanElement>>;
+export declare const MemoizedCitationComponent: React.NamedExoticComponent<CitationComponentProps & React.RefAttributes<HTMLSpanElement>>;