@deepcitation/deepcitation-js 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +71 -1197
  2. package/lib/client/DeepCitation.d.ts +204 -0
  3. package/lib/client/DeepCitation.js +473 -0
  4. package/lib/client/index.d.ts +2 -0
  5. package/lib/client/index.js +1 -0
  6. package/lib/client/types.d.ts +157 -0
  7. package/lib/client/types.js +1 -0
  8. package/lib/index.d.ts +25 -0
  9. package/lib/index.js +22 -0
  10. package/lib/parsing/normalizeCitation.d.ts +5 -0
  11. package/lib/parsing/normalizeCitation.js +182 -0
  12. package/lib/parsing/parseCitation.d.ts +79 -0
  13. package/lib/parsing/parseCitation.js +371 -0
  14. package/lib/parsing/parseWorkAround.d.ts +2 -0
  15. package/lib/parsing/parseWorkAround.js +73 -0
  16. package/lib/prompts/citationPrompts.d.ts +133 -0
  17. package/lib/prompts/citationPrompts.js +152 -0
  18. package/lib/prompts/index.d.ts +3 -0
  19. package/lib/prompts/index.js +3 -0
  20. package/lib/prompts/promptCompression.d.ts +14 -0
  21. package/lib/prompts/promptCompression.js +109 -0
  22. package/lib/prompts/types.d.ts +4 -0
  23. package/lib/prompts/types.js +1 -0
  24. package/lib/react/CitationComponent.d.ts +134 -0
  25. package/lib/react/CitationComponent.js +376 -0
  26. package/lib/react/CitationVariants.d.ts +135 -0
  27. package/lib/react/CitationVariants.js +283 -0
  28. package/lib/react/DiffDisplay.d.ts +10 -0
  29. package/lib/react/DiffDisplay.js +33 -0
  30. package/lib/react/UrlCitationComponent.d.ts +83 -0
  31. package/lib/react/UrlCitationComponent.js +224 -0
  32. package/lib/react/VerificationTabs.d.ts +10 -0
  33. package/lib/react/VerificationTabs.js +36 -0
  34. package/lib/react/icons.d.ts +8 -0
  35. package/lib/react/icons.js +9 -0
  36. package/lib/react/index.d.ts +16 -0
  37. package/lib/react/index.js +18 -0
  38. package/lib/react/primitives.d.ts +104 -0
  39. package/lib/react/primitives.js +190 -0
  40. package/lib/react/types.d.ts +192 -0
  41. package/lib/react/types.js +1 -0
  42. package/lib/react/useSmartDiff.d.ts +16 -0
  43. package/lib/react/useSmartDiff.js +64 -0
  44. package/lib/react/utils.d.ts +34 -0
  45. package/lib/react/utils.js +59 -0
  46. package/lib/types/boxes.d.ts +11 -0
  47. package/lib/types/boxes.js +1 -0
  48. package/lib/types/citation.d.ts +44 -0
  49. package/lib/types/citation.js +2 -0
  50. package/lib/types/foundHighlight.d.ts +23 -0
  51. package/lib/types/foundHighlight.js +22 -0
  52. package/lib/types/index.d.ts +11 -0
  53. package/lib/types/index.js +7 -0
  54. package/lib/types/search.d.ts +30 -0
  55. package/lib/types/search.js +1 -0
  56. package/lib/utils/sha.d.ts +10 -0
  57. package/lib/utils/sha.js +108 -0
  58. package/package.json +5 -2
@@ -0,0 +1,64 @@
1
+ import * as Diff from "diff";
2
+ import { useMemo } from "react";
3
+ export const useSmartDiff = (expected = "", actual = "") => {
4
+ return useMemo(() => {
5
+ // 1. Sanitize standard noise (CRLF, trailing spaces)
6
+ const cleanExpected = (expected || "").trim().replace(/\r\n/g, "\n");
7
+ const cleanActual = (actual || "").trim().replace(/\r\n/g, "\n");
8
+ // 2. First Pass: Diff by LINES.
9
+ // This isolates the "extra line" issue. The extra line becomes one "added" chunk,
10
+ // and it prevents the tokenizer from getting confused on the rest of the text.
11
+ const lineDiffs = Diff.diffLines(cleanExpected, cleanActual);
12
+ // 3. Second Pass: Process the line results to find "Modifications"
13
+ const processedDiffs = [];
14
+ let hasDiff = false;
15
+ let totalChange = 0;
16
+ for (let i = 0; i < lineDiffs.length; i++) {
17
+ const part = lineDiffs[i];
18
+ const nextPart = lineDiffs[i + 1];
19
+ // CHECK FOR MODIFICATION:
20
+ // If we see a "Removed" block immediately followed by an "Added" block,
21
+ // it means this specific line changed. We should DIFF WORDS inside this line.
22
+ if (part.removed && nextPart && nextPart.added) {
23
+ // Run word diff ONLY on this pair of lines
24
+ const wordDiffs = Diff.diffWordsWithSpace(part.value, nextPart.value);
25
+ processedDiffs.push({
26
+ type: "modified",
27
+ parts: wordDiffs,
28
+ });
29
+ hasDiff = true;
30
+ // Calculate raw change amount for variance score
31
+ totalChange += Math.abs(part.value.length - nextPart.value.length);
32
+ i++; // Skip the next part since we merged it into this block
33
+ }
34
+ // CHECK FOR PURE ADDITION/DELETION (The "Extra Line" Scenario)
35
+ else if (part.added || part.removed) {
36
+ processedDiffs.push({
37
+ type: part.added ? "added" : "removed",
38
+ parts: [{ value: part.value, added: part.added, removed: part.removed }],
39
+ });
40
+ hasDiff = true;
41
+ totalChange += part.value.length;
42
+ }
43
+ // UNCHANGED BLOCKS
44
+ else {
45
+ processedDiffs.push({
46
+ type: "unchanged",
47
+ parts: [{ value: part.value }],
48
+ });
49
+ }
50
+ }
51
+ // 4. Calculate a similarity score to decide UI defaults
52
+ // 1.0 = Perfect match, 0.0 = Totally different
53
+ const maxLength = Math.max(cleanExpected.length, cleanActual.length);
54
+ const similarity = maxLength === 0 ? 1 : 1 - totalChange / maxLength;
55
+ return {
56
+ diffResult: processedDiffs,
57
+ hasDiff,
58
+ similarity,
59
+ // If similarity is too low (< 60%), the Diff view is likely "Fruit Salad" (messy).
60
+ // We can use this boolean to default the UI to the "Source" tab.
61
+ isHighVariance: similarity < 0.6,
62
+ };
63
+ }, [expected, actual]);
64
+ };
@@ -0,0 +1,34 @@
1
+ import type { Citation } from "../types/citation.js";
2
+ /**
3
+ * Generates a unique, deterministic key for a citation based on its content.
4
+ * Uses a hash of the citation's identifying properties.
5
+ */
6
+ export declare function generateCitationKey(citation: Citation): string;
7
+ /**
8
+ * Generates a unique instance ID for a citation component render.
9
+ * Combines the citation key with a random suffix for uniqueness.
10
+ */
11
+ export declare function generateCitationInstanceId(citationKey: string): string;
12
+ /**
13
+ * Gets the display text for a citation based on configuration.
14
+ */
15
+ export declare function getCitationDisplayText(citation: Citation, options?: {
16
+ displayCitationValue?: boolean;
17
+ fallbackDisplay?: string | null;
18
+ }): string;
19
+ /**
20
+ * Gets the value text to display before the citation bracket.
21
+ */
22
+ export declare function getCitationValueText(citation: Citation, options?: {
23
+ displayCitationValue?: boolean;
24
+ }): string;
25
+ /**
26
+ * Joins class names, filtering out falsy values.
27
+ * This is a minimal implementation for the base component.
28
+ */
29
+ export declare function classNames(...classes: (string | undefined | null | false)[]): string;
30
+ /**
31
+ * Default padding values for citation styling.
32
+ */
33
+ export declare const CITATION_X_PADDING = 4;
34
+ export declare const CITATION_Y_PADDING = 1;
@@ -0,0 +1,59 @@
1
+ import { sha1Hash } from "../utils/sha.js";
2
+ /**
3
+ * Generates a unique, deterministic key for a citation based on its content.
4
+ * Uses a hash of the citation's identifying properties.
5
+ */
6
+ export function generateCitationKey(citation) {
7
+ const keyParts = [
8
+ citation.fileId || "",
9
+ citation.pageNumber?.toString() || "",
10
+ citation.fullPhrase || "",
11
+ citation.value || "",
12
+ citation.citationNumber?.toString() || "",
13
+ citation.lineIds?.join(",") || "",
14
+ citation.timestamps?.startTime || "",
15
+ citation.timestamps?.endTime || "",
16
+ citation.fragmentContext || "",
17
+ ];
18
+ return sha1Hash(keyParts.join("|")).slice(0, 16);
19
+ }
20
+ /**
21
+ * Generates a unique instance ID for a citation component render.
22
+ * Combines the citation key with a random suffix for uniqueness.
23
+ */
24
+ export function generateCitationInstanceId(citationKey) {
25
+ const randomSuffix = Math.random().toString(36).substr(2, 9);
26
+ return `${citationKey}-${randomSuffix}`;
27
+ }
28
+ /**
29
+ * Gets the display text for a citation based on configuration.
30
+ */
31
+ export function getCitationDisplayText(citation, options = {}) {
32
+ const { displayCitationValue = false, fallbackDisplay } = options;
33
+ if (displayCitationValue) {
34
+ return citation.value || citation.citationNumber?.toString() || fallbackDisplay || "";
35
+ }
36
+ return citation.citationNumber?.toString() || "";
37
+ }
38
+ /**
39
+ * Gets the value text to display before the citation bracket.
40
+ */
41
+ export function getCitationValueText(citation, options = {}) {
42
+ const { displayCitationValue = false } = options;
43
+ if (displayCitationValue) {
44
+ return "";
45
+ }
46
+ return citation.value || "";
47
+ }
48
+ /**
49
+ * Joins class names, filtering out falsy values.
50
+ * This is a minimal implementation for the base component.
51
+ */
52
+ export function classNames(...classes) {
53
+ return classes.filter(Boolean).join(" ");
54
+ }
55
+ /**
56
+ * Default padding values for citation styling.
57
+ */
58
+ export const CITATION_X_PADDING = 4;
59
+ export const CITATION_Y_PADDING = 1;
@@ -0,0 +1,11 @@
1
+ export interface PdfSpaceItem extends ScreenBox {
2
+ text?: string;
3
+ }
4
+ export type IVertex = {
5
+ x: number;
6
+ y: number;
7
+ };
8
+ export interface ScreenBox extends IVertex {
9
+ width: number;
10
+ height: number;
11
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,44 @@
1
+ import { type ScreenBox } from "./boxes";
2
+ import { type FoundHighlightLocation } from "./foundHighlight";
3
+ export declare const VERIFICATION_VERSION_NUMBER = "0.4.37";
4
+ export type OutputImageFormat = "jpeg" | "png" | "avif" | undefined | null;
5
+ export declare const DEFAULT_OUTPUT_IMAGE_FORMAT: "avif";
6
+ export interface VerifyCitationResponse {
7
+ foundHighlights: {
8
+ [key: string]: FoundHighlightLocation;
9
+ };
10
+ }
11
+ export interface VerifyCitationRequest {
12
+ fileId: string;
13
+ citations: {
14
+ [key: string]: Citation;
15
+ };
16
+ outputImageFormat?: OutputImageFormat;
17
+ apiKey?: string;
18
+ }
19
+ export interface Citation {
20
+ fileId?: string;
21
+ fullPhrase?: string | null;
22
+ value?: string | null;
23
+ startPageKey?: string | null;
24
+ pageNumber?: number | null;
25
+ lineIds?: number[] | null;
26
+ reasoning?: string | null;
27
+ selection?: ScreenBox | null;
28
+ citationNumber?: number;
29
+ timestamps?: {
30
+ endTime?: string;
31
+ startTime?: string;
32
+ };
33
+ fragmentContext?: string | null;
34
+ rawCitationMd?: string;
35
+ beforeCite?: string;
36
+ formFieldName?: string | null;
37
+ formFieldValue?: string | null;
38
+ }
39
+ export interface CitationStatus {
40
+ isVerified: boolean;
41
+ isMiss: boolean;
42
+ isPartialMatch: boolean;
43
+ isPending: boolean;
44
+ }
@@ -0,0 +1,2 @@
1
+ export const VERIFICATION_VERSION_NUMBER = "0.4.37";
2
+ export const DEFAULT_OUTPUT_IMAGE_FORMAT = "avif";
@@ -0,0 +1,23 @@
1
+ import { VERIFICATION_VERSION_NUMBER, type Citation } from "./citation";
2
+ import { type SearchState } from "./search";
3
+ import { type PdfSpaceItem } from "./boxes";
4
+ export declare const NOT_FOUND_HIGHLIGHT_INDEX = -1;
5
+ export declare const PENDING_HIGHLIGHT_INDEX = -2;
6
+ export declare const BLANK_HIGHLIGHT_LOCATION: FoundHighlightLocation;
7
+ export declare function deterministicIdFromHighlightLocation(highlightLocation: FoundHighlightLocation): string;
8
+ export interface FoundHighlightLocation {
9
+ regex?: RegExp | null;
10
+ lowerCaseSearchTerm: string | null;
11
+ label?: string | null;
12
+ attachmentId?: string | null;
13
+ pageNumber?: number | null;
14
+ timestamp?: number | null;
15
+ citation?: Citation;
16
+ searchState?: SearchState | null;
17
+ hitIndexWithinPage?: number | null;
18
+ matchSnippet?: string | null;
19
+ pdfSpaceItem?: PdfSpaceItem;
20
+ verificationImageBase64?: string | null;
21
+ source?: typeof VERIFICATION_VERSION_NUMBER | string | null;
22
+ verifiedAt?: Date;
23
+ }
@@ -0,0 +1,22 @@
1
+ import { sha1Hash } from "../utils/sha.js";
2
+ export const NOT_FOUND_HIGHLIGHT_INDEX = -1;
3
+ export const PENDING_HIGHLIGHT_INDEX = -2;
4
+ export const BLANK_HIGHLIGHT_LOCATION = {
5
+ pageNumber: NOT_FOUND_HIGHLIGHT_INDEX,
6
+ regex: null,
7
+ lowerCaseSearchTerm: null,
8
+ attachmentId: null,
9
+ matchSnippet: null,
10
+ source: null,
11
+ citation: {
12
+ startPageKey: null,
13
+ lineIds: null,
14
+ pageNumber: NOT_FOUND_HIGHLIGHT_INDEX,
15
+ fileId: undefined,
16
+ fullPhrase: null,
17
+ value: null,
18
+ },
19
+ };
20
+ export function deterministicIdFromHighlightLocation(highlightLocation) {
21
+ return sha1Hash(`${highlightLocation.lowerCaseSearchTerm}-${highlightLocation.attachmentId}-${highlightLocation.pageNumber}-${highlightLocation.hitIndexWithinPage}-${highlightLocation.matchSnippet}-${highlightLocation?.hitIndexWithinPage}`);
22
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Type definitions for DeepCitation
3
+ *
4
+ * @packageDocumentation
5
+ */
6
+ export type { Citation, CitationStatus, VerifyCitationRequest, VerifyCitationResponse, OutputImageFormat, } from "./citation.js";
7
+ export { VERIFICATION_VERSION_NUMBER, DEFAULT_OUTPUT_IMAGE_FORMAT } from "./citation.js";
8
+ export type { FoundHighlightLocation } from "./foundHighlight.js";
9
+ export { NOT_FOUND_HIGHLIGHT_INDEX, PENDING_HIGHLIGHT_INDEX, BLANK_HIGHLIGHT_LOCATION, deterministicIdFromHighlightLocation, } from "./foundHighlight.js";
10
+ export type { SearchState, SearchStatus } from "./search.js";
11
+ export type { ScreenBox, PdfSpaceItem, IVertex } from "./boxes.js";
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Type definitions for DeepCitation
3
+ *
4
+ * @packageDocumentation
5
+ */
6
+ export { VERIFICATION_VERSION_NUMBER, DEFAULT_OUTPUT_IMAGE_FORMAT } from "./citation.js";
7
+ export { NOT_FOUND_HIGHLIGHT_INDEX, PENDING_HIGHLIGHT_INDEX, BLANK_HIGHLIGHT_LOCATION, deterministicIdFromHighlightLocation, } from "./foundHighlight.js";
@@ -0,0 +1,30 @@
1
+ export type SearchStatus = "loading" | "pending" | "not_found" | "partial_text_found" | "found" | "found_value_only" | "found_phrase_missed_value" | "found_on_other_page" | "found_on_other_line" | "first_word_found" | "timestamp_wip";
2
+ export type SearchMethod = "exact_line_match" | "line_with_buffer" | "current_page" | "adjacent_pages" | "expanded_window" | "regex_search" | "bm25_search" | "fuzzy_regex" | "first_word_fallback";
3
+ export interface SearchAttempt {
4
+ method: SearchMethod;
5
+ success: boolean;
6
+ searchPhrases: string[];
7
+ pageSearched?: number;
8
+ matchScore?: number;
9
+ matchSnippet?: string;
10
+ notes?: string;
11
+ durationMs?: number;
12
+ startTime?: number;
13
+ endTime?: number;
14
+ }
15
+ export interface SearchState {
16
+ status: SearchStatus;
17
+ expectedPage?: number | null;
18
+ actualPage?: number | null;
19
+ expectedLineIds?: number[] | null;
20
+ actualLineIds?: number[] | null;
21
+ actualTimestamps?: {
22
+ startTime?: string;
23
+ endTime?: string;
24
+ };
25
+ expectedTimestamps?: {
26
+ startTime?: string;
27
+ endTime?: string;
28
+ };
29
+ searchAttempts?: SearchAttempt[];
30
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Pure JavaScript SHA-1 implementation.
3
+ * Based on the FIPS 180-4 specification.
4
+ * No external dependencies.
5
+ */
6
+ /**
7
+ * Computes a SHA-1 hash of the provided data.
8
+ * Used internally by generateCitationKey in react/utils.ts
9
+ */
10
+ export declare function sha1Hash(data: string | any): string;
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Pure JavaScript SHA-1 implementation.
3
+ * Based on the FIPS 180-4 specification.
4
+ * No external dependencies.
5
+ */
6
+ function utf8Encode(str) {
7
+ const encoder = new TextEncoder();
8
+ return encoder.encode(str);
9
+ }
10
+ function sha1(message) {
11
+ // Initial hash values
12
+ let h0 = 0x67452301;
13
+ let h1 = 0xefcdab89;
14
+ let h2 = 0x98badcfe;
15
+ let h3 = 0x10325476;
16
+ let h4 = 0xc3d2e1f0;
17
+ // Pre-processing: adding padding bits
18
+ const msgLen = message.length;
19
+ const bitLen = msgLen * 8;
20
+ // Calculate padded length: message + 1 (0x80) + padding + 8 (length)
21
+ // Total must be multiple of 64 bytes (512 bits)
22
+ const totalLen = msgLen + 1 + 8; // minimum: msg + 0x80 + 64-bit length
23
+ const paddedLen = Math.ceil(totalLen / 64) * 64;
24
+ // Create padded buffer
25
+ const padded = new ArrayBuffer(paddedLen);
26
+ const paddedView = new Uint8Array(padded);
27
+ const dataView = new DataView(padded);
28
+ // Copy message
29
+ paddedView.set(message);
30
+ // Append bit '1' (0x80)
31
+ paddedView[msgLen] = 0x80;
32
+ // Append length as 64-bit big-endian (in bits)
33
+ // High 32 bits (for messages > 512MB, which we don't support)
34
+ dataView.setUint32(paddedLen - 8, Math.floor(bitLen / 0x100000000), false);
35
+ // Low 32 bits
36
+ dataView.setUint32(paddedLen - 4, bitLen >>> 0, false);
37
+ // Process each 512-bit (64-byte) chunk
38
+ const w = new Uint32Array(80);
39
+ for (let offset = 0; offset < paddedLen; offset += 64) {
40
+ // Break chunk into sixteen 32-bit big-endian words
41
+ for (let i = 0; i < 16; i++) {
42
+ w[i] = dataView.getUint32(offset + i * 4, false);
43
+ }
44
+ // Extend the sixteen 32-bit words into eighty 32-bit words
45
+ for (let i = 16; i < 80; i++) {
46
+ const val = w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16];
47
+ w[i] = (val << 1) | (val >>> 31);
48
+ }
49
+ // Initialize working variables
50
+ let a = h0;
51
+ let b = h1;
52
+ let c = h2;
53
+ let d = h3;
54
+ let e = h4;
55
+ // Main loop
56
+ for (let i = 0; i < 80; i++) {
57
+ let f;
58
+ let k;
59
+ if (i < 20) {
60
+ f = (b & c) | (~b & d);
61
+ k = 0x5a827999;
62
+ }
63
+ else if (i < 40) {
64
+ f = b ^ c ^ d;
65
+ k = 0x6ed9eba1;
66
+ }
67
+ else if (i < 60) {
68
+ f = (b & c) | (b & d) | (c & d);
69
+ k = 0x8f1bbcdc;
70
+ }
71
+ else {
72
+ f = b ^ c ^ d;
73
+ k = 0xca62c1d6;
74
+ }
75
+ const temp = (((a << 5) | (a >>> 27)) + f + e + k + w[i]) >>> 0;
76
+ e = d;
77
+ d = c;
78
+ c = ((b << 30) | (b >>> 2)) >>> 0;
79
+ b = a;
80
+ a = temp;
81
+ }
82
+ // Add this chunk's hash to result
83
+ h0 = (h0 + a) >>> 0;
84
+ h1 = (h1 + b) >>> 0;
85
+ h2 = (h2 + c) >>> 0;
86
+ h3 = (h3 + d) >>> 0;
87
+ h4 = (h4 + e) >>> 0;
88
+ }
89
+ // Produce the final hash value (160-bit) as hex string
90
+ const hex = (n) => n.toString(16).padStart(8, "0");
91
+ return hex(h0) + hex(h1) + hex(h2) + hex(h3) + hex(h4);
92
+ }
93
+ /**
94
+ * Computes a SHA-1 hash of the provided data.
95
+ * Used internally by generateCitationKey in react/utils.ts
96
+ */
97
+ export function sha1Hash(data) {
98
+ try {
99
+ if (!data)
100
+ return "";
101
+ const str = typeof data === "string" ? data : JSON.stringify(data);
102
+ return sha1(utf8Encode(str));
103
+ }
104
+ catch (error) {
105
+ console.error("Error in making the hash:", error);
106
+ }
107
+ return "";
108
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@deepcitation/deepcitation-js",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "DeepCitation JavaScript SDK for deterministic AI citation verification",
5
5
  "type": "module",
6
6
  "private": false,
@@ -13,10 +13,13 @@
13
13
  },
14
14
  "main": "./lib/index.js",
15
15
  "types": "./lib/index.d.ts",
16
+ "sideEffects": [
17
+ "*.css",
18
+ "src/react/styles.css"
19
+ ],
16
20
  "files": [
17
21
  "lib",
18
22
  "src/react/styles.css",
19
- "README.md",
20
23
  "LICENSE"
21
24
  ],
22
25
  "scripts": {