@deepcitation/deepcitation-js 1.1.22 → 1.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,414 @@
1
+ /**
2
+ * Custom diff implementation to replace the 'diff' npm package.
3
+ * This avoids dependency issues in Firebase Functions environments.
4
+ *
5
+ * Implements a Myers diff algorithm with optimizations inspired by jsdiff.
6
+ * @see https://github.com/kpdecker/jsdiff
7
+ *
8
+ * ---
9
+ *
10
+ * BSD 3-Clause License
11
+ *
12
+ * Copyright (c) 2009-2015, Kevin Decker <kpdecker@gmail.com>
13
+ * All rights reserved.
14
+ *
15
+ * Redistribution and use in source and binary forms, with or without
16
+ * modification, are permitted provided that the following conditions are met:
17
+ *
18
+ * 1. Redistributions of source code must retain the above copyright notice, this
19
+ * list of conditions and the following disclaimer.
20
+ *
21
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
22
+ * this list of conditions and the following disclaimer in the documentation
23
+ * and/or other materials provided with the distribution.
24
+ *
25
+ * 3. Neither the name of the copyright holder nor the names of its
26
+ * contributors may be used to endorse or promote products derived from
27
+ * this software without specific prior written permission.
28
+ *
29
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
32
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
33
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
35
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
37
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
+ */
40
+ /**
41
+ * Myers diff algorithm with diagonal pruning optimization.
42
+ * This reduces complexity from O(n+d²) to O(n+d) for common cases like appending text.
43
+ *
44
+ * @see https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/
45
+ */
46
+ function computeDiff(oldTokens, newTokens, equals = (a, b) => a === b) {
47
+ const oldLen = oldTokens.length;
48
+ const newLen = newTokens.length;
49
+ // Handle edge cases
50
+ if (oldLen === 0 && newLen === 0) {
51
+ return [];
52
+ }
53
+ // Quick path for completely new content
54
+ if (oldLen === 0) {
55
+ return [{ value: newTokens.join(""), added: true, count: newTokens.length }];
56
+ }
57
+ // Quick path for completely removed content
58
+ if (newLen === 0) {
59
+ return [
60
+ { value: oldTokens.join(""), removed: true, count: oldTokens.length },
61
+ ];
62
+ }
63
+ // Find common prefix
64
+ let commonPrefixLen = 0;
65
+ while (commonPrefixLen < oldLen &&
66
+ commonPrefixLen < newLen &&
67
+ equals(oldTokens[commonPrefixLen], newTokens[commonPrefixLen])) {
68
+ commonPrefixLen++;
69
+ }
70
+ // Find common suffix (but don't overlap with prefix)
71
+ let commonSuffixLen = 0;
72
+ while (commonSuffixLen < oldLen - commonPrefixLen &&
73
+ commonSuffixLen < newLen - commonPrefixLen &&
74
+ equals(oldTokens[oldLen - 1 - commonSuffixLen], newTokens[newLen - 1 - commonSuffixLen])) {
75
+ commonSuffixLen++;
76
+ }
77
+ // Extract the differing middle portions
78
+ const oldMiddle = oldTokens.slice(commonPrefixLen, oldLen - commonSuffixLen);
79
+ const newMiddle = newTokens.slice(commonPrefixLen, newLen - commonSuffixLen);
80
+ // If middles are empty, we only have common prefix/suffix
81
+ if (oldMiddle.length === 0 && newMiddle.length === 0) {
82
+ return [{ value: oldTokens.join(""), count: oldTokens.length }];
83
+ }
84
+ // Compute diff on the middle portion using Myers algorithm
85
+ const middleDiff = myersDiff(oldMiddle, newMiddle, equals);
86
+ // Build result with prefix, middle diff, and suffix
87
+ const result = [];
88
+ if (commonPrefixLen > 0) {
89
+ result.push({
90
+ value: oldTokens.slice(0, commonPrefixLen).join(""),
91
+ count: commonPrefixLen,
92
+ });
93
+ }
94
+ result.push(...middleDiff);
95
+ if (commonSuffixLen > 0) {
96
+ result.push({
97
+ value: oldTokens.slice(oldLen - commonSuffixLen).join(""),
98
+ count: commonSuffixLen,
99
+ });
100
+ }
101
+ return mergeConsecutiveChanges(result);
102
+ }
103
+ /**
104
+ * Myers diff algorithm implementation.
105
+ * Uses the "middle snake" approach for better memory efficiency.
106
+ */
107
+ function myersDiff(oldTokens, newTokens, equals) {
108
+ const oldLen = oldTokens.length;
109
+ const newLen = newTokens.length;
110
+ const maxD = oldLen + newLen;
111
+ // V array indexed by k = x - y (diagonal)
112
+ // We use an object to handle negative indices
113
+ const v = { 1: 0 };
114
+ // Store the path for backtracking
115
+ const trace = [];
116
+ // Iterate through edit distances
117
+ outer: for (let d = 0; d <= maxD; d++) {
118
+ trace.push({ ...v });
119
+ // Iterate through diagonals
120
+ for (let k = -d; k <= d; k += 2) {
121
+ // Decide whether to go down or right
122
+ let x;
123
+ if (k === -d || (k !== d && v[k - 1] < v[k + 1])) {
124
+ x = v[k + 1]; // Move down (insert)
125
+ }
126
+ else {
127
+ x = v[k - 1] + 1; // Move right (delete)
128
+ }
129
+ let y = x - k;
130
+ // Follow diagonal (matches)
131
+ while (x < oldLen && y < newLen && equals(oldTokens[x], newTokens[y])) {
132
+ x++;
133
+ y++;
134
+ }
135
+ v[k] = x;
136
+ // Check if we've reached the end
137
+ if (x >= oldLen && y >= newLen) {
138
+ break outer;
139
+ }
140
+ }
141
+ }
142
+ // Backtrack to build the diff
143
+ return backtrack(trace, oldTokens, newTokens);
144
+ }
145
+ /**
146
+ * Backtrack through the trace to build the diff result.
147
+ */
148
+ function backtrack(trace, oldTokens, newTokens) {
149
+ const changes = [];
150
+ let x = oldTokens.length;
151
+ let y = newTokens.length;
152
+ for (let d = trace.length - 1; d >= 0; d--) {
153
+ const v = trace[d];
154
+ const k = x - y;
155
+ let prevK;
156
+ if (k === -d || (k !== d && v[k - 1] < v[k + 1])) {
157
+ prevK = k + 1;
158
+ }
159
+ else {
160
+ prevK = k - 1;
161
+ }
162
+ const prevX = v[prevK] ?? 0;
163
+ const prevY = prevX - prevK;
164
+ // Add diagonal matches (unchanged)
165
+ while (x > prevX && y > prevY) {
166
+ x--;
167
+ y--;
168
+ changes.unshift({ value: oldTokens[x], count: 1 });
169
+ }
170
+ if (d > 0) {
171
+ if (x === prevX) {
172
+ // Insertion (went down)
173
+ y--;
174
+ changes.unshift({ value: newTokens[y], added: true, count: 1 });
175
+ }
176
+ else {
177
+ // Deletion (went right)
178
+ x--;
179
+ changes.unshift({ value: oldTokens[x], removed: true, count: 1 });
180
+ }
181
+ }
182
+ }
183
+ return changes;
184
+ }
185
+ /**
186
+ * Merge consecutive changes of the same type.
187
+ */
188
+ function mergeConsecutiveChanges(changes) {
189
+ if (changes.length === 0)
190
+ return [];
191
+ const result = [];
192
+ for (const change of changes) {
193
+ const last = result[result.length - 1];
194
+ if (last &&
195
+ last.added === change.added &&
196
+ last.removed === change.removed) {
197
+ last.value += change.value;
198
+ last.count = (last.count || 1) + (change.count || 1);
199
+ }
200
+ else {
201
+ result.push({ ...change });
202
+ }
203
+ }
204
+ return result;
205
+ }
206
+ /**
207
+ * Split text into lines, preserving line endings.
208
+ * Handles both Unix (\n) and Windows (\r\n) line endings.
209
+ */
210
+ function splitLines(text) {
211
+ if (!text)
212
+ return [];
213
+ const lines = [];
214
+ let current = "";
215
+ for (let i = 0; i < text.length; i++) {
216
+ const char = text[i];
217
+ current += char;
218
+ if (char === "\n") {
219
+ lines.push(current);
220
+ current = "";
221
+ }
222
+ }
223
+ // Don't forget the last line if it doesn't end with newline
224
+ if (current.length > 0) {
225
+ lines.push(current);
226
+ }
227
+ return lines;
228
+ }
229
+ /**
230
+ * Extended word character class - matches jsdiff's extendedWordChars.
231
+ * Includes: a-zA-Z0-9_, soft hyphen, Latin Extended-A/B, IPA Extensions,
232
+ * Spacing Modifier Letters, and Latin Extended Additional.
233
+ *
234
+ * @see https://github.com/kpdecker/jsdiff/blob/master/src/diff/word.ts
235
+ */
236
+ const EXTENDED_WORD_CHARS = "a-zA-Z0-9_\\u00AD\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02C6\\u02C8-\\u02D7\\u02DE-\\u02FF\\u1E00-\\u1EFF";
237
+ /**
238
+ * Tokenization regex matching jsdiff's approach.
239
+ * Matches: word character runs, whitespace runs, or single non-word chars.
240
+ */
241
+ const TOKENIZE_REGEX = new RegExp(`[${EXTENDED_WORD_CHARS}]+|\\s+|[^${EXTENDED_WORD_CHARS}]`, "gu");
242
+ /**
243
+ * Split text into tokens using jsdiff's tokenization approach.
244
+ * Each token is one of:
245
+ * - A word (extended word characters)
246
+ * - A whitespace run
247
+ * - A single punctuation/symbol character
248
+ */
249
+ function tokenizeWords(text) {
250
+ if (!text)
251
+ return [];
252
+ return text.match(TOKENIZE_REGEX) || [];
253
+ }
254
+ /**
255
+ * Find the longest common prefix between two strings.
256
+ */
257
+ function longestCommonPrefix(a, b) {
258
+ let i = 0;
259
+ while (i < a.length && i < b.length && a[i] === b[i]) {
260
+ i++;
261
+ }
262
+ return a.slice(0, i);
263
+ }
264
+ /**
265
+ * Find the longest common suffix between two strings.
266
+ */
267
+ function longestCommonSuffix(a, b) {
268
+ let i = 0;
269
+ while (i < a.length &&
270
+ i < b.length &&
271
+ a[a.length - 1 - i] === b[b.length - 1 - i]) {
272
+ i++;
273
+ }
274
+ return a.slice(a.length - i);
275
+ }
276
+ /**
277
+ * Check if a string is only whitespace.
278
+ */
279
+ function isWhitespace(str) {
280
+ return /^\s*$/.test(str);
281
+ }
282
+ /**
283
+ * Deduplicate whitespace in change objects.
284
+ * This is a simplified version of jsdiff's dedupeWhitespaceInChangeObjects.
285
+ *
286
+ * Handles three main scenarios:
287
+ * 1. Deletion followed by insertion - extract common leading/trailing whitespace
288
+ * 2. Lone insertion after unchanged - strip duplicate leading whitespace
289
+ * 3. Lone deletion between unchanged - distribute whitespace properly
290
+ */
291
+ function dedupeWhitespaceInChangeObjects(changes) {
292
+ const result = [];
293
+ for (let i = 0; i < changes.length; i++) {
294
+ const change = changes[i];
295
+ // Scenario 1: Deletion followed by insertion
296
+ if (change.removed && changes[i + 1]?.added) {
297
+ const deletion = change;
298
+ const insertion = changes[i + 1];
299
+ // Find common prefix (must be whitespace)
300
+ const commonPrefix = longestCommonPrefix(deletion.value, insertion.value);
301
+ const wsPrefix = commonPrefix.match(/^\s*/)?.[0] || "";
302
+ // Find common suffix (must be whitespace)
303
+ const delWithoutPrefix = deletion.value.slice(wsPrefix.length);
304
+ const insWithoutPrefix = insertion.value.slice(wsPrefix.length);
305
+ const commonSuffix = longestCommonSuffix(delWithoutPrefix, insWithoutPrefix);
306
+ const wsSuffix = commonSuffix.match(/\s*$/)?.[0] || "";
307
+ // Build the cleaned changes
308
+ if (wsPrefix) {
309
+ result.push({ value: wsPrefix, count: 1 });
310
+ }
311
+ const cleanedDel = deletion.value.slice(wsPrefix.length, deletion.value.length - wsSuffix.length);
312
+ const cleanedIns = insertion.value.slice(wsPrefix.length, insertion.value.length - wsSuffix.length);
313
+ if (cleanedDel) {
314
+ result.push({ value: cleanedDel, removed: true, count: 1 });
315
+ }
316
+ if (cleanedIns) {
317
+ result.push({ value: cleanedIns, added: true, count: 1 });
318
+ }
319
+ if (wsSuffix) {
320
+ result.push({ value: wsSuffix, count: 1 });
321
+ }
322
+ i++; // Skip the insertion since we processed it
323
+ continue;
324
+ }
325
+ // Scenario 2: Lone insertion after unchanged text
326
+ if (change.added && i > 0 && !changes[i - 1].added && !changes[i - 1].removed) {
327
+ const prev = result[result.length - 1];
328
+ if (prev && !prev.added && !prev.removed) {
329
+ // Check for duplicate leading whitespace
330
+ const leadingWs = change.value.match(/^\s*/)?.[0] || "";
331
+ const trailingWs = prev.value.match(/\s*$/)?.[0] || "";
332
+ if (leadingWs && trailingWs) {
333
+ const overlap = longestCommonSuffix(trailingWs, leadingWs);
334
+ if (overlap) {
335
+ // Remove overlap from the insertion
336
+ result.push({
337
+ value: change.value.slice(overlap.length),
338
+ added: true,
339
+ count: 1,
340
+ });
341
+ continue;
342
+ }
343
+ }
344
+ }
345
+ }
346
+ // Scenario 3: Lone deletion between unchanged text
347
+ if (change.removed &&
348
+ !changes[i + 1]?.added &&
349
+ i > 0 &&
350
+ !changes[i - 1]?.added &&
351
+ !changes[i - 1]?.removed) {
352
+ const prev = result[result.length - 1];
353
+ const next = changes[i + 1];
354
+ if (prev && next && !next.added && !next.removed) {
355
+ const leadingWs = change.value.match(/^\s*/)?.[0] || "";
356
+ const trailingWs = change.value.match(/\s*$/)?.[0] || "";
357
+ const prevTrailingWs = prev.value.match(/\s*$/)?.[0] || "";
358
+ const nextLeadingWs = next.value.match(/^\s*/)?.[0] || "";
359
+ // If deletion starts/ends with whitespace that overlaps with neighbors
360
+ if (leadingWs && prevTrailingWs) {
361
+ const overlap = longestCommonSuffix(prevTrailingWs, leadingWs);
362
+ if (overlap.length === leadingWs.length) {
363
+ // Leading whitespace is already in prev, strip it
364
+ result.push({
365
+ value: change.value.slice(leadingWs.length),
366
+ removed: true,
367
+ count: 1,
368
+ });
369
+ continue;
370
+ }
371
+ }
372
+ if (trailingWs && nextLeadingWs) {
373
+ const overlap = longestCommonPrefix(trailingWs, nextLeadingWs);
374
+ if (overlap.length === trailingWs.length) {
375
+ // Trailing whitespace will be in next, strip it
376
+ result.push({
377
+ value: change.value.slice(0, -trailingWs.length) || change.value,
378
+ removed: true,
379
+ count: 1,
380
+ });
381
+ continue;
382
+ }
383
+ }
384
+ }
385
+ }
386
+ // Default: just add the change as-is
387
+ result.push({ ...change });
388
+ }
389
+ return mergeConsecutiveChanges(result);
390
+ }
391
+ /**
392
+ * Compare two strings line by line.
393
+ * Similar to Diff.diffLines from the 'diff' package.
394
+ */
395
+ export function diffLines(oldStr, newStr) {
396
+ const oldLines = splitLines(oldStr);
397
+ const newLines = splitLines(newStr);
398
+ return computeDiff(oldLines, newLines);
399
+ }
400
+ /**
401
+ * Compare two strings word by word, preserving whitespace.
402
+ * Similar to Diff.diffWordsWithSpace from the 'diff' package.
403
+ *
404
+ * Features matching jsdiff:
405
+ * - Extended Unicode word character support
406
+ * - Proper tokenization (words, whitespace runs, single punctuation)
407
+ * - Whitespace deduplication in consecutive changes
408
+ */
409
+ export function diffWordsWithSpace(oldStr, newStr) {
410
+ const oldWords = tokenizeWords(oldStr);
411
+ const newWords = tokenizeWords(newStr);
412
+ const diff = computeDiff(oldWords, newWords);
413
+ return dedupeWhitespaceInChangeObjects(diff);
414
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@deepcitation/deepcitation-js",
3
- "version": "1.1.22",
3
+ "version": "1.1.24",
4
4
  "description": "DeepCitation JavaScript SDK for deterministic AI citation verification",
5
5
  "type": "module",
6
6
  "private": false,
@@ -13,15 +13,13 @@
13
13
  },
14
14
  "main": "./lib/index.js",
15
15
  "types": "./lib/index.d.ts",
16
- "sideEffects": [
17
- "*.css"
18
- ],
16
+ "sideEffects": false,
19
17
  "files": [
20
18
  "lib",
21
19
  "LICENSE"
22
20
  ],
23
21
  "scripts": {
24
- "build": "rimraf lib && tsc && cp src/react/styles.css lib/react/styles.css",
22
+ "build": "rimraf lib && tsc",
25
23
  "build:watch": "rimraf lib && tsc --watch",
26
24
  "test": "bun test ./src/__tests__/*.test.ts ./src/__tests__/*.test.tsx",
27
25
  "test:jest": "jest",
@@ -33,15 +31,20 @@
33
31
  "engines": {
34
32
  "node": ">=22"
35
33
  },
36
- "dependencies": {
37
- "diff": "^8.0.2"
38
- },
39
34
  "peerDependencies": {
40
- "react": ">=17.0.0"
35
+ "react": ">=17.0.0",
36
+ "react-dom": ">=17.0.0",
37
+ "@radix-ui/react-popover": "^1.0.0"
41
38
  },
42
39
  "peerDependenciesMeta": {
43
40
  "react": {
44
41
  "optional": true
42
+ },
43
+ "react-dom": {
44
+ "optional": true
45
+ },
46
+ "@radix-ui/react-popover": {
47
+ "optional": true
45
48
  }
46
49
  },
47
50
  "devDependencies": {
@@ -59,6 +62,7 @@
59
62
  "happy-dom": "^20.0.11",
60
63
  "jest": "^29.7.0",
61
64
  "jest-environment-jsdom": "^29.7.0",
65
+ "@radix-ui/react-popover": "^1.1.14",
62
66
  "react": "19.2.3",
63
67
  "react-dom": "19.2.3",
64
68
  "rimraf": "^5.0.5",
@@ -76,8 +80,7 @@
76
80
  "types": "./lib/react/index.d.ts",
77
81
  "import": "./lib/react/index.js",
78
82
  "require": "./lib/react/index.js"
79
- },
80
- "./react/styles.css": "./lib/react/styles.css"
83
+ }
81
84
  },
82
85
  "keywords": [
83
86
  "citation",