zotero-bridge 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/database.ts CHANGED
@@ -1409,4 +1409,339 @@ export class ZoteroDatabase {
1409
1409
  run(sql: string, params: any[] = []): { changes: number; lastInsertRowid: number } {
1410
1410
  return this.execute(sql, params);
1411
1411
  }
1412
+
1413
+ // ============================================
1414
+ // Duplicate Detection & Attachment Validation
1415
+ // ============================================
1416
+
1417
+ /**
1418
+ * Find duplicate items based on title, DOI, or ISBN
1419
+ */
1420
+ findDuplicates(field: 'title' | 'doi' | 'isbn' = 'title', libraryID: number = 1): any[] {
1421
+ if (field === 'title') {
1422
+ // Find items with the same title
1423
+ return this.queryAll(`
1424
+ SELECT
1425
+ iv.value as title,
1426
+ GROUP_CONCAT(i.itemID) as itemIDs,
1427
+ COUNT(*) as count
1428
+ FROM items i
1429
+ JOIN itemData id ON i.itemID = id.itemID
1430
+ JOIN itemDataValues iv ON id.valueID = iv.valueID
1431
+ JOIN fields f ON id.fieldID = f.fieldID
1432
+ WHERE f.fieldName = 'title'
1433
+ AND i.libraryID = ?
1434
+ AND i.itemTypeID NOT IN (1, 14) -- Exclude notes and attachments
1435
+ GROUP BY iv.value
1436
+ HAVING COUNT(*) > 1
1437
+ ORDER BY count DESC
1438
+ `, [libraryID]);
1439
+ } else if (field === 'doi') {
1440
+ return this.queryAll(`
1441
+ SELECT
1442
+ iv.value as doi,
1443
+ GROUP_CONCAT(i.itemID) as itemIDs,
1444
+ COUNT(*) as count
1445
+ FROM items i
1446
+ JOIN itemData id ON i.itemID = id.itemID
1447
+ JOIN itemDataValues iv ON id.valueID = iv.valueID
1448
+ JOIN fields f ON id.fieldID = f.fieldID
1449
+ WHERE f.fieldName = 'DOI'
1450
+ AND i.libraryID = ?
1451
+ AND iv.value != ''
1452
+ GROUP BY LOWER(iv.value)
1453
+ HAVING COUNT(*) > 1
1454
+ ORDER BY count DESC
1455
+ `, [libraryID]);
1456
+ } else {
1457
+ return this.queryAll(`
1458
+ SELECT
1459
+ iv.value as isbn,
1460
+ GROUP_CONCAT(i.itemID) as itemIDs,
1461
+ COUNT(*) as count
1462
+ FROM items i
1463
+ JOIN itemData id ON i.itemID = id.itemID
1464
+ JOIN itemDataValues iv ON id.valueID = iv.valueID
1465
+ JOIN fields f ON id.fieldID = f.fieldID
1466
+ WHERE f.fieldName = 'ISBN'
1467
+ AND i.libraryID = ?
1468
+ AND iv.value != ''
1469
+ GROUP BY REPLACE(REPLACE(iv.value, '-', ''), ' ', '')
1470
+ HAVING COUNT(*) > 1
1471
+ ORDER BY count DESC
1472
+ `, [libraryID]);
1473
+ }
1474
+ }
1475
+
1476
+ /**
1477
+ * Validate attachment files exist on disk
1478
+ */
1479
+ validateAttachments(itemID?: number, checkAll: boolean = false): {
1480
+ valid: any[];
1481
+ missing: any[];
1482
+ total: number;
1483
+ } {
1484
+ let attachments: any[];
1485
+
1486
+ if (itemID) {
1487
+ attachments = this.queryAll(`
1488
+ SELECT ia.itemID, ia.parentItemID, ia.path, ia.contentType, i.key
1489
+ FROM itemAttachments ia
1490
+ JOIN items i ON ia.itemID = i.itemID
1491
+ WHERE ia.parentItemID = ? AND ia.path IS NOT NULL
1492
+ `, [itemID]);
1493
+ } else if (checkAll) {
1494
+ attachments = this.queryAll(`
1495
+ SELECT ia.itemID, ia.parentItemID, ia.path, ia.contentType, i.key
1496
+ FROM itemAttachments ia
1497
+ JOIN items i ON ia.itemID = i.itemID
1498
+ WHERE ia.path IS NOT NULL
1499
+ LIMIT 1000
1500
+ `);
1501
+ } else {
1502
+ return { valid: [], missing: [], total: 0 };
1503
+ }
1504
+
1505
+ const valid: any[] = [];
1506
+ const missing: any[] = [];
1507
+
1508
+ for (const att of attachments) {
1509
+ const fullPath = this.getAttachmentPath(att.itemID);
1510
+ if (fullPath && existsSync(fullPath)) {
1511
+ valid.push({
1512
+ ...att,
1513
+ fullPath,
1514
+ exists: true
1515
+ });
1516
+ } else {
1517
+ missing.push({
1518
+ ...att,
1519
+ fullPath,
1520
+ exists: false
1521
+ });
1522
+ }
1523
+ }
1524
+
1525
+ return {
1526
+ valid,
1527
+ missing,
1528
+ total: attachments.length
1529
+ };
1530
+ }
1531
+
1532
+ /**
1533
+ * Get a valid (existing) attachment for an item
1534
+ * Useful when multiple attachment records exist but only one file is present
1535
+ */
1536
+ getValidAttachment(parentItemID: number, contentType: string = 'application/pdf'): any | null {
1537
+ const attachments = this.queryAll(`
1538
+ SELECT ia.itemID, ia.path, ia.contentType, i.key
1539
+ FROM itemAttachments ia
1540
+ JOIN items i ON ia.itemID = i.itemID
1541
+ WHERE ia.parentItemID = ?
1542
+ AND ia.contentType = ?
1543
+ AND ia.path IS NOT NULL
1544
+ `, [parentItemID, contentType]);
1545
+
1546
+ // Return the first attachment that actually exists
1547
+ for (const att of attachments) {
1548
+ const fullPath = this.getAttachmentPath(att.itemID);
1549
+ if (fullPath && existsSync(fullPath)) {
1550
+ return {
1551
+ ...att,
1552
+ fullPath,
1553
+ exists: true
1554
+ };
1555
+ }
1556
+ }
1557
+
1558
+ return null;
1559
+ }
1560
+
1561
+ /**
1562
+ * Find items with valid (existing) PDF files
1563
+ */
1564
+ findItemsWithValidPDF(options: {
1565
+ title?: string;
1566
+ doi?: string;
1567
+ requireValidPDF?: boolean;
1568
+ }): any[] {
1569
+ let items: any[];
1570
+
1571
+ if (options.doi) {
1572
+ // Search by DOI
1573
+ const normalizedDOI = options.doi.replace(/^https?:\/\/doi\.org\//i, '').replace(/^doi:/i, '').trim();
1574
+ items = this.queryAll(`
1575
+ SELECT DISTINCT i.itemID, i.key, i.dateAdded, iv.value as doi
1576
+ FROM items i
1577
+ JOIN itemData id ON i.itemID = id.itemID
1578
+ JOIN itemDataValues iv ON id.valueID = iv.valueID
1579
+ JOIN fields f ON id.fieldID = f.fieldID
1580
+ WHERE f.fieldName = 'DOI' AND LOWER(iv.value) = LOWER(?)
1581
+ `, [normalizedDOI]);
1582
+ } else if (options.title) {
1583
+ // Search by title
1584
+ items = this.queryAll(`
1585
+ SELECT DISTINCT i.itemID, i.key, i.dateAdded, iv.value as title
1586
+ FROM items i
1587
+ JOIN itemData id ON i.itemID = id.itemID
1588
+ JOIN itemDataValues iv ON id.valueID = iv.valueID
1589
+ JOIN fields f ON id.fieldID = f.fieldID
1590
+ WHERE f.fieldName = 'title' AND iv.value LIKE ?
1591
+ LIMIT 50
1592
+ `, [`%${options.title}%`]);
1593
+ } else {
1594
+ return [];
1595
+ }
1596
+
1597
+ if (!options.requireValidPDF) {
1598
+ return items.map(item => ({
1599
+ ...this.getItemDetails(item.itemID),
1600
+ hasValidPDF: this.getValidAttachment(item.itemID) !== null
1601
+ }));
1602
+ }
1603
+
1604
+ // Filter to only items with valid PDF
1605
+ const results: any[] = [];
1606
+ for (const item of items) {
1607
+ const validAttachment = this.getValidAttachment(item.itemID);
1608
+ if (validAttachment) {
1609
+ results.push({
1610
+ ...this.getItemDetails(item.itemID),
1611
+ validAttachment
1612
+ });
1613
+ }
1614
+ }
1615
+ return results;
1616
+ }
1617
+
1618
+ /**
1619
+ * Find orphan attachments (records without files)
1620
+ */
1621
+ findOrphanAttachments(limit: number = 100): any[] {
1622
+ const attachments = this.queryAll(`
1623
+ SELECT ia.itemID, ia.parentItemID, ia.path, ia.contentType, i.key,
1624
+ parent.itemID as parentExists
1625
+ FROM itemAttachments ia
1626
+ JOIN items i ON ia.itemID = i.itemID
1627
+ LEFT JOIN items parent ON ia.parentItemID = parent.itemID
1628
+ WHERE ia.path LIKE 'storage:%'
1629
+ LIMIT ?
1630
+ `, [limit]);
1631
+
1632
+ const orphans: any[] = [];
1633
+
1634
+ for (const att of attachments) {
1635
+ const fullPath = this.getAttachmentPath(att.itemID);
1636
+ if (!fullPath || !existsSync(fullPath)) {
1637
+ orphans.push({
1638
+ itemID: att.itemID,
1639
+ parentItemID: att.parentItemID,
1640
+ key: att.key,
1641
+ path: att.path,
1642
+ expectedPath: fullPath,
1643
+ reason: !fullPath ? 'invalid_path' : 'file_not_found'
1644
+ });
1645
+ }
1646
+ }
1647
+
1648
+ return orphans;
1649
+ }
1650
+
1651
+ /**
1652
+ * Delete orphan attachment records (use with caution!)
1653
+ */
1654
+ deleteOrphanAttachments(dryRun: boolean = true): {
1655
+ orphans: any[];
1656
+ deleted: number;
1657
+ dryRun: boolean;
1658
+ } {
1659
+ const orphans = this.findOrphanAttachments(500);
1660
+
1661
+ if (dryRun || this.readonly) {
1662
+ return {
1663
+ orphans,
1664
+ deleted: 0,
1665
+ dryRun: true
1666
+ };
1667
+ }
1668
+
1669
+ let deleted = 0;
1670
+ for (const orphan of orphans) {
1671
+ try {
1672
+ this.execute('DELETE FROM itemAttachments WHERE itemID = ?', [orphan.itemID]);
1673
+ this.execute('DELETE FROM items WHERE itemID = ?', [orphan.itemID]);
1674
+ deleted++;
1675
+ } catch (error) {
1676
+ console.error(`Failed to delete orphan ${orphan.itemID}:`, error);
1677
+ }
1678
+ }
1679
+
1680
+ return {
1681
+ orphans,
1682
+ deleted,
1683
+ dryRun: false
1684
+ };
1685
+ }
1686
+
1687
+ /**
1688
+ * Merge items by transferring notes and tags from source items to target
1689
+ */
1690
+ mergeItems(targetItemID: number, sourceItemIDs: number[]): {
1691
+ success: boolean;
1692
+ transferred: {
1693
+ notes: number;
1694
+ tags: number;
1695
+ };
1696
+ errors: string[];
1697
+ } {
1698
+ const errors: string[] = [];
1699
+ let notesTransferred = 0;
1700
+ let tagsTransferred = 0;
1701
+
1702
+ // Verify target exists
1703
+ const target = this.getItemDetails(targetItemID);
1704
+ if (!target) {
1705
+ return {
1706
+ success: false,
1707
+ transferred: { notes: 0, tags: 0 },
1708
+ errors: ['Target item not found']
1709
+ };
1710
+ }
1711
+
1712
+ for (const sourceID of sourceItemIDs) {
1713
+ if (sourceID === targetItemID) continue;
1714
+
1715
+ // Transfer notes
1716
+ try {
1717
+ const notes = this.getItemNotes(sourceID);
1718
+ for (const note of notes) {
1719
+ this.addItemNote(targetItemID, note.note, `[Merged] ${note.title || ''}`);
1720
+ notesTransferred++;
1721
+ }
1722
+ } catch (error) {
1723
+ errors.push(`Failed to transfer notes from ${sourceID}: ${error}`);
1724
+ }
1725
+
1726
+ // Transfer tags
1727
+ try {
1728
+ const tags = this.getItemTags(sourceID);
1729
+ for (const tag of tags) {
1730
+ this.addTagToItem(targetItemID, tag.name, tag.type);
1731
+ tagsTransferred++;
1732
+ }
1733
+ } catch (error) {
1734
+ errors.push(`Failed to transfer tags from ${sourceID}: ${error}`);
1735
+ }
1736
+ }
1737
+
1738
+ return {
1739
+ success: errors.length === 0,
1740
+ transferred: {
1741
+ notes: notesTransferred,
1742
+ tags: tagsTransferred
1743
+ },
1744
+ errors
1745
+ };
1746
+ }
1412
1747
  }