hazo_files 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -621,7 +621,7 @@ var LocalStorageModule = class extends BaseStorageModule {
621
621
  await super.initialize(config);
622
622
  const localConfig = this.getProviderConfig();
623
623
  this.basePath = path2.resolve(localConfig.basePath);
624
- this.allowedExtensions = localConfig.allowedExtensions || [];
624
+ this.allowedExtensions = (localConfig.allowedExtensions || []).map((ext) => ext.trim().replace(/^\./, "").toLowerCase()).filter((ext) => ext.length > 0);
625
625
  this.maxFileSize = localConfig.maxFileSize || 0;
626
626
  await fs2.promises.mkdir(this.basePath, { recursive: true });
627
627
  }
@@ -2112,6 +2112,70 @@ function updateExtractionById(fileData, id, newData, options = {}) {
2112
2112
  };
2113
2113
  }
2114
2114
 
2115
+ // src/common/ref-utils.ts
2116
+ function generateRefId() {
2117
+ return `ref_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;
2118
+ }
2119
+ function parseFileRefs(json) {
2120
+ if (!json) return [];
2121
+ try {
2122
+ const parsed = JSON.parse(json);
2123
+ if (!Array.isArray(parsed)) return [];
2124
+ return parsed;
2125
+ } catch {
2126
+ return [];
2127
+ }
2128
+ }
2129
+ function stringifyFileRefs(refs) {
2130
+ return JSON.stringify(refs);
2131
+ }
2132
+ function createFileRef(options) {
2133
+ const ref = {
2134
+ ref_id: generateRefId(),
2135
+ entity_type: options.entity_type,
2136
+ entity_id: options.entity_id,
2137
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
2138
+ };
2139
+ if (options.created_by) ref.created_by = options.created_by;
2140
+ if (options.visibility) ref.visibility = options.visibility;
2141
+ if (options.label) ref.label = options.label;
2142
+ if (options.metadata) ref.metadata = options.metadata;
2143
+ return ref;
2144
+ }
2145
+ function removeRefFromArray(refs, refId) {
2146
+ return refs.filter((r) => r.ref_id !== refId);
2147
+ }
2148
+ function removeRefsByCriteriaFromArray(refs, criteria) {
2149
+ return refs.filter((ref) => {
2150
+ if (criteria.entity_type && ref.entity_type !== criteria.entity_type) return true;
2151
+ if (criteria.entity_id && ref.entity_id !== criteria.entity_id) return true;
2152
+ return !criteria.entity_type && !criteria.entity_id;
2153
+ });
2154
+ }
2155
+ function toV2Record(record) {
2156
+ const v2 = record;
2157
+ return {
2158
+ ...record,
2159
+ file_refs: typeof v2.file_refs === "string" ? v2.file_refs : "[]",
2160
+ ref_count: typeof v2.ref_count === "number" ? v2.ref_count : 0,
2161
+ status: typeof v2.status === "string" ? v2.status : "active",
2162
+ scope_id: typeof v2.scope_id === "string" ? v2.scope_id : null,
2163
+ uploaded_by: typeof v2.uploaded_by === "string" ? v2.uploaded_by : null,
2164
+ original_filename: typeof v2.original_filename === "string" ? v2.original_filename : null,
2165
+ storage_verified_at: typeof v2.storage_verified_at === "string" ? v2.storage_verified_at : null,
2166
+ deleted_at: typeof v2.deleted_at === "string" ? v2.deleted_at : null
2167
+ };
2168
+ }
2169
+ function buildFileWithStatus(record) {
2170
+ const v2 = toV2Record(record);
2171
+ const refs = parseFileRefs(v2.file_refs);
2172
+ return {
2173
+ record: v2,
2174
+ refs,
2175
+ is_orphaned: refs.length === 0
2176
+ };
2177
+ }
2178
+
2115
2179
  // src/services/file-metadata-service.ts
2116
2180
  var FileMetadataService = class {
2117
2181
  constructor(crudService, options = {}) {
@@ -2153,9 +2217,15 @@ var FileMetadataService = class {
2153
2217
  changed_at: timestamp,
2154
2218
  file_hash: input.file_hash || null,
2155
2219
  file_size: input.file_size ?? null,
2156
- file_changed_at: input.file_hash ? timestamp : null
2157
- // Set content changed time if hash is provided
2220
+ file_changed_at: input.file_hash ? timestamp : null,
2221
+ // V2 defaults included conditionally to avoid breaking pre-migration DBs
2222
+ file_refs: "[]",
2223
+ ref_count: 0,
2224
+ status: "active"
2158
2225
  };
2226
+ if (input.scope_id !== void 0) record.scope_id = input.scope_id;
2227
+ if (input.uploaded_by !== void 0) record.uploaded_by = input.uploaded_by;
2228
+ if (input.original_filename !== void 0) record.original_filename = input.original_filename;
2159
2229
  const results = await this.crud.insert(record);
2160
2230
  this.logger?.debug?.("Recorded file upload", { path: input.file_path });
2161
2231
  return results[0] || null;
@@ -2514,6 +2584,254 @@ var FileMetadataService = class {
2514
2584
  return false;
2515
2585
  }
2516
2586
  }
2587
+ // ============================================
2588
+ // Reference Tracking Methods (V2)
2589
+ // ============================================
2590
+ /**
2591
+ * Find a record by ID
2592
+ */
2593
+ async findById(id) {
2594
+ try {
2595
+ const results = await this.crud.findBy({ id });
2596
+ return results[0] || null;
2597
+ } catch (error) {
2598
+ this.logError("findById", error);
2599
+ return null;
2600
+ }
2601
+ }
2602
+ /**
2603
+ * Find multiple records by IDs
2604
+ */
2605
+ async findByIds(ids) {
2606
+ try {
2607
+ const results = [];
2608
+ for (const id of ids) {
2609
+ const record = await this.findById(id);
2610
+ if (record) results.push(record);
2611
+ }
2612
+ return results;
2613
+ } catch (error) {
2614
+ this.logError("findByIds", error);
2615
+ return [];
2616
+ }
2617
+ }
2618
+ /**
2619
+ * Add a reference to a file
2620
+ * @returns The new ref_id, or null on failure
2621
+ */
2622
+ async addRef(fileId, options) {
2623
+ try {
2624
+ const record = await this.findById(fileId);
2625
+ if (!record) {
2626
+ this.logger?.warn?.("Cannot add ref: file not found", { fileId });
2627
+ return null;
2628
+ }
2629
+ const v2 = toV2Record(record);
2630
+ const refs = parseFileRefs(v2.file_refs);
2631
+ const newRef = createFileRef(options);
2632
+ const updatedRefs = [...refs, newRef];
2633
+ await this.crud.updateById(fileId, {
2634
+ file_refs: stringifyFileRefs(updatedRefs),
2635
+ ref_count: updatedRefs.length,
2636
+ status: "active",
2637
+ changed_at: this.now()
2638
+ });
2639
+ this.logger?.debug?.("Added ref", { fileId, ref_id: newRef.ref_id });
2640
+ return { ref_id: newRef.ref_id };
2641
+ } catch (error) {
2642
+ this.logError("addRef", error);
2643
+ return null;
2644
+ }
2645
+ }
2646
+ /**
2647
+ * Remove a specific reference from a file
2648
+ * @returns Remaining ref count, or null on failure
2649
+ */
2650
+ async removeRef(fileId, refId) {
2651
+ try {
2652
+ const record = await this.findById(fileId);
2653
+ if (!record) {
2654
+ this.logger?.warn?.("Cannot remove ref: file not found", { fileId });
2655
+ return null;
2656
+ }
2657
+ const v2 = toV2Record(record);
2658
+ const refs = parseFileRefs(v2.file_refs);
2659
+ const updatedRefs = removeRefFromArray(refs, refId);
2660
+ await this.crud.updateById(fileId, {
2661
+ file_refs: stringifyFileRefs(updatedRefs),
2662
+ ref_count: updatedRefs.length,
2663
+ changed_at: this.now()
2664
+ });
2665
+ this.logger?.debug?.("Removed ref", { fileId, refId, remaining: updatedRefs.length });
2666
+ return { remaining_refs: updatedRefs.length };
2667
+ } catch (error) {
2668
+ this.logError("removeRef", error);
2669
+ return null;
2670
+ }
2671
+ }
2672
+ /**
2673
+ * Remove references matching criteria across all records.
2674
+ * Scans all records and removes matching refs (AND semantics).
2675
+ */
2676
+ async removeRefsByCriteria(criteria) {
2677
+ try {
2678
+ let totalRemoved = 0;
2679
+ if (criteria.file_id) {
2680
+ const record = await this.findById(criteria.file_id);
2681
+ if (record) {
2682
+ const removed = await this.removeRefsFromRecord(record, criteria);
2683
+ totalRemoved += removed;
2684
+ }
2685
+ return { removed_count: totalRemoved };
2686
+ }
2687
+ let records;
2688
+ if (criteria.scope_id) {
2689
+ records = await this.crud.findBy({ scope_id: criteria.scope_id });
2690
+ } else {
2691
+ records = await this.crud.list();
2692
+ }
2693
+ for (const record of records) {
2694
+ const removed = await this.removeRefsFromRecord(record, criteria);
2695
+ totalRemoved += removed;
2696
+ }
2697
+ this.logger?.debug?.("Removed refs by criteria", { criteria, removed_count: totalRemoved });
2698
+ return { removed_count: totalRemoved };
2699
+ } catch (error) {
2700
+ this.logError("removeRefsByCriteria", error);
2701
+ return { removed_count: 0 };
2702
+ }
2703
+ }
2704
+ /**
2705
+ * Helper: remove matching refs from a single record
2706
+ */
2707
+ async removeRefsFromRecord(record, criteria) {
2708
+ const v2 = toV2Record(record);
2709
+ const refs = parseFileRefs(v2.file_refs);
2710
+ if (refs.length === 0) return 0;
2711
+ const updatedRefs = removeRefsByCriteriaFromArray(refs, {
2712
+ entity_type: criteria.entity_type,
2713
+ entity_id: criteria.entity_id
2714
+ });
2715
+ const removedCount = refs.length - updatedRefs.length;
2716
+ if (removedCount > 0) {
2717
+ await this.crud.updateById(record.id, {
2718
+ file_refs: stringifyFileRefs(updatedRefs),
2719
+ ref_count: updatedRefs.length,
2720
+ changed_at: this.now()
2721
+ });
2722
+ }
2723
+ return removedCount;
2724
+ }
2725
+ /**
2726
+ * Get all references for a file
2727
+ */
2728
+ async getRefs(fileId) {
2729
+ try {
2730
+ const record = await this.findById(fileId);
2731
+ if (!record) return null;
2732
+ return parseFileRefs(toV2Record(record).file_refs);
2733
+ } catch (error) {
2734
+ this.logError("getRefs", error);
2735
+ return null;
2736
+ }
2737
+ }
2738
+ /**
2739
+ * Get a file with its status and parsed refs
2740
+ */
2741
+ async getFileWithStatus(fileId) {
2742
+ try {
2743
+ const record = await this.findById(fileId);
2744
+ if (!record) return null;
2745
+ return buildFileWithStatus(record);
2746
+ } catch (error) {
2747
+ this.logError("getFileWithStatus", error);
2748
+ return null;
2749
+ }
2750
+ }
2751
+ /**
2752
+ * Get multiple files with status
2753
+ */
2754
+ async getFilesWithStatus(fileIds) {
2755
+ try {
2756
+ const records = await this.findByIds(fileIds);
2757
+ return records.map(buildFileWithStatus);
2758
+ } catch (error) {
2759
+ this.logError("getFilesWithStatus", error);
2760
+ return [];
2761
+ }
2762
+ }
2763
+ /**
2764
+ * Update the status of a file
2765
+ */
2766
+ async updateStatus(fileId, status) {
2767
+ try {
2768
+ const patch = {
2769
+ status,
2770
+ changed_at: this.now()
2771
+ };
2772
+ if (status === "soft_deleted") {
2773
+ patch.deleted_at = this.now();
2774
+ }
2775
+ await this.crud.updateById(fileId, patch);
2776
+ this.logger?.debug?.("Updated status", { fileId, status });
2777
+ return true;
2778
+ } catch (error) {
2779
+ this.logError("updateStatus", error);
2780
+ return false;
2781
+ }
2782
+ }
2783
+ /**
2784
+ * Soft-delete a file (set status to soft_deleted, record deleted_at)
2785
+ */
2786
+ async softDelete(fileId) {
2787
+ return this.updateStatus(fileId, "soft_deleted");
2788
+ }
2789
+ /**
2790
+ * Update specific V2 fields on a record
2791
+ */
2792
+ async updateFields(fileId, fields) {
2793
+ try {
2794
+ await this.crud.updateById(fileId, {
2795
+ ...fields,
2796
+ changed_at: this.now()
2797
+ });
2798
+ this.logger?.debug?.("Updated fields", { fileId, fields: Object.keys(fields) });
2799
+ return true;
2800
+ } catch (error) {
2801
+ this.logError("updateFields", error);
2802
+ return false;
2803
+ }
2804
+ }
2805
+ /**
2806
+ * Find orphaned files (zero references)
2807
+ */
2808
+ async findOrphaned(options) {
2809
+ try {
2810
+ let records;
2811
+ if (options?.scope_id) {
2812
+ records = await this.crud.findBy({ scope_id: options.scope_id });
2813
+ } else if (options?.storage_type) {
2814
+ records = await this.crud.findBy({ storage_type: options.storage_type });
2815
+ } else {
2816
+ records = await this.crud.list();
2817
+ }
2818
+ let orphaned = records.map(buildFileWithStatus).filter((f) => f.is_orphaned && f.record.status !== "soft_deleted");
2819
+ if (options?.olderThanMs) {
2820
+ const cutoff = Date.now() - options.olderThanMs;
2821
+ orphaned = orphaned.filter((f) => {
2822
+ const createdAt = new Date(f.record.created_at).getTime();
2823
+ return createdAt < cutoff;
2824
+ });
2825
+ }
2826
+ if (options?.limit && orphaned.length > options.limit) {
2827
+ orphaned = orphaned.slice(0, options.limit);
2828
+ }
2829
+ return orphaned;
2830
+ } catch (error) {
2831
+ this.logError("findOrphaned", error);
2832
+ return [];
2833
+ }
2834
+ }
2517
2835
  };
2518
2836
  function createFileMetadataService(crudService, options) {
2519
2837
  return new FileMetadataService(crudService, options);
@@ -2883,6 +3201,132 @@ var TrackedFileManager = class extends FileManager {
2883
3201
  const record = await this.metadataService.findByPath(path3, this.getStorageType());
2884
3202
  return record?.file_size ?? null;
2885
3203
  }
3204
+ // ============ Reference Tracking Methods (V2) ============
3205
+ /**
3206
+ * Add a reference to a file
3207
+ */
3208
+ async addRef(fileId, options) {
3209
+ if (!this.isTrackingEnabled()) return null;
3210
+ return this.metadataService.addRef(fileId, options);
3211
+ }
3212
+ /**
3213
+ * Remove a reference from a file
3214
+ */
3215
+ async removeRef(fileId, refId) {
3216
+ if (!this.isTrackingEnabled()) return null;
3217
+ return this.metadataService.removeRef(fileId, refId);
3218
+ }
3219
+ /**
3220
+ * Get a file by its database ID with status information
3221
+ */
3222
+ async getFileById(fileId) {
3223
+ if (!this.isTrackingEnabled()) return null;
3224
+ return this.metadataService.getFileWithStatus(fileId);
3225
+ }
3226
+ /**
3227
+ * Get multiple files by their database IDs with status information
3228
+ */
3229
+ async getFilesById(fileIds) {
3230
+ if (!this.isTrackingEnabled()) return [];
3231
+ return this.metadataService.getFilesWithStatus(fileIds);
3232
+ }
3233
+ /**
3234
+ * Soft-delete a file (marks as soft_deleted, does not remove physical file)
3235
+ */
3236
+ async softDeleteFile(fileId) {
3237
+ if (!this.isTrackingEnabled()) return false;
3238
+ return this.metadataService.softDelete(fileId);
3239
+ }
3240
+ /**
3241
+ * Find orphaned files (files with zero references)
3242
+ */
3243
+ async findOrphanedFiles(options) {
3244
+ if (!this.isTrackingEnabled()) return [];
3245
+ return this.metadataService.findOrphaned(options);
3246
+ }
3247
+ /**
3248
+ * Cleanup orphaned files — removes physical files and/or DB records
3249
+ */
3250
+ async cleanupOrphanedFiles(options) {
3251
+ if (!this.isTrackingEnabled()) return { cleaned: 0, errors: [] };
3252
+ const orphaned = await this.metadataService.findOrphaned(options);
3253
+ let cleaned = 0;
3254
+ const errors = [];
3255
+ for (const file of orphaned) {
3256
+ try {
3257
+ if (options?.softDeleteOnly) {
3258
+ await this.metadataService.softDelete(file.record.id);
3259
+ cleaned++;
3260
+ continue;
3261
+ }
3262
+ const deletePhysical = options?.deletePhysicalFiles !== false;
3263
+ if (deletePhysical) {
3264
+ const deleteResult = await super.deleteFile(file.record.file_path);
3265
+ if (!deleteResult.success) {
3266
+ if (deleteResult.error && !deleteResult.error.includes("not found")) {
3267
+ errors.push(`Failed to delete physical file ${file.record.file_path}: ${deleteResult.error}`);
3268
+ }
3269
+ }
3270
+ }
3271
+ await this.metadataService.recordDelete(file.record.file_path, file.record.storage_type);
3272
+ cleaned++;
3273
+ } catch (error) {
3274
+ const msg = error instanceof Error ? error.message : String(error);
3275
+ errors.push(`Error cleaning up ${file.record.file_path}: ${msg}`);
3276
+ }
3277
+ }
3278
+ return { cleaned, errors };
3279
+ }
3280
+ /**
3281
+ * Verify that a file's physical storage exists and update its status
3282
+ */
3283
+ async verifyFileExistence(fileId) {
3284
+ if (!this.isTrackingEnabled()) return null;
3285
+ const record = await this.metadataService.findById(fileId);
3286
+ if (!record) return null;
3287
+ const fileExists = await this.exists(record.file_path);
3288
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
3289
+ await this.metadataService.updateFields(fileId, {
3290
+ storage_verified_at: timestamp,
3291
+ ...fileExists ? {} : { status: "missing" }
3292
+ });
3293
+ return fileExists;
3294
+ }
3295
+ /**
3296
+ * Upload a file and optionally add an initial reference
3297
+ */
3298
+ async uploadFileWithRef(source, remotePath, options) {
3299
+ const uploadResult = await this.uploadFile(source, remotePath, {
3300
+ ...options,
3301
+ awaitRecording: true
3302
+ });
3303
+ if (!uploadResult.success || !uploadResult.data || !this.isTrackingEnabled()) {
3304
+ return uploadResult;
3305
+ }
3306
+ const record = await this.metadataService.findByPath(remotePath, this.getStorageType());
3307
+ if (!record) {
3308
+ return uploadResult;
3309
+ }
3310
+ const fieldsToUpdate = {};
3311
+ if (options?.scope_id) fieldsToUpdate.scope_id = options.scope_id;
3312
+ if (options?.uploaded_by) fieldsToUpdate.uploaded_by = options.uploaded_by;
3313
+ if (Object.keys(fieldsToUpdate).length > 0) {
3314
+ await this.metadataService.updateFields(record.id, fieldsToUpdate);
3315
+ }
3316
+ let refId;
3317
+ if (options?.ref) {
3318
+ const refResult = await this.metadataService.addRef(record.id, options.ref);
3319
+ if (refResult) refId = refResult.ref_id;
3320
+ }
3321
+ return {
3322
+ success: true,
3323
+ data: {
3324
+ ...uploadResult.data,
3325
+ file_id: record.id,
3326
+ ref_id: refId
3327
+ }
3328
+ };
3329
+ }
2886
3330
  };
2887
3331
  function createTrackedFileManager(options) {
2888
3332
  return new TrackedFileManager(options);
@@ -3931,13 +4375,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
3931
4375
  storage_type TEXT NOT NULL,
3932
4376
  file_hash TEXT,
3933
4377
  file_size INTEGER,
3934
- file_changed_at TEXT
4378
+ file_changed_at TEXT,
4379
+ file_refs TEXT DEFAULT '[]',
4380
+ ref_count INTEGER DEFAULT 0,
4381
+ status TEXT DEFAULT 'active',
4382
+ scope_id TEXT,
4383
+ uploaded_by TEXT,
4384
+ storage_verified_at TEXT,
4385
+ deleted_at TEXT,
4386
+ original_filename TEXT
3935
4387
  )`,
3936
4388
  indexes: [
3937
4389
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
3938
4390
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
3939
4391
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
3940
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4392
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4393
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4394
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4395
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4396
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
3941
4397
  ]
3942
4398
  },
3943
4399
  postgres: {
@@ -3952,13 +4408,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
3952
4408
  storage_type TEXT NOT NULL,
3953
4409
  file_hash TEXT,
3954
4410
  file_size BIGINT,
3955
- file_changed_at TIMESTAMP WITH TIME ZONE
4411
+ file_changed_at TIMESTAMP WITH TIME ZONE,
4412
+ file_refs TEXT DEFAULT '[]',
4413
+ ref_count INTEGER DEFAULT 0,
4414
+ status TEXT DEFAULT 'active',
4415
+ scope_id UUID,
4416
+ uploaded_by UUID,
4417
+ storage_verified_at TIMESTAMP WITH TIME ZONE,
4418
+ deleted_at TIMESTAMP WITH TIME ZONE,
4419
+ original_filename TEXT
3956
4420
  )`,
3957
4421
  indexes: [
3958
4422
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
3959
4423
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
3960
4424
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
3961
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4425
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4426
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4427
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4428
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4429
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
3962
4430
  ]
3963
4431
  },
3964
4432
  columns: [
@@ -3972,7 +4440,15 @@ var HAZO_FILES_TABLE_SCHEMA = {
3972
4440
  "storage_type",
3973
4441
  "file_hash",
3974
4442
  "file_size",
3975
- "file_changed_at"
4443
+ "file_changed_at",
4444
+ "file_refs",
4445
+ "ref_count",
4446
+ "status",
4447
+ "scope_id",
4448
+ "uploaded_by",
4449
+ "storage_verified_at",
4450
+ "deleted_at",
4451
+ "original_filename"
3976
4452
  ]
3977
4453
  };
3978
4454
  function getSchemaForTable(tableName, dbType) {
@@ -3985,6 +4461,78 @@ function getSchemaForTable(tableName, dbType) {
3985
4461
  )
3986
4462
  };
3987
4463
  }
4464
+ var HAZO_FILES_MIGRATION_V2 = {
4465
+ tableName: HAZO_FILES_DEFAULT_TABLE_NAME,
4466
+ sqlite: {
4467
+ alterStatements: [
4468
+ "ALTER TABLE hazo_files ADD COLUMN file_refs TEXT DEFAULT '[]'",
4469
+ "ALTER TABLE hazo_files ADD COLUMN ref_count INTEGER DEFAULT 0",
4470
+ "ALTER TABLE hazo_files ADD COLUMN status TEXT DEFAULT 'active'",
4471
+ "ALTER TABLE hazo_files ADD COLUMN scope_id TEXT",
4472
+ "ALTER TABLE hazo_files ADD COLUMN uploaded_by TEXT",
4473
+ "ALTER TABLE hazo_files ADD COLUMN storage_verified_at TEXT",
4474
+ "ALTER TABLE hazo_files ADD COLUMN deleted_at TEXT",
4475
+ "ALTER TABLE hazo_files ADD COLUMN original_filename TEXT"
4476
+ ],
4477
+ indexes: [
4478
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4479
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4480
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4481
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4482
+ ],
4483
+ backfill: `UPDATE hazo_files SET
4484
+ file_refs = COALESCE(file_refs, '[]'),
4485
+ ref_count = COALESCE(ref_count, 0),
4486
+ status = COALESCE(status, 'active')
4487
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4488
+ },
4489
+ postgres: {
4490
+ alterStatements: [
4491
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS file_refs TEXT DEFAULT '[]'",
4492
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS ref_count INTEGER DEFAULT 0",
4493
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS status TEXT DEFAULT 'active'",
4494
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS scope_id UUID",
4495
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS uploaded_by UUID",
4496
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS storage_verified_at TIMESTAMP WITH TIME ZONE",
4497
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMP WITH TIME ZONE",
4498
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS original_filename TEXT"
4499
+ ],
4500
+ indexes: [
4501
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4502
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4503
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4504
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4505
+ ],
4506
+ backfill: `UPDATE hazo_files SET
4507
+ file_refs = COALESCE(file_refs, '[]'),
4508
+ ref_count = COALESCE(ref_count, 0),
4509
+ status = COALESCE(status, 'active')
4510
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4511
+ },
4512
+ newColumns: [
4513
+ "file_refs",
4514
+ "ref_count",
4515
+ "status",
4516
+ "scope_id",
4517
+ "uploaded_by",
4518
+ "storage_verified_at",
4519
+ "deleted_at",
4520
+ "original_filename"
4521
+ ]
4522
+ };
4523
+ function getMigrationForTable(tableName, dbType) {
4524
+ const migration = HAZO_FILES_MIGRATION_V2[dbType];
4525
+ const defaultName = HAZO_FILES_MIGRATION_V2.tableName;
4526
+ return {
4527
+ alterStatements: migration.alterStatements.map(
4528
+ (stmt) => stmt.replace(new RegExp(defaultName, "g"), tableName)
4529
+ ),
4530
+ indexes: migration.indexes.map(
4531
+ (idx) => idx.replace(new RegExp(defaultName, "g"), tableName)
4532
+ ),
4533
+ backfill: migration.backfill.replace(new RegExp(defaultName, "g"), tableName)
4534
+ };
4535
+ }
3988
4536
  var HAZO_FILES_NAMING_DEFAULT_TABLE_NAME = "hazo_files_naming";
3989
4537
  var HAZO_FILES_NAMING_TABLE_SCHEMA = {
3990
4538
  tableName: HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
@@ -4042,6 +4590,24 @@ function getNamingSchemaForTable(tableName, dbType) {
4042
4590
  };
4043
4591
  }
4044
4592
 
4593
+ // src/migrations/add-reference-tracking.ts
4594
+ async function migrateToV2(executor, dbType, tableName) {
4595
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4596
+ for (const stmt of migration.alterStatements) {
4597
+ try {
4598
+ await executor.run(stmt);
4599
+ } catch {
4600
+ }
4601
+ }
4602
+ for (const idx of migration.indexes) {
4603
+ await executor.run(idx);
4604
+ }
4605
+ }
4606
+ async function backfillV2Defaults(executor, dbType, tableName) {
4607
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4608
+ await executor.run(migration.backfill);
4609
+ }
4610
+
4045
4611
  // src/server/index.ts
4046
4612
  try {
4047
4613
  __require("server-only");
@@ -4063,6 +4629,7 @@ export {
4063
4629
  GoogleDriveAuth,
4064
4630
  GoogleDriveModule,
4065
4631
  HAZO_FILES_DEFAULT_TABLE_NAME,
4632
+ HAZO_FILES_MIGRATION_V2,
4066
4633
  HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
4067
4634
  HAZO_FILES_NAMING_TABLE_SCHEMA,
4068
4635
  HAZO_FILES_TABLE_SCHEMA,
@@ -4080,6 +4647,8 @@ export {
4080
4647
  TrackedFileManager,
4081
4648
  UploadExtractService,
4082
4649
  addExtractionToFileData,
4650
+ backfillV2Defaults,
4651
+ buildFileWithStatus,
4083
4652
  clearExtractions,
4084
4653
  clonePattern,
4085
4654
  computeFileHash,
@@ -4093,6 +4662,7 @@ export {
4093
4662
  createFileItem,
4094
4663
  createFileManager,
4095
4664
  createFileMetadataService,
4665
+ createFileRef,
4096
4666
  createFolderItem,
4097
4667
  createGoogleDriveAuth,
4098
4668
  createGoogleDriveModule,
@@ -4116,6 +4686,7 @@ export {
4116
4686
  generateExtractionId,
4117
4687
  generateId,
4118
4688
  generatePreviewName,
4689
+ generateRefId,
4119
4690
  generateSampleConfig,
4120
4691
  generateSegmentId,
4121
4692
  getBaseName,
@@ -4129,6 +4700,7 @@ export {
4129
4700
  getFileCategory,
4130
4701
  getFileMetadataValues,
4131
4702
  getMergedData,
4703
+ getMigrationForTable,
4132
4704
  getMimeType,
4133
4705
  getNameWithoutExtension,
4134
4706
  getNamingSchemaForTable,
@@ -4160,20 +4732,26 @@ export {
4160
4732
  joinPath,
4161
4733
  loadConfig,
4162
4734
  loadConfigAsync,
4735
+ migrateToV2,
4163
4736
  normalizePath,
4164
4737
  parseConfig,
4165
4738
  parseFileData,
4739
+ parseFileRefs,
4166
4740
  parsePatternString,
4167
4741
  patternToString,
4168
4742
  recalculateMergedData,
4169
4743
  registerModule,
4170
4744
  removeExtractionById,
4171
4745
  removeExtractionByIndex,
4746
+ removeRefFromArray,
4747
+ removeRefsByCriteriaFromArray,
4172
4748
  sanitizeFilename,
4173
4749
  saveConfig,
4174
4750
  sortItems,
4175
4751
  stringifyFileData,
4752
+ stringifyFileRefs,
4176
4753
  successResult,
4754
+ toV2Record,
4177
4755
  updateExtractionById,
4178
4756
  validateExtractionData,
4179
4757
  validateFileDataStructure,