hazo_files 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -614,7 +614,7 @@ var LocalStorageModule = class extends BaseStorageModule {
614
614
  await super.initialize(config);
615
615
  const localConfig = this.getProviderConfig();
616
616
  this.basePath = path2.resolve(localConfig.basePath);
617
- this.allowedExtensions = localConfig.allowedExtensions || [];
617
+ this.allowedExtensions = (localConfig.allowedExtensions || []).map((ext) => ext.trim().replace(/^\./, "").toLowerCase()).filter((ext) => ext.length > 0);
618
618
  this.maxFileSize = localConfig.maxFileSize || 0;
619
619
  await fs2.promises.mkdir(this.basePath, { recursive: true });
620
620
  }
@@ -2105,6 +2105,70 @@ function updateExtractionById(fileData, id, newData, options = {}) {
2105
2105
  };
2106
2106
  }
2107
2107
 
2108
+ // src/common/ref-utils.ts
2109
+ function generateRefId() {
2110
+ return `ref_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;
2111
+ }
2112
+ function parseFileRefs(json) {
2113
+ if (!json) return [];
2114
+ try {
2115
+ const parsed = JSON.parse(json);
2116
+ if (!Array.isArray(parsed)) return [];
2117
+ return parsed;
2118
+ } catch {
2119
+ return [];
2120
+ }
2121
+ }
2122
+ function stringifyFileRefs(refs) {
2123
+ return JSON.stringify(refs);
2124
+ }
2125
+ function createFileRef(options) {
2126
+ const ref = {
2127
+ ref_id: generateRefId(),
2128
+ entity_type: options.entity_type,
2129
+ entity_id: options.entity_id,
2130
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
2131
+ };
2132
+ if (options.created_by) ref.created_by = options.created_by;
2133
+ if (options.visibility) ref.visibility = options.visibility;
2134
+ if (options.label) ref.label = options.label;
2135
+ if (options.metadata) ref.metadata = options.metadata;
2136
+ return ref;
2137
+ }
2138
+ function removeRefFromArray(refs, refId) {
2139
+ return refs.filter((r) => r.ref_id !== refId);
2140
+ }
2141
+ function removeRefsByCriteriaFromArray(refs, criteria) {
2142
+ return refs.filter((ref) => {
2143
+ if (criteria.entity_type && ref.entity_type !== criteria.entity_type) return true;
2144
+ if (criteria.entity_id && ref.entity_id !== criteria.entity_id) return true;
2145
+ return !criteria.entity_type && !criteria.entity_id;
2146
+ });
2147
+ }
2148
+ function toV2Record(record) {
2149
+ const v2 = record;
2150
+ return {
2151
+ ...record,
2152
+ file_refs: typeof v2.file_refs === "string" ? v2.file_refs : "[]",
2153
+ ref_count: typeof v2.ref_count === "number" ? v2.ref_count : 0,
2154
+ status: typeof v2.status === "string" ? v2.status : "active",
2155
+ scope_id: typeof v2.scope_id === "string" ? v2.scope_id : null,
2156
+ uploaded_by: typeof v2.uploaded_by === "string" ? v2.uploaded_by : null,
2157
+ original_filename: typeof v2.original_filename === "string" ? v2.original_filename : null,
2158
+ storage_verified_at: typeof v2.storage_verified_at === "string" ? v2.storage_verified_at : null,
2159
+ deleted_at: typeof v2.deleted_at === "string" ? v2.deleted_at : null
2160
+ };
2161
+ }
2162
+ function buildFileWithStatus(record) {
2163
+ const v2 = toV2Record(record);
2164
+ const refs = parseFileRefs(v2.file_refs);
2165
+ return {
2166
+ record: v2,
2167
+ refs,
2168
+ is_orphaned: refs.length === 0
2169
+ };
2170
+ }
2171
+
2108
2172
  // src/services/file-metadata-service.ts
2109
2173
  var FileMetadataService = class {
2110
2174
  constructor(crudService, options = {}) {
@@ -2146,9 +2210,15 @@ var FileMetadataService = class {
2146
2210
  changed_at: timestamp,
2147
2211
  file_hash: input.file_hash || null,
2148
2212
  file_size: input.file_size ?? null,
2149
- file_changed_at: input.file_hash ? timestamp : null
2150
- // Set content changed time if hash is provided
2213
+ file_changed_at: input.file_hash ? timestamp : null,
2214
+ // V2 defaults included conditionally to avoid breaking pre-migration DBs
2215
+ file_refs: "[]",
2216
+ ref_count: 0,
2217
+ status: "active"
2151
2218
  };
2219
+ if (input.scope_id !== void 0) record.scope_id = input.scope_id;
2220
+ if (input.uploaded_by !== void 0) record.uploaded_by = input.uploaded_by;
2221
+ if (input.original_filename !== void 0) record.original_filename = input.original_filename;
2152
2222
  const results = await this.crud.insert(record);
2153
2223
  this.logger?.debug?.("Recorded file upload", { path: input.file_path });
2154
2224
  return results[0] || null;
@@ -2507,6 +2577,254 @@ var FileMetadataService = class {
2507
2577
  return false;
2508
2578
  }
2509
2579
  }
2580
+ // ============================================
2581
+ // Reference Tracking Methods (V2)
2582
+ // ============================================
2583
+ /**
2584
+ * Find a record by ID
2585
+ */
2586
+ async findById(id) {
2587
+ try {
2588
+ const results = await this.crud.findBy({ id });
2589
+ return results[0] || null;
2590
+ } catch (error) {
2591
+ this.logError("findById", error);
2592
+ return null;
2593
+ }
2594
+ }
2595
+ /**
2596
+ * Find multiple records by IDs
2597
+ */
2598
+ async findByIds(ids) {
2599
+ try {
2600
+ const results = [];
2601
+ for (const id of ids) {
2602
+ const record = await this.findById(id);
2603
+ if (record) results.push(record);
2604
+ }
2605
+ return results;
2606
+ } catch (error) {
2607
+ this.logError("findByIds", error);
2608
+ return [];
2609
+ }
2610
+ }
2611
+ /**
2612
+ * Add a reference to a file
2613
+ * @returns The new ref_id, or null on failure
2614
+ */
2615
+ async addRef(fileId, options) {
2616
+ try {
2617
+ const record = await this.findById(fileId);
2618
+ if (!record) {
2619
+ this.logger?.warn?.("Cannot add ref: file not found", { fileId });
2620
+ return null;
2621
+ }
2622
+ const v2 = toV2Record(record);
2623
+ const refs = parseFileRefs(v2.file_refs);
2624
+ const newRef = createFileRef(options);
2625
+ const updatedRefs = [...refs, newRef];
2626
+ await this.crud.updateById(fileId, {
2627
+ file_refs: stringifyFileRefs(updatedRefs),
2628
+ ref_count: updatedRefs.length,
2629
+ status: "active",
2630
+ changed_at: this.now()
2631
+ });
2632
+ this.logger?.debug?.("Added ref", { fileId, ref_id: newRef.ref_id });
2633
+ return { ref_id: newRef.ref_id };
2634
+ } catch (error) {
2635
+ this.logError("addRef", error);
2636
+ return null;
2637
+ }
2638
+ }
2639
+ /**
2640
+ * Remove a specific reference from a file
2641
+ * @returns Remaining ref count, or null on failure
2642
+ */
2643
+ async removeRef(fileId, refId) {
2644
+ try {
2645
+ const record = await this.findById(fileId);
2646
+ if (!record) {
2647
+ this.logger?.warn?.("Cannot remove ref: file not found", { fileId });
2648
+ return null;
2649
+ }
2650
+ const v2 = toV2Record(record);
2651
+ const refs = parseFileRefs(v2.file_refs);
2652
+ const updatedRefs = removeRefFromArray(refs, refId);
2653
+ await this.crud.updateById(fileId, {
2654
+ file_refs: stringifyFileRefs(updatedRefs),
2655
+ ref_count: updatedRefs.length,
2656
+ changed_at: this.now()
2657
+ });
2658
+ this.logger?.debug?.("Removed ref", { fileId, refId, remaining: updatedRefs.length });
2659
+ return { remaining_refs: updatedRefs.length };
2660
+ } catch (error) {
2661
+ this.logError("removeRef", error);
2662
+ return null;
2663
+ }
2664
+ }
2665
+ /**
2666
+ * Remove references matching criteria across all records.
2667
+ * Scans all records and removes matching refs (AND semantics).
2668
+ */
2669
+ async removeRefsByCriteria(criteria) {
2670
+ try {
2671
+ let totalRemoved = 0;
2672
+ if (criteria.file_id) {
2673
+ const record = await this.findById(criteria.file_id);
2674
+ if (record) {
2675
+ const removed = await this.removeRefsFromRecord(record, criteria);
2676
+ totalRemoved += removed;
2677
+ }
2678
+ return { removed_count: totalRemoved };
2679
+ }
2680
+ let records;
2681
+ if (criteria.scope_id) {
2682
+ records = await this.crud.findBy({ scope_id: criteria.scope_id });
2683
+ } else {
2684
+ records = await this.crud.list();
2685
+ }
2686
+ for (const record of records) {
2687
+ const removed = await this.removeRefsFromRecord(record, criteria);
2688
+ totalRemoved += removed;
2689
+ }
2690
+ this.logger?.debug?.("Removed refs by criteria", { criteria, removed_count: totalRemoved });
2691
+ return { removed_count: totalRemoved };
2692
+ } catch (error) {
2693
+ this.logError("removeRefsByCriteria", error);
2694
+ return { removed_count: 0 };
2695
+ }
2696
+ }
2697
+ /**
2698
+ * Helper: remove matching refs from a single record
2699
+ */
2700
+ async removeRefsFromRecord(record, criteria) {
2701
+ const v2 = toV2Record(record);
2702
+ const refs = parseFileRefs(v2.file_refs);
2703
+ if (refs.length === 0) return 0;
2704
+ const updatedRefs = removeRefsByCriteriaFromArray(refs, {
2705
+ entity_type: criteria.entity_type,
2706
+ entity_id: criteria.entity_id
2707
+ });
2708
+ const removedCount = refs.length - updatedRefs.length;
2709
+ if (removedCount > 0) {
2710
+ await this.crud.updateById(record.id, {
2711
+ file_refs: stringifyFileRefs(updatedRefs),
2712
+ ref_count: updatedRefs.length,
2713
+ changed_at: this.now()
2714
+ });
2715
+ }
2716
+ return removedCount;
2717
+ }
2718
+ /**
2719
+ * Get all references for a file
2720
+ */
2721
+ async getRefs(fileId) {
2722
+ try {
2723
+ const record = await this.findById(fileId);
2724
+ if (!record) return null;
2725
+ return parseFileRefs(toV2Record(record).file_refs);
2726
+ } catch (error) {
2727
+ this.logError("getRefs", error);
2728
+ return null;
2729
+ }
2730
+ }
2731
+ /**
2732
+ * Get a file with its status and parsed refs
2733
+ */
2734
+ async getFileWithStatus(fileId) {
2735
+ try {
2736
+ const record = await this.findById(fileId);
2737
+ if (!record) return null;
2738
+ return buildFileWithStatus(record);
2739
+ } catch (error) {
2740
+ this.logError("getFileWithStatus", error);
2741
+ return null;
2742
+ }
2743
+ }
2744
+ /**
2745
+ * Get multiple files with status
2746
+ */
2747
+ async getFilesWithStatus(fileIds) {
2748
+ try {
2749
+ const records = await this.findByIds(fileIds);
2750
+ return records.map(buildFileWithStatus);
2751
+ } catch (error) {
2752
+ this.logError("getFilesWithStatus", error);
2753
+ return [];
2754
+ }
2755
+ }
2756
+ /**
2757
+ * Update the status of a file
2758
+ */
2759
+ async updateStatus(fileId, status) {
2760
+ try {
2761
+ const patch = {
2762
+ status,
2763
+ changed_at: this.now()
2764
+ };
2765
+ if (status === "soft_deleted") {
2766
+ patch.deleted_at = this.now();
2767
+ }
2768
+ await this.crud.updateById(fileId, patch);
2769
+ this.logger?.debug?.("Updated status", { fileId, status });
2770
+ return true;
2771
+ } catch (error) {
2772
+ this.logError("updateStatus", error);
2773
+ return false;
2774
+ }
2775
+ }
2776
+ /**
2777
+ * Soft-delete a file (set status to soft_deleted, record deleted_at)
2778
+ */
2779
+ async softDelete(fileId) {
2780
+ return this.updateStatus(fileId, "soft_deleted");
2781
+ }
2782
+ /**
2783
+ * Update specific V2 fields on a record
2784
+ */
2785
+ async updateFields(fileId, fields) {
2786
+ try {
2787
+ await this.crud.updateById(fileId, {
2788
+ ...fields,
2789
+ changed_at: this.now()
2790
+ });
2791
+ this.logger?.debug?.("Updated fields", { fileId, fields: Object.keys(fields) });
2792
+ return true;
2793
+ } catch (error) {
2794
+ this.logError("updateFields", error);
2795
+ return false;
2796
+ }
2797
+ }
2798
+ /**
2799
+ * Find orphaned files (zero references)
2800
+ */
2801
+ async findOrphaned(options) {
2802
+ try {
2803
+ let records;
2804
+ if (options?.scope_id) {
2805
+ records = await this.crud.findBy({ scope_id: options.scope_id });
2806
+ } else if (options?.storage_type) {
2807
+ records = await this.crud.findBy({ storage_type: options.storage_type });
2808
+ } else {
2809
+ records = await this.crud.list();
2810
+ }
2811
+ let orphaned = records.map(buildFileWithStatus).filter((f) => f.is_orphaned && f.record.status !== "soft_deleted");
2812
+ if (options?.olderThanMs) {
2813
+ const cutoff = Date.now() - options.olderThanMs;
2814
+ orphaned = orphaned.filter((f) => {
2815
+ const createdAt = new Date(f.record.created_at).getTime();
2816
+ return createdAt < cutoff;
2817
+ });
2818
+ }
2819
+ if (options?.limit && orphaned.length > options.limit) {
2820
+ orphaned = orphaned.slice(0, options.limit);
2821
+ }
2822
+ return orphaned;
2823
+ } catch (error) {
2824
+ this.logError("findOrphaned", error);
2825
+ return [];
2826
+ }
2827
+ }
2510
2828
  };
2511
2829
  function createFileMetadataService(crudService, options) {
2512
2830
  return new FileMetadataService(crudService, options);
@@ -2876,6 +3194,132 @@ var TrackedFileManager = class extends FileManager {
2876
3194
  const record = await this.metadataService.findByPath(path3, this.getStorageType());
2877
3195
  return record?.file_size ?? null;
2878
3196
  }
3197
+ // ============ Reference Tracking Methods (V2) ============
3198
+ /**
3199
+ * Add a reference to a file
3200
+ */
3201
+ async addRef(fileId, options) {
3202
+ if (!this.isTrackingEnabled()) return null;
3203
+ return this.metadataService.addRef(fileId, options);
3204
+ }
3205
+ /**
3206
+ * Remove a reference from a file
3207
+ */
3208
+ async removeRef(fileId, refId) {
3209
+ if (!this.isTrackingEnabled()) return null;
3210
+ return this.metadataService.removeRef(fileId, refId);
3211
+ }
3212
+ /**
3213
+ * Get a file by its database ID with status information
3214
+ */
3215
+ async getFileById(fileId) {
3216
+ if (!this.isTrackingEnabled()) return null;
3217
+ return this.metadataService.getFileWithStatus(fileId);
3218
+ }
3219
+ /**
3220
+ * Get multiple files by their database IDs with status information
3221
+ */
3222
+ async getFilesById(fileIds) {
3223
+ if (!this.isTrackingEnabled()) return [];
3224
+ return this.metadataService.getFilesWithStatus(fileIds);
3225
+ }
3226
+ /**
3227
+ * Soft-delete a file (marks as soft_deleted, does not remove physical file)
3228
+ */
3229
+ async softDeleteFile(fileId) {
3230
+ if (!this.isTrackingEnabled()) return false;
3231
+ return this.metadataService.softDelete(fileId);
3232
+ }
3233
+ /**
3234
+ * Find orphaned files (files with zero references)
3235
+ */
3236
+ async findOrphanedFiles(options) {
3237
+ if (!this.isTrackingEnabled()) return [];
3238
+ return this.metadataService.findOrphaned(options);
3239
+ }
3240
+ /**
3241
+ * Cleanup orphaned files — removes physical files and/or DB records
3242
+ */
3243
+ async cleanupOrphanedFiles(options) {
3244
+ if (!this.isTrackingEnabled()) return { cleaned: 0, errors: [] };
3245
+ const orphaned = await this.metadataService.findOrphaned(options);
3246
+ let cleaned = 0;
3247
+ const errors = [];
3248
+ for (const file of orphaned) {
3249
+ try {
3250
+ if (options?.softDeleteOnly) {
3251
+ await this.metadataService.softDelete(file.record.id);
3252
+ cleaned++;
3253
+ continue;
3254
+ }
3255
+ const deletePhysical = options?.deletePhysicalFiles !== false;
3256
+ if (deletePhysical) {
3257
+ const deleteResult = await super.deleteFile(file.record.file_path);
3258
+ if (!deleteResult.success) {
3259
+ if (deleteResult.error && !deleteResult.error.includes("not found")) {
3260
+ errors.push(`Failed to delete physical file ${file.record.file_path}: ${deleteResult.error}`);
3261
+ }
3262
+ }
3263
+ }
3264
+ await this.metadataService.recordDelete(file.record.file_path, file.record.storage_type);
3265
+ cleaned++;
3266
+ } catch (error) {
3267
+ const msg = error instanceof Error ? error.message : String(error);
3268
+ errors.push(`Error cleaning up ${file.record.file_path}: ${msg}`);
3269
+ }
3270
+ }
3271
+ return { cleaned, errors };
3272
+ }
3273
+ /**
3274
+ * Verify that a file's physical storage exists and update its status
3275
+ */
3276
+ async verifyFileExistence(fileId) {
3277
+ if (!this.isTrackingEnabled()) return null;
3278
+ const record = await this.metadataService.findById(fileId);
3279
+ if (!record) return null;
3280
+ const fileExists = await this.exists(record.file_path);
3281
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
3282
+ await this.metadataService.updateFields(fileId, {
3283
+ storage_verified_at: timestamp,
3284
+ ...fileExists ? {} : { status: "missing" }
3285
+ });
3286
+ return fileExists;
3287
+ }
3288
+ /**
3289
+ * Upload a file and optionally add an initial reference
3290
+ */
3291
+ async uploadFileWithRef(source, remotePath, options) {
3292
+ const uploadResult = await this.uploadFile(source, remotePath, {
3293
+ ...options,
3294
+ awaitRecording: true
3295
+ });
3296
+ if (!uploadResult.success || !uploadResult.data || !this.isTrackingEnabled()) {
3297
+ return uploadResult;
3298
+ }
3299
+ const record = await this.metadataService.findByPath(remotePath, this.getStorageType());
3300
+ if (!record) {
3301
+ return uploadResult;
3302
+ }
3303
+ const fieldsToUpdate = {};
3304
+ if (options?.scope_id) fieldsToUpdate.scope_id = options.scope_id;
3305
+ if (options?.uploaded_by) fieldsToUpdate.uploaded_by = options.uploaded_by;
3306
+ if (Object.keys(fieldsToUpdate).length > 0) {
3307
+ await this.metadataService.updateFields(record.id, fieldsToUpdate);
3308
+ }
3309
+ let refId;
3310
+ if (options?.ref) {
3311
+ const refResult = await this.metadataService.addRef(record.id, options.ref);
3312
+ if (refResult) refId = refResult.ref_id;
3313
+ }
3314
+ return {
3315
+ success: true,
3316
+ data: {
3317
+ ...uploadResult.data,
3318
+ file_id: record.id,
3319
+ ref_id: refId
3320
+ }
3321
+ };
3322
+ }
2879
3323
  };
2880
3324
  function createTrackedFileManager(options) {
2881
3325
  return new TrackedFileManager(options);
@@ -3860,13 +4304,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
3860
4304
  storage_type TEXT NOT NULL,
3861
4305
  file_hash TEXT,
3862
4306
  file_size INTEGER,
3863
- file_changed_at TEXT
4307
+ file_changed_at TEXT,
4308
+ file_refs TEXT DEFAULT '[]',
4309
+ ref_count INTEGER DEFAULT 0,
4310
+ status TEXT DEFAULT 'active',
4311
+ scope_id TEXT,
4312
+ uploaded_by TEXT,
4313
+ storage_verified_at TEXT,
4314
+ deleted_at TEXT,
4315
+ original_filename TEXT
3864
4316
  )`,
3865
4317
  indexes: [
3866
4318
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
3867
4319
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
3868
4320
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
3869
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4321
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4322
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4323
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4324
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4325
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
3870
4326
  ]
3871
4327
  },
3872
4328
  postgres: {
@@ -3881,13 +4337,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
3881
4337
  storage_type TEXT NOT NULL,
3882
4338
  file_hash TEXT,
3883
4339
  file_size BIGINT,
3884
- file_changed_at TIMESTAMP WITH TIME ZONE
4340
+ file_changed_at TIMESTAMP WITH TIME ZONE,
4341
+ file_refs TEXT DEFAULT '[]',
4342
+ ref_count INTEGER DEFAULT 0,
4343
+ status TEXT DEFAULT 'active',
4344
+ scope_id UUID,
4345
+ uploaded_by UUID,
4346
+ storage_verified_at TIMESTAMP WITH TIME ZONE,
4347
+ deleted_at TIMESTAMP WITH TIME ZONE,
4348
+ original_filename TEXT
3885
4349
  )`,
3886
4350
  indexes: [
3887
4351
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
3888
4352
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
3889
4353
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
3890
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4354
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4355
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4356
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4357
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4358
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
3891
4359
  ]
3892
4360
  },
3893
4361
  columns: [
@@ -3901,7 +4369,15 @@ var HAZO_FILES_TABLE_SCHEMA = {
3901
4369
  "storage_type",
3902
4370
  "file_hash",
3903
4371
  "file_size",
3904
- "file_changed_at"
4372
+ "file_changed_at",
4373
+ "file_refs",
4374
+ "ref_count",
4375
+ "status",
4376
+ "scope_id",
4377
+ "uploaded_by",
4378
+ "storage_verified_at",
4379
+ "deleted_at",
4380
+ "original_filename"
3905
4381
  ]
3906
4382
  };
3907
4383
  function getSchemaForTable(tableName, dbType) {
@@ -3914,6 +4390,78 @@ function getSchemaForTable(tableName, dbType) {
3914
4390
  )
3915
4391
  };
3916
4392
  }
4393
+ var HAZO_FILES_MIGRATION_V2 = {
4394
+ tableName: HAZO_FILES_DEFAULT_TABLE_NAME,
4395
+ sqlite: {
4396
+ alterStatements: [
4397
+ "ALTER TABLE hazo_files ADD COLUMN file_refs TEXT DEFAULT '[]'",
4398
+ "ALTER TABLE hazo_files ADD COLUMN ref_count INTEGER DEFAULT 0",
4399
+ "ALTER TABLE hazo_files ADD COLUMN status TEXT DEFAULT 'active'",
4400
+ "ALTER TABLE hazo_files ADD COLUMN scope_id TEXT",
4401
+ "ALTER TABLE hazo_files ADD COLUMN uploaded_by TEXT",
4402
+ "ALTER TABLE hazo_files ADD COLUMN storage_verified_at TEXT",
4403
+ "ALTER TABLE hazo_files ADD COLUMN deleted_at TEXT",
4404
+ "ALTER TABLE hazo_files ADD COLUMN original_filename TEXT"
4405
+ ],
4406
+ indexes: [
4407
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4408
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4409
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4410
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4411
+ ],
4412
+ backfill: `UPDATE hazo_files SET
4413
+ file_refs = COALESCE(file_refs, '[]'),
4414
+ ref_count = COALESCE(ref_count, 0),
4415
+ status = COALESCE(status, 'active')
4416
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4417
+ },
4418
+ postgres: {
4419
+ alterStatements: [
4420
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS file_refs TEXT DEFAULT '[]'",
4421
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS ref_count INTEGER DEFAULT 0",
4422
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS status TEXT DEFAULT 'active'",
4423
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS scope_id UUID",
4424
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS uploaded_by UUID",
4425
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS storage_verified_at TIMESTAMP WITH TIME ZONE",
4426
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMP WITH TIME ZONE",
4427
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS original_filename TEXT"
4428
+ ],
4429
+ indexes: [
4430
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4431
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4432
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4433
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4434
+ ],
4435
+ backfill: `UPDATE hazo_files SET
4436
+ file_refs = COALESCE(file_refs, '[]'),
4437
+ ref_count = COALESCE(ref_count, 0),
4438
+ status = COALESCE(status, 'active')
4439
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4440
+ },
4441
+ newColumns: [
4442
+ "file_refs",
4443
+ "ref_count",
4444
+ "status",
4445
+ "scope_id",
4446
+ "uploaded_by",
4447
+ "storage_verified_at",
4448
+ "deleted_at",
4449
+ "original_filename"
4450
+ ]
4451
+ };
4452
+ function getMigrationForTable(tableName, dbType) {
4453
+ const migration = HAZO_FILES_MIGRATION_V2[dbType];
4454
+ const defaultName = HAZO_FILES_MIGRATION_V2.tableName;
4455
+ return {
4456
+ alterStatements: migration.alterStatements.map(
4457
+ (stmt) => stmt.replace(new RegExp(defaultName, "g"), tableName)
4458
+ ),
4459
+ indexes: migration.indexes.map(
4460
+ (idx) => idx.replace(new RegExp(defaultName, "g"), tableName)
4461
+ ),
4462
+ backfill: migration.backfill.replace(new RegExp(defaultName, "g"), tableName)
4463
+ };
4464
+ }
3917
4465
  var HAZO_FILES_NAMING_DEFAULT_TABLE_NAME = "hazo_files_naming";
3918
4466
  var HAZO_FILES_NAMING_TABLE_SCHEMA = {
3919
4467
  tableName: HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
@@ -3970,6 +4518,24 @@ function getNamingSchemaForTable(tableName, dbType) {
3970
4518
  )
3971
4519
  };
3972
4520
  }
4521
+
4522
+ // src/migrations/add-reference-tracking.ts
4523
+ async function migrateToV2(executor, dbType, tableName) {
4524
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4525
+ for (const stmt of migration.alterStatements) {
4526
+ try {
4527
+ await executor.run(stmt);
4528
+ } catch {
4529
+ }
4530
+ }
4531
+ for (const idx of migration.indexes) {
4532
+ await executor.run(idx);
4533
+ }
4534
+ }
4535
+ async function backfillV2Defaults(executor, dbType, tableName) {
4536
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4537
+ await executor.run(migration.backfill);
4538
+ }
3973
4539
  export {
3974
4540
  ALL_SYSTEM_VARIABLES,
3975
4541
  AuthenticationError,
@@ -3986,6 +4552,7 @@ export {
3986
4552
  GoogleDriveAuth,
3987
4553
  GoogleDriveModule,
3988
4554
  HAZO_FILES_DEFAULT_TABLE_NAME,
4555
+ HAZO_FILES_MIGRATION_V2,
3989
4556
  HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
3990
4557
  HAZO_FILES_NAMING_TABLE_SCHEMA,
3991
4558
  HAZO_FILES_TABLE_SCHEMA,
@@ -4003,6 +4570,8 @@ export {
4003
4570
  TrackedFileManager,
4004
4571
  UploadExtractService,
4005
4572
  addExtractionToFileData,
4573
+ backfillV2Defaults,
4574
+ buildFileWithStatus,
4006
4575
  clearExtractions,
4007
4576
  clonePattern,
4008
4577
  computeFileHash,
@@ -4015,6 +4584,7 @@ export {
4015
4584
  createFileItem,
4016
4585
  createFileManager,
4017
4586
  createFileMetadataService,
4587
+ createFileRef,
4018
4588
  createFolderItem,
4019
4589
  createGoogleDriveAuth,
4020
4590
  createGoogleDriveModule,
@@ -4037,6 +4607,7 @@ export {
4037
4607
  generateExtractionId,
4038
4608
  generateId,
4039
4609
  generatePreviewName,
4610
+ generateRefId,
4040
4611
  generateSampleConfig,
4041
4612
  generateSegmentId,
4042
4613
  getBaseName,
@@ -4050,6 +4621,7 @@ export {
4050
4621
  getFileCategory,
4051
4622
  getFileMetadataValues,
4052
4623
  getMergedData,
4624
+ getMigrationForTable,
4053
4625
  getMimeType,
4054
4626
  getNameWithoutExtension,
4055
4627
  getNamingSchemaForTable,
@@ -4081,20 +4653,26 @@ export {
4081
4653
  joinPath,
4082
4654
  loadConfig,
4083
4655
  loadConfigAsync,
4656
+ migrateToV2,
4084
4657
  normalizePath,
4085
4658
  parseConfig,
4086
4659
  parseFileData,
4660
+ parseFileRefs,
4087
4661
  parsePatternString,
4088
4662
  patternToString,
4089
4663
  recalculateMergedData,
4090
4664
  registerModule,
4091
4665
  removeExtractionById,
4092
4666
  removeExtractionByIndex,
4667
+ removeRefFromArray,
4668
+ removeRefsByCriteriaFromArray,
4093
4669
  sanitizeFilename,
4094
4670
  saveConfig,
4095
4671
  sortItems,
4096
4672
  stringifyFileData,
4673
+ stringifyFileRefs,
4097
4674
  successResult,
4675
+ toV2Record,
4098
4676
  updateExtractionById,
4099
4677
  validateExtractionData,
4100
4678
  validateFileDataStructure,