hazo_files 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -45,6 +45,7 @@ __export(index_exports, {
45
45
  GoogleDriveAuth: () => GoogleDriveAuth,
46
46
  GoogleDriveModule: () => GoogleDriveModule,
47
47
  HAZO_FILES_DEFAULT_TABLE_NAME: () => HAZO_FILES_DEFAULT_TABLE_NAME,
48
+ HAZO_FILES_MIGRATION_V2: () => HAZO_FILES_MIGRATION_V2,
48
49
  HAZO_FILES_NAMING_DEFAULT_TABLE_NAME: () => HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
49
50
  HAZO_FILES_NAMING_TABLE_SCHEMA: () => HAZO_FILES_NAMING_TABLE_SCHEMA,
50
51
  HAZO_FILES_TABLE_SCHEMA: () => HAZO_FILES_TABLE_SCHEMA,
@@ -62,6 +63,8 @@ __export(index_exports, {
62
63
  TrackedFileManager: () => TrackedFileManager,
63
64
  UploadExtractService: () => UploadExtractService,
64
65
  addExtractionToFileData: () => addExtractionToFileData,
66
+ backfillV2Defaults: () => backfillV2Defaults,
67
+ buildFileWithStatus: () => buildFileWithStatus,
65
68
  clearExtractions: () => clearExtractions,
66
69
  clonePattern: () => clonePattern,
67
70
  computeFileHash: () => computeFileHash,
@@ -74,6 +77,7 @@ __export(index_exports, {
74
77
  createFileItem: () => createFileItem,
75
78
  createFileManager: () => createFileManager,
76
79
  createFileMetadataService: () => createFileMetadataService,
80
+ createFileRef: () => createFileRef,
77
81
  createFolderItem: () => createFolderItem,
78
82
  createGoogleDriveAuth: () => createGoogleDriveAuth,
79
83
  createGoogleDriveModule: () => createGoogleDriveModule,
@@ -96,6 +100,7 @@ __export(index_exports, {
96
100
  generateExtractionId: () => generateExtractionId,
97
101
  generateId: () => generateId,
98
102
  generatePreviewName: () => generatePreviewName,
103
+ generateRefId: () => generateRefId,
99
104
  generateSampleConfig: () => generateSampleConfig,
100
105
  generateSegmentId: () => generateSegmentId,
101
106
  getBaseName: () => getBaseName,
@@ -109,6 +114,7 @@ __export(index_exports, {
109
114
  getFileCategory: () => getFileCategory,
110
115
  getFileMetadataValues: () => getFileMetadataValues,
111
116
  getMergedData: () => getMergedData,
117
+ getMigrationForTable: () => getMigrationForTable,
112
118
  getMimeType: () => getMimeType,
113
119
  getNameWithoutExtension: () => getNameWithoutExtension,
114
120
  getNamingSchemaForTable: () => getNamingSchemaForTable,
@@ -140,20 +146,26 @@ __export(index_exports, {
140
146
  joinPath: () => joinPath,
141
147
  loadConfig: () => loadConfig,
142
148
  loadConfigAsync: () => loadConfigAsync,
149
+ migrateToV2: () => migrateToV2,
143
150
  normalizePath: () => normalizePath,
144
151
  parseConfig: () => parseConfig,
145
152
  parseFileData: () => parseFileData,
153
+ parseFileRefs: () => parseFileRefs,
146
154
  parsePatternString: () => parsePatternString,
147
155
  patternToString: () => patternToString,
148
156
  recalculateMergedData: () => recalculateMergedData,
149
157
  registerModule: () => registerModule,
150
158
  removeExtractionById: () => removeExtractionById,
151
159
  removeExtractionByIndex: () => removeExtractionByIndex,
160
+ removeRefFromArray: () => removeRefFromArray,
161
+ removeRefsByCriteriaFromArray: () => removeRefsByCriteriaFromArray,
152
162
  sanitizeFilename: () => sanitizeFilename,
153
163
  saveConfig: () => saveConfig,
154
164
  sortItems: () => sortItems,
155
165
  stringifyFileData: () => stringifyFileData,
166
+ stringifyFileRefs: () => stringifyFileRefs,
156
167
  successResult: () => successResult,
168
+ toV2Record: () => toV2Record,
157
169
  updateExtractionById: () => updateExtractionById,
158
170
  validateExtractionData: () => validateExtractionData,
159
171
  validateFileDataStructure: () => validateFileDataStructure,
@@ -778,7 +790,7 @@ var LocalStorageModule = class extends BaseStorageModule {
778
790
  await super.initialize(config);
779
791
  const localConfig = this.getProviderConfig();
780
792
  this.basePath = path2.resolve(localConfig.basePath);
781
- this.allowedExtensions = localConfig.allowedExtensions || [];
793
+ this.allowedExtensions = (localConfig.allowedExtensions || []).map((ext) => ext.trim().replace(/^\./, "").toLowerCase()).filter((ext) => ext.length > 0);
782
794
  this.maxFileSize = localConfig.maxFileSize || 0;
783
795
  await fs2.promises.mkdir(this.basePath, { recursive: true });
784
796
  }
@@ -2269,6 +2281,70 @@ function updateExtractionById(fileData, id, newData, options = {}) {
2269
2281
  };
2270
2282
  }
2271
2283
 
2284
+ // src/common/ref-utils.ts
2285
+ function generateRefId() {
2286
+ return `ref_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;
2287
+ }
2288
+ function parseFileRefs(json) {
2289
+ if (!json) return [];
2290
+ try {
2291
+ const parsed = JSON.parse(json);
2292
+ if (!Array.isArray(parsed)) return [];
2293
+ return parsed;
2294
+ } catch {
2295
+ return [];
2296
+ }
2297
+ }
2298
+ function stringifyFileRefs(refs) {
2299
+ return JSON.stringify(refs);
2300
+ }
2301
+ function createFileRef(options) {
2302
+ const ref = {
2303
+ ref_id: generateRefId(),
2304
+ entity_type: options.entity_type,
2305
+ entity_id: options.entity_id,
2306
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
2307
+ };
2308
+ if (options.created_by) ref.created_by = options.created_by;
2309
+ if (options.visibility) ref.visibility = options.visibility;
2310
+ if (options.label) ref.label = options.label;
2311
+ if (options.metadata) ref.metadata = options.metadata;
2312
+ return ref;
2313
+ }
2314
+ function removeRefFromArray(refs, refId) {
2315
+ return refs.filter((r) => r.ref_id !== refId);
2316
+ }
2317
+ function removeRefsByCriteriaFromArray(refs, criteria) {
2318
+ return refs.filter((ref) => {
2319
+ if (criteria.entity_type && ref.entity_type !== criteria.entity_type) return true;
2320
+ if (criteria.entity_id && ref.entity_id !== criteria.entity_id) return true;
2321
+ return !criteria.entity_type && !criteria.entity_id;
2322
+ });
2323
+ }
2324
+ function toV2Record(record) {
2325
+ const v2 = record;
2326
+ return {
2327
+ ...record,
2328
+ file_refs: typeof v2.file_refs === "string" ? v2.file_refs : "[]",
2329
+ ref_count: typeof v2.ref_count === "number" ? v2.ref_count : 0,
2330
+ status: typeof v2.status === "string" ? v2.status : "active",
2331
+ scope_id: typeof v2.scope_id === "string" ? v2.scope_id : null,
2332
+ uploaded_by: typeof v2.uploaded_by === "string" ? v2.uploaded_by : null,
2333
+ original_filename: typeof v2.original_filename === "string" ? v2.original_filename : null,
2334
+ storage_verified_at: typeof v2.storage_verified_at === "string" ? v2.storage_verified_at : null,
2335
+ deleted_at: typeof v2.deleted_at === "string" ? v2.deleted_at : null
2336
+ };
2337
+ }
2338
+ function buildFileWithStatus(record) {
2339
+ const v2 = toV2Record(record);
2340
+ const refs = parseFileRefs(v2.file_refs);
2341
+ return {
2342
+ record: v2,
2343
+ refs,
2344
+ is_orphaned: refs.length === 0
2345
+ };
2346
+ }
2347
+
2272
2348
  // src/services/file-metadata-service.ts
2273
2349
  var FileMetadataService = class {
2274
2350
  constructor(crudService, options = {}) {
@@ -2310,9 +2386,15 @@ var FileMetadataService = class {
2310
2386
  changed_at: timestamp,
2311
2387
  file_hash: input.file_hash || null,
2312
2388
  file_size: input.file_size ?? null,
2313
- file_changed_at: input.file_hash ? timestamp : null
2314
- // Set content changed time if hash is provided
2389
+ file_changed_at: input.file_hash ? timestamp : null,
2390
+ // V2 defaults included conditionally to avoid breaking pre-migration DBs
2391
+ file_refs: "[]",
2392
+ ref_count: 0,
2393
+ status: "active"
2315
2394
  };
2395
+ if (input.scope_id !== void 0) record.scope_id = input.scope_id;
2396
+ if (input.uploaded_by !== void 0) record.uploaded_by = input.uploaded_by;
2397
+ if (input.original_filename !== void 0) record.original_filename = input.original_filename;
2316
2398
  const results = await this.crud.insert(record);
2317
2399
  this.logger?.debug?.("Recorded file upload", { path: input.file_path });
2318
2400
  return results[0] || null;
@@ -2671,6 +2753,254 @@ var FileMetadataService = class {
2671
2753
  return false;
2672
2754
  }
2673
2755
  }
2756
+ // ============================================
2757
+ // Reference Tracking Methods (V2)
2758
+ // ============================================
2759
+ /**
2760
+ * Find a record by ID
2761
+ */
2762
+ async findById(id) {
2763
+ try {
2764
+ const results = await this.crud.findBy({ id });
2765
+ return results[0] || null;
2766
+ } catch (error) {
2767
+ this.logError("findById", error);
2768
+ return null;
2769
+ }
2770
+ }
2771
+ /**
2772
+ * Find multiple records by IDs
2773
+ */
2774
+ async findByIds(ids) {
2775
+ try {
2776
+ const results = [];
2777
+ for (const id of ids) {
2778
+ const record = await this.findById(id);
2779
+ if (record) results.push(record);
2780
+ }
2781
+ return results;
2782
+ } catch (error) {
2783
+ this.logError("findByIds", error);
2784
+ return [];
2785
+ }
2786
+ }
2787
+ /**
2788
+ * Add a reference to a file
2789
+ * @returns The new ref_id, or null on failure
2790
+ */
2791
+ async addRef(fileId, options) {
2792
+ try {
2793
+ const record = await this.findById(fileId);
2794
+ if (!record) {
2795
+ this.logger?.warn?.("Cannot add ref: file not found", { fileId });
2796
+ return null;
2797
+ }
2798
+ const v2 = toV2Record(record);
2799
+ const refs = parseFileRefs(v2.file_refs);
2800
+ const newRef = createFileRef(options);
2801
+ const updatedRefs = [...refs, newRef];
2802
+ await this.crud.updateById(fileId, {
2803
+ file_refs: stringifyFileRefs(updatedRefs),
2804
+ ref_count: updatedRefs.length,
2805
+ status: "active",
2806
+ changed_at: this.now()
2807
+ });
2808
+ this.logger?.debug?.("Added ref", { fileId, ref_id: newRef.ref_id });
2809
+ return { ref_id: newRef.ref_id };
2810
+ } catch (error) {
2811
+ this.logError("addRef", error);
2812
+ return null;
2813
+ }
2814
+ }
2815
+ /**
2816
+ * Remove a specific reference from a file
2817
+ * @returns Remaining ref count, or null on failure
2818
+ */
2819
+ async removeRef(fileId, refId) {
2820
+ try {
2821
+ const record = await this.findById(fileId);
2822
+ if (!record) {
2823
+ this.logger?.warn?.("Cannot remove ref: file not found", { fileId });
2824
+ return null;
2825
+ }
2826
+ const v2 = toV2Record(record);
2827
+ const refs = parseFileRefs(v2.file_refs);
2828
+ const updatedRefs = removeRefFromArray(refs, refId);
2829
+ await this.crud.updateById(fileId, {
2830
+ file_refs: stringifyFileRefs(updatedRefs),
2831
+ ref_count: updatedRefs.length,
2832
+ changed_at: this.now()
2833
+ });
2834
+ this.logger?.debug?.("Removed ref", { fileId, refId, remaining: updatedRefs.length });
2835
+ return { remaining_refs: updatedRefs.length };
2836
+ } catch (error) {
2837
+ this.logError("removeRef", error);
2838
+ return null;
2839
+ }
2840
+ }
2841
+ /**
2842
+ * Remove references matching criteria across all records.
2843
+ * Scans all records and removes matching refs (AND semantics).
2844
+ */
2845
+ async removeRefsByCriteria(criteria) {
2846
+ try {
2847
+ let totalRemoved = 0;
2848
+ if (criteria.file_id) {
2849
+ const record = await this.findById(criteria.file_id);
2850
+ if (record) {
2851
+ const removed = await this.removeRefsFromRecord(record, criteria);
2852
+ totalRemoved += removed;
2853
+ }
2854
+ return { removed_count: totalRemoved };
2855
+ }
2856
+ let records;
2857
+ if (criteria.scope_id) {
2858
+ records = await this.crud.findBy({ scope_id: criteria.scope_id });
2859
+ } else {
2860
+ records = await this.crud.list();
2861
+ }
2862
+ for (const record of records) {
2863
+ const removed = await this.removeRefsFromRecord(record, criteria);
2864
+ totalRemoved += removed;
2865
+ }
2866
+ this.logger?.debug?.("Removed refs by criteria", { criteria, removed_count: totalRemoved });
2867
+ return { removed_count: totalRemoved };
2868
+ } catch (error) {
2869
+ this.logError("removeRefsByCriteria", error);
2870
+ return { removed_count: 0 };
2871
+ }
2872
+ }
2873
+ /**
2874
+ * Helper: remove matching refs from a single record
2875
+ */
2876
+ async removeRefsFromRecord(record, criteria) {
2877
+ const v2 = toV2Record(record);
2878
+ const refs = parseFileRefs(v2.file_refs);
2879
+ if (refs.length === 0) return 0;
2880
+ const updatedRefs = removeRefsByCriteriaFromArray(refs, {
2881
+ entity_type: criteria.entity_type,
2882
+ entity_id: criteria.entity_id
2883
+ });
2884
+ const removedCount = refs.length - updatedRefs.length;
2885
+ if (removedCount > 0) {
2886
+ await this.crud.updateById(record.id, {
2887
+ file_refs: stringifyFileRefs(updatedRefs),
2888
+ ref_count: updatedRefs.length,
2889
+ changed_at: this.now()
2890
+ });
2891
+ }
2892
+ return removedCount;
2893
+ }
2894
+ /**
2895
+ * Get all references for a file
2896
+ */
2897
+ async getRefs(fileId) {
2898
+ try {
2899
+ const record = await this.findById(fileId);
2900
+ if (!record) return null;
2901
+ return parseFileRefs(toV2Record(record).file_refs);
2902
+ } catch (error) {
2903
+ this.logError("getRefs", error);
2904
+ return null;
2905
+ }
2906
+ }
2907
+ /**
2908
+ * Get a file with its status and parsed refs
2909
+ */
2910
+ async getFileWithStatus(fileId) {
2911
+ try {
2912
+ const record = await this.findById(fileId);
2913
+ if (!record) return null;
2914
+ return buildFileWithStatus(record);
2915
+ } catch (error) {
2916
+ this.logError("getFileWithStatus", error);
2917
+ return null;
2918
+ }
2919
+ }
2920
+ /**
2921
+ * Get multiple files with status
2922
+ */
2923
+ async getFilesWithStatus(fileIds) {
2924
+ try {
2925
+ const records = await this.findByIds(fileIds);
2926
+ return records.map(buildFileWithStatus);
2927
+ } catch (error) {
2928
+ this.logError("getFilesWithStatus", error);
2929
+ return [];
2930
+ }
2931
+ }
2932
+ /**
2933
+ * Update the status of a file
2934
+ */
2935
+ async updateStatus(fileId, status) {
2936
+ try {
2937
+ const patch = {
2938
+ status,
2939
+ changed_at: this.now()
2940
+ };
2941
+ if (status === "soft_deleted") {
2942
+ patch.deleted_at = this.now();
2943
+ }
2944
+ await this.crud.updateById(fileId, patch);
2945
+ this.logger?.debug?.("Updated status", { fileId, status });
2946
+ return true;
2947
+ } catch (error) {
2948
+ this.logError("updateStatus", error);
2949
+ return false;
2950
+ }
2951
+ }
2952
+ /**
2953
+ * Soft-delete a file (set status to soft_deleted, record deleted_at)
2954
+ */
2955
+ async softDelete(fileId) {
2956
+ return this.updateStatus(fileId, "soft_deleted");
2957
+ }
2958
+ /**
2959
+ * Update specific V2 fields on a record
2960
+ */
2961
+ async updateFields(fileId, fields) {
2962
+ try {
2963
+ await this.crud.updateById(fileId, {
2964
+ ...fields,
2965
+ changed_at: this.now()
2966
+ });
2967
+ this.logger?.debug?.("Updated fields", { fileId, fields: Object.keys(fields) });
2968
+ return true;
2969
+ } catch (error) {
2970
+ this.logError("updateFields", error);
2971
+ return false;
2972
+ }
2973
+ }
2974
+ /**
2975
+ * Find orphaned files (zero references)
2976
+ */
2977
+ async findOrphaned(options) {
2978
+ try {
2979
+ let records;
2980
+ if (options?.scope_id) {
2981
+ records = await this.crud.findBy({ scope_id: options.scope_id });
2982
+ } else if (options?.storage_type) {
2983
+ records = await this.crud.findBy({ storage_type: options.storage_type });
2984
+ } else {
2985
+ records = await this.crud.list();
2986
+ }
2987
+ let orphaned = records.map(buildFileWithStatus).filter((f) => f.is_orphaned && f.record.status !== "soft_deleted");
2988
+ if (options?.olderThanMs) {
2989
+ const cutoff = Date.now() - options.olderThanMs;
2990
+ orphaned = orphaned.filter((f) => {
2991
+ const createdAt = new Date(f.record.created_at).getTime();
2992
+ return createdAt < cutoff;
2993
+ });
2994
+ }
2995
+ if (options?.limit && orphaned.length > options.limit) {
2996
+ orphaned = orphaned.slice(0, options.limit);
2997
+ }
2998
+ return orphaned;
2999
+ } catch (error) {
3000
+ this.logError("findOrphaned", error);
3001
+ return [];
3002
+ }
3003
+ }
2674
3004
  };
2675
3005
  function createFileMetadataService(crudService, options) {
2676
3006
  return new FileMetadataService(crudService, options);
@@ -3040,6 +3370,132 @@ var TrackedFileManager = class extends FileManager {
3040
3370
  const record = await this.metadataService.findByPath(path3, this.getStorageType());
3041
3371
  return record?.file_size ?? null;
3042
3372
  }
3373
+ // ============ Reference Tracking Methods (V2) ============
3374
+ /**
3375
+ * Add a reference to a file
3376
+ */
3377
+ async addRef(fileId, options) {
3378
+ if (!this.isTrackingEnabled()) return null;
3379
+ return this.metadataService.addRef(fileId, options);
3380
+ }
3381
+ /**
3382
+ * Remove a reference from a file
3383
+ */
3384
+ async removeRef(fileId, refId) {
3385
+ if (!this.isTrackingEnabled()) return null;
3386
+ return this.metadataService.removeRef(fileId, refId);
3387
+ }
3388
+ /**
3389
+ * Get a file by its database ID with status information
3390
+ */
3391
+ async getFileById(fileId) {
3392
+ if (!this.isTrackingEnabled()) return null;
3393
+ return this.metadataService.getFileWithStatus(fileId);
3394
+ }
3395
+ /**
3396
+ * Get multiple files by their database IDs with status information
3397
+ */
3398
+ async getFilesById(fileIds) {
3399
+ if (!this.isTrackingEnabled()) return [];
3400
+ return this.metadataService.getFilesWithStatus(fileIds);
3401
+ }
3402
+ /**
3403
+ * Soft-delete a file (marks as soft_deleted, does not remove physical file)
3404
+ */
3405
+ async softDeleteFile(fileId) {
3406
+ if (!this.isTrackingEnabled()) return false;
3407
+ return this.metadataService.softDelete(fileId);
3408
+ }
3409
+ /**
3410
+ * Find orphaned files (files with zero references)
3411
+ */
3412
+ async findOrphanedFiles(options) {
3413
+ if (!this.isTrackingEnabled()) return [];
3414
+ return this.metadataService.findOrphaned(options);
3415
+ }
3416
+ /**
3417
+ * Cleanup orphaned files — removes physical files and/or DB records
3418
+ */
3419
+ async cleanupOrphanedFiles(options) {
3420
+ if (!this.isTrackingEnabled()) return { cleaned: 0, errors: [] };
3421
+ const orphaned = await this.metadataService.findOrphaned(options);
3422
+ let cleaned = 0;
3423
+ const errors = [];
3424
+ for (const file of orphaned) {
3425
+ try {
3426
+ if (options?.softDeleteOnly) {
3427
+ await this.metadataService.softDelete(file.record.id);
3428
+ cleaned++;
3429
+ continue;
3430
+ }
3431
+ const deletePhysical = options?.deletePhysicalFiles !== false;
3432
+ if (deletePhysical) {
3433
+ const deleteResult = await super.deleteFile(file.record.file_path);
3434
+ if (!deleteResult.success) {
3435
+ if (deleteResult.error && !deleteResult.error.includes("not found")) {
3436
+ errors.push(`Failed to delete physical file ${file.record.file_path}: ${deleteResult.error}`);
3437
+ }
3438
+ }
3439
+ }
3440
+ await this.metadataService.recordDelete(file.record.file_path, file.record.storage_type);
3441
+ cleaned++;
3442
+ } catch (error) {
3443
+ const msg = error instanceof Error ? error.message : String(error);
3444
+ errors.push(`Error cleaning up ${file.record.file_path}: ${msg}`);
3445
+ }
3446
+ }
3447
+ return { cleaned, errors };
3448
+ }
3449
+ /**
3450
+ * Verify that a file's physical storage exists and update its status
3451
+ */
3452
+ async verifyFileExistence(fileId) {
3453
+ if (!this.isTrackingEnabled()) return null;
3454
+ const record = await this.metadataService.findById(fileId);
3455
+ if (!record) return null;
3456
+ const fileExists = await this.exists(record.file_path);
3457
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
3458
+ await this.metadataService.updateFields(fileId, {
3459
+ storage_verified_at: timestamp,
3460
+ ...fileExists ? {} : { status: "missing" }
3461
+ });
3462
+ return fileExists;
3463
+ }
3464
+ /**
3465
+ * Upload a file and optionally add an initial reference
3466
+ */
3467
+ async uploadFileWithRef(source, remotePath, options) {
3468
+ const uploadResult = await this.uploadFile(source, remotePath, {
3469
+ ...options,
3470
+ awaitRecording: true
3471
+ });
3472
+ if (!uploadResult.success || !uploadResult.data || !this.isTrackingEnabled()) {
3473
+ return uploadResult;
3474
+ }
3475
+ const record = await this.metadataService.findByPath(remotePath, this.getStorageType());
3476
+ if (!record) {
3477
+ return uploadResult;
3478
+ }
3479
+ const fieldsToUpdate = {};
3480
+ if (options?.scope_id) fieldsToUpdate.scope_id = options.scope_id;
3481
+ if (options?.uploaded_by) fieldsToUpdate.uploaded_by = options.uploaded_by;
3482
+ if (Object.keys(fieldsToUpdate).length > 0) {
3483
+ await this.metadataService.updateFields(record.id, fieldsToUpdate);
3484
+ }
3485
+ let refId;
3486
+ if (options?.ref) {
3487
+ const refResult = await this.metadataService.addRef(record.id, options.ref);
3488
+ if (refResult) refId = refResult.ref_id;
3489
+ }
3490
+ return {
3491
+ success: true,
3492
+ data: {
3493
+ ...uploadResult.data,
3494
+ file_id: record.id,
3495
+ ref_id: refId
3496
+ }
3497
+ };
3498
+ }
3043
3499
  };
3044
3500
  function createTrackedFileManager(options) {
3045
3501
  return new TrackedFileManager(options);
@@ -4024,13 +4480,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
4024
4480
  storage_type TEXT NOT NULL,
4025
4481
  file_hash TEXT,
4026
4482
  file_size INTEGER,
4027
- file_changed_at TEXT
4483
+ file_changed_at TEXT,
4484
+ file_refs TEXT DEFAULT '[]',
4485
+ ref_count INTEGER DEFAULT 0,
4486
+ status TEXT DEFAULT 'active',
4487
+ scope_id TEXT,
4488
+ uploaded_by TEXT,
4489
+ storage_verified_at TEXT,
4490
+ deleted_at TEXT,
4491
+ original_filename TEXT
4028
4492
  )`,
4029
4493
  indexes: [
4030
4494
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
4031
4495
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
4032
4496
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
4033
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4497
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4498
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4499
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4500
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4501
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4034
4502
  ]
4035
4503
  },
4036
4504
  postgres: {
@@ -4045,13 +4513,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
4045
4513
  storage_type TEXT NOT NULL,
4046
4514
  file_hash TEXT,
4047
4515
  file_size BIGINT,
4048
- file_changed_at TIMESTAMP WITH TIME ZONE
4516
+ file_changed_at TIMESTAMP WITH TIME ZONE,
4517
+ file_refs TEXT DEFAULT '[]',
4518
+ ref_count INTEGER DEFAULT 0,
4519
+ status TEXT DEFAULT 'active',
4520
+ scope_id UUID,
4521
+ uploaded_by UUID,
4522
+ storage_verified_at TIMESTAMP WITH TIME ZONE,
4523
+ deleted_at TIMESTAMP WITH TIME ZONE,
4524
+ original_filename TEXT
4049
4525
  )`,
4050
4526
  indexes: [
4051
4527
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
4052
4528
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
4053
4529
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
4054
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4530
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4531
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4532
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4533
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4534
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4055
4535
  ]
4056
4536
  },
4057
4537
  columns: [
@@ -4065,7 +4545,15 @@ var HAZO_FILES_TABLE_SCHEMA = {
4065
4545
  "storage_type",
4066
4546
  "file_hash",
4067
4547
  "file_size",
4068
- "file_changed_at"
4548
+ "file_changed_at",
4549
+ "file_refs",
4550
+ "ref_count",
4551
+ "status",
4552
+ "scope_id",
4553
+ "uploaded_by",
4554
+ "storage_verified_at",
4555
+ "deleted_at",
4556
+ "original_filename"
4069
4557
  ]
4070
4558
  };
4071
4559
  function getSchemaForTable(tableName, dbType) {
@@ -4078,6 +4566,78 @@ function getSchemaForTable(tableName, dbType) {
4078
4566
  )
4079
4567
  };
4080
4568
  }
4569
+ var HAZO_FILES_MIGRATION_V2 = {
4570
+ tableName: HAZO_FILES_DEFAULT_TABLE_NAME,
4571
+ sqlite: {
4572
+ alterStatements: [
4573
+ "ALTER TABLE hazo_files ADD COLUMN file_refs TEXT DEFAULT '[]'",
4574
+ "ALTER TABLE hazo_files ADD COLUMN ref_count INTEGER DEFAULT 0",
4575
+ "ALTER TABLE hazo_files ADD COLUMN status TEXT DEFAULT 'active'",
4576
+ "ALTER TABLE hazo_files ADD COLUMN scope_id TEXT",
4577
+ "ALTER TABLE hazo_files ADD COLUMN uploaded_by TEXT",
4578
+ "ALTER TABLE hazo_files ADD COLUMN storage_verified_at TEXT",
4579
+ "ALTER TABLE hazo_files ADD COLUMN deleted_at TEXT",
4580
+ "ALTER TABLE hazo_files ADD COLUMN original_filename TEXT"
4581
+ ],
4582
+ indexes: [
4583
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4584
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4585
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4586
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4587
+ ],
4588
+ backfill: `UPDATE hazo_files SET
4589
+ file_refs = COALESCE(file_refs, '[]'),
4590
+ ref_count = COALESCE(ref_count, 0),
4591
+ status = COALESCE(status, 'active')
4592
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4593
+ },
4594
+ postgres: {
4595
+ alterStatements: [
4596
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS file_refs TEXT DEFAULT '[]'",
4597
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS ref_count INTEGER DEFAULT 0",
4598
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS status TEXT DEFAULT 'active'",
4599
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS scope_id UUID",
4600
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS uploaded_by UUID",
4601
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS storage_verified_at TIMESTAMP WITH TIME ZONE",
4602
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMP WITH TIME ZONE",
4603
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS original_filename TEXT"
4604
+ ],
4605
+ indexes: [
4606
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4607
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4608
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4609
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4610
+ ],
4611
+ backfill: `UPDATE hazo_files SET
4612
+ file_refs = COALESCE(file_refs, '[]'),
4613
+ ref_count = COALESCE(ref_count, 0),
4614
+ status = COALESCE(status, 'active')
4615
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4616
+ },
4617
+ newColumns: [
4618
+ "file_refs",
4619
+ "ref_count",
4620
+ "status",
4621
+ "scope_id",
4622
+ "uploaded_by",
4623
+ "storage_verified_at",
4624
+ "deleted_at",
4625
+ "original_filename"
4626
+ ]
4627
+ };
4628
+ function getMigrationForTable(tableName, dbType) {
4629
+ const migration = HAZO_FILES_MIGRATION_V2[dbType];
4630
+ const defaultName = HAZO_FILES_MIGRATION_V2.tableName;
4631
+ return {
4632
+ alterStatements: migration.alterStatements.map(
4633
+ (stmt) => stmt.replace(new RegExp(defaultName, "g"), tableName)
4634
+ ),
4635
+ indexes: migration.indexes.map(
4636
+ (idx) => idx.replace(new RegExp(defaultName, "g"), tableName)
4637
+ ),
4638
+ backfill: migration.backfill.replace(new RegExp(defaultName, "g"), tableName)
4639
+ };
4640
+ }
4081
4641
  var HAZO_FILES_NAMING_DEFAULT_TABLE_NAME = "hazo_files_naming";
4082
4642
  var HAZO_FILES_NAMING_TABLE_SCHEMA = {
4083
4643
  tableName: HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
@@ -4134,6 +4694,24 @@ function getNamingSchemaForTable(tableName, dbType) {
4134
4694
  )
4135
4695
  };
4136
4696
  }
4697
+
4698
+ // src/migrations/add-reference-tracking.ts
4699
+ async function migrateToV2(executor, dbType, tableName) {
4700
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4701
+ for (const stmt of migration.alterStatements) {
4702
+ try {
4703
+ await executor.run(stmt);
4704
+ } catch {
4705
+ }
4706
+ }
4707
+ for (const idx of migration.indexes) {
4708
+ await executor.run(idx);
4709
+ }
4710
+ }
4711
+ async function backfillV2Defaults(executor, dbType, tableName) {
4712
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4713
+ await executor.run(migration.backfill);
4714
+ }
4137
4715
  // Annotate the CommonJS export names for ESM import in node:
4138
4716
  0 && (module.exports = {
4139
4717
  ALL_SYSTEM_VARIABLES,
@@ -4151,6 +4729,7 @@ function getNamingSchemaForTable(tableName, dbType) {
4151
4729
  GoogleDriveAuth,
4152
4730
  GoogleDriveModule,
4153
4731
  HAZO_FILES_DEFAULT_TABLE_NAME,
4732
+ HAZO_FILES_MIGRATION_V2,
4154
4733
  HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
4155
4734
  HAZO_FILES_NAMING_TABLE_SCHEMA,
4156
4735
  HAZO_FILES_TABLE_SCHEMA,
@@ -4168,6 +4747,8 @@ function getNamingSchemaForTable(tableName, dbType) {
4168
4747
  TrackedFileManager,
4169
4748
  UploadExtractService,
4170
4749
  addExtractionToFileData,
4750
+ backfillV2Defaults,
4751
+ buildFileWithStatus,
4171
4752
  clearExtractions,
4172
4753
  clonePattern,
4173
4754
  computeFileHash,
@@ -4180,6 +4761,7 @@ function getNamingSchemaForTable(tableName, dbType) {
4180
4761
  createFileItem,
4181
4762
  createFileManager,
4182
4763
  createFileMetadataService,
4764
+ createFileRef,
4183
4765
  createFolderItem,
4184
4766
  createGoogleDriveAuth,
4185
4767
  createGoogleDriveModule,
@@ -4202,6 +4784,7 @@ function getNamingSchemaForTable(tableName, dbType) {
4202
4784
  generateExtractionId,
4203
4785
  generateId,
4204
4786
  generatePreviewName,
4787
+ generateRefId,
4205
4788
  generateSampleConfig,
4206
4789
  generateSegmentId,
4207
4790
  getBaseName,
@@ -4215,6 +4798,7 @@ function getNamingSchemaForTable(tableName, dbType) {
4215
4798
  getFileCategory,
4216
4799
  getFileMetadataValues,
4217
4800
  getMergedData,
4801
+ getMigrationForTable,
4218
4802
  getMimeType,
4219
4803
  getNameWithoutExtension,
4220
4804
  getNamingSchemaForTable,
@@ -4246,20 +4830,26 @@ function getNamingSchemaForTable(tableName, dbType) {
4246
4830
  joinPath,
4247
4831
  loadConfig,
4248
4832
  loadConfigAsync,
4833
+ migrateToV2,
4249
4834
  normalizePath,
4250
4835
  parseConfig,
4251
4836
  parseFileData,
4837
+ parseFileRefs,
4252
4838
  parsePatternString,
4253
4839
  patternToString,
4254
4840
  recalculateMergedData,
4255
4841
  registerModule,
4256
4842
  removeExtractionById,
4257
4843
  removeExtractionByIndex,
4844
+ removeRefFromArray,
4845
+ removeRefsByCriteriaFromArray,
4258
4846
  sanitizeFilename,
4259
4847
  saveConfig,
4260
4848
  sortItems,
4261
4849
  stringifyFileData,
4850
+ stringifyFileRefs,
4262
4851
  successResult,
4852
+ toV2Record,
4263
4853
  updateExtractionById,
4264
4854
  validateExtractionData,
4265
4855
  validateFileDataStructure,