hazo_files 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,7 @@ __export(index_exports, {
45
45
  GoogleDriveAuth: () => GoogleDriveAuth,
46
46
  GoogleDriveModule: () => GoogleDriveModule,
47
47
  HAZO_FILES_DEFAULT_TABLE_NAME: () => HAZO_FILES_DEFAULT_TABLE_NAME,
48
+ HAZO_FILES_MIGRATION_V2: () => HAZO_FILES_MIGRATION_V2,
48
49
  HAZO_FILES_NAMING_DEFAULT_TABLE_NAME: () => HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
49
50
  HAZO_FILES_NAMING_TABLE_SCHEMA: () => HAZO_FILES_NAMING_TABLE_SCHEMA,
50
51
  HAZO_FILES_TABLE_SCHEMA: () => HAZO_FILES_TABLE_SCHEMA,
@@ -62,6 +63,8 @@ __export(index_exports, {
62
63
  TrackedFileManager: () => TrackedFileManager,
63
64
  UploadExtractService: () => UploadExtractService,
64
65
  addExtractionToFileData: () => addExtractionToFileData,
66
+ backfillV2Defaults: () => backfillV2Defaults,
67
+ buildFileWithStatus: () => buildFileWithStatus,
65
68
  clearExtractions: () => clearExtractions,
66
69
  clonePattern: () => clonePattern,
67
70
  computeFileHash: () => computeFileHash,
@@ -75,6 +78,7 @@ __export(index_exports, {
75
78
  createFileItem: () => createFileItem,
76
79
  createFileManager: () => createFileManager,
77
80
  createFileMetadataService: () => createFileMetadataService,
81
+ createFileRef: () => createFileRef,
78
82
  createFolderItem: () => createFolderItem,
79
83
  createGoogleDriveAuth: () => createGoogleDriveAuth,
80
84
  createGoogleDriveModule: () => createGoogleDriveModule,
@@ -98,6 +102,7 @@ __export(index_exports, {
98
102
  generateExtractionId: () => generateExtractionId,
99
103
  generateId: () => generateId,
100
104
  generatePreviewName: () => generatePreviewName,
105
+ generateRefId: () => generateRefId,
101
106
  generateSampleConfig: () => generateSampleConfig,
102
107
  generateSegmentId: () => generateSegmentId,
103
108
  getBaseName: () => getBaseName,
@@ -111,6 +116,7 @@ __export(index_exports, {
111
116
  getFileCategory: () => getFileCategory,
112
117
  getFileMetadataValues: () => getFileMetadataValues,
113
118
  getMergedData: () => getMergedData,
119
+ getMigrationForTable: () => getMigrationForTable,
114
120
  getMimeType: () => getMimeType,
115
121
  getNameWithoutExtension: () => getNameWithoutExtension,
116
122
  getNamingSchemaForTable: () => getNamingSchemaForTable,
@@ -142,20 +148,26 @@ __export(index_exports, {
142
148
  joinPath: () => joinPath,
143
149
  loadConfig: () => loadConfig,
144
150
  loadConfigAsync: () => loadConfigAsync,
151
+ migrateToV2: () => migrateToV2,
145
152
  normalizePath: () => normalizePath,
146
153
  parseConfig: () => parseConfig,
147
154
  parseFileData: () => parseFileData,
155
+ parseFileRefs: () => parseFileRefs,
148
156
  parsePatternString: () => parsePatternString,
149
157
  patternToString: () => patternToString,
150
158
  recalculateMergedData: () => recalculateMergedData,
151
159
  registerModule: () => registerModule,
152
160
  removeExtractionById: () => removeExtractionById,
153
161
  removeExtractionByIndex: () => removeExtractionByIndex,
162
+ removeRefFromArray: () => removeRefFromArray,
163
+ removeRefsByCriteriaFromArray: () => removeRefsByCriteriaFromArray,
154
164
  sanitizeFilename: () => sanitizeFilename,
155
165
  saveConfig: () => saveConfig,
156
166
  sortItems: () => sortItems,
157
167
  stringifyFileData: () => stringifyFileData,
168
+ stringifyFileRefs: () => stringifyFileRefs,
158
169
  successResult: () => successResult,
170
+ toV2Record: () => toV2Record,
159
171
  updateExtractionById: () => updateExtractionById,
160
172
  validateExtractionData: () => validateExtractionData,
161
173
  validateFileDataStructure: () => validateFileDataStructure,
@@ -780,7 +792,7 @@ var LocalStorageModule = class extends BaseStorageModule {
780
792
  await super.initialize(config);
781
793
  const localConfig = this.getProviderConfig();
782
794
  this.basePath = path2.resolve(localConfig.basePath);
783
- this.allowedExtensions = localConfig.allowedExtensions || [];
795
+ this.allowedExtensions = (localConfig.allowedExtensions || []).map((ext) => ext.trim().replace(/^\./, "").toLowerCase()).filter((ext) => ext.length > 0);
784
796
  this.maxFileSize = localConfig.maxFileSize || 0;
785
797
  await fs2.promises.mkdir(this.basePath, { recursive: true });
786
798
  }
@@ -2271,6 +2283,70 @@ function updateExtractionById(fileData, id, newData, options = {}) {
2271
2283
  };
2272
2284
  }
2273
2285
 
2286
+ // src/common/ref-utils.ts
2287
+ function generateRefId() {
2288
+ return `ref_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;
2289
+ }
2290
+ function parseFileRefs(json) {
2291
+ if (!json) return [];
2292
+ try {
2293
+ const parsed = JSON.parse(json);
2294
+ if (!Array.isArray(parsed)) return [];
2295
+ return parsed;
2296
+ } catch {
2297
+ return [];
2298
+ }
2299
+ }
2300
+ function stringifyFileRefs(refs) {
2301
+ return JSON.stringify(refs);
2302
+ }
2303
+ function createFileRef(options) {
2304
+ const ref = {
2305
+ ref_id: generateRefId(),
2306
+ entity_type: options.entity_type,
2307
+ entity_id: options.entity_id,
2308
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
2309
+ };
2310
+ if (options.created_by) ref.created_by = options.created_by;
2311
+ if (options.visibility) ref.visibility = options.visibility;
2312
+ if (options.label) ref.label = options.label;
2313
+ if (options.metadata) ref.metadata = options.metadata;
2314
+ return ref;
2315
+ }
2316
+ function removeRefFromArray(refs, refId) {
2317
+ return refs.filter((r) => r.ref_id !== refId);
2318
+ }
2319
+ function removeRefsByCriteriaFromArray(refs, criteria) {
2320
+ return refs.filter((ref) => {
2321
+ if (criteria.entity_type && ref.entity_type !== criteria.entity_type) return true;
2322
+ if (criteria.entity_id && ref.entity_id !== criteria.entity_id) return true;
2323
+ return !criteria.entity_type && !criteria.entity_id;
2324
+ });
2325
+ }
2326
+ function toV2Record(record) {
2327
+ const v2 = record;
2328
+ return {
2329
+ ...record,
2330
+ file_refs: typeof v2.file_refs === "string" ? v2.file_refs : "[]",
2331
+ ref_count: typeof v2.ref_count === "number" ? v2.ref_count : 0,
2332
+ status: typeof v2.status === "string" ? v2.status : "active",
2333
+ scope_id: typeof v2.scope_id === "string" ? v2.scope_id : null,
2334
+ uploaded_by: typeof v2.uploaded_by === "string" ? v2.uploaded_by : null,
2335
+ original_filename: typeof v2.original_filename === "string" ? v2.original_filename : null,
2336
+ storage_verified_at: typeof v2.storage_verified_at === "string" ? v2.storage_verified_at : null,
2337
+ deleted_at: typeof v2.deleted_at === "string" ? v2.deleted_at : null
2338
+ };
2339
+ }
2340
+ function buildFileWithStatus(record) {
2341
+ const v2 = toV2Record(record);
2342
+ const refs = parseFileRefs(v2.file_refs);
2343
+ return {
2344
+ record: v2,
2345
+ refs,
2346
+ is_orphaned: refs.length === 0
2347
+ };
2348
+ }
2349
+
2274
2350
  // src/services/file-metadata-service.ts
2275
2351
  var FileMetadataService = class {
2276
2352
  constructor(crudService, options = {}) {
@@ -2312,9 +2388,15 @@ var FileMetadataService = class {
2312
2388
  changed_at: timestamp,
2313
2389
  file_hash: input.file_hash || null,
2314
2390
  file_size: input.file_size ?? null,
2315
- file_changed_at: input.file_hash ? timestamp : null
2316
- // Set content changed time if hash is provided
2391
+ file_changed_at: input.file_hash ? timestamp : null,
2392
+ // V2 defaults included conditionally to avoid breaking pre-migration DBs
2393
+ file_refs: "[]",
2394
+ ref_count: 0,
2395
+ status: "active"
2317
2396
  };
2397
+ if (input.scope_id !== void 0) record.scope_id = input.scope_id;
2398
+ if (input.uploaded_by !== void 0) record.uploaded_by = input.uploaded_by;
2399
+ if (input.original_filename !== void 0) record.original_filename = input.original_filename;
2318
2400
  const results = await this.crud.insert(record);
2319
2401
  this.logger?.debug?.("Recorded file upload", { path: input.file_path });
2320
2402
  return results[0] || null;
@@ -2673,6 +2755,254 @@ var FileMetadataService = class {
2673
2755
  return false;
2674
2756
  }
2675
2757
  }
2758
+ // ============================================
2759
+ // Reference Tracking Methods (V2)
2760
+ // ============================================
2761
+ /**
2762
+ * Find a record by ID
2763
+ */
2764
+ async findById(id) {
2765
+ try {
2766
+ const results = await this.crud.findBy({ id });
2767
+ return results[0] || null;
2768
+ } catch (error) {
2769
+ this.logError("findById", error);
2770
+ return null;
2771
+ }
2772
+ }
2773
+ /**
2774
+ * Find multiple records by IDs
2775
+ */
2776
+ async findByIds(ids) {
2777
+ try {
2778
+ const results = [];
2779
+ for (const id of ids) {
2780
+ const record = await this.findById(id);
2781
+ if (record) results.push(record);
2782
+ }
2783
+ return results;
2784
+ } catch (error) {
2785
+ this.logError("findByIds", error);
2786
+ return [];
2787
+ }
2788
+ }
2789
+ /**
2790
+ * Add a reference to a file
2791
+ * @returns The new ref_id, or null on failure
2792
+ */
2793
+ async addRef(fileId, options) {
2794
+ try {
2795
+ const record = await this.findById(fileId);
2796
+ if (!record) {
2797
+ this.logger?.warn?.("Cannot add ref: file not found", { fileId });
2798
+ return null;
2799
+ }
2800
+ const v2 = toV2Record(record);
2801
+ const refs = parseFileRefs(v2.file_refs);
2802
+ const newRef = createFileRef(options);
2803
+ const updatedRefs = [...refs, newRef];
2804
+ await this.crud.updateById(fileId, {
2805
+ file_refs: stringifyFileRefs(updatedRefs),
2806
+ ref_count: updatedRefs.length,
2807
+ status: "active",
2808
+ changed_at: this.now()
2809
+ });
2810
+ this.logger?.debug?.("Added ref", { fileId, ref_id: newRef.ref_id });
2811
+ return { ref_id: newRef.ref_id };
2812
+ } catch (error) {
2813
+ this.logError("addRef", error);
2814
+ return null;
2815
+ }
2816
+ }
2817
+ /**
2818
+ * Remove a specific reference from a file
2819
+ * @returns Remaining ref count, or null on failure
2820
+ */
2821
+ async removeRef(fileId, refId) {
2822
+ try {
2823
+ const record = await this.findById(fileId);
2824
+ if (!record) {
2825
+ this.logger?.warn?.("Cannot remove ref: file not found", { fileId });
2826
+ return null;
2827
+ }
2828
+ const v2 = toV2Record(record);
2829
+ const refs = parseFileRefs(v2.file_refs);
2830
+ const updatedRefs = removeRefFromArray(refs, refId);
2831
+ await this.crud.updateById(fileId, {
2832
+ file_refs: stringifyFileRefs(updatedRefs),
2833
+ ref_count: updatedRefs.length,
2834
+ changed_at: this.now()
2835
+ });
2836
+ this.logger?.debug?.("Removed ref", { fileId, refId, remaining: updatedRefs.length });
2837
+ return { remaining_refs: updatedRefs.length };
2838
+ } catch (error) {
2839
+ this.logError("removeRef", error);
2840
+ return null;
2841
+ }
2842
+ }
2843
+ /**
2844
+ * Remove references matching criteria across all records.
2845
+ * Scans all records and removes matching refs (AND semantics).
2846
+ */
2847
+ async removeRefsByCriteria(criteria) {
2848
+ try {
2849
+ let totalRemoved = 0;
2850
+ if (criteria.file_id) {
2851
+ const record = await this.findById(criteria.file_id);
2852
+ if (record) {
2853
+ const removed = await this.removeRefsFromRecord(record, criteria);
2854
+ totalRemoved += removed;
2855
+ }
2856
+ return { removed_count: totalRemoved };
2857
+ }
2858
+ let records;
2859
+ if (criteria.scope_id) {
2860
+ records = await this.crud.findBy({ scope_id: criteria.scope_id });
2861
+ } else {
2862
+ records = await this.crud.list();
2863
+ }
2864
+ for (const record of records) {
2865
+ const removed = await this.removeRefsFromRecord(record, criteria);
2866
+ totalRemoved += removed;
2867
+ }
2868
+ this.logger?.debug?.("Removed refs by criteria", { criteria, removed_count: totalRemoved });
2869
+ return { removed_count: totalRemoved };
2870
+ } catch (error) {
2871
+ this.logError("removeRefsByCriteria", error);
2872
+ return { removed_count: 0 };
2873
+ }
2874
+ }
2875
+ /**
2876
+ * Helper: remove matching refs from a single record
2877
+ */
2878
+ async removeRefsFromRecord(record, criteria) {
2879
+ const v2 = toV2Record(record);
2880
+ const refs = parseFileRefs(v2.file_refs);
2881
+ if (refs.length === 0) return 0;
2882
+ const updatedRefs = removeRefsByCriteriaFromArray(refs, {
2883
+ entity_type: criteria.entity_type,
2884
+ entity_id: criteria.entity_id
2885
+ });
2886
+ const removedCount = refs.length - updatedRefs.length;
2887
+ if (removedCount > 0) {
2888
+ await this.crud.updateById(record.id, {
2889
+ file_refs: stringifyFileRefs(updatedRefs),
2890
+ ref_count: updatedRefs.length,
2891
+ changed_at: this.now()
2892
+ });
2893
+ }
2894
+ return removedCount;
2895
+ }
2896
+ /**
2897
+ * Get all references for a file
2898
+ */
2899
+ async getRefs(fileId) {
2900
+ try {
2901
+ const record = await this.findById(fileId);
2902
+ if (!record) return null;
2903
+ return parseFileRefs(toV2Record(record).file_refs);
2904
+ } catch (error) {
2905
+ this.logError("getRefs", error);
2906
+ return null;
2907
+ }
2908
+ }
2909
+ /**
2910
+ * Get a file with its status and parsed refs
2911
+ */
2912
+ async getFileWithStatus(fileId) {
2913
+ try {
2914
+ const record = await this.findById(fileId);
2915
+ if (!record) return null;
2916
+ return buildFileWithStatus(record);
2917
+ } catch (error) {
2918
+ this.logError("getFileWithStatus", error);
2919
+ return null;
2920
+ }
2921
+ }
2922
+ /**
2923
+ * Get multiple files with status
2924
+ */
2925
+ async getFilesWithStatus(fileIds) {
2926
+ try {
2927
+ const records = await this.findByIds(fileIds);
2928
+ return records.map(buildFileWithStatus);
2929
+ } catch (error) {
2930
+ this.logError("getFilesWithStatus", error);
2931
+ return [];
2932
+ }
2933
+ }
2934
+ /**
2935
+ * Update the status of a file
2936
+ */
2937
+ async updateStatus(fileId, status) {
2938
+ try {
2939
+ const patch = {
2940
+ status,
2941
+ changed_at: this.now()
2942
+ };
2943
+ if (status === "soft_deleted") {
2944
+ patch.deleted_at = this.now();
2945
+ }
2946
+ await this.crud.updateById(fileId, patch);
2947
+ this.logger?.debug?.("Updated status", { fileId, status });
2948
+ return true;
2949
+ } catch (error) {
2950
+ this.logError("updateStatus", error);
2951
+ return false;
2952
+ }
2953
+ }
2954
+ /**
2955
+ * Soft-delete a file (set status to soft_deleted, record deleted_at)
2956
+ */
2957
+ async softDelete(fileId) {
2958
+ return this.updateStatus(fileId, "soft_deleted");
2959
+ }
2960
+ /**
2961
+ * Update specific V2 fields on a record
2962
+ */
2963
+ async updateFields(fileId, fields) {
2964
+ try {
2965
+ await this.crud.updateById(fileId, {
2966
+ ...fields,
2967
+ changed_at: this.now()
2968
+ });
2969
+ this.logger?.debug?.("Updated fields", { fileId, fields: Object.keys(fields) });
2970
+ return true;
2971
+ } catch (error) {
2972
+ this.logError("updateFields", error);
2973
+ return false;
2974
+ }
2975
+ }
2976
+ /**
2977
+ * Find orphaned files (zero references)
2978
+ */
2979
+ async findOrphaned(options) {
2980
+ try {
2981
+ let records;
2982
+ if (options?.scope_id) {
2983
+ records = await this.crud.findBy({ scope_id: options.scope_id });
2984
+ } else if (options?.storage_type) {
2985
+ records = await this.crud.findBy({ storage_type: options.storage_type });
2986
+ } else {
2987
+ records = await this.crud.list();
2988
+ }
2989
+ let orphaned = records.map(buildFileWithStatus).filter((f) => f.is_orphaned && f.record.status !== "soft_deleted");
2990
+ if (options?.olderThanMs) {
2991
+ const cutoff = Date.now() - options.olderThanMs;
2992
+ orphaned = orphaned.filter((f) => {
2993
+ const createdAt = new Date(f.record.created_at).getTime();
2994
+ return createdAt < cutoff;
2995
+ });
2996
+ }
2997
+ if (options?.limit && orphaned.length > options.limit) {
2998
+ orphaned = orphaned.slice(0, options.limit);
2999
+ }
3000
+ return orphaned;
3001
+ } catch (error) {
3002
+ this.logError("findOrphaned", error);
3003
+ return [];
3004
+ }
3005
+ }
2676
3006
  };
2677
3007
  function createFileMetadataService(crudService, options) {
2678
3008
  return new FileMetadataService(crudService, options);
@@ -3042,6 +3372,132 @@ var TrackedFileManager = class extends FileManager {
3042
3372
  const record = await this.metadataService.findByPath(path3, this.getStorageType());
3043
3373
  return record?.file_size ?? null;
3044
3374
  }
3375
+ // ============ Reference Tracking Methods (V2) ============
3376
+ /**
3377
+ * Add a reference to a file
3378
+ */
3379
+ async addRef(fileId, options) {
3380
+ if (!this.isTrackingEnabled()) return null;
3381
+ return this.metadataService.addRef(fileId, options);
3382
+ }
3383
+ /**
3384
+ * Remove a reference from a file
3385
+ */
3386
+ async removeRef(fileId, refId) {
3387
+ if (!this.isTrackingEnabled()) return null;
3388
+ return this.metadataService.removeRef(fileId, refId);
3389
+ }
3390
+ /**
3391
+ * Get a file by its database ID with status information
3392
+ */
3393
+ async getFileById(fileId) {
3394
+ if (!this.isTrackingEnabled()) return null;
3395
+ return this.metadataService.getFileWithStatus(fileId);
3396
+ }
3397
+ /**
3398
+ * Get multiple files by their database IDs with status information
3399
+ */
3400
+ async getFilesById(fileIds) {
3401
+ if (!this.isTrackingEnabled()) return [];
3402
+ return this.metadataService.getFilesWithStatus(fileIds);
3403
+ }
3404
+ /**
3405
+ * Soft-delete a file (marks as soft_deleted, does not remove physical file)
3406
+ */
3407
+ async softDeleteFile(fileId) {
3408
+ if (!this.isTrackingEnabled()) return false;
3409
+ return this.metadataService.softDelete(fileId);
3410
+ }
3411
+ /**
3412
+ * Find orphaned files (files with zero references)
3413
+ */
3414
+ async findOrphanedFiles(options) {
3415
+ if (!this.isTrackingEnabled()) return [];
3416
+ return this.metadataService.findOrphaned(options);
3417
+ }
3418
+ /**
3419
+ * Cleanup orphaned files — removes physical files and/or DB records
3420
+ */
3421
+ async cleanupOrphanedFiles(options) {
3422
+ if (!this.isTrackingEnabled()) return { cleaned: 0, errors: [] };
3423
+ const orphaned = await this.metadataService.findOrphaned(options);
3424
+ let cleaned = 0;
3425
+ const errors = [];
3426
+ for (const file of orphaned) {
3427
+ try {
3428
+ if (options?.softDeleteOnly) {
3429
+ await this.metadataService.softDelete(file.record.id);
3430
+ cleaned++;
3431
+ continue;
3432
+ }
3433
+ const deletePhysical = options?.deletePhysicalFiles !== false;
3434
+ if (deletePhysical) {
3435
+ const deleteResult = await super.deleteFile(file.record.file_path);
3436
+ if (!deleteResult.success) {
3437
+ if (deleteResult.error && !deleteResult.error.includes("not found")) {
3438
+ errors.push(`Failed to delete physical file ${file.record.file_path}: ${deleteResult.error}`);
3439
+ }
3440
+ }
3441
+ }
3442
+ await this.metadataService.recordDelete(file.record.file_path, file.record.storage_type);
3443
+ cleaned++;
3444
+ } catch (error) {
3445
+ const msg = error instanceof Error ? error.message : String(error);
3446
+ errors.push(`Error cleaning up ${file.record.file_path}: ${msg}`);
3447
+ }
3448
+ }
3449
+ return { cleaned, errors };
3450
+ }
3451
+ /**
3452
+ * Verify that a file's physical storage exists and update its status
3453
+ */
3454
+ async verifyFileExistence(fileId) {
3455
+ if (!this.isTrackingEnabled()) return null;
3456
+ const record = await this.metadataService.findById(fileId);
3457
+ if (!record) return null;
3458
+ const fileExists = await this.exists(record.file_path);
3459
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
3460
+ await this.metadataService.updateFields(fileId, {
3461
+ storage_verified_at: timestamp,
3462
+ ...fileExists ? {} : { status: "missing" }
3463
+ });
3464
+ return fileExists;
3465
+ }
3466
+ /**
3467
+ * Upload a file and optionally add an initial reference
3468
+ */
3469
+ async uploadFileWithRef(source, remotePath, options) {
3470
+ const uploadResult = await this.uploadFile(source, remotePath, {
3471
+ ...options,
3472
+ awaitRecording: true
3473
+ });
3474
+ if (!uploadResult.success || !uploadResult.data || !this.isTrackingEnabled()) {
3475
+ return uploadResult;
3476
+ }
3477
+ const record = await this.metadataService.findByPath(remotePath, this.getStorageType());
3478
+ if (!record) {
3479
+ return uploadResult;
3480
+ }
3481
+ const fieldsToUpdate = {};
3482
+ if (options?.scope_id) fieldsToUpdate.scope_id = options.scope_id;
3483
+ if (options?.uploaded_by) fieldsToUpdate.uploaded_by = options.uploaded_by;
3484
+ if (Object.keys(fieldsToUpdate).length > 0) {
3485
+ await this.metadataService.updateFields(record.id, fieldsToUpdate);
3486
+ }
3487
+ let refId;
3488
+ if (options?.ref) {
3489
+ const refResult = await this.metadataService.addRef(record.id, options.ref);
3490
+ if (refResult) refId = refResult.ref_id;
3491
+ }
3492
+ return {
3493
+ success: true,
3494
+ data: {
3495
+ ...uploadResult.data,
3496
+ file_id: record.id,
3497
+ ref_id: refId
3498
+ }
3499
+ };
3500
+ }
3045
3501
  };
3046
3502
  function createTrackedFileManager(options) {
3047
3503
  return new TrackedFileManager(options);
@@ -4090,13 +4546,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
4090
4546
  storage_type TEXT NOT NULL,
4091
4547
  file_hash TEXT,
4092
4548
  file_size INTEGER,
4093
- file_changed_at TEXT
4549
+ file_changed_at TEXT,
4550
+ file_refs TEXT DEFAULT '[]',
4551
+ ref_count INTEGER DEFAULT 0,
4552
+ status TEXT DEFAULT 'active',
4553
+ scope_id TEXT,
4554
+ uploaded_by TEXT,
4555
+ storage_verified_at TEXT,
4556
+ deleted_at TEXT,
4557
+ original_filename TEXT
4094
4558
  )`,
4095
4559
  indexes: [
4096
4560
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
4097
4561
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
4098
4562
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
4099
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4563
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4564
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4565
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4566
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4567
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4100
4568
  ]
4101
4569
  },
4102
4570
  postgres: {
@@ -4111,13 +4579,25 @@ var HAZO_FILES_TABLE_SCHEMA = {
4111
4579
  storage_type TEXT NOT NULL,
4112
4580
  file_hash TEXT,
4113
4581
  file_size BIGINT,
4114
- file_changed_at TIMESTAMP WITH TIME ZONE
4582
+ file_changed_at TIMESTAMP WITH TIME ZONE,
4583
+ file_refs TEXT DEFAULT '[]',
4584
+ ref_count INTEGER DEFAULT 0,
4585
+ status TEXT DEFAULT 'active',
4586
+ scope_id UUID,
4587
+ uploaded_by UUID,
4588
+ storage_verified_at TIMESTAMP WITH TIME ZONE,
4589
+ deleted_at TIMESTAMP WITH TIME ZONE,
4590
+ original_filename TEXT
4115
4591
  )`,
4116
4592
  indexes: [
4117
4593
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
4118
4594
  "CREATE INDEX IF NOT EXISTS idx_hazo_files_storage ON hazo_files (storage_type)",
4119
4595
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_hazo_files_path_storage ON hazo_files (file_path, storage_type)",
4120
- "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)"
4596
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_hash ON hazo_files (file_hash)",
4597
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4598
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4599
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4600
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4121
4601
  ]
4122
4602
  },
4123
4603
  columns: [
@@ -4131,7 +4611,15 @@ var HAZO_FILES_TABLE_SCHEMA = {
4131
4611
  "storage_type",
4132
4612
  "file_hash",
4133
4613
  "file_size",
4134
- "file_changed_at"
4614
+ "file_changed_at",
4615
+ "file_refs",
4616
+ "ref_count",
4617
+ "status",
4618
+ "scope_id",
4619
+ "uploaded_by",
4620
+ "storage_verified_at",
4621
+ "deleted_at",
4622
+ "original_filename"
4135
4623
  ]
4136
4624
  };
4137
4625
  function getSchemaForTable(tableName, dbType) {
@@ -4144,6 +4632,78 @@ function getSchemaForTable(tableName, dbType) {
4144
4632
  )
4145
4633
  };
4146
4634
  }
4635
+ var HAZO_FILES_MIGRATION_V2 = {
4636
+ tableName: HAZO_FILES_DEFAULT_TABLE_NAME,
4637
+ sqlite: {
4638
+ alterStatements: [
4639
+ "ALTER TABLE hazo_files ADD COLUMN file_refs TEXT DEFAULT '[]'",
4640
+ "ALTER TABLE hazo_files ADD COLUMN ref_count INTEGER DEFAULT 0",
4641
+ "ALTER TABLE hazo_files ADD COLUMN status TEXT DEFAULT 'active'",
4642
+ "ALTER TABLE hazo_files ADD COLUMN scope_id TEXT",
4643
+ "ALTER TABLE hazo_files ADD COLUMN uploaded_by TEXT",
4644
+ "ALTER TABLE hazo_files ADD COLUMN storage_verified_at TEXT",
4645
+ "ALTER TABLE hazo_files ADD COLUMN deleted_at TEXT",
4646
+ "ALTER TABLE hazo_files ADD COLUMN original_filename TEXT"
4647
+ ],
4648
+ indexes: [
4649
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4650
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4651
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4652
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4653
+ ],
4654
+ backfill: `UPDATE hazo_files SET
4655
+ file_refs = COALESCE(file_refs, '[]'),
4656
+ ref_count = COALESCE(ref_count, 0),
4657
+ status = COALESCE(status, 'active')
4658
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4659
+ },
4660
+ postgres: {
4661
+ alterStatements: [
4662
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS file_refs TEXT DEFAULT '[]'",
4663
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS ref_count INTEGER DEFAULT 0",
4664
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS status TEXT DEFAULT 'active'",
4665
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS scope_id UUID",
4666
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS uploaded_by UUID",
4667
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS storage_verified_at TIMESTAMP WITH TIME ZONE",
4668
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMP WITH TIME ZONE",
4669
+ "ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS original_filename TEXT"
4670
+ ],
4671
+ indexes: [
4672
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
4673
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
4674
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
4675
+ "CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
4676
+ ],
4677
+ backfill: `UPDATE hazo_files SET
4678
+ file_refs = COALESCE(file_refs, '[]'),
4679
+ ref_count = COALESCE(ref_count, 0),
4680
+ status = COALESCE(status, 'active')
4681
+ WHERE file_refs IS NULL OR ref_count IS NULL OR status IS NULL`
4682
+ },
4683
+ newColumns: [
4684
+ "file_refs",
4685
+ "ref_count",
4686
+ "status",
4687
+ "scope_id",
4688
+ "uploaded_by",
4689
+ "storage_verified_at",
4690
+ "deleted_at",
4691
+ "original_filename"
4692
+ ]
4693
+ };
4694
+ function getMigrationForTable(tableName, dbType) {
4695
+ const migration = HAZO_FILES_MIGRATION_V2[dbType];
4696
+ const defaultName = HAZO_FILES_MIGRATION_V2.tableName;
4697
+ return {
4698
+ alterStatements: migration.alterStatements.map(
4699
+ (stmt) => stmt.replace(new RegExp(defaultName, "g"), tableName)
4700
+ ),
4701
+ indexes: migration.indexes.map(
4702
+ (idx) => idx.replace(new RegExp(defaultName, "g"), tableName)
4703
+ ),
4704
+ backfill: migration.backfill.replace(new RegExp(defaultName, "g"), tableName)
4705
+ };
4706
+ }
4147
4707
  var HAZO_FILES_NAMING_DEFAULT_TABLE_NAME = "hazo_files_naming";
4148
4708
  var HAZO_FILES_NAMING_TABLE_SCHEMA = {
4149
4709
  tableName: HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
@@ -4201,6 +4761,24 @@ function getNamingSchemaForTable(tableName, dbType) {
4201
4761
  };
4202
4762
  }
4203
4763
 
4764
+ // src/migrations/add-reference-tracking.ts
4765
+ async function migrateToV2(executor, dbType, tableName) {
4766
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4767
+ for (const stmt of migration.alterStatements) {
4768
+ try {
4769
+ await executor.run(stmt);
4770
+ } catch {
4771
+ }
4772
+ }
4773
+ for (const idx of migration.indexes) {
4774
+ await executor.run(idx);
4775
+ }
4776
+ }
4777
+ async function backfillV2Defaults(executor, dbType, tableName) {
4778
+ const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
4779
+ await executor.run(migration.backfill);
4780
+ }
4781
+
4204
4782
  // src/server/index.ts
4205
4783
  try {
4206
4784
  require("server-only");
@@ -4223,6 +4801,7 @@ try {
4223
4801
  GoogleDriveAuth,
4224
4802
  GoogleDriveModule,
4225
4803
  HAZO_FILES_DEFAULT_TABLE_NAME,
4804
+ HAZO_FILES_MIGRATION_V2,
4226
4805
  HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
4227
4806
  HAZO_FILES_NAMING_TABLE_SCHEMA,
4228
4807
  HAZO_FILES_TABLE_SCHEMA,
@@ -4240,6 +4819,8 @@ try {
4240
4819
  TrackedFileManager,
4241
4820
  UploadExtractService,
4242
4821
  addExtractionToFileData,
4822
+ backfillV2Defaults,
4823
+ buildFileWithStatus,
4243
4824
  clearExtractions,
4244
4825
  clonePattern,
4245
4826
  computeFileHash,
@@ -4253,6 +4834,7 @@ try {
4253
4834
  createFileItem,
4254
4835
  createFileManager,
4255
4836
  createFileMetadataService,
4837
+ createFileRef,
4256
4838
  createFolderItem,
4257
4839
  createGoogleDriveAuth,
4258
4840
  createGoogleDriveModule,
@@ -4276,6 +4858,7 @@ try {
4276
4858
  generateExtractionId,
4277
4859
  generateId,
4278
4860
  generatePreviewName,
4861
+ generateRefId,
4279
4862
  generateSampleConfig,
4280
4863
  generateSegmentId,
4281
4864
  getBaseName,
@@ -4289,6 +4872,7 @@ try {
4289
4872
  getFileCategory,
4290
4873
  getFileMetadataValues,
4291
4874
  getMergedData,
4875
+ getMigrationForTable,
4292
4876
  getMimeType,
4293
4877
  getNameWithoutExtension,
4294
4878
  getNamingSchemaForTable,
@@ -4320,20 +4904,26 @@ try {
4320
4904
  joinPath,
4321
4905
  loadConfig,
4322
4906
  loadConfigAsync,
4907
+ migrateToV2,
4323
4908
  normalizePath,
4324
4909
  parseConfig,
4325
4910
  parseFileData,
4911
+ parseFileRefs,
4326
4912
  parsePatternString,
4327
4913
  patternToString,
4328
4914
  recalculateMergedData,
4329
4915
  registerModule,
4330
4916
  removeExtractionById,
4331
4917
  removeExtractionByIndex,
4918
+ removeRefFromArray,
4919
+ removeRefsByCriteriaFromArray,
4332
4920
  sanitizeFilename,
4333
4921
  saveConfig,
4334
4922
  sortItems,
4335
4923
  stringifyFileData,
4924
+ stringifyFileRefs,
4336
4925
  successResult,
4926
+ toV2Record,
4337
4927
  updateExtractionById,
4338
4928
  validateExtractionData,
4339
4929
  validateFileDataStructure,