@proofhound/core 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/server/modules/annotation/annotation.controller.d.ts +3 -3
  2. package/dist/server/modules/annotation/annotation.module.d.ts.map +1 -1
  3. package/dist/server/modules/annotation/annotation.module.js +2 -1
  4. package/dist/server/modules/annotation/annotation.module.js.map +1 -1
  5. package/dist/server/modules/annotation/annotation.repository.d.ts +4 -1
  6. package/dist/server/modules/annotation/annotation.repository.d.ts.map +1 -1
  7. package/dist/server/modules/annotation/annotation.repository.js +31 -6
  8. package/dist/server/modules/annotation/annotation.repository.js.map +1 -1
  9. package/dist/server/modules/canary-release/canary-release.module.d.ts.map +1 -1
  10. package/dist/server/modules/canary-release/canary-release.module.js +2 -1
  11. package/dist/server/modules/canary-release/canary-release.module.js.map +1 -1
  12. package/dist/server/modules/canary-release/canary-release.repository.d.ts +4 -1
  13. package/dist/server/modules/canary-release/canary-release.repository.d.ts.map +1 -1
  14. package/dist/server/modules/canary-release/canary-release.repository.js +29 -4
  15. package/dist/server/modules/canary-release/canary-release.repository.js.map +1 -1
  16. package/dist/server/modules/dataset/dataset-import.repository.d.ts +3 -1
  17. package/dist/server/modules/dataset/dataset-import.repository.d.ts.map +1 -1
  18. package/dist/server/modules/dataset/dataset-import.repository.js +44 -9
  19. package/dist/server/modules/dataset/dataset-import.repository.js.map +1 -1
  20. package/dist/server/modules/dataset/dataset-sample-offload.d.ts +36 -0
  21. package/dist/server/modules/dataset/dataset-sample-offload.d.ts.map +1 -0
  22. package/dist/server/modules/dataset/dataset-sample-offload.js +41 -0
  23. package/dist/server/modules/dataset/dataset-sample-offload.js.map +1 -0
  24. package/dist/server/modules/dataset/dataset-sample-payload.d.ts +21 -0
  25. package/dist/server/modules/dataset/dataset-sample-payload.d.ts.map +1 -0
  26. package/dist/server/modules/dataset/dataset-sample-payload.js +72 -0
  27. package/dist/server/modules/dataset/dataset-sample-payload.js.map +1 -0
  28. package/dist/server/modules/dataset/dataset-sample-projection.d.ts +11 -0
  29. package/dist/server/modules/dataset/dataset-sample-projection.d.ts.map +1 -0
  30. package/dist/server/modules/dataset/dataset-sample-projection.js +49 -0
  31. package/dist/server/modules/dataset/dataset-sample-projection.js.map +1 -0
  32. package/dist/server/modules/dataset/dataset.module.d.ts.map +1 -1
  33. package/dist/server/modules/dataset/dataset.module.js +2 -0
  34. package/dist/server/modules/dataset/dataset.module.js.map +1 -1
  35. package/dist/server/modules/dataset/dataset.repository.d.ts +6 -1
  36. package/dist/server/modules/dataset/dataset.repository.d.ts.map +1 -1
  37. package/dist/server/modules/dataset/dataset.repository.js +59 -12
  38. package/dist/server/modules/dataset/dataset.repository.js.map +1 -1
  39. package/dist/server/modules/experiment/experiment.module.d.ts.map +1 -1
  40. package/dist/server/modules/experiment/experiment.module.js +2 -0
  41. package/dist/server/modules/experiment/experiment.module.js.map +1 -1
  42. package/dist/server/modules/experiment/experiment.workflow.d.ts +7 -1
  43. package/dist/server/modules/experiment/experiment.workflow.d.ts.map +1 -1
  44. package/dist/server/modules/experiment/experiment.workflow.js +33 -4
  45. package/dist/server/modules/experiment/experiment.workflow.js.map +1 -1
  46. package/dist/server/modules/optimization/optimization.module.d.ts.map +1 -1
  47. package/dist/server/modules/optimization/optimization.module.js +2 -0
  48. package/dist/server/modules/optimization/optimization.module.js.map +1 -1
  49. package/dist/server/modules/optimization/optimization.repository.d.ts +5 -1
  50. package/dist/server/modules/optimization/optimization.repository.d.ts.map +1 -1
  51. package/dist/server/modules/optimization/optimization.repository.js +30 -8
  52. package/dist/server/modules/optimization/optimization.repository.js.map +1 -1
  53. package/dist/server/modules/run-result/run-result-compaction-sweeper.d.ts +15 -0
  54. package/dist/server/modules/run-result/run-result-compaction-sweeper.d.ts.map +1 -0
  55. package/dist/server/modules/run-result/run-result-compaction-sweeper.js +74 -0
  56. package/dist/server/modules/run-result/run-result-compaction-sweeper.js.map +1 -0
  57. package/dist/server/modules/run-result/run-result-compaction.d.ts +38 -0
  58. package/dist/server/modules/run-result/run-result-compaction.d.ts.map +1 -0
  59. package/dist/server/modules/run-result/run-result-compaction.js +74 -0
  60. package/dist/server/modules/run-result/run-result-compaction.js.map +1 -0
  61. package/dist/server/modules/run-result/run-result-compactor.d.ts +58 -0
  62. package/dist/server/modules/run-result/run-result-compactor.d.ts.map +1 -0
  63. package/dist/server/modules/run-result/run-result-compactor.js +188 -0
  64. package/dist/server/modules/run-result/run-result-compactor.js.map +1 -0
  65. package/dist/server/modules/run-result/run-result-payload.d.ts +30 -0
  66. package/dist/server/modules/run-result/run-result-payload.d.ts.map +1 -0
  67. package/dist/server/modules/run-result/run-result-payload.js +82 -0
  68. package/dist/server/modules/run-result/run-result-payload.js.map +1 -0
  69. package/dist/server/modules/run-result/run-result-payload.reader.d.ts +20 -0
  70. package/dist/server/modules/run-result/run-result-payload.reader.d.ts.map +1 -0
  71. package/dist/server/modules/run-result/run-result-payload.reader.js +99 -0
  72. package/dist/server/modules/run-result/run-result-payload.reader.js.map +1 -0
  73. package/dist/server/modules/run-result/run-result.controller.d.ts +4 -0
  74. package/dist/server/modules/run-result/run-result.controller.d.ts.map +1 -1
  75. package/dist/server/modules/run-result/run-result.module.d.ts.map +1 -1
  76. package/dist/server/modules/run-result/run-result.module.js +14 -2
  77. package/dist/server/modules/run-result/run-result.module.js.map +1 -1
  78. package/dist/server/modules/run-result/run-result.repository.d.ts +3 -1
  79. package/dist/server/modules/run-result/run-result.repository.d.ts.map +1 -1
  80. package/dist/server/modules/run-result/run-result.repository.js +63 -4
  81. package/dist/server/modules/run-result/run-result.repository.js.map +1 -1
  82. package/package.json +12 -12
@@ -17,7 +17,12 @@ const common_1 = require("@nestjs/common");
17
17
  const drizzle_orm_1 = require("drizzle-orm");
18
18
  const db_1 = require("@proofhound/db");
19
19
  const database_constants_1 = require("../../../shared/database/database.constants");
20
- const { datasetImports, datasetImportSamples, datasets, projects } = db_1.schema;
20
+ const object_storage_provider_1 = require("../../common/contracts/object-storage.provider");
21
+ const dataset_sample_offload_1 = require("./dataset-sample-offload");
22
+ const { datasetImports, datasetImportSamples, datasetSamples, datasets, projects } = db_1.schema;
23
+ // Per-shard batch for offload-at-promote. Bounded so a batch's data stays in memory only briefly
24
+ // (large image/base64 samples make per-row size unpredictable); each batch becomes one R2 shard.
25
+ const PROMOTE_SHARD_BATCH = 200;
21
26
  // Thrown inside the promote transaction so the caller can map to the right HTTP status while the tx rolls back.
22
27
  class DatasetImportEmptyError extends Error {
23
28
  }
@@ -26,8 +31,9 @@ class DatasetNameTakenError extends Error {
26
31
  }
27
32
  exports.DatasetNameTakenError = DatasetNameTakenError;
28
33
  let DatasetImportRepository = class DatasetImportRepository {
29
- constructor(db) {
34
+ constructor(db, storage) {
30
35
  this.db = db;
36
+ this.storage = storage;
31
37
  }
32
38
  async findProjectAccess(projectId) {
33
39
  const rows = await this.db
@@ -129,12 +135,41 @@ let DatasetImportRepository = class DatasetImportRepository {
129
135
  hasImages: args.hasImages,
130
136
  createdBy: args.actorUserId,
131
137
  });
132
- await tx.execute((0, drizzle_orm_1.sql) `
133
- INSERT INTO ph_assets.dataset_samples (dataset_id, data, external_id)
134
- SELECT ${args.datasetId}::uuid, data, external_id
135
- FROM ph_assets.dataset_import_samples
136
- WHERE import_id = ${args.importId}::uuid
137
- `);
138
+ if (this.storage.isEnabled()) {
139
+ // Offload-at-promote (SPEC 22 §7.2): stream staging into shards + projected rows. The pure
140
+ // orchestration lives in dataset-sample-offload.ts; here we just bind the tx / storage I/O.
141
+ const project = { projectId: args.projectId, source: 'local' };
142
+ const { storagePrefix } = await (0, dataset_sample_offload_1.offloadStagingToShards)({
143
+ datasetId: args.datasetId,
144
+ sampleCount,
145
+ batchSize: PROMOTE_SHARD_BATCH,
146
+ fieldSchema: args.fieldSchema,
147
+ readBatch: (offset, limit) => tx
148
+ .select({ data: datasetImportSamples.data, externalId: datasetImportSamples.externalId })
149
+ .from(datasetImportSamples)
150
+ .where((0, drizzle_orm_1.eq)(datasetImportSamples.importId, args.importId))
151
+ .orderBy((0, drizzle_orm_1.asc)(datasetImportSamples.rowIndex))
152
+ .limit(limit)
153
+ .offset(offset),
154
+ putShard: (name, body) => this.storage.putObject({ project, resourceType: 'dataset_normalized', resourceId: args.datasetId, name }, body, {
155
+ codec: 'gzip',
156
+ }),
157
+ insertRows: async (rows) => {
158
+ await tx.insert(datasetSamples).values(rows);
159
+ },
160
+ });
161
+ if (storagePrefix) {
162
+ await tx.update(datasets).set({ storagePrefix }).where((0, drizzle_orm_1.eq)(datasets.id, args.datasetId));
163
+ }
164
+ }
165
+ else {
166
+ await tx.execute((0, drizzle_orm_1.sql) `
167
+ INSERT INTO ph_assets.dataset_samples (dataset_id, data, external_id)
168
+ SELECT ${args.datasetId}::uuid, data, external_id
169
+ FROM ph_assets.dataset_import_samples
170
+ WHERE import_id = ${args.importId}::uuid
171
+ `);
172
+ }
138
173
  await tx
139
174
  .update(datasetImports)
140
175
  .set({ status: 'ready', datasetId: args.datasetId, updatedAt: new Date() })
@@ -171,6 +206,6 @@ exports.DatasetImportRepository = DatasetImportRepository;
171
206
  exports.DatasetImportRepository = DatasetImportRepository = __decorate([
172
207
  (0, common_1.Injectable)(),
173
208
  __param(0, (0, common_1.Inject)(database_constants_1.DATABASE_CLIENT)),
174
- __metadata("design:paramtypes", [Object])
209
+ __metadata("design:paramtypes", [Object, object_storage_provider_1.ObjectStorageProvider])
175
210
  ], DatasetImportRepository);
176
211
  //# sourceMappingURL=dataset-import.repository.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"dataset-import.repository.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-import.repository.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,2CAAoD;AACpD,6CAAqE;AAErE,uCAAwC;AAExC,oFAA8E;AAE9E,MAAM,EAAE,cAAc,EAAE,oBAAoB,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,WAAM,CAAC;AA4C5E,gHAAgH;AAChH,MAAa,uBAAwB,SAAQ,KAAK;CAAG;AAArD,0DAAqD;AACrD,MAAa,qBAAsB,SAAQ,KAAK;CAAG;AAAnD,sDAAmD;AAG5C,IAAM,uBAAuB,GAA7B,MAAM,uBAAuB;IAClC,YAAsD,EAAY;QAAZ,OAAE,GAAF,EAAE,CAAU;IAAG,CAAC;IAEtE,KAAK,CAAC,iBAAiB,CAAC,SAAiB;QACvC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;aAC3B,IAAI,CAAC,QAAQ,CAAC;aACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;aAClE,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAiB,EAAE,IAAY;QACtD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;aAC3B,IAAI,CAAC,QAAQ,CAAC;aACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;aAClG,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,IAA6B;QAC9C,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,IAAI,CAAC,EAAE;aACxB,MAAM,CAAC,cAAc,CAAC;aACtB,MAAM,CAAC;YACN,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,IAAI;YACjD,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,aAAa;YACrC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ;YACtC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,aAAa;YAChD,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,WAAW,IAAI,IAAI;YACpD,YAAY,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY;YACnC,iBAAiB,EAAE,IAAI,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YACrD,SAAS,EAAE,IAAI,CAAC,WAAW;SAC5B,CAAC;aACD,SAAS,EAAE,CAAC;QACf,IAAI,CAAC,GAAG;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QACnE,OAAO,GAAuB,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,SAAiB,EAAE,QAAgB;QACtD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,EAAE;aACR,IAAI,CAAC,cAAc,CAAC;aACpB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC;aACpF,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAQ,IAAI,CAAC,CAAC,CAAkC,IAAI,IAAI,CAAC;IAC3D,CAAC;IAED,uGAAuG;IACvG,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,IAAsB,EAAE,gBAAwB;QAClF,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,EAAE;qBACL,MAAM,CAAC,oBAAoB,CAAC;qBAC5B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;qBAC7G,mBAAmB,EAAE,CAAC;YAC3B,CAAC;YACD,MAAM,CAAC,OAAO,CAAC,GAAG,MAAM,EAAE;iBACvB,MAAM,CAAC,cAAc,CAAC;iBACtB,GAAG,CAAC;gBACH,YAAY,EAAE,IAAA,iBAAG,EAAA,YAAY,cAAc,CAAC,YAAY,KAAK,gBAAgB,GAAG;gBAChF,SAAS,EAAE,IAAI,IAAI,EAAE;aACtB,CAAC;iBACD,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;iBACtC,SAAS,CAAC,EAAE,YAAY,EAAE,cAAc,CAAC,YAAY,EAAE,CAAC,CAAC;YAC5D,OAAO,OAAO,EAAE,YAAY,IAAI,gBAAgB,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,yBAAyB,CAAC,QAAgB,EAAE,KAAa;QAC7D,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,IAAI,EAAE,oBAAoB,CAAC,IAAI,EAAE,CAAC;aAC3C,IAAI,CAAC,oBAAoB,CAAC;aAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;aAClD,OAAO,CAAC,IAAA,iBAAG,EAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;aAC3C,KAAK,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CACtB,GAAG,CAAC,IAAI,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,IAAgC,CAAC,CAAC,CAAC,EAAE,CAClH,CAAC;IACJ,CAAC;IAED,6HAA6H;IAC7H,KAAK,CAAC,OAAO,CAAC,IAA8B;QAC1C,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACtC,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,EAAE;iBACxB,MAAM,CAAC,EAAE,KAAK,EAAE,IAAA,iBAAG,EAAQ,eAAe,EAAE,CAAC;iBAC7C,IAAI,CAAC,oBAAoB,CAAC;iBAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC3D,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC;YACjD,IAAI,WAAW,KAAK,CAAC;gBAAE,MAAM,IAAI,uBAAuB,EAAE,CAAC;YAE3D,MAAM,KAAK,GAAG,MAAM,EAAE;iBACnB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;iBAC3B,IAAI,CAAC,QAAQ,CAAC;iBACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;iBAC5G,KAAK,CAAC,CAAC,CAAC,CAAC;YACZ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM,IAAI,qBAAqB,EAAE,CAAC;YAExD,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;gBAC/B,EAAE,EAAE,IAAI,CAAC,SAAS;gBAClB,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,WAAW;gBACX,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,SAAS,EAAE,IAAI,CAAC,WAAW;aAC5B,CAAC,CAAC;YAEH,MAAM,EAAE,CAAC,OAAO,CAAC,IAAA,iBAAG,EAAA;;iBAET,IAAI,CAAC,SAAS;;4BAEH,IAAI,CAAC,QAAQ;OAClC,CAAC,CAAC;YAEH,MAAM,EAAE;iBACL,MAAM,CAAC,cAAc,CAAC;iBACtB,GAAG,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,EAAE,CAAC;iBAC1E,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAE/C,MAAM,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAE9F,OAAO,EAAE,WAAW,EAAE,CAAC;QACzB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAAiB,EAAE,QAAgB;QACpD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,EAAE;aAC1B,MAAM,CAAC,cAAc,CAAC;aACtB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC;aACpF,SAAS,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,OAAO,OAAO,CAAC,MAAM,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAe;QACtC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC;aACjC,IAAI,CAAC,cAAc,CAAC;aACpB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC;QAC/F,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,GAAa;QACpC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAC/B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,EAAE;aAC1B,MAAM,CAAC,cAAc,CAAC;aACtB,KAAK,CAAC,IAAA,qBAAO,EAAC,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;aACtC,SAAS,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,OAAO,OAAO,CAAC,MAAM,CAAC;IACxB,CAAC;CACF,CAAA;AAzJY,0DAAuB;kCAAvB,uBAAuB;IADnC,IAAA,mBAAU,GAAE;IAEE,WAAA,IAAA,eAAM,EAAC,oCAAe,CAAC,CAAA;;GADzB,uBAAuB,CAyJnC"}
1
+ {"version":3,"file":"dataset-import.repository.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-import.repository.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,2CAAoD;AACpD,6CAAqE;AAErE,uCAAwC;AAExC,oFAA8E;AAC9E,4FAAuF;AACvF,qEAAkE;AAElE,MAAM,EAAE,cAAc,EAAE,oBAAoB,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,WAAM,CAAC;AAE5F,iGAAiG;AACjG,iGAAiG;AACjG,MAAM,mBAAmB,GAAG,GAAG,CAAC;AA4ChC,gHAAgH;AAChH,MAAa,uBAAwB,SAAQ,KAAK;CAAG;AAArD,0DAAqD;AACrD,MAAa,qBAAsB,SAAQ,KAAK;CAAG;AAAnD,sDAAmD;AAG5C,IAAM,uBAAuB,GAA7B,MAAM,uBAAuB;IAClC,YAC4C,EAAY,EACrC,OAA8B;QADL,OAAE,GAAF,EAAE,CAAU;QACrC,YAAO,GAAP,OAAO,CAAuB;IAC9C,CAAC;IAEJ,KAAK,CAAC,iBAAiB,CAAC,SAAiB;QACvC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;aAC3B,IAAI,CAAC,QAAQ,CAAC;aACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;aAClE,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAiB,EAAE,IAAY;QACtD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;aAC3B,IAAI,CAAC,QAAQ,CAAC;aACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;aAClG,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,IAA6B;QAC9C,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,IAAI,CAAC,EAAE;aACxB,MAAM,CAAC,cAAc,CAAC;aACtB,MAAM,CAAC;YACN,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,IAAI;YACjD,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,aAAa;YACrC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ;YACtC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,aAAa;YAChD,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,WAAW,IAAI,IAAI;YACpD,YAAY,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY;YACnC,iBAAiB,EAAE,IAAI,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YACrD,SAAS,EAAE,IAAI,CAAC,WAAW;SAC5B,CAAC;aACD,SAAS,EAAE,CAAC;QACf,IAAI,CAAC,GAAG;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QACnE,OAAO,GAAuB,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,SAAiB,EAAE,QAAgB;QACtD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,EAAE;aACR,IAAI,CAAC,cAAc,CAAC;aACpB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC;aACpF,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAQ,IAAI,CAAC,CAAC,CAAkC,IAAI,IAAI,CAAC;IAC3D,CAAC;IAED,uGAAuG;IACvG,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,IAAsB,EAAE,gBAAwB;QAClF,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,EAAE;qBACL,MAAM,CAAC,oBAAoB,CAAC;qBAC5B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;qBAC7G,mBAAmB,EAAE,CAAC;YAC3B,CAAC;YACD,MAAM,CAAC,OAAO,CAAC,GAAG,MAAM,EAAE;iBACvB,MAAM,CAAC,cAAc,CAAC;iBACtB,GAAG,CAAC;gBACH,YAAY,EAAE,IAAA,iBAAG,EAAA,YAAY,cAAc,CAAC,YAAY,KAAK,gBAAgB,GAAG;gBAChF,SAAS,EAAE,IAAI,IAAI,EAAE;aACtB,CAAC;iBACD,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;iBACtC,SAAS,CAAC,EAAE,YAAY,EAAE,cAAc,CAAC,YAAY,EAAE,CAAC,CAAC;YAC5D,OAAO,OAAO,EAAE,YAAY,IAAI,gBAAgB,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,yBAAyB,CAAC,QAAgB,EAAE,KAAa;QAC7D,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,IAAI,EAAE,oBAAoB,CAAC,IAAI,EAAE,CAAC;aAC3C,IAAI,CAAC,oBAAoB,CAAC;aAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;aAClD,OAAO,CAAC,IAAA,iBAAG,EAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;aAC3C,KAAK,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CACtB,GAAG,CAAC,IAAI,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,IAAgC,CAAC,CAAC,CAAC,EAAE,CAClH,CAAC;IACJ,CAAC;IAED,6HAA6H;IAC7H,KAAK,CAAC,OAAO,CAAC,IAA8B;QAC1C,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACtC,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,EAAE;iBACxB,MAAM,CAAC,EAAE,KAAK,EAAE,IAAA,iBAAG,EAAQ,eAAe,EAAE,CAAC;iBAC7C,IAAI,CAAC,oBAAoB,CAAC;iBAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC3D,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC;YACjD,IAAI,WAAW,KAAK,CAAC;gBAAE,MAAM,IAAI,uBAAuB,EAAE,CAAC;YAE3D,MAAM,KAAK,GAAG,MAAM,EAAE;iBACnB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;iBAC3B,IAAI,CAAC,QAAQ,CAAC;iBACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;iBAC5G,KAAK,CAAC,CAAC,CAAC,CAAC;YACZ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM,IAAI,qBAAqB,EAAE,CAAC;YAExD,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;gBAC/B,EAAE,EAAE,IAAI,CAAC,SAAS;gBAClB,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,WAAW;gBACX,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,SAAS,EAAE,IAAI,CAAC,WAAW;aAC5B,CAAC,CAAC;YAEH,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;gBAC7B,2FAA2F;gBAC3F,4FAA4F;gBAC5F,MAAM,OAAO,GAAG,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,OAAgB,EAAE,CAAC;gBACxE,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,IAAA,+CAAsB,EAAC;oBACrD,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,WAAW;oBACX,SAAS,EAAE,mBAAmB;oBAC9B,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,SAAS,EAAE,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAC3B,EAAE;yBACC,MAAM,CAAC,EAAE,IAAI,EAAE,oBAAoB,CAAC,IAAI,EAAE,UAAU,EAAE,oBAAoB,CAAC,UAAU,EAAE,CAAC;yBACxF,IAAI,CAAC,oBAAoB,CAAC;yBAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;yBACvD,OAAO,CAAC,IAAA,iBAAG,EAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;yBAC3C,KAAK,CAAC,KAAK,CAAC;yBACZ,MAAM,CAAC,MAAM,CAAC;oBACnB,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CACvB,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,UAAU,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE;wBAC9G,KAAK,EAAE,MAAM;qBACd,CAAC;oBACJ,UAAU,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;wBACzB,MAAM,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/C,CAAC;iBACF,CAAC,CAAC;gBACH,IAAI,aAAa,EAAE,CAAC;oBAClB,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC1F,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,EAAE,CAAC,OAAO,CAAC,IAAA,iBAAG,EAAA;;mBAET,IAAI,CAAC,SAAS;;8BAEH,IAAI,CAAC,QAAQ;SAClC,CAAC,CAAC;YACL,CAAC;YAED,MAAM,EAAE;iBACL,MAAM,CAAC,cAAc,CAAC;iBACtB,GAAG,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,EAAE,CAAC;iBAC1E,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAE/C,MAAM,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAE9F,OAAO,EAAE,WAAW,EAAE,CAAC;QACzB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAAiB,EAAE,QAAgB;QACpD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,EAAE;aAC1B,MAAM,CAAC,cAAc,CAAC;aACtB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC;aACpF,SAAS,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,OAAO,OAAO,CAAC,MAAM,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAe;QACtC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC;aACjC,IAAI,CAAC,cAAc,CAAC;aACpB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC;QAC/F,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,GAAa;QACpC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAC/B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,EAAE;aAC1B,MAAM,CAAC,cAAc,CAAC;aACtB,KAAK,CAAC,IAAA,qBAAO,EAAC,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;aACtC,SAAS,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,OAAO,OAAO,CAAC,MAAM,CAAC;IACxB,CAAC;CACF,CAAA;AA1LY,0DAAuB;kCAAvB,uBAAuB;IADnC,IAAA,mBAAU,GAAE;IAGR,WAAA,IAAA,eAAM,EAAC,oCAAe,CAAC,CAAA;6CACE,+CAAqB;GAHtC,uBAAuB,CA0LnC"}
@@ -0,0 +1,36 @@
1
+ import { Buffer } from 'node:buffer';
2
+ import type { DatasetFieldSchemaDto } from '@proofhound/shared';
3
+ import type { ObjectCodec, StoredObjectRef } from '../../common/contracts/object-storage.provider';
4
+ export interface StagingSample {
5
+ data: unknown;
6
+ externalId: string | null;
7
+ }
8
+ export interface DatasetSampleOffloadRow {
9
+ datasetId: string;
10
+ data: null;
11
+ externalId: string | null;
12
+ payloadRef: {
13
+ shard: StoredObjectRef;
14
+ rowIndex: number;
15
+ };
16
+ searchPreview: string | null;
17
+ expectedOutputScalar: string | null;
18
+ labelScalar: string | null;
19
+ categoryScalar: string | null;
20
+ indexValues: Record<string, string> | null;
21
+ }
22
+ export interface OffloadStagingOptions {
23
+ datasetId: string;
24
+ sampleCount: number;
25
+ batchSize: number;
26
+ fieldSchema: DatasetFieldSchemaDto[];
27
+ codec?: ObjectCodec;
28
+ readBatch: (offset: number, limit: number) => Promise<StagingSample[]>;
29
+ putShard: (name: string, body: Buffer) => Promise<StoredObjectRef>;
30
+ insertRows: (rows: DatasetSampleOffloadRow[]) => Promise<void>;
31
+ }
32
+ export declare function offloadStagingToShards(opts: OffloadStagingOptions): Promise<{
33
+ shards: number;
34
+ storagePrefix: string | null;
35
+ }>;
36
+ //# sourceMappingURL=dataset-sample-offload.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset-sample-offload.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-offload.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,gDAAgD,CAAC;AAInG,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,OAAO,CAAC;IACd,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED,MAAM,WAAW,uBAAuB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,EAAE;QAAE,KAAK,EAAE,eAAe,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IACzD,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAC5C;AAED,MAAM,WAAW,qBAAqB;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,qBAAqB,EAAE,CAAC;IACrC,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC;IACvE,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,eAAe,CAAC,CAAC;IACnE,UAAU,EAAE,CAAC,IAAI,EAAE,uBAAuB,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAChE;AAED,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,qBAAqB,GAC1B,OAAO,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAC,CA2C3D"}
@@ -0,0 +1,41 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.offloadStagingToShards = offloadStagingToShards;
4
+ const run_result_payload_1 = require("../run-result/run-result-payload");
5
+ const dataset_sample_projection_1 = require("./dataset-sample-projection");
6
+ async function offloadStagingToShards(opts) {
7
+ const codec = opts.codec ?? 'gzip';
8
+ let shardSeq = 0;
9
+ let offset = 0;
10
+ let firstShardKey = null;
11
+ while (offset < opts.sampleCount) {
12
+ const batch = await opts.readBatch(offset, opts.batchSize);
13
+ if (batch.length === 0)
14
+ break;
15
+ const body = await (0, run_result_payload_1.encodeShard)(batch.map((row) => row.data), codec);
16
+ const shardRef = await opts.putShard(`shard-${String(shardSeq).padStart(5, '0')}.jsonl.gz`, body);
17
+ firstShardKey ??= shardRef.key;
18
+ await opts.insertRows(batch.map((row, rowIndex) => {
19
+ const data = (row.data ?? null);
20
+ const projection = (0, dataset_sample_projection_1.projectDatasetSample)(data, opts.fieldSchema);
21
+ return {
22
+ datasetId: opts.datasetId,
23
+ data: null,
24
+ externalId: row.externalId,
25
+ payloadRef: { shard: shardRef, rowIndex },
26
+ searchPreview: projection.searchPreview,
27
+ expectedOutputScalar: projection.expectedOutputScalar,
28
+ labelScalar: projection.labelScalar,
29
+ categoryScalar: projection.categoryScalar,
30
+ indexValues: projection.indexValues,
31
+ };
32
+ }));
33
+ offset += batch.length;
34
+ shardSeq += 1;
35
+ }
36
+ return {
37
+ shards: shardSeq,
38
+ storagePrefix: firstShardKey ? firstShardKey.slice(0, firstShardKey.lastIndexOf('/') + 1) : null,
39
+ };
40
+ }
41
+ //# sourceMappingURL=dataset-sample-offload.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset-sample-offload.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-offload.ts"],"names":[],"mappings":";;AAwCA,wDA6CC;AA5ED,yEAA+D;AAC/D,2EAAmE;AA8B5D,KAAK,UAAU,sBAAsB,CAC1C,IAA2B;IAE3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC;IACnC,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,aAAa,GAAkB,IAAI,CAAC;IAExC,OAAO,MAAM,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAC3D,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM;QAE9B,MAAM,IAAI,GAAG,MAAM,IAAA,gCAAW,EAC5B,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAC5B,KAAK,CACN,CAAC;QACF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,MAAM,CAAC,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;QAClG,aAAa,KAAK,QAAQ,CAAC,GAAG,CAAC;QAE/B,MAAM,IAAI,CAAC,UAAU,CACnB,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YAC1B,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,IAAI,CAAmC,CAAC;YAClE,MAAM,UAAU,GAAG,IAAA,gDAAoB,EAAC,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YAChE,OAAO;gBACL,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,IAAI,EAAE,IAAI;gBACV,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,UAAU,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE;gBACzC,aAAa,EAAE,UAAU,CAAC,aAAa;gBACvC,oBAAoB,EAAE,UAAU,CAAC,oBAAoB;gBACrD,WAAW,EAAE,UAAU,CAAC,WAAW;gBACnC,cAAc,EAAE,UAAU,CAAC,cAAc;gBACzC,WAAW,EAAE,UAAU,CAAC,WAAW;aACpC,CAAC;QACJ,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC;QACvB,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IAED,OAAO;QACL,MAAM,EAAE,QAAQ;QAChB,aAAa,EAAE,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;KACjG,CAAC;AACJ,CAAC"}
@@ -0,0 +1,21 @@
1
+ import { ObjectStorageProvider, type StoredObjectRef } from '../../common/contracts/object-storage.provider';
2
+ /** Stored in `dataset_samples.payload_ref`: the shard holding this sample's data + its line. */
3
+ export interface DatasetSamplePayloadRef {
4
+ shard: StoredObjectRef;
5
+ rowIndex: number;
6
+ }
7
+ export interface DatasetSamplePayloadRow {
8
+ data: unknown;
9
+ payloadRef: DatasetSamplePayloadRef | null;
10
+ }
11
+ export declare class DatasetSamplePayloadReader {
12
+ private readonly storage;
13
+ constructor(storage: ObjectStorageProvider);
14
+ /** Resolve one sample's data: inline when present, else from its shard. */
15
+ hydrate(row: DatasetSamplePayloadRow): Promise<unknown>;
16
+ /** Batch: groups rows by shard so a 500-sample batch is one GET per shard, not one per sample. */
17
+ hydrateMany(rows: DatasetSamplePayloadRow[]): Promise<unknown[]>;
18
+ /** The ref to read from, or null when the inline value should be used (cache / no offload / disabled). */
19
+ private shardRef;
20
+ }
21
+ //# sourceMappingURL=dataset-sample-payload.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset-sample-payload.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-payload.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,qBAAqB,EAAE,KAAK,eAAe,EAAE,MAAM,gDAAgD,CAAC;AAG7G,gGAAgG;AAChG,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,eAAe,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,OAAO,CAAC;IACd,UAAU,EAAE,uBAAuB,GAAG,IAAI,CAAC;CAC5C;AAED,qBACa,0BAA0B;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAAP,OAAO,EAAE,qBAAqB;IAE3D,2EAA2E;IACrE,OAAO,CAAC,GAAG,EAAE,uBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC;IAO7D,kGAAkG;IAC5F,WAAW,CAAC,IAAI,EAAE,uBAAuB,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAqBtE,0GAA0G;IAC1G,OAAO,CAAC,QAAQ;CAIjB"}
@@ -0,0 +1,72 @@
1
+ "use strict";
2
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
3
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
4
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
5
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
6
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
7
+ };
8
+ var __metadata = (this && this.__metadata) || function (k, v) {
9
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.DatasetSamplePayloadReader = void 0;
13
+ // DatasetSamplePayloadReader — the read seam for dataset-sample content (SPEC 22 §7.3).
14
+ //
15
+ // The worker hot paths (experiment rendering, optimization rounds) load a sample's full `data`. Once
16
+ // promote tiers it out, `data` is null and the authoritative content lives in an object-storage shard.
17
+ // This seam returns the inline value when present (a small-sample cache, or no offload), else reads
18
+ // the shard. Pure pass-through when storage is disabled / the row was never offloaded. It reuses the
19
+ // generic JSONL shard codec from the run-result seam (one data object per line).
20
+ const common_1 = require("@nestjs/common");
21
+ const object_storage_provider_1 = require("../../common/contracts/object-storage.provider");
22
+ const run_result_payload_1 = require("../run-result/run-result-payload");
23
+ let DatasetSamplePayloadReader = class DatasetSamplePayloadReader {
24
+ constructor(storage) {
25
+ this.storage = storage;
26
+ }
27
+ /** Resolve one sample's data: inline when present, else from its shard. */
28
+ async hydrate(row) {
29
+ const ref = this.shardRef(row);
30
+ if (!ref)
31
+ return row.data ?? null;
32
+ const lines = await (0, run_result_payload_1.decodeShard)(await this.storage.getObject(ref.shard), ref.shard.codec);
33
+ return lines[ref.rowIndex] ?? null;
34
+ }
35
+ /** Batch: groups rows by shard so a 500-sample batch is one GET per shard, not one per sample. */
36
+ async hydrateMany(rows) {
37
+ const out = rows.map((r) => r.data ?? null);
38
+ const byShard = new Map();
39
+ rows.forEach((row, index) => {
40
+ const ref = this.shardRef(row);
41
+ if (!ref)
42
+ return;
43
+ const key = shardKey(ref.shard);
44
+ const group = byShard.get(key);
45
+ if (group)
46
+ group.entries.push({ index, rowIndex: ref.rowIndex });
47
+ else
48
+ byShard.set(key, { shard: ref.shard, entries: [{ index, rowIndex: ref.rowIndex }] });
49
+ });
50
+ await Promise.all([...byShard.values()].map(async ({ shard, entries }) => {
51
+ const lines = await (0, run_result_payload_1.decodeShard)(await this.storage.getObject(shard), shard.codec);
52
+ for (const { index, rowIndex } of entries)
53
+ out[index] = lines[rowIndex] ?? null;
54
+ }));
55
+ return out;
56
+ }
57
+ /** The ref to read from, or null when the inline value should be used (cache / no offload / disabled). */
58
+ shardRef(row) {
59
+ if (row.data != null || row.payloadRef == null || !this.storage.isEnabled())
60
+ return null;
61
+ return row.payloadRef;
62
+ }
63
+ };
64
+ exports.DatasetSamplePayloadReader = DatasetSamplePayloadReader;
65
+ exports.DatasetSamplePayloadReader = DatasetSamplePayloadReader = __decorate([
66
+ (0, common_1.Injectable)(),
67
+ __metadata("design:paramtypes", [object_storage_provider_1.ObjectStorageProvider])
68
+ ], DatasetSamplePayloadReader);
69
+ function shardKey(ref) {
70
+ return `${ref.provider}:${ref.bucket ?? ''}:${ref.key}`;
71
+ }
72
+ //# sourceMappingURL=dataset-sample-payload.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset-sample-payload.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-payload.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,wFAAwF;AACxF,EAAE;AACF,qGAAqG;AACrG,uGAAuG;AACvG,oGAAoG;AACpG,qGAAqG;AACrG,iFAAiF;AACjF,2CAA4C;AAC5C,4FAA6G;AAC7G,yEAA+D;AAcxD,IAAM,0BAA0B,GAAhC,MAAM,0BAA0B;IACrC,YAA6B,OAA8B;QAA9B,YAAO,GAAP,OAAO,CAAuB;IAAG,CAAC;IAE/D,2EAA2E;IAC3E,KAAK,CAAC,OAAO,CAAC,GAA4B;QACxC,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,CAAC,GAAG;YAAE,OAAO,GAAG,CAAC,IAAI,IAAI,IAAI,CAAC;QAClC,MAAM,KAAK,GAAG,MAAM,IAAA,gCAAW,EAA0B,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACnH,OAAO,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;IACrC,CAAC;IAED,kGAAkG;IAClG,KAAK,CAAC,WAAW,CAAC,IAA+B;QAC/C,MAAM,GAAG,GAAc,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,GAAG,EAA2F,CAAC;QACnH,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE;YAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAC/B,IAAI,CAAC,GAAG;gBAAE,OAAO;YACjB,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC/B,IAAI,KAAK;gBAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;;gBAC5D,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC;QAC5F,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,GAAG,CACf,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE;YACrD,MAAM,KAAK,GAAG,MAAM,IAAA,gCAAW,EAA0B,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YAC3G,KAAK,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,OAAO;gBAAE,GAAG,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;QAClF,CAAC,CAAC,CACH,CAAC;QACF,OAAO,GAAG,CAAC;IACb,CAAC;IAED,0GAA0G;IAClG,QAAQ,CAAC,GAA4B;QAC3C,IAAI,GAAG,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,CAAC,UAAU,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YAAE,OAAO,IAAI,CAAC;QACzF,OAAO,GAAG,CAAC,UAAU,CAAC;IACxB,CAAC;CACF,CAAA;AAtCY,gEAA0B;qCAA1B,0BAA0B;IADtC,IAAA,mBAAU,GAAE;qCAE2B,+CAAqB;GADhD,0BAA0B,CAsCtC;AAED,SAAS,QAAQ,CAAC,GAAoB;IACpC,OAAO,GAAG,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,MAAM,IAAI,EAAE,IAAI,GAAG,CAAC,GAAG,EAAE,CAAC;AAC1D,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { DatasetFieldSchemaDto } from '@proofhound/shared';
2
+ export interface DatasetSampleProjection {
3
+ searchPreview: string | null;
4
+ expectedOutputScalar: string | null;
5
+ labelScalar: string | null;
6
+ categoryScalar: string | null;
7
+ indexValues: Record<string, string> | null;
8
+ }
9
+ /** Build the DB-side projection for one sample from its data + the dataset's field schema. */
10
+ export declare function projectDatasetSample(data: Record<string, unknown> | null, fieldSchema: DatasetFieldSchemaDto[]): DatasetSampleProjection;
11
+ //# sourceMappingURL=dataset-sample-projection.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset-sample-projection.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-projection.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAEhE,MAAM,WAAW,uBAAuB;IACtC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAC5C;AAoBD,8FAA8F;AAC9F,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,EACpC,WAAW,EAAE,qBAAqB,EAAE,GACnC,uBAAuB,CAwBzB"}
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.projectDatasetSample = projectDatasetSample;
4
+ const PREVIEW_MAX = 1000;
5
+ const SCALAR_MAX = 200;
6
+ const IMAGE_ROLES = new Set(['image', 'image_url', 'image_base64']);
7
+ function asScalar(value) {
8
+ if (value === null || value === undefined)
9
+ return null;
10
+ if (typeof value === 'string')
11
+ return value.length > SCALAR_MAX ? null : value;
12
+ if (typeof value === 'number' || typeof value === 'boolean')
13
+ return String(value);
14
+ return null;
15
+ }
16
+ function previewOf(data) {
17
+ if (data === null || data === undefined)
18
+ return null;
19
+ const text = typeof data === 'string' ? data : JSON.stringify(data);
20
+ if (text.length === 0)
21
+ return null;
22
+ return text.length > PREVIEW_MAX ? text.slice(0, PREVIEW_MAX) : text;
23
+ }
24
+ /** Build the DB-side projection for one sample from its data + the dataset's field schema. */
25
+ function projectDatasetSample(data, fieldSchema) {
26
+ if (data === null) {
27
+ return { searchPreview: null, expectedOutputScalar: null, labelScalar: null, categoryScalar: null, indexValues: null };
28
+ }
29
+ const expectedField = fieldSchema.find((f) => f.role === 'expected_output')?.name;
30
+ const expectedOutputScalar = expectedField ? asScalar(data[expectedField]) : null;
31
+ // index_values: short scalar values of the non-image fields, so distribution / filter on any
32
+ // configurable field works off `index_values->>field` once `data` is offloaded.
33
+ const indexValues = {};
34
+ for (const field of fieldSchema) {
35
+ if (IMAGE_ROLES.has(field.role))
36
+ continue;
37
+ const scalar = asScalar(data[field.name]);
38
+ if (scalar !== null)
39
+ indexValues[field.name] = scalar;
40
+ }
41
+ return {
42
+ searchPreview: previewOf(data),
43
+ expectedOutputScalar,
44
+ labelScalar: null,
45
+ categoryScalar: null,
46
+ indexValues: Object.keys(indexValues).length > 0 ? indexValues : null,
47
+ };
48
+ }
49
+ //# sourceMappingURL=dataset-sample-projection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset-sample-projection.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-projection.ts"],"names":[],"mappings":";;AAsCA,oDA2BC;AA9CD,MAAM,WAAW,GAAG,IAAI,CAAC;AACzB,MAAM,UAAU,GAAG,GAAG,CAAC;AACvB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;AAEpE,SAAS,QAAQ,CAAC,KAAc;IAC9B,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC;IAC/E,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAClF,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,SAAS,CAAC,IAAa;IAC9B,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACrD,MAAM,IAAI,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACpE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACnC,OAAO,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACvE,CAAC;AAED,8FAA8F;AAC9F,SAAgB,oBAAoB,CAClC,IAAoC,EACpC,WAAoC;IAEpC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,aAAa,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IACzH,CAAC;IAED,MAAM,aAAa,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,iBAAiB,CAAC,EAAE,IAAI,CAAC;IAClF,MAAM,oBAAoB,GAAG,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAElF,6FAA6F;IAC7F,gFAAgF;IAChF,MAAM,WAAW,GAA2B,EAAE,CAAC;IAC/C,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;QAChC,IAAI,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC;YAAE,SAAS;QAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC1C,IAAI,MAAM,KAAK,IAAI;YAAE,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;IACxD,CAAC;IAED,OAAO;QACL,aAAa,EAAE,SAAS,CAAC,IAAI,CAAC;QAC9B,oBAAoB;QACpB,WAAW,EAAE,IAAI;QACjB,cAAc,EAAE,IAAI;QACpB,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI;KACtE,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"dataset.module.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":"AAUA,qBAYa,aAAa;CAAG"}
1
+ {"version":3,"file":"dataset.module.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":"AAWA,qBAaa,aAAa;CAAG"}
@@ -9,6 +9,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
9
9
  exports.DatasetModule = void 0;
10
10
  const common_1 = require("@nestjs/common");
11
11
  const database_module_1 = require("../../../shared/database/database.module");
12
+ const dataset_sample_payload_1 = require("./dataset-sample-payload");
12
13
  const dataset_deletion_hook_1 = require("./dataset-deletion.hook");
13
14
  const dataset_import_controller_1 = require("./dataset-import.controller");
14
15
  const dataset_import_repository_1 = require("./dataset-import.repository");
@@ -25,6 +26,7 @@ exports.DatasetModule = DatasetModule = __decorate([
25
26
  controllers: [dataset_controller_1.DatasetController, dataset_import_controller_1.DatasetImportController],
26
27
  providers: [
27
28
  dataset_repository_1.DatasetRepository,
29
+ dataset_sample_payload_1.DatasetSamplePayloadReader,
28
30
  { provide: dataset_deletion_hook_1.DatasetDeletionHook, useClass: dataset_deletion_hook_1.LocalDatasetDeletionHook },
29
31
  dataset_service_1.DatasetService,
30
32
  dataset_import_repository_1.DatasetImportRepository,
@@ -1 +1 @@
1
- {"version":3,"file":"dataset.module.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":";;;;;;;;;AAAA,2CAAwC;AACxC,8EAA0E;AAC1E,mEAAwF;AACxF,2EAAsE;AACtE,2EAAsE;AACtE,qEAAgE;AAChE,6DAAyD;AACzD,6DAAyD;AACzD,uDAAmD;AAc5C,IAAM,aAAa,GAAnB,MAAM,aAAa;CAAG,CAAA;AAAhB,sCAAa;wBAAb,aAAa;IAZzB,IAAA,eAAM,EAAC;QACN,OAAO,EAAE,CAAC,gCAAc,CAAC;QACzB,WAAW,EAAE,CAAC,sCAAiB,EAAE,mDAAuB,CAAC;QACzD,SAAS,EAAE;YACT,sCAAiB;YACjB,EAAE,OAAO,EAAE,2CAAmB,EAAE,QAAQ,EAAE,gDAAwB,EAAE;YACpE,gCAAc;YACd,mDAAuB;YACvB,6CAAoB;SACrB;QACD,OAAO,EAAE,CAAC,gCAAc,EAAE,6CAAoB,CAAC;KAChD,CAAC;GACW,aAAa,CAAG"}
1
+ {"version":3,"file":"dataset.module.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":";;;;;;;;;AAAA,2CAAwC;AACxC,8EAA0E;AAC1E,qEAAsE;AACtE,mEAAwF;AACxF,2EAAsE;AACtE,2EAAsE;AACtE,qEAAgE;AAChE,6DAAyD;AACzD,6DAAyD;AACzD,uDAAmD;AAe5C,IAAM,aAAa,GAAnB,MAAM,aAAa;CAAG,CAAA;AAAhB,sCAAa;wBAAb,aAAa;IAbzB,IAAA,eAAM,EAAC;QACN,OAAO,EAAE,CAAC,gCAAc,CAAC;QACzB,WAAW,EAAE,CAAC,sCAAiB,EAAE,mDAAuB,CAAC;QACzD,SAAS,EAAE;YACT,sCAAiB;YACjB,mDAA0B;YAC1B,EAAE,OAAO,EAAE,2CAAmB,EAAE,QAAQ,EAAE,gDAAwB,EAAE;YACpE,gCAAc;YACd,mDAAuB;YACvB,6CAAoB;SACrB;QACD,OAAO,EAAE,CAAC,gCAAc,EAAE,6CAAoB,CAAC;KAChD,CAAC;GACW,aAAa,CAAG"}
@@ -1,5 +1,7 @@
1
1
  import type { DbClient } from '@proofhound/db';
2
2
  import type { CreateDatasetDto, DatasetFieldSchemaDto } from '@proofhound/shared';
3
+ import { ObjectStorageProvider } from '../../common/contracts/object-storage.provider';
4
+ import { DatasetSamplePayloadReader } from './dataset-sample-payload';
3
5
  export interface DatasetProjectAccessRow {
4
6
  id: string;
5
7
  }
@@ -58,7 +60,10 @@ export interface DatasetDeletionImpactRows {
58
60
  }
59
61
  export declare class DatasetRepository {
60
62
  private readonly db;
61
- constructor(db: DbClient);
63
+ private readonly sampleReader;
64
+ private readonly storage;
65
+ constructor(db: DbClient, sampleReader: DatasetSamplePayloadReader, storage: ObjectStorageProvider);
66
+ private hydrateSampleRows;
62
67
  private datasetSelectFields;
63
68
  findProjectAccess(_actorUserId: string, projectId: string, _isSuperAdmin: boolean): Promise<DatasetProjectAccessRow | null>;
64
69
  findDatasetByProjectAndName(projectId: string, name: string): Promise<DatasetRow | null>;
@@ -1 +1 @@
1
- {"version":3,"file":"dataset.repository.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.repository.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAKlF,MAAM,WAAW,uBAAuB;IACtC,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,SAAS,EAAE,IAAI,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,UAAU,EAAE,IAAI,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,IAAI,GAAG,IAAI,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,OAAO,CAAC;IACd,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,SAAS,EAAE,IAAI,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,uBAAuB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,EAAE,gBAAgB,CAAC;IACtB,WAAW,EAAE,qBAAqB,EAAE,CAAC;IACrC,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED,MAAM,WAAW,wBAAwB;IACvC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,SAAS,EAAE,IAAI,GAAG,IAAI,CAAC;CACxB;AAED,MAAM,WAAW,yBAAyB;IACxC,WAAW,EAAE,wBAAwB,EAAE,CAAC;IACxC,aAAa,EAAE,wBAAwB,EAAE,CAAC;CAC3C;AAED,qBACa,iBAAiB;IACS,OAAO,CAAC,QAAQ,CAAC,EAAE;gBAAF,EAAE,EAAE,QAAQ;IAElE,OAAO,CAAC,mBAAmB,CAgBzB;IAEI,iBAAiB,CACrB,YAAY,EAAE,MAAM,EACpB,SAAS,EAAE,MAAM,EACjB,aAAa,EAAE,OAAO,GACrB,OAAO,CAAC,uBAAuB,GAAG,IAAI,CAAC;IASpC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAUxF,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;IAQtD,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAUjF,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQnE,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQnE,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAUlE,sBAAsB,CAC1B,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GAC1D,OAAO,CAAC;QAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAyBjD,6BAA6B,CACjC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAkB7C,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAuNxE,kBAAkB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,yBAAyB,CAAC;IA4C5F,qBAAqB,CACzB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,yBAAyB,GAC9B,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAavB,sBAAsB,CAC1B,UAAU,EAAE,MAAM,EAAE,GACnB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAuCjE,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAW1E,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAW5E,wBAAwB,CAAC,IAAI,EAAE,uBAAuB,GAAG,OAAO,CAAC,UAAU,CAAC;IAoClF,OAAO,CAAC,aAAa;CAMtB"}
1
+ {"version":3,"file":"dataset.repository.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.repository.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAElF,OAAO,EAAE,qBAAqB,EAAE,MAAM,gDAAgD,CAAC;AAEvF,OAAO,EAAgC,0BAA0B,EAAE,MAAM,0BAA0B,CAAC;AAOpG,MAAM,WAAW,uBAAuB;IACtC,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,SAAS,EAAE,IAAI,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,UAAU,EAAE,IAAI,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,IAAI,GAAG,IAAI,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,OAAO,CAAC;IACd,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,SAAS,EAAE,IAAI,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,uBAAuB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,EAAE,gBAAgB,CAAC;IACtB,WAAW,EAAE,qBAAqB,EAAE,CAAC;IACrC,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED,MAAM,WAAW,wBAAwB;IACvC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,SAAS,EAAE,IAAI,GAAG,IAAI,CAAC;CACxB;AAED,MAAM,WAAW,yBAAyB;IACxC,WAAW,EAAE,wBAAwB,EAAE,CAAC;IACxC,aAAa,EAAE,wBAAwB,EAAE,CAAC;CAC3C;AAED,qBACa,iBAAiB;IAED,OAAO,CAAC,QAAQ,CAAC,EAAE;IAC5C,OAAO,CAAC,QAAQ,CAAC,YAAY;IAC7B,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAFkB,EAAE,EAAE,QAAQ,EACrC,YAAY,EAAE,0BAA0B,EACxC,OAAO,EAAE,qBAAqB;YAKnC,iBAAiB;IAU/B,OAAO,CAAC,mBAAmB,CAgBzB;IAEI,iBAAiB,CACrB,YAAY,EAAE,MAAM,EACpB,SAAS,EAAE,MAAM,EACjB,aAAa,EAAE,OAAO,GACrB,OAAO,CAAC,uBAAuB,GAAG,IAAI,CAAC;IASpC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAUxF,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;IAQtD,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAUjF,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQnE,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQnE,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAWlE,sBAAsB,CAC1B,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GAC1D,OAAO,CAAC;QAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IA6BjD,6BAA6B,CACjC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAsB7C,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAuNxE,kBAAkB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,yBAAyB,CAAC;IA4C5F,qBAAqB,CACzB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,yBAAyB,GAC9B,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAavB,sBAAsB,CAC1B,UAAU,EAAE,MAAM,EAAE,GACnB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAuCjE,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAW1E,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAW5E,wBAAwB,CAAC,IAAI,EAAE,uBAAuB,GAAG,OAAO,CAAC,UAAU,CAAC;IAiElF,OAAO,CAAC,aAAa;CAMtB"}
@@ -17,10 +17,17 @@ const common_1 = require("@nestjs/common");
17
17
  const drizzle_orm_1 = require("drizzle-orm");
18
18
  const db_1 = require("@proofhound/db");
19
19
  const database_constants_1 = require("../../../shared/database/database.constants");
20
+ const object_storage_provider_1 = require("../../common/contracts/object-storage.provider");
21
+ const dataset_sample_offload_1 = require("./dataset-sample-offload");
22
+ const dataset_sample_payload_1 = require("./dataset-sample-payload");
20
23
  const { optimizations, datasetSamples, datasets, experiments, projects, promptVersions } = db_1.schema;
24
+ // Per-shard batch for small-file create offload (samples are already in memory; one shard per batch).
25
+ const CREATE_SHARD_BATCH = 200;
21
26
  let DatasetRepository = class DatasetRepository {
22
- constructor(db) {
27
+ constructor(db, sampleReader, storage) {
23
28
  this.db = db;
29
+ this.sampleReader = sampleReader;
30
+ this.storage = storage;
24
31
  this.datasetSelectFields = {
25
32
  id: datasets.id,
26
33
  projectId: datasets.projectId,
@@ -39,6 +46,15 @@ let DatasetRepository = class DatasetRepository {
39
46
  deletedAt: datasets.deletedAt,
40
47
  };
41
48
  }
49
+ // Resolve each row's `data` through the seam (inline when present, else from its shard) so callers
50
+ // that render full sample content keep working after a dataset is offloaded (SPEC 22 §7.3).
51
+ async hydrateSampleRows(rows) {
52
+ const hydrated = await this.sampleReader.hydrateMany(rows.map((r) => ({ data: r.data, payloadRef: r.payloadRef ?? null })));
53
+ rows.forEach((r, i) => {
54
+ r.data = hydrated[i] ?? null;
55
+ });
56
+ return rows;
57
+ }
42
58
  async findProjectAccess(_actorUserId, projectId, _isSuperAdmin) {
43
59
  const rows = await this.db
44
60
  .select({ id: projects.id })
@@ -85,18 +101,20 @@ let DatasetRepository = class DatasetRepository {
85
101
  }
86
102
  // Full scan — only for export (complete dump). Detail browsing must use listDatasetSamplesPage.
87
103
  async listDatasetSamples(datasetId) {
88
- return this.db
104
+ const rows = await this.db
89
105
  .select()
90
106
  .from(datasetSamples)
91
107
  .where((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId))
92
108
  .orderBy((0, drizzle_orm_1.asc)(datasetSamples.createdAt), (0, drizzle_orm_1.asc)(datasetSamples.id));
109
+ return this.hydrateSampleRows(rows);
93
110
  }
94
111
  // Server-side paginated browse with optional cross-field search (data::text ILIKE), so the detail page
95
112
  // never loads an entire (potentially 100k+ sample) dataset into memory.
96
113
  async listDatasetSamplesPage(datasetId, options) {
97
114
  const searchTerm = options.search?.trim();
115
+ // Search matches inline data or, once a sample is offloaded, its search_preview (SPEC 22 §7.3).
98
116
  const where = searchTerm
99
- ? (0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql) `${datasetSamples.data}::text ILIKE ${`%${searchTerm}%`}`)
117
+ ? (0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql) `(${datasetSamples.data}::text ILIKE ${`%${searchTerm}%`} OR ${datasetSamples.searchPreview} ILIKE ${`%${searchTerm}%`})`)
100
118
  : (0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId);
101
119
  const [rows, countResult] = await Promise.all([
102
120
  this.db
@@ -111,16 +129,20 @@ let DatasetRepository = class DatasetRepository {
111
129
  .from(datasetSamples)
112
130
  .where(where),
113
131
  ]);
114
- return { rows, total: Number(countResult[0]?.count ?? 0) };
132
+ return { rows: await this.hydrateSampleRows(rows), total: Number(countResult[0]?.count ?? 0) };
115
133
  }
116
134
  // SQL GROUP BY on the expected-output field so list/detail never load all sample rows into memory.
117
135
  // Mirrors DatasetService.toCategoryLabel: only scalar (string/number/boolean), non-blank, trimmed labels count.
118
136
  async aggregateCategoryDistribution(datasetId, fieldName) {
119
- const label = (0, drizzle_orm_1.sql) `btrim(${datasetSamples.data} ->> ${fieldName})`;
137
+ // Read the field from inline data (scalar only), or from index_values once the sample is offloaded
138
+ // (index_values holds only short scalars by construction) (SPEC 22 §7.3).
139
+ const value = (0, drizzle_orm_1.sql) `COALESCE(${datasetSamples.data} ->> ${fieldName}, ${datasetSamples.indexValues} ->> ${fieldName})`;
140
+ const label = (0, drizzle_orm_1.sql) `btrim(${value})`;
120
141
  const rows = await this.db
121
142
  .select({ label, count: (0, drizzle_orm_1.sql) `count(*)::int` })
122
143
  .from(datasetSamples)
123
- .where((0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql) `jsonb_typeof(${datasetSamples.data} -> ${fieldName}) IN ('string', 'number', 'boolean')`, (0, drizzle_orm_1.sql) `btrim(${datasetSamples.data} ->> ${fieldName}) <> ''`))
144
+ .where((0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql) `(jsonb_typeof(${datasetSamples.data} -> ${fieldName}) IN ('string', 'number', 'boolean')
145
+ OR (${datasetSamples.data} IS NULL AND ${datasetSamples.indexValues} ->> ${fieldName} IS NOT NULL))`, (0, drizzle_orm_1.sql) `btrim(${value}) <> ''`))
124
146
  // GROUP BY ordinal: the same ${fieldName} binds to different param positions in select vs group-by,
125
147
  // so Postgres won't match the expressions textually. Referencing select column 1 sidesteps that.
126
148
  .groupBy((0, drizzle_orm_1.sql) `1`);
@@ -448,11 +470,35 @@ let DatasetRepository = class DatasetRepository {
448
470
  if (!dataset) {
449
471
  throw new Error('Dataset insert returned no row');
450
472
  }
451
- await tx.insert(datasetSamples).values(args.dto.samples.map((sample) => ({
452
- datasetId: dataset.id,
453
- data: sample,
454
- externalId: this.getExternalId(sample, args.externalIdFieldName),
455
- })));
473
+ if (this.storage.isEnabled()) {
474
+ // Small-file create mirrors offload-at-promote (SPEC 22 §7.2): the samples are already in
475
+ // memory, so the batch reader just slices them. Object storage off → the inline insert below.
476
+ const samples = args.dto.samples;
477
+ const { storagePrefix } = await (0, dataset_sample_offload_1.offloadStagingToShards)({
478
+ datasetId: dataset.id,
479
+ sampleCount: samples.length,
480
+ batchSize: CREATE_SHARD_BATCH,
481
+ fieldSchema: args.fieldSchema,
482
+ readBatch: async (offset, limit) => samples.slice(offset, offset + limit).map((sample) => ({
483
+ data: sample,
484
+ externalId: this.getExternalId(sample, args.externalIdFieldName),
485
+ })),
486
+ putShard: (name, body) => this.storage.putObject({ project: { projectId: args.projectId, source: 'local' }, resourceType: 'dataset_normalized', resourceId: dataset.id, name }, body, { codec: 'gzip' }),
487
+ insertRows: async (rows) => {
488
+ await tx.insert(datasetSamples).values(rows);
489
+ },
490
+ });
491
+ if (storagePrefix) {
492
+ await tx.update(datasets).set({ storagePrefix }).where((0, drizzle_orm_1.eq)(datasets.id, dataset.id));
493
+ }
494
+ }
495
+ else {
496
+ await tx.insert(datasetSamples).values(args.dto.samples.map((sample) => ({
497
+ datasetId: dataset.id,
498
+ data: sample,
499
+ externalId: this.getExternalId(sample, args.externalIdFieldName),
500
+ })));
501
+ }
456
502
  return {
457
503
  ...dataset,
458
504
  createdByDisplayName: null,
@@ -472,6 +518,7 @@ exports.DatasetRepository = DatasetRepository;
472
518
  exports.DatasetRepository = DatasetRepository = __decorate([
473
519
  (0, common_1.Injectable)(),
474
520
  __param(0, (0, common_1.Inject)(database_constants_1.DATABASE_CLIENT)),
475
- __metadata("design:paramtypes", [Object])
521
+ __metadata("design:paramtypes", [Object, dataset_sample_payload_1.DatasetSamplePayloadReader,
522
+ object_storage_provider_1.ObjectStorageProvider])
476
523
  ], DatasetRepository);
477
524
  //# sourceMappingURL=dataset.repository.js.map