@proofhound/core 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/modules/annotation/annotation.controller.d.ts +3 -3
- package/dist/server/modules/annotation/annotation.module.d.ts.map +1 -1
- package/dist/server/modules/annotation/annotation.module.js +2 -1
- package/dist/server/modules/annotation/annotation.module.js.map +1 -1
- package/dist/server/modules/annotation/annotation.repository.d.ts +4 -1
- package/dist/server/modules/annotation/annotation.repository.d.ts.map +1 -1
- package/dist/server/modules/annotation/annotation.repository.js +31 -6
- package/dist/server/modules/annotation/annotation.repository.js.map +1 -1
- package/dist/server/modules/canary-release/canary-release.module.d.ts.map +1 -1
- package/dist/server/modules/canary-release/canary-release.module.js +2 -1
- package/dist/server/modules/canary-release/canary-release.module.js.map +1 -1
- package/dist/server/modules/canary-release/canary-release.repository.d.ts +4 -1
- package/dist/server/modules/canary-release/canary-release.repository.d.ts.map +1 -1
- package/dist/server/modules/canary-release/canary-release.repository.js +29 -4
- package/dist/server/modules/canary-release/canary-release.repository.js.map +1 -1
- package/dist/server/modules/dataset/dataset-import.repository.d.ts +3 -1
- package/dist/server/modules/dataset/dataset-import.repository.d.ts.map +1 -1
- package/dist/server/modules/dataset/dataset-import.repository.js +44 -9
- package/dist/server/modules/dataset/dataset-import.repository.js.map +1 -1
- package/dist/server/modules/dataset/dataset-sample-offload.d.ts +36 -0
- package/dist/server/modules/dataset/dataset-sample-offload.d.ts.map +1 -0
- package/dist/server/modules/dataset/dataset-sample-offload.js +41 -0
- package/dist/server/modules/dataset/dataset-sample-offload.js.map +1 -0
- package/dist/server/modules/dataset/dataset-sample-payload.d.ts +21 -0
- package/dist/server/modules/dataset/dataset-sample-payload.d.ts.map +1 -0
- package/dist/server/modules/dataset/dataset-sample-payload.js +72 -0
- package/dist/server/modules/dataset/dataset-sample-payload.js.map +1 -0
- package/dist/server/modules/dataset/dataset-sample-projection.d.ts +11 -0
- package/dist/server/modules/dataset/dataset-sample-projection.d.ts.map +1 -0
- package/dist/server/modules/dataset/dataset-sample-projection.js +49 -0
- package/dist/server/modules/dataset/dataset-sample-projection.js.map +1 -0
- package/dist/server/modules/dataset/dataset.module.d.ts.map +1 -1
- package/dist/server/modules/dataset/dataset.module.js +2 -0
- package/dist/server/modules/dataset/dataset.module.js.map +1 -1
- package/dist/server/modules/dataset/dataset.repository.d.ts +6 -1
- package/dist/server/modules/dataset/dataset.repository.d.ts.map +1 -1
- package/dist/server/modules/dataset/dataset.repository.js +59 -12
- package/dist/server/modules/dataset/dataset.repository.js.map +1 -1
- package/dist/server/modules/experiment/experiment.module.d.ts.map +1 -1
- package/dist/server/modules/experiment/experiment.module.js +2 -0
- package/dist/server/modules/experiment/experiment.module.js.map +1 -1
- package/dist/server/modules/experiment/experiment.workflow.d.ts +7 -1
- package/dist/server/modules/experiment/experiment.workflow.d.ts.map +1 -1
- package/dist/server/modules/experiment/experiment.workflow.js +33 -4
- package/dist/server/modules/experiment/experiment.workflow.js.map +1 -1
- package/dist/server/modules/optimization/optimization.module.d.ts.map +1 -1
- package/dist/server/modules/optimization/optimization.module.js +2 -0
- package/dist/server/modules/optimization/optimization.module.js.map +1 -1
- package/dist/server/modules/optimization/optimization.repository.d.ts +5 -1
- package/dist/server/modules/optimization/optimization.repository.d.ts.map +1 -1
- package/dist/server/modules/optimization/optimization.repository.js +30 -8
- package/dist/server/modules/optimization/optimization.repository.js.map +1 -1
- package/dist/server/modules/run-result/run-result-compaction-sweeper.d.ts +15 -0
- package/dist/server/modules/run-result/run-result-compaction-sweeper.d.ts.map +1 -0
- package/dist/server/modules/run-result/run-result-compaction-sweeper.js +74 -0
- package/dist/server/modules/run-result/run-result-compaction-sweeper.js.map +1 -0
- package/dist/server/modules/run-result/run-result-compaction.d.ts +38 -0
- package/dist/server/modules/run-result/run-result-compaction.d.ts.map +1 -0
- package/dist/server/modules/run-result/run-result-compaction.js +74 -0
- package/dist/server/modules/run-result/run-result-compaction.js.map +1 -0
- package/dist/server/modules/run-result/run-result-compactor.d.ts +58 -0
- package/dist/server/modules/run-result/run-result-compactor.d.ts.map +1 -0
- package/dist/server/modules/run-result/run-result-compactor.js +188 -0
- package/dist/server/modules/run-result/run-result-compactor.js.map +1 -0
- package/dist/server/modules/run-result/run-result-payload.d.ts +30 -0
- package/dist/server/modules/run-result/run-result-payload.d.ts.map +1 -0
- package/dist/server/modules/run-result/run-result-payload.js +82 -0
- package/dist/server/modules/run-result/run-result-payload.js.map +1 -0
- package/dist/server/modules/run-result/run-result-payload.reader.d.ts +20 -0
- package/dist/server/modules/run-result/run-result-payload.reader.d.ts.map +1 -0
- package/dist/server/modules/run-result/run-result-payload.reader.js +99 -0
- package/dist/server/modules/run-result/run-result-payload.reader.js.map +1 -0
- package/dist/server/modules/run-result/run-result.controller.d.ts +4 -0
- package/dist/server/modules/run-result/run-result.controller.d.ts.map +1 -1
- package/dist/server/modules/run-result/run-result.module.d.ts.map +1 -1
- package/dist/server/modules/run-result/run-result.module.js +14 -2
- package/dist/server/modules/run-result/run-result.module.js.map +1 -1
- package/dist/server/modules/run-result/run-result.repository.d.ts +3 -1
- package/dist/server/modules/run-result/run-result.repository.d.ts.map +1 -1
- package/dist/server/modules/run-result/run-result.repository.js +63 -4
- package/dist/server/modules/run-result/run-result.repository.js.map +1 -1
- package/package.json +12 -12
|
@@ -17,7 +17,12 @@ const common_1 = require("@nestjs/common");
|
|
|
17
17
|
const drizzle_orm_1 = require("drizzle-orm");
|
|
18
18
|
const db_1 = require("@proofhound/db");
|
|
19
19
|
const database_constants_1 = require("../../../shared/database/database.constants");
|
|
20
|
-
const
|
|
20
|
+
const object_storage_provider_1 = require("../../common/contracts/object-storage.provider");
|
|
21
|
+
const dataset_sample_offload_1 = require("./dataset-sample-offload");
|
|
22
|
+
const { datasetImports, datasetImportSamples, datasetSamples, datasets, projects } = db_1.schema;
|
|
23
|
+
// Per-shard batch for offload-at-promote. Bounded so a batch's data stays in memory only briefly
|
|
24
|
+
// (large image/base64 samples make per-row size unpredictable); each batch becomes one R2 shard.
|
|
25
|
+
const PROMOTE_SHARD_BATCH = 200;
|
|
21
26
|
// Thrown inside the promote transaction so the caller can map to the right HTTP status while the tx rolls back.
|
|
22
27
|
class DatasetImportEmptyError extends Error {
|
|
23
28
|
}
|
|
@@ -26,8 +31,9 @@ class DatasetNameTakenError extends Error {
|
|
|
26
31
|
}
|
|
27
32
|
exports.DatasetNameTakenError = DatasetNameTakenError;
|
|
28
33
|
let DatasetImportRepository = class DatasetImportRepository {
|
|
29
|
-
constructor(db) {
|
|
34
|
+
constructor(db, storage) {
|
|
30
35
|
this.db = db;
|
|
36
|
+
this.storage = storage;
|
|
31
37
|
}
|
|
32
38
|
async findProjectAccess(projectId) {
|
|
33
39
|
const rows = await this.db
|
|
@@ -129,12 +135,41 @@ let DatasetImportRepository = class DatasetImportRepository {
|
|
|
129
135
|
hasImages: args.hasImages,
|
|
130
136
|
createdBy: args.actorUserId,
|
|
131
137
|
});
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
+
if (this.storage.isEnabled()) {
|
|
139
|
+
// Offload-at-promote (SPEC 22 §7.2): stream staging into shards + projected rows. The pure
|
|
140
|
+
// orchestration lives in dataset-sample-offload.ts; here we just bind the tx / storage I/O.
|
|
141
|
+
const project = { projectId: args.projectId, source: 'local' };
|
|
142
|
+
const { storagePrefix } = await (0, dataset_sample_offload_1.offloadStagingToShards)({
|
|
143
|
+
datasetId: args.datasetId,
|
|
144
|
+
sampleCount,
|
|
145
|
+
batchSize: PROMOTE_SHARD_BATCH,
|
|
146
|
+
fieldSchema: args.fieldSchema,
|
|
147
|
+
readBatch: (offset, limit) => tx
|
|
148
|
+
.select({ data: datasetImportSamples.data, externalId: datasetImportSamples.externalId })
|
|
149
|
+
.from(datasetImportSamples)
|
|
150
|
+
.where((0, drizzle_orm_1.eq)(datasetImportSamples.importId, args.importId))
|
|
151
|
+
.orderBy((0, drizzle_orm_1.asc)(datasetImportSamples.rowIndex))
|
|
152
|
+
.limit(limit)
|
|
153
|
+
.offset(offset),
|
|
154
|
+
putShard: (name, body) => this.storage.putObject({ project, resourceType: 'dataset_normalized', resourceId: args.datasetId, name }, body, {
|
|
155
|
+
codec: 'gzip',
|
|
156
|
+
}),
|
|
157
|
+
insertRows: async (rows) => {
|
|
158
|
+
await tx.insert(datasetSamples).values(rows);
|
|
159
|
+
},
|
|
160
|
+
});
|
|
161
|
+
if (storagePrefix) {
|
|
162
|
+
await tx.update(datasets).set({ storagePrefix }).where((0, drizzle_orm_1.eq)(datasets.id, args.datasetId));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
await tx.execute((0, drizzle_orm_1.sql) `
|
|
167
|
+
INSERT INTO ph_assets.dataset_samples (dataset_id, data, external_id)
|
|
168
|
+
SELECT ${args.datasetId}::uuid, data, external_id
|
|
169
|
+
FROM ph_assets.dataset_import_samples
|
|
170
|
+
WHERE import_id = ${args.importId}::uuid
|
|
171
|
+
`);
|
|
172
|
+
}
|
|
138
173
|
await tx
|
|
139
174
|
.update(datasetImports)
|
|
140
175
|
.set({ status: 'ready', datasetId: args.datasetId, updatedAt: new Date() })
|
|
@@ -171,6 +206,6 @@ exports.DatasetImportRepository = DatasetImportRepository;
|
|
|
171
206
|
exports.DatasetImportRepository = DatasetImportRepository = __decorate([
|
|
172
207
|
(0, common_1.Injectable)(),
|
|
173
208
|
__param(0, (0, common_1.Inject)(database_constants_1.DATABASE_CLIENT)),
|
|
174
|
-
__metadata("design:paramtypes", [Object])
|
|
209
|
+
__metadata("design:paramtypes", [Object, object_storage_provider_1.ObjectStorageProvider])
|
|
175
210
|
], DatasetImportRepository);
|
|
176
211
|
//# sourceMappingURL=dataset-import.repository.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataset-import.repository.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-import.repository.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,2CAAoD;AACpD,6CAAqE;AAErE,uCAAwC;AAExC,oFAA8E;
|
|
1
|
+
{"version":3,"file":"dataset-import.repository.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-import.repository.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,2CAAoD;AACpD,6CAAqE;AAErE,uCAAwC;AAExC,oFAA8E;AAC9E,4FAAuF;AACvF,qEAAkE;AAElE,MAAM,EAAE,cAAc,EAAE,oBAAoB,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,WAAM,CAAC;AAE5F,iGAAiG;AACjG,iGAAiG;AACjG,MAAM,mBAAmB,GAAG,GAAG,CAAC;AA4ChC,gHAAgH;AAChH,MAAa,uBAAwB,SAAQ,KAAK;CAAG;AAArD,0DAAqD;AACrD,MAAa,qBAAsB,SAAQ,KAAK;CAAG;AAAnD,sDAAmD;AAG5C,IAAM,uBAAuB,GAA7B,MAAM,uBAAuB;IAClC,YAC4C,EAAY,EACrC,OAA8B;QADL,OAAE,GAAF,EAAE,CAAU;QACrC,YAAO,GAAP,OAAO,CAAuB;IAC9C,CAAC;IAEJ,KAAK,CAAC,iBAAiB,CAAC,SAAiB;QACvC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;aAC3B,IAAI,CAAC,QAAQ,CAAC;aACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;aAClE,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAiB,EAAE,IAAY;QACtD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;aAC3B,IAAI,CAAC,QAAQ,CAAC;aACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;aAClG,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,IAA6B;QAC9C,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,IAAI,CAAC,EAAE;aACxB,MAAM,CAAC,cAAc,CAAC;aACtB,MAAM,CAAC;YACN,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI;YACnB,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,IAAI;YACjD,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,aAAa;YACrC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ;YACtC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,aAAa;YAChD,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,WAAW,IAAI,IAAI;YACpD,YAAY,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY;YACnC,iBAAiB,EAAE,IAAI,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YACrD,SAAS,EAAE,IAAI,CAAC,WAAW;SAC5B,CAAC;aACD,SAAS,EAAE,CAAC;QACf,IAAI,CAAC,GAAG;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QACnE,OAAO,GAAuB,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,SAAiB,EAAE,QAAgB;QACtD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,EAAE;aACR,IAAI,CAAC,cAAc,CAAC;aACpB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC;aACpF,KAAK,CAAC,CAAC,CAAC,CAAC;QACZ,OAAQ,IAAI,CAAC,CAAC,CAAkC,IAAI,IAAI,CAAC;IAC3D,CAAC;IAED,uGAAuG;IACvG,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,IAAsB,EAAE,gBAAwB;QAClF,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,EAAE;qBACL,MAAM,CAAC,oBAAoB,CAAC;qBAC5B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;qBAC7G,mBAAmB,EAAE,CAAC;YAC3B,CAAC;YACD,MAAM,CAAC,OAAO,CAAC,GAAG,MAAM,EAAE;iBACvB,MAAM,CAAC,cAAc,CAAC;iBACtB,GAAG,CAAC;gBACH,YAAY,EAAE,IAAA,iBAAG,EAAA,YAAY,cAAc,CAAC,YAAY,KAAK,gBAAgB,GAAG;gBAChF,SAAS,EAAE,IAAI,IAAI,EAAE;aACtB,CAAC;iBACD,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;iBACtC,SAAS,CAAC,EAAE,YAAY,EAAE,cAAc,CAAC,YAAY,EAAE,CAAC,CAAC;YAC5D,OAAO,OAAO,EAAE,YAAY,IAAI,gBAAgB,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,yBAAyB,CAAC,QAAgB,EAAE,KAAa;QAC7D,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,IAAI,EAAE,oBAAoB,CAAC,IAAI,EAAE,CAAC;aAC3C,IAAI,CAAC,oBAAoB,CAAC;aAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;aAClD,OAAO,CAAC,IAAA,iBAAG,EAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;aAC3C,KAAK,CAAC,KAAK,CAAC,CAAC;QAChB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CACtB,GAAG,CAAC,IAAI,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,IAAgC,CAAC,CAAC,CAAC,EAAE,CAClH,CAAC;IACJ,CAAC;IAED,6HAA6H;IAC7H,KAAK,CAAC,OAAO,CAAC,IAA8B;QAC1C,OAAO,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACtC,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,EAAE;iBACxB,MAAM,CAAC,EAAE,KAAK,EAAE,IAAA,iBAAG,EAAQ,eAAe,EAAE,CAAC;iBAC7C,IAAI,CAAC,oBAAoB,CAAC;iBAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC3D,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC;YACjD,IAAI,WAAW,KAAK,CAAC;gBAAE,MAAM,IAAI,uBAAuB,EAAE,CAAC;YAE3D,MAAM,KAAK,GAAG,MAAM,EAAE;iBACnB,MAAM,CAAC,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE,CAAC;iBAC3B,IAAI,CAAC,QAAQ,CAAC;iBACd,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAA,oBAAM,EAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;iBAC5G,KAAK,CAAC,CAAC,CAAC,CAAC;YACZ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM,IAAI,qBAAqB,EAAE,CAAC;YAExD,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;gBAC/B,EAAE,EAAE,IAAI,CAAC,SAAS;gBAClB,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,WAAW;gBACX,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,SAAS,EAAE,IAAI,CAAC,WAAW;aAC5B,CAAC,CAAC;YAEH,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;gBAC7B,2FAA2F;gBAC3F,4FAA4F;gBAC5F,MAAM,OAAO,GAAG,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,OAAgB,EAAE,CAAC;gBACxE,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,IAAA,+CAAsB,EAAC;oBACrD,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,WAAW;oBACX,SAAS,EAAE,mBAAmB;oBAC9B,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,SAAS,EAAE,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAC3B,EAAE;yBACC,MAAM,CAAC,EAAE,IAAI,EAAE,oBAAoB,CAAC,IAAI,EAAE,UAAU,EAAE,oBAAoB,CAAC,UAAU,EAAE,CAAC;yBACxF,IAAI,CAAC,oBAAoB,CAAC;yBAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;yBACvD,OAAO,CAAC,IAAA,iBAAG,EAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;yBAC3C,KAAK,CAAC,KAAK,CAAC;yBACZ,MAAM,CAAC,MAAM,CAAC;oBACnB,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CACvB,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,UAAU,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE;wBAC9G,KAAK,EAAE,MAAM;qBACd,CAAC;oBACJ,UAAU,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;wBACzB,MAAM,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/C,CAAC;iBACF,CAAC,CAAC;gBACH,IAAI,aAAa,EAAE,CAAC;oBAClB,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,QAAQ,CAAC,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC1F,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,EAAE,CAAC,OAAO,CAAC,IAAA,iBAAG,EAAA;;mBAET,IAAI,CAAC,SAAS;;8BAEH,IAAI,CAAC,QAAQ;SAClC,CAAC,CAAC;YACL,CAAC;YAED,MAAM,EAAE;iBACL,MAAM,CAAC,cAAc,CAAC;iBACtB,GAAG,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,EAAE,CAAC;iBAC1E,KAAK,CAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAE/C,MAAM,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,oBAAoB,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAE9F,OAAO,EAAE,WAAW,EAAE,CAAC;QACzB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAAiB,EAAE,QAAgB;QACpD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,EAAE;aAC1B,MAAM,CAAC,cAAc,CAAC;aACtB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC;aACpF,SAAS,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,OAAO,OAAO,CAAC,MAAM,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAe;QACtC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE;aACvB,MAAM,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC;aACjC,IAAI,CAAC,cAAc,CAAC;aACpB,KAAK,CAAC,IAAA,iBAAG,EAAC,IAAA,gBAAE,EAAC,cAAc,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,IAAA,gBAAE,EAAC,cAAc,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC;QAC/F,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,GAAa;QACpC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAC/B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,EAAE;aAC1B,MAAM,CAAC,cAAc,CAAC;aACtB,KAAK,CAAC,IAAA,qBAAO,EAAC,cAAc,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;aACtC,SAAS,CAAC,EAAE,EAAE,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,OAAO,OAAO,CAAC,MAAM,CAAC;IACxB,CAAC;CACF,CAAA;AA1LY,0DAAuB;kCAAvB,uBAAuB;IADnC,IAAA,mBAAU,GAAE;IAGR,WAAA,IAAA,eAAM,EAAC,oCAAe,CAAC,CAAA;6CACE,+CAAqB;GAHtC,uBAAuB,CA0LnC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
import type { DatasetFieldSchemaDto } from '@proofhound/shared';
|
|
3
|
+
import type { ObjectCodec, StoredObjectRef } from '../../common/contracts/object-storage.provider';
|
|
4
|
+
export interface StagingSample {
|
|
5
|
+
data: unknown;
|
|
6
|
+
externalId: string | null;
|
|
7
|
+
}
|
|
8
|
+
export interface DatasetSampleOffloadRow {
|
|
9
|
+
datasetId: string;
|
|
10
|
+
data: null;
|
|
11
|
+
externalId: string | null;
|
|
12
|
+
payloadRef: {
|
|
13
|
+
shard: StoredObjectRef;
|
|
14
|
+
rowIndex: number;
|
|
15
|
+
};
|
|
16
|
+
searchPreview: string | null;
|
|
17
|
+
expectedOutputScalar: string | null;
|
|
18
|
+
labelScalar: string | null;
|
|
19
|
+
categoryScalar: string | null;
|
|
20
|
+
indexValues: Record<string, string> | null;
|
|
21
|
+
}
|
|
22
|
+
export interface OffloadStagingOptions {
|
|
23
|
+
datasetId: string;
|
|
24
|
+
sampleCount: number;
|
|
25
|
+
batchSize: number;
|
|
26
|
+
fieldSchema: DatasetFieldSchemaDto[];
|
|
27
|
+
codec?: ObjectCodec;
|
|
28
|
+
readBatch: (offset: number, limit: number) => Promise<StagingSample[]>;
|
|
29
|
+
putShard: (name: string, body: Buffer) => Promise<StoredObjectRef>;
|
|
30
|
+
insertRows: (rows: DatasetSampleOffloadRow[]) => Promise<void>;
|
|
31
|
+
}
|
|
32
|
+
export declare function offloadStagingToShards(opts: OffloadStagingOptions): Promise<{
|
|
33
|
+
shards: number;
|
|
34
|
+
storagePrefix: string | null;
|
|
35
|
+
}>;
|
|
36
|
+
//# sourceMappingURL=dataset-sample-offload.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dataset-sample-offload.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-offload.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,gDAAgD,CAAC;AAInG,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,OAAO,CAAC;IACd,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED,MAAM,WAAW,uBAAuB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,EAAE;QAAE,KAAK,EAAE,eAAe,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IACzD,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAC5C;AAED,MAAM,WAAW,qBAAqB;IACpC,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,qBAAqB,EAAE,CAAC;IACrC,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC;IACvE,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,eAAe,CAAC,CAAC;IACnE,UAAU,EAAE,CAAC,IAAI,EAAE,uBAAuB,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAChE;AAED,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,qBAAqB,GAC1B,OAAO,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAC,CA2C3D"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.offloadStagingToShards = offloadStagingToShards;
|
|
4
|
+
const run_result_payload_1 = require("../run-result/run-result-payload");
|
|
5
|
+
const dataset_sample_projection_1 = require("./dataset-sample-projection");
|
|
6
|
+
async function offloadStagingToShards(opts) {
|
|
7
|
+
const codec = opts.codec ?? 'gzip';
|
|
8
|
+
let shardSeq = 0;
|
|
9
|
+
let offset = 0;
|
|
10
|
+
let firstShardKey = null;
|
|
11
|
+
while (offset < opts.sampleCount) {
|
|
12
|
+
const batch = await opts.readBatch(offset, opts.batchSize);
|
|
13
|
+
if (batch.length === 0)
|
|
14
|
+
break;
|
|
15
|
+
const body = await (0, run_result_payload_1.encodeShard)(batch.map((row) => row.data), codec);
|
|
16
|
+
const shardRef = await opts.putShard(`shard-${String(shardSeq).padStart(5, '0')}.jsonl.gz`, body);
|
|
17
|
+
firstShardKey ??= shardRef.key;
|
|
18
|
+
await opts.insertRows(batch.map((row, rowIndex) => {
|
|
19
|
+
const data = (row.data ?? null);
|
|
20
|
+
const projection = (0, dataset_sample_projection_1.projectDatasetSample)(data, opts.fieldSchema);
|
|
21
|
+
return {
|
|
22
|
+
datasetId: opts.datasetId,
|
|
23
|
+
data: null,
|
|
24
|
+
externalId: row.externalId,
|
|
25
|
+
payloadRef: { shard: shardRef, rowIndex },
|
|
26
|
+
searchPreview: projection.searchPreview,
|
|
27
|
+
expectedOutputScalar: projection.expectedOutputScalar,
|
|
28
|
+
labelScalar: projection.labelScalar,
|
|
29
|
+
categoryScalar: projection.categoryScalar,
|
|
30
|
+
indexValues: projection.indexValues,
|
|
31
|
+
};
|
|
32
|
+
}));
|
|
33
|
+
offset += batch.length;
|
|
34
|
+
shardSeq += 1;
|
|
35
|
+
}
|
|
36
|
+
return {
|
|
37
|
+
shards: shardSeq,
|
|
38
|
+
storagePrefix: firstShardKey ? firstShardKey.slice(0, firstShardKey.lastIndexOf('/') + 1) : null,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=dataset-sample-offload.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dataset-sample-offload.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-offload.ts"],"names":[],"mappings":";;AAwCA,wDA6CC;AA5ED,yEAA+D;AAC/D,2EAAmE;AA8B5D,KAAK,UAAU,sBAAsB,CAC1C,IAA2B;IAE3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC;IACnC,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,aAAa,GAAkB,IAAI,CAAC;IAExC,OAAO,MAAM,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAC3D,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM;QAE9B,MAAM,IAAI,GAAG,MAAM,IAAA,gCAAW,EAC5B,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAC5B,KAAK,CACN,CAAC;QACF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,MAAM,CAAC,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;QAClG,aAAa,KAAK,QAAQ,CAAC,GAAG,CAAC;QAE/B,MAAM,IAAI,CAAC,UAAU,CACnB,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YAC1B,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,IAAI,CAAmC,CAAC;YAClE,MAAM,UAAU,GAAG,IAAA,gDAAoB,EAAC,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YAChE,OAAO;gBACL,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,IAAI,EAAE,IAAI;gBACV,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,UAAU,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE;gBACzC,aAAa,EAAE,UAAU,CAAC,aAAa;gBACvC,oBAAoB,EAAE,UAAU,CAAC,oBAAoB;gBACrD,WAAW,EAAE,UAAU,CAAC,WAAW;gBACnC,cAAc,EAAE,UAAU,CAAC,cAAc;gBACzC,WAAW,EAAE,UAAU,CAAC,WAAW;aACpC,CAAC;QACJ,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC;QACvB,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IAED,OAAO;QACL,MAAM,EAAE,QAAQ;QAChB,aAAa,EAAE,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;KACjG,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { ObjectStorageProvider, type StoredObjectRef } from '../../common/contracts/object-storage.provider';
|
|
2
|
+
/** Stored in `dataset_samples.payload_ref`: the shard holding this sample's data + its line. */
|
|
3
|
+
export interface DatasetSamplePayloadRef {
|
|
4
|
+
shard: StoredObjectRef;
|
|
5
|
+
rowIndex: number;
|
|
6
|
+
}
|
|
7
|
+
export interface DatasetSamplePayloadRow {
|
|
8
|
+
data: unknown;
|
|
9
|
+
payloadRef: DatasetSamplePayloadRef | null;
|
|
10
|
+
}
|
|
11
|
+
export declare class DatasetSamplePayloadReader {
|
|
12
|
+
private readonly storage;
|
|
13
|
+
constructor(storage: ObjectStorageProvider);
|
|
14
|
+
/** Resolve one sample's data: inline when present, else from its shard. */
|
|
15
|
+
hydrate(row: DatasetSamplePayloadRow): Promise<unknown>;
|
|
16
|
+
/** Batch: groups rows by shard so a 500-sample batch is one GET per shard, not one per sample. */
|
|
17
|
+
hydrateMany(rows: DatasetSamplePayloadRow[]): Promise<unknown[]>;
|
|
18
|
+
/** The ref to read from, or null when the inline value should be used (cache / no offload / disabled). */
|
|
19
|
+
private shardRef;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=dataset-sample-payload.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dataset-sample-payload.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-payload.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,qBAAqB,EAAE,KAAK,eAAe,EAAE,MAAM,gDAAgD,CAAC;AAG7G,gGAAgG;AAChG,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,eAAe,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,OAAO,CAAC;IACd,UAAU,EAAE,uBAAuB,GAAG,IAAI,CAAC;CAC5C;AAED,qBACa,0BAA0B;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAAP,OAAO,EAAE,qBAAqB;IAE3D,2EAA2E;IACrE,OAAO,CAAC,GAAG,EAAE,uBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC;IAO7D,kGAAkG;IAC5F,WAAW,CAAC,IAAI,EAAE,uBAAuB,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAqBtE,0GAA0G;IAC1G,OAAO,CAAC,QAAQ;CAIjB"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.DatasetSamplePayloadReader = void 0;
|
|
13
|
+
// DatasetSamplePayloadReader — the read seam for dataset-sample content (SPEC 22 §7.3).
|
|
14
|
+
//
|
|
15
|
+
// The worker hot paths (experiment rendering, optimization rounds) load a sample's full `data`. Once
|
|
16
|
+
// promote tiers it out, `data` is null and the authoritative content lives in an object-storage shard.
|
|
17
|
+
// This seam returns the inline value when present (a small-sample cache, or no offload), else reads
|
|
18
|
+
// the shard. Pure pass-through when storage is disabled / the row was never offloaded. It reuses the
|
|
19
|
+
// generic JSONL shard codec from the run-result seam (one data object per line).
|
|
20
|
+
const common_1 = require("@nestjs/common");
|
|
21
|
+
const object_storage_provider_1 = require("../../common/contracts/object-storage.provider");
|
|
22
|
+
const run_result_payload_1 = require("../run-result/run-result-payload");
|
|
23
|
+
let DatasetSamplePayloadReader = class DatasetSamplePayloadReader {
|
|
24
|
+
constructor(storage) {
|
|
25
|
+
this.storage = storage;
|
|
26
|
+
}
|
|
27
|
+
/** Resolve one sample's data: inline when present, else from its shard. */
|
|
28
|
+
async hydrate(row) {
|
|
29
|
+
const ref = this.shardRef(row);
|
|
30
|
+
if (!ref)
|
|
31
|
+
return row.data ?? null;
|
|
32
|
+
const lines = await (0, run_result_payload_1.decodeShard)(await this.storage.getObject(ref.shard), ref.shard.codec);
|
|
33
|
+
return lines[ref.rowIndex] ?? null;
|
|
34
|
+
}
|
|
35
|
+
/** Batch: groups rows by shard so a 500-sample batch is one GET per shard, not one per sample. */
|
|
36
|
+
async hydrateMany(rows) {
|
|
37
|
+
const out = rows.map((r) => r.data ?? null);
|
|
38
|
+
const byShard = new Map();
|
|
39
|
+
rows.forEach((row, index) => {
|
|
40
|
+
const ref = this.shardRef(row);
|
|
41
|
+
if (!ref)
|
|
42
|
+
return;
|
|
43
|
+
const key = shardKey(ref.shard);
|
|
44
|
+
const group = byShard.get(key);
|
|
45
|
+
if (group)
|
|
46
|
+
group.entries.push({ index, rowIndex: ref.rowIndex });
|
|
47
|
+
else
|
|
48
|
+
byShard.set(key, { shard: ref.shard, entries: [{ index, rowIndex: ref.rowIndex }] });
|
|
49
|
+
});
|
|
50
|
+
await Promise.all([...byShard.values()].map(async ({ shard, entries }) => {
|
|
51
|
+
const lines = await (0, run_result_payload_1.decodeShard)(await this.storage.getObject(shard), shard.codec);
|
|
52
|
+
for (const { index, rowIndex } of entries)
|
|
53
|
+
out[index] = lines[rowIndex] ?? null;
|
|
54
|
+
}));
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
/** The ref to read from, or null when the inline value should be used (cache / no offload / disabled). */
|
|
58
|
+
shardRef(row) {
|
|
59
|
+
if (row.data != null || row.payloadRef == null || !this.storage.isEnabled())
|
|
60
|
+
return null;
|
|
61
|
+
return row.payloadRef;
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
exports.DatasetSamplePayloadReader = DatasetSamplePayloadReader;
|
|
65
|
+
exports.DatasetSamplePayloadReader = DatasetSamplePayloadReader = __decorate([
|
|
66
|
+
(0, common_1.Injectable)(),
|
|
67
|
+
__metadata("design:paramtypes", [object_storage_provider_1.ObjectStorageProvider])
|
|
68
|
+
], DatasetSamplePayloadReader);
|
|
69
|
+
function shardKey(ref) {
|
|
70
|
+
return `${ref.provider}:${ref.bucket ?? ''}:${ref.key}`;
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=dataset-sample-payload.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dataset-sample-payload.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-payload.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,wFAAwF;AACxF,EAAE;AACF,qGAAqG;AACrG,uGAAuG;AACvG,oGAAoG;AACpG,qGAAqG;AACrG,iFAAiF;AACjF,2CAA4C;AAC5C,4FAA6G;AAC7G,yEAA+D;AAcxD,IAAM,0BAA0B,GAAhC,MAAM,0BAA0B;IACrC,YAA6B,OAA8B;QAA9B,YAAO,GAAP,OAAO,CAAuB;IAAG,CAAC;IAE/D,2EAA2E;IAC3E,KAAK,CAAC,OAAO,CAAC,GAA4B;QACxC,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,CAAC,GAAG;YAAE,OAAO,GAAG,CAAC,IAAI,IAAI,IAAI,CAAC;QAClC,MAAM,KAAK,GAAG,MAAM,IAAA,gCAAW,EAA0B,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACnH,OAAO,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;IACrC,CAAC;IAED,kGAAkG;IAClG,KAAK,CAAC,WAAW,CAAC,IAA+B;QAC/C,MAAM,GAAG,GAAc,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,GAAG,EAA2F,CAAC;QACnH,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE;YAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAC/B,IAAI,CAAC,GAAG;gBAAE,OAAO;YACjB,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC/B,IAAI,KAAK;gBAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;;gBAC5D,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC;QAC5F,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,GAAG,CACf,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE;YACrD,MAAM,KAAK,GAAG,MAAM,IAAA,gCAAW,EAA0B,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YAC3G,KAAK,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,OAAO;gBAAE,GAAG,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;QAClF,CAAC,CAAC,CACH,CAAC;QACF,OAAO,GAAG,CAAC;IACb,CAAC;IAED,0GAA0G;IAClG,QAAQ,CAAC,GAA4B;QAC3C,IAAI,GAAG,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,CAAC,UAAU,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;YAAE,OAAO,IAAI,CAAC;QACzF,OAAO,GAAG,CAAC,UAAU,CAAC;IACxB,CAAC;CACF,CAAA;AAtCY,gEAA0B;qCAA1B,0BAA0B;IADtC,IAAA,mBAAU,GAAE;qCAE2B,+CAAqB;GADhD,0BAA0B,CAsCtC;AAED,SAAS,QAAQ,CAAC,GAAoB;IACpC,OAAO,GAAG,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,MAAM,IAAI,EAAE,IAAI,GAAG,CAAC,GAAG,EAAE,CAAC;AAC1D,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { DatasetFieldSchemaDto } from '@proofhound/shared';
|
|
2
|
+
export interface DatasetSampleProjection {
|
|
3
|
+
searchPreview: string | null;
|
|
4
|
+
expectedOutputScalar: string | null;
|
|
5
|
+
labelScalar: string | null;
|
|
6
|
+
categoryScalar: string | null;
|
|
7
|
+
indexValues: Record<string, string> | null;
|
|
8
|
+
}
|
|
9
|
+
/** Build the DB-side projection for one sample from its data + the dataset's field schema. */
|
|
10
|
+
export declare function projectDatasetSample(data: Record<string, unknown> | null, fieldSchema: DatasetFieldSchemaDto[]): DatasetSampleProjection;
|
|
11
|
+
//# sourceMappingURL=dataset-sample-projection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dataset-sample-projection.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-projection.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAEhE,MAAM,WAAW,uBAAuB;IACtC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;CAC5C;AAoBD,8FAA8F;AAC9F,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,EACpC,WAAW,EAAE,qBAAqB,EAAE,GACnC,uBAAuB,CAwBzB"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.projectDatasetSample = projectDatasetSample;
|
|
4
|
+
const PREVIEW_MAX = 1000;
|
|
5
|
+
const SCALAR_MAX = 200;
|
|
6
|
+
const IMAGE_ROLES = new Set(['image', 'image_url', 'image_base64']);
|
|
7
|
+
function asScalar(value) {
|
|
8
|
+
if (value === null || value === undefined)
|
|
9
|
+
return null;
|
|
10
|
+
if (typeof value === 'string')
|
|
11
|
+
return value.length > SCALAR_MAX ? null : value;
|
|
12
|
+
if (typeof value === 'number' || typeof value === 'boolean')
|
|
13
|
+
return String(value);
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
function previewOf(data) {
|
|
17
|
+
if (data === null || data === undefined)
|
|
18
|
+
return null;
|
|
19
|
+
const text = typeof data === 'string' ? data : JSON.stringify(data);
|
|
20
|
+
if (text.length === 0)
|
|
21
|
+
return null;
|
|
22
|
+
return text.length > PREVIEW_MAX ? text.slice(0, PREVIEW_MAX) : text;
|
|
23
|
+
}
|
|
24
|
+
/** Build the DB-side projection for one sample from its data + the dataset's field schema. */
|
|
25
|
+
function projectDatasetSample(data, fieldSchema) {
|
|
26
|
+
if (data === null) {
|
|
27
|
+
return { searchPreview: null, expectedOutputScalar: null, labelScalar: null, categoryScalar: null, indexValues: null };
|
|
28
|
+
}
|
|
29
|
+
const expectedField = fieldSchema.find((f) => f.role === 'expected_output')?.name;
|
|
30
|
+
const expectedOutputScalar = expectedField ? asScalar(data[expectedField]) : null;
|
|
31
|
+
// index_values: short scalar values of the non-image fields, so distribution / filter on any
|
|
32
|
+
// configurable field works off `index_values->>field` once `data` is offloaded.
|
|
33
|
+
const indexValues = {};
|
|
34
|
+
for (const field of fieldSchema) {
|
|
35
|
+
if (IMAGE_ROLES.has(field.role))
|
|
36
|
+
continue;
|
|
37
|
+
const scalar = asScalar(data[field.name]);
|
|
38
|
+
if (scalar !== null)
|
|
39
|
+
indexValues[field.name] = scalar;
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
searchPreview: previewOf(data),
|
|
43
|
+
expectedOutputScalar,
|
|
44
|
+
labelScalar: null,
|
|
45
|
+
categoryScalar: null,
|
|
46
|
+
indexValues: Object.keys(indexValues).length > 0 ? indexValues : null,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=dataset-sample-projection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dataset-sample-projection.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset-sample-projection.ts"],"names":[],"mappings":";;AAsCA,oDA2BC;AA9CD,MAAM,WAAW,GAAG,IAAI,CAAC;AACzB,MAAM,UAAU,GAAG,GAAG,CAAC;AACvB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;AAEpE,SAAS,QAAQ,CAAC,KAAc;IAC9B,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACvD,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC;IAC/E,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAClF,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,SAAS,CAAC,IAAa;IAC9B,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACrD,MAAM,IAAI,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACpE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACnC,OAAO,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACvE,CAAC;AAED,8FAA8F;AAC9F,SAAgB,oBAAoB,CAClC,IAAoC,EACpC,WAAoC;IAEpC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,aAAa,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IACzH,CAAC;IAED,MAAM,aAAa,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,iBAAiB,CAAC,EAAE,IAAI,CAAC;IAClF,MAAM,oBAAoB,GAAG,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAElF,6FAA6F;IAC7F,gFAAgF;IAChF,MAAM,WAAW,GAA2B,EAAE,CAAC;IAC/C,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;QAChC,IAAI,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC;YAAE,SAAS;QAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;QAC1C,IAAI,MAAM,KAAK,IAAI;YAAE,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;IACxD,CAAC;IAED,OAAO;QACL,aAAa,EAAE,SAAS,CAAC,IAAI,CAAC;QAC9B,oBAAoB;QACpB,WAAW,EAAE,IAAI;QACjB,cAAc,EAAE,IAAI;QACpB,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI;KACtE,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataset.module.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"dataset.module.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":"AAWA,qBAaa,aAAa;CAAG"}
|
|
@@ -9,6 +9,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
9
9
|
exports.DatasetModule = void 0;
|
|
10
10
|
const common_1 = require("@nestjs/common");
|
|
11
11
|
const database_module_1 = require("../../../shared/database/database.module");
|
|
12
|
+
const dataset_sample_payload_1 = require("./dataset-sample-payload");
|
|
12
13
|
const dataset_deletion_hook_1 = require("./dataset-deletion.hook");
|
|
13
14
|
const dataset_import_controller_1 = require("./dataset-import.controller");
|
|
14
15
|
const dataset_import_repository_1 = require("./dataset-import.repository");
|
|
@@ -25,6 +26,7 @@ exports.DatasetModule = DatasetModule = __decorate([
|
|
|
25
26
|
controllers: [dataset_controller_1.DatasetController, dataset_import_controller_1.DatasetImportController],
|
|
26
27
|
providers: [
|
|
27
28
|
dataset_repository_1.DatasetRepository,
|
|
29
|
+
dataset_sample_payload_1.DatasetSamplePayloadReader,
|
|
28
30
|
{ provide: dataset_deletion_hook_1.DatasetDeletionHook, useClass: dataset_deletion_hook_1.LocalDatasetDeletionHook },
|
|
29
31
|
dataset_service_1.DatasetService,
|
|
30
32
|
dataset_import_repository_1.DatasetImportRepository,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataset.module.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":";;;;;;;;;AAAA,2CAAwC;AACxC,8EAA0E;AAC1E,mEAAwF;AACxF,2EAAsE;AACtE,2EAAsE;AACtE,qEAAgE;AAChE,6DAAyD;AACzD,6DAAyD;AACzD,uDAAmD;
|
|
1
|
+
{"version":3,"file":"dataset.module.js","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.module.ts"],"names":[],"mappings":";;;;;;;;;AAAA,2CAAwC;AACxC,8EAA0E;AAC1E,qEAAsE;AACtE,mEAAwF;AACxF,2EAAsE;AACtE,2EAAsE;AACtE,qEAAgE;AAChE,6DAAyD;AACzD,6DAAyD;AACzD,uDAAmD;AAe5C,IAAM,aAAa,GAAnB,MAAM,aAAa;CAAG,CAAA;AAAhB,sCAAa;wBAAb,aAAa;IAbzB,IAAA,eAAM,EAAC;QACN,OAAO,EAAE,CAAC,gCAAc,CAAC;QACzB,WAAW,EAAE,CAAC,sCAAiB,EAAE,mDAAuB,CAAC;QACzD,SAAS,EAAE;YACT,sCAAiB;YACjB,mDAA0B;YAC1B,EAAE,OAAO,EAAE,2CAAmB,EAAE,QAAQ,EAAE,gDAAwB,EAAE;YACpE,gCAAc;YACd,mDAAuB;YACvB,6CAAoB;SACrB;QACD,OAAO,EAAE,CAAC,gCAAc,EAAE,6CAAoB,CAAC;KAChD,CAAC;GACW,aAAa,CAAG"}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type { DbClient } from '@proofhound/db';
|
|
2
2
|
import type { CreateDatasetDto, DatasetFieldSchemaDto } from '@proofhound/shared';
|
|
3
|
+
import { ObjectStorageProvider } from '../../common/contracts/object-storage.provider';
|
|
4
|
+
import { DatasetSamplePayloadReader } from './dataset-sample-payload';
|
|
3
5
|
export interface DatasetProjectAccessRow {
|
|
4
6
|
id: string;
|
|
5
7
|
}
|
|
@@ -58,7 +60,10 @@ export interface DatasetDeletionImpactRows {
|
|
|
58
60
|
}
|
|
59
61
|
export declare class DatasetRepository {
|
|
60
62
|
private readonly db;
|
|
61
|
-
|
|
63
|
+
private readonly sampleReader;
|
|
64
|
+
private readonly storage;
|
|
65
|
+
constructor(db: DbClient, sampleReader: DatasetSamplePayloadReader, storage: ObjectStorageProvider);
|
|
66
|
+
private hydrateSampleRows;
|
|
62
67
|
private datasetSelectFields;
|
|
63
68
|
findProjectAccess(_actorUserId: string, projectId: string, _isSuperAdmin: boolean): Promise<DatasetProjectAccessRow | null>;
|
|
64
69
|
findDatasetByProjectAndName(projectId: string, name: string): Promise<DatasetRow | null>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataset.repository.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.repository.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"dataset.repository.d.ts","sourceRoot":"","sources":["../../../../src/server/modules/dataset/dataset.repository.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAElF,OAAO,EAAE,qBAAqB,EAAE,MAAM,gDAAgD,CAAC;AAEvF,OAAO,EAAgC,0BAA0B,EAAE,MAAM,0BAA0B,CAAC;AAOpG,MAAM,WAAW,uBAAuB;IACtC,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,SAAS,EAAE,IAAI,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,UAAU,EAAE,IAAI,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,IAAI,GAAG,IAAI,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,OAAO,CAAC;IACd,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,SAAS,EAAE,IAAI,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,uBAAuB;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,EAAE,gBAAgB,CAAC;IACtB,WAAW,EAAE,qBAAqB,EAAE,CAAC;IACrC,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED,MAAM,WAAW,wBAAwB;IACvC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,SAAS,EAAE,IAAI,GAAG,IAAI,CAAC;CACxB;AAED,MAAM,WAAW,yBAAyB;IACxC,WAAW,EAAE,wBAAwB,EAAE,CAAC;IACxC,aAAa,EAAE,wBAAwB,EAAE,CAAC;CAC3C;AAED,qBACa,iBAAiB;IAED,OAAO,CAAC,QAAQ,CAAC,EAAE;IAC5C,OAAO,CAAC,QAAQ,CAAC,YAAY;IAC7B,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAFkB,EAAE,EAAE,QAAQ,EACrC,YAAY,EAAE,0BAA0B,EACxC,OAAO,EAAE,qBAAqB;YAKnC,iBAAiB;IAU/B,OAAO,CAAC,mBAAmB,CAgBzB;IAEI,iBAAiB,CACrB,YAAY,EAAE,MAAM,EACpB,SAAS,EAAE,MAAM,EACjB,aAAa,EAAE,OAAO,GACrB,OAAO,CAAC,uBAAuB,GAAG,IAAI,CAAC;IASpC,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAUxF,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;IAQtD,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAUjF,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQnE,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQnE,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAWlE,sBAAsB,CAC1B,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GAC1D,OAAO,CAAC;QAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IA6BjD,6BAA6B,CACjC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAsB7C,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAuNxE,kBAAkB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,yBAAyB,CAAC;IA4C5F,qBAAqB,CACzB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,yBAAyB,GAC9B,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAavB,sBAAsB,CAC1B,UAAU,EAAE,MAAM,EAAE,GACnB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAuCjE,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAW1E,2BAA2B,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAW5E,wBAAwB,CAAC,IAAI,EAAE,uBAAuB,GAAG,OAAO,CAAC,UAAU,CAAC;IAiElF,OAAO,CAAC,aAAa;CAMtB"}
|
|
@@ -17,10 +17,17 @@ const common_1 = require("@nestjs/common");
|
|
|
17
17
|
const drizzle_orm_1 = require("drizzle-orm");
|
|
18
18
|
const db_1 = require("@proofhound/db");
|
|
19
19
|
const database_constants_1 = require("../../../shared/database/database.constants");
|
|
20
|
+
const object_storage_provider_1 = require("../../common/contracts/object-storage.provider");
|
|
21
|
+
const dataset_sample_offload_1 = require("./dataset-sample-offload");
|
|
22
|
+
const dataset_sample_payload_1 = require("./dataset-sample-payload");
|
|
20
23
|
const { optimizations, datasetSamples, datasets, experiments, projects, promptVersions } = db_1.schema;
|
|
24
|
+
// Per-shard batch for small-file create offload (samples are already in memory; one shard per batch).
|
|
25
|
+
const CREATE_SHARD_BATCH = 200;
|
|
21
26
|
let DatasetRepository = class DatasetRepository {
|
|
22
|
-
constructor(db) {
|
|
27
|
+
constructor(db, sampleReader, storage) {
|
|
23
28
|
this.db = db;
|
|
29
|
+
this.sampleReader = sampleReader;
|
|
30
|
+
this.storage = storage;
|
|
24
31
|
this.datasetSelectFields = {
|
|
25
32
|
id: datasets.id,
|
|
26
33
|
projectId: datasets.projectId,
|
|
@@ -39,6 +46,15 @@ let DatasetRepository = class DatasetRepository {
|
|
|
39
46
|
deletedAt: datasets.deletedAt,
|
|
40
47
|
};
|
|
41
48
|
}
|
|
49
|
+
// Resolve each row's `data` through the seam (inline when present, else from its shard) so callers
|
|
50
|
+
// that render full sample content keep working after a dataset is offloaded (SPEC 22 §7.3).
|
|
51
|
+
async hydrateSampleRows(rows) {
|
|
52
|
+
const hydrated = await this.sampleReader.hydrateMany(rows.map((r) => ({ data: r.data, payloadRef: r.payloadRef ?? null })));
|
|
53
|
+
rows.forEach((r, i) => {
|
|
54
|
+
r.data = hydrated[i] ?? null;
|
|
55
|
+
});
|
|
56
|
+
return rows;
|
|
57
|
+
}
|
|
42
58
|
async findProjectAccess(_actorUserId, projectId, _isSuperAdmin) {
|
|
43
59
|
const rows = await this.db
|
|
44
60
|
.select({ id: projects.id })
|
|
@@ -85,18 +101,20 @@ let DatasetRepository = class DatasetRepository {
|
|
|
85
101
|
}
|
|
86
102
|
// Full scan — only for export (complete dump). Detail browsing must use listDatasetSamplesPage.
|
|
87
103
|
async listDatasetSamples(datasetId) {
|
|
88
|
-
|
|
104
|
+
const rows = await this.db
|
|
89
105
|
.select()
|
|
90
106
|
.from(datasetSamples)
|
|
91
107
|
.where((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId))
|
|
92
108
|
.orderBy((0, drizzle_orm_1.asc)(datasetSamples.createdAt), (0, drizzle_orm_1.asc)(datasetSamples.id));
|
|
109
|
+
return this.hydrateSampleRows(rows);
|
|
93
110
|
}
|
|
94
111
|
// Server-side paginated browse with optional cross-field search (data::text ILIKE), so the detail page
|
|
95
112
|
// never loads an entire (potentially 100k+ sample) dataset into memory.
|
|
96
113
|
async listDatasetSamplesPage(datasetId, options) {
|
|
97
114
|
const searchTerm = options.search?.trim();
|
|
115
|
+
// Search matches inline data or, once a sample is offloaded, its search_preview (SPEC 22 §7.3).
|
|
98
116
|
const where = searchTerm
|
|
99
|
-
? (0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql)
|
|
117
|
+
? (0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql) `(${datasetSamples.data}::text ILIKE ${`%${searchTerm}%`} OR ${datasetSamples.searchPreview} ILIKE ${`%${searchTerm}%`})`)
|
|
100
118
|
: (0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId);
|
|
101
119
|
const [rows, countResult] = await Promise.all([
|
|
102
120
|
this.db
|
|
@@ -111,16 +129,20 @@ let DatasetRepository = class DatasetRepository {
|
|
|
111
129
|
.from(datasetSamples)
|
|
112
130
|
.where(where),
|
|
113
131
|
]);
|
|
114
|
-
return { rows, total: Number(countResult[0]?.count ?? 0) };
|
|
132
|
+
return { rows: await this.hydrateSampleRows(rows), total: Number(countResult[0]?.count ?? 0) };
|
|
115
133
|
}
|
|
116
134
|
// SQL GROUP BY on the expected-output field so list/detail never load all sample rows into memory.
|
|
117
135
|
// Mirrors DatasetService.toCategoryLabel: only scalar (string/number/boolean), non-blank, trimmed labels count.
|
|
118
136
|
async aggregateCategoryDistribution(datasetId, fieldName) {
|
|
119
|
-
|
|
137
|
+
// Read the field from inline data (scalar only), or from index_values once the sample is offloaded
|
|
138
|
+
// (index_values holds only short scalars by construction) (SPEC 22 §7.3).
|
|
139
|
+
const value = (0, drizzle_orm_1.sql) `COALESCE(${datasetSamples.data} ->> ${fieldName}, ${datasetSamples.indexValues} ->> ${fieldName})`;
|
|
140
|
+
const label = (0, drizzle_orm_1.sql) `btrim(${value})`;
|
|
120
141
|
const rows = await this.db
|
|
121
142
|
.select({ label, count: (0, drizzle_orm_1.sql) `count(*)::int` })
|
|
122
143
|
.from(datasetSamples)
|
|
123
|
-
.where((0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql) `jsonb_typeof(${datasetSamples.data} -> ${fieldName}) IN ('string', 'number', 'boolean')
|
|
144
|
+
.where((0, drizzle_orm_1.and)((0, drizzle_orm_1.eq)(datasetSamples.datasetId, datasetId), (0, drizzle_orm_1.sql) `(jsonb_typeof(${datasetSamples.data} -> ${fieldName}) IN ('string', 'number', 'boolean')
|
|
145
|
+
OR (${datasetSamples.data} IS NULL AND ${datasetSamples.indexValues} ->> ${fieldName} IS NOT NULL))`, (0, drizzle_orm_1.sql) `btrim(${value}) <> ''`))
|
|
124
146
|
// GROUP BY ordinal: the same ${fieldName} binds to different param positions in select vs group-by,
|
|
125
147
|
// so Postgres won't match the expressions textually. Referencing select column 1 sidesteps that.
|
|
126
148
|
.groupBy((0, drizzle_orm_1.sql) `1`);
|
|
@@ -448,11 +470,35 @@ let DatasetRepository = class DatasetRepository {
|
|
|
448
470
|
if (!dataset) {
|
|
449
471
|
throw new Error('Dataset insert returned no row');
|
|
450
472
|
}
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
473
|
+
if (this.storage.isEnabled()) {
|
|
474
|
+
// Small-file create mirrors offload-at-promote (SPEC 22 §7.2): the samples are already in
|
|
475
|
+
// memory, so the batch reader just slices them. Object storage off → the inline insert below.
|
|
476
|
+
const samples = args.dto.samples;
|
|
477
|
+
const { storagePrefix } = await (0, dataset_sample_offload_1.offloadStagingToShards)({
|
|
478
|
+
datasetId: dataset.id,
|
|
479
|
+
sampleCount: samples.length,
|
|
480
|
+
batchSize: CREATE_SHARD_BATCH,
|
|
481
|
+
fieldSchema: args.fieldSchema,
|
|
482
|
+
readBatch: async (offset, limit) => samples.slice(offset, offset + limit).map((sample) => ({
|
|
483
|
+
data: sample,
|
|
484
|
+
externalId: this.getExternalId(sample, args.externalIdFieldName),
|
|
485
|
+
})),
|
|
486
|
+
putShard: (name, body) => this.storage.putObject({ project: { projectId: args.projectId, source: 'local' }, resourceType: 'dataset_normalized', resourceId: dataset.id, name }, body, { codec: 'gzip' }),
|
|
487
|
+
insertRows: async (rows) => {
|
|
488
|
+
await tx.insert(datasetSamples).values(rows);
|
|
489
|
+
},
|
|
490
|
+
});
|
|
491
|
+
if (storagePrefix) {
|
|
492
|
+
await tx.update(datasets).set({ storagePrefix }).where((0, drizzle_orm_1.eq)(datasets.id, dataset.id));
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
else {
|
|
496
|
+
await tx.insert(datasetSamples).values(args.dto.samples.map((sample) => ({
|
|
497
|
+
datasetId: dataset.id,
|
|
498
|
+
data: sample,
|
|
499
|
+
externalId: this.getExternalId(sample, args.externalIdFieldName),
|
|
500
|
+
})));
|
|
501
|
+
}
|
|
456
502
|
return {
|
|
457
503
|
...dataset,
|
|
458
504
|
createdByDisplayName: null,
|
|
@@ -472,6 +518,7 @@ exports.DatasetRepository = DatasetRepository;
|
|
|
472
518
|
exports.DatasetRepository = DatasetRepository = __decorate([
|
|
473
519
|
(0, common_1.Injectable)(),
|
|
474
520
|
__param(0, (0, common_1.Inject)(database_constants_1.DATABASE_CLIENT)),
|
|
475
|
-
__metadata("design:paramtypes", [Object
|
|
521
|
+
__metadata("design:paramtypes", [Object, dataset_sample_payload_1.DatasetSamplePayloadReader,
|
|
522
|
+
object_storage_provider_1.ObjectStorageProvider])
|
|
476
523
|
], DatasetRepository);
|
|
477
524
|
//# sourceMappingURL=dataset.repository.js.map
|