@ekairos/dataset 1.22.58-beta.development.0 → 1.22.59-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/materialize.js +64 -1
- package/package.json +4 -4
|
@@ -3,13 +3,66 @@ import { createTransformDatasetContext } from "../transform/transform-dataset.ag
|
|
|
3
3
|
import { datasetInferAndUpdateSchemaStep, datasetReadOneStep, } from "../dataset/steps.js";
|
|
4
4
|
import { registerDatasetAgentMaterializers } from "./agentMaterializers.js";
|
|
5
5
|
import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransformInstructions, } from "./instructions.js";
|
|
6
|
-
import { createOrUpdateDatasetMetadata, uploadInlineTextSource, } from "./persistence.js";
|
|
6
|
+
import { createOrUpdateDatasetMetadata, materializeRowsToDataset, uploadInlineTextSource, } from "./persistence.js";
|
|
7
7
|
import { getDomainDescriptor } from "./sourceRows.js";
|
|
8
8
|
import { materializeQuerySource } from "./materializeQuery.js";
|
|
9
9
|
import { createDatasetSandboxStep } from "../sandbox/steps.js";
|
|
10
10
|
function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
|
|
11
11
|
return `${targetDatasetId}__${sourceKind}_${index}`;
|
|
12
12
|
}
|
|
13
|
+
function normalizeParsedTextRows(value) {
|
|
14
|
+
if (Array.isArray(value)) {
|
|
15
|
+
return value.map((item) => (item && typeof item === "object" ? item : { value: item }));
|
|
16
|
+
}
|
|
17
|
+
if (value && typeof value === "object")
|
|
18
|
+
return [value];
|
|
19
|
+
return [{ value }];
|
|
20
|
+
}
|
|
21
|
+
function materializeRawTextRows(source) {
|
|
22
|
+
const text = String(source.text ?? "");
|
|
23
|
+
const mimeType = String(source.mimeType ?? "").toLowerCase();
|
|
24
|
+
const name = String(source.name ?? "").toLowerCase();
|
|
25
|
+
const shouldParseJson = mimeType.includes("json") || name.endsWith(".json") || name.endsWith(".jsonl");
|
|
26
|
+
if (shouldParseJson) {
|
|
27
|
+
try {
|
|
28
|
+
if (name.endsWith(".jsonl")) {
|
|
29
|
+
const rows = text
|
|
30
|
+
.split(/\r?\n/g)
|
|
31
|
+
.map((line) => line.trim())
|
|
32
|
+
.filter(Boolean)
|
|
33
|
+
.map((line) => JSON.parse(line));
|
|
34
|
+
return rows.flatMap((row) => normalizeParsedTextRows(row));
|
|
35
|
+
}
|
|
36
|
+
return normalizeParsedTextRows(JSON.parse(text));
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
return [{ text }];
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return [{ text }];
|
|
43
|
+
}
|
|
44
|
+
async function materializeRawTextSource(state, source, targetDatasetId) {
|
|
45
|
+
const rows = materializeRawTextRows(source);
|
|
46
|
+
await materializeRowsToDataset(state.runtime, {
|
|
47
|
+
datasetId: targetDatasetId,
|
|
48
|
+
sandboxId: state.sandboxId,
|
|
49
|
+
title: state.title ?? source.name ?? targetDatasetId,
|
|
50
|
+
instructions: state.instructions,
|
|
51
|
+
sources: [
|
|
52
|
+
{
|
|
53
|
+
kind: "text",
|
|
54
|
+
mimeType: source.mimeType,
|
|
55
|
+
name: source.name,
|
|
56
|
+
description: source.description,
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
sourceKinds: ["text"],
|
|
60
|
+
rows,
|
|
61
|
+
schema: state.outputSchema,
|
|
62
|
+
first: state.first,
|
|
63
|
+
});
|
|
64
|
+
return targetDatasetId;
|
|
65
|
+
}
|
|
13
66
|
async function resolveDatasetSandboxId(state, targetDatasetId) {
|
|
14
67
|
const sandboxId = String(state.sandboxId ?? "").trim();
|
|
15
68
|
if (sandboxId)
|
|
@@ -90,6 +143,16 @@ async function normalizeSourceToDatasetId(state, source, targetDatasetId, source
|
|
|
90
143
|
});
|
|
91
144
|
return intermediateDatasetId;
|
|
92
145
|
}
|
|
146
|
+
if (source.kind === "text") {
|
|
147
|
+
await materializeRawTextSource({
|
|
148
|
+
...state,
|
|
149
|
+
outputSchema: undefined,
|
|
150
|
+
first: false,
|
|
151
|
+
instructions: buildRawSourceInstructions(source.kind),
|
|
152
|
+
title: source.name ?? state.title,
|
|
153
|
+
}, source, intermediateDatasetId);
|
|
154
|
+
return intermediateDatasetId;
|
|
155
|
+
}
|
|
93
156
|
await materializeSingleFileLikeSource({
|
|
94
157
|
...state,
|
|
95
158
|
outputSchema: undefined,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.59-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.59-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.59-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.59-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|