@ekairos/dataset 1.22.57-beta.development.0 → 1.22.59-beta.development.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/builder/materialize.js +64 -1
- package/dist/dataset.js +2 -2
- package/dist/file/file-dataset.agent.js +3 -3
- package/dist/id.d.ts +1 -0
- package/dist/id.js +10 -0
- package/dist/query/queryDomain.step.js +2 -2
- package/dist/service.d.ts +1 -1
- package/dist/service.js +4 -4
- package/dist/transform/transform-dataset.agent.js +3 -3
- package/package.json +4 -4
|
@@ -3,13 +3,66 @@ import { createTransformDatasetContext } from "../transform/transform-dataset.ag
|
|
|
3
3
|
import { datasetInferAndUpdateSchemaStep, datasetReadOneStep, } from "../dataset/steps.js";
|
|
4
4
|
import { registerDatasetAgentMaterializers } from "./agentMaterializers.js";
|
|
5
5
|
import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransformInstructions, } from "./instructions.js";
|
|
6
|
-
import { createOrUpdateDatasetMetadata, uploadInlineTextSource, } from "./persistence.js";
|
|
6
|
+
import { createOrUpdateDatasetMetadata, materializeRowsToDataset, uploadInlineTextSource, } from "./persistence.js";
|
|
7
7
|
import { getDomainDescriptor } from "./sourceRows.js";
|
|
8
8
|
import { materializeQuerySource } from "./materializeQuery.js";
|
|
9
9
|
import { createDatasetSandboxStep } from "../sandbox/steps.js";
|
|
10
10
|
function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
|
|
11
11
|
return `${targetDatasetId}__${sourceKind}_${index}`;
|
|
12
12
|
}
|
|
13
|
+
function normalizeParsedTextRows(value) {
|
|
14
|
+
if (Array.isArray(value)) {
|
|
15
|
+
return value.map((item) => (item && typeof item === "object" ? item : { value: item }));
|
|
16
|
+
}
|
|
17
|
+
if (value && typeof value === "object")
|
|
18
|
+
return [value];
|
|
19
|
+
return [{ value }];
|
|
20
|
+
}
|
|
21
|
+
function materializeRawTextRows(source) {
|
|
22
|
+
const text = String(source.text ?? "");
|
|
23
|
+
const mimeType = String(source.mimeType ?? "").toLowerCase();
|
|
24
|
+
const name = String(source.name ?? "").toLowerCase();
|
|
25
|
+
const shouldParseJson = mimeType.includes("json") || name.endsWith(".json") || name.endsWith(".jsonl");
|
|
26
|
+
if (shouldParseJson) {
|
|
27
|
+
try {
|
|
28
|
+
if (name.endsWith(".jsonl")) {
|
|
29
|
+
const rows = text
|
|
30
|
+
.split(/\r?\n/g)
|
|
31
|
+
.map((line) => line.trim())
|
|
32
|
+
.filter(Boolean)
|
|
33
|
+
.map((line) => JSON.parse(line));
|
|
34
|
+
return rows.flatMap((row) => normalizeParsedTextRows(row));
|
|
35
|
+
}
|
|
36
|
+
return normalizeParsedTextRows(JSON.parse(text));
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
return [{ text }];
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return [{ text }];
|
|
43
|
+
}
|
|
44
|
+
async function materializeRawTextSource(state, source, targetDatasetId) {
|
|
45
|
+
const rows = materializeRawTextRows(source);
|
|
46
|
+
await materializeRowsToDataset(state.runtime, {
|
|
47
|
+
datasetId: targetDatasetId,
|
|
48
|
+
sandboxId: state.sandboxId,
|
|
49
|
+
title: state.title ?? source.name ?? targetDatasetId,
|
|
50
|
+
instructions: state.instructions,
|
|
51
|
+
sources: [
|
|
52
|
+
{
|
|
53
|
+
kind: "text",
|
|
54
|
+
mimeType: source.mimeType,
|
|
55
|
+
name: source.name,
|
|
56
|
+
description: source.description,
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
sourceKinds: ["text"],
|
|
60
|
+
rows,
|
|
61
|
+
schema: state.outputSchema,
|
|
62
|
+
first: state.first,
|
|
63
|
+
});
|
|
64
|
+
return targetDatasetId;
|
|
65
|
+
}
|
|
13
66
|
async function resolveDatasetSandboxId(state, targetDatasetId) {
|
|
14
67
|
const sandboxId = String(state.sandboxId ?? "").trim();
|
|
15
68
|
if (sandboxId)
|
|
@@ -90,6 +143,16 @@ async function normalizeSourceToDatasetId(state, source, targetDatasetId, source
|
|
|
90
143
|
});
|
|
91
144
|
return intermediateDatasetId;
|
|
92
145
|
}
|
|
146
|
+
if (source.kind === "text") {
|
|
147
|
+
await materializeRawTextSource({
|
|
148
|
+
...state,
|
|
149
|
+
outputSchema: undefined,
|
|
150
|
+
first: false,
|
|
151
|
+
instructions: buildRawSourceInstructions(source.kind),
|
|
152
|
+
title: source.name ?? state.title,
|
|
153
|
+
}, source, intermediateDatasetId);
|
|
154
|
+
return intermediateDatasetId;
|
|
155
|
+
}
|
|
93
156
|
await materializeSingleFileLikeSource({
|
|
94
157
|
...state,
|
|
95
158
|
outputSchema: undefined,
|
package/dist/dataset.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { id as newId } from "@instantdb/admin";
|
|
2
1
|
import { buildObjectOutputInstructions } from "./builder/instructions.js";
|
|
2
|
+
import { createDatasetId } from "./id.js";
|
|
3
3
|
import { materializeDerivedDataset, materializeSingleFileLikeSource, } from "./builder/materialize.js";
|
|
4
4
|
import { materializeQuerySource } from "./builder/materializeQuery.js";
|
|
5
5
|
import { finalizeBuildResult } from "./builder/persistence.js";
|
|
@@ -144,7 +144,7 @@ export function dataset(runtime, options = {}) {
|
|
|
144
144
|
return api;
|
|
145
145
|
}
|
|
146
146
|
function normalizeDatasetId(datasetId) {
|
|
147
|
-
const normalized = String(datasetId ??
|
|
147
|
+
const normalized = String(datasetId ?? createDatasetId()).trim();
|
|
148
148
|
if (!normalized) {
|
|
149
149
|
throw new Error("dataset_id_required");
|
|
150
150
|
}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
2
|
-
import { id } from "@instantdb/admin";
|
|
3
2
|
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
4
3
|
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
5
4
|
import { datasetGetByIdStep } from "../dataset/steps.js";
|
|
6
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
7
6
|
import { createGenerateSchemaTool } from "./generateSchema.tool.js";
|
|
8
7
|
import { buildFileDatasetPromptStep, generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
|
|
8
|
+
import { createDatasetId } from "../id.js";
|
|
9
9
|
async function awaitContextRun(run) {
|
|
10
10
|
if (!run)
|
|
11
11
|
return;
|
|
@@ -152,7 +152,7 @@ function createFileParseContextDefinition(params) {
|
|
|
152
152
|
return { datasetId: fallbackDatasetId ?? "", context };
|
|
153
153
|
}
|
|
154
154
|
export function createFileParseContext(fileId, opts) {
|
|
155
|
-
const datasetId = opts?.datasetId ??
|
|
155
|
+
const datasetId = opts?.datasetId ?? createDatasetId();
|
|
156
156
|
const params = {
|
|
157
157
|
fileId,
|
|
158
158
|
instructions: opts?.instructions,
|
|
@@ -166,7 +166,7 @@ export function createFileParseContext(fileId, opts) {
|
|
|
166
166
|
datasetId,
|
|
167
167
|
async parse(runtime, options = {}) {
|
|
168
168
|
const triggerEvent = {
|
|
169
|
-
id:
|
|
169
|
+
id: createDatasetId(),
|
|
170
170
|
type: INPUT_TEXT_ITEM_TYPE,
|
|
171
171
|
channel: WEB_CHANNEL,
|
|
172
172
|
createdAt: new Date().toISOString(),
|
package/dist/id.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function createDatasetId(): string;
|
package/dist/id.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export function createDatasetId() {
|
|
2
|
+
const uuid = globalThis.crypto?.randomUUID?.();
|
|
3
|
+
if (typeof uuid === "string" && uuid.length > 0)
|
|
4
|
+
return uuid;
|
|
5
|
+
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
|
|
6
|
+
const r = (Math.random() * 16) | 0;
|
|
7
|
+
const v = c === "x" ? r : (r & 0x3) | 0x8;
|
|
8
|
+
return v.toString(16);
|
|
9
|
+
});
|
|
10
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { id as newId } from "@instantdb/admin";
|
|
2
1
|
import { DatasetService } from "../service.js";
|
|
2
|
+
import { createDatasetId } from "../id.js";
|
|
3
3
|
function normalizeRows(result) {
|
|
4
4
|
if (!result || typeof result !== "object")
|
|
5
5
|
return [];
|
|
@@ -56,7 +56,7 @@ export async function queryDomainStep(params) {
|
|
|
56
56
|
"use step";
|
|
57
57
|
const db = await getRuntimeDb(params.runtime);
|
|
58
58
|
const service = new DatasetService(db);
|
|
59
|
-
const datasetId = params.datasetId ??
|
|
59
|
+
const datasetId = params.datasetId ?? createDatasetId();
|
|
60
60
|
const queryResult = await db.query(params.query);
|
|
61
61
|
const rows = normalizeRows(queryResult);
|
|
62
62
|
const previewRows = rows.slice(0, 20);
|
package/dist/service.d.ts
CHANGED
package/dist/service.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createDatasetId } from "./id.js";
|
|
2
2
|
export class DatasetService {
|
|
3
3
|
constructor(db) {
|
|
4
4
|
this.db = db;
|
|
@@ -27,9 +27,9 @@ export class DatasetService {
|
|
|
27
27
|
}
|
|
28
28
|
async createDataset(params) {
|
|
29
29
|
try {
|
|
30
|
-
const datasetId = params.id ??
|
|
30
|
+
const datasetId = params.id ?? createDatasetId();
|
|
31
31
|
const existing = await this.resolveDatasetEntityId(datasetId);
|
|
32
|
-
const entityId = existing.ok ? existing.data :
|
|
32
|
+
const entityId = existing.ok ? existing.data : createDatasetId();
|
|
33
33
|
const mutations = [];
|
|
34
34
|
mutations.push(this.db.tx.dataset_datasets[entityId].update({
|
|
35
35
|
datasetId,
|
|
@@ -73,7 +73,7 @@ export class DatasetService {
|
|
|
73
73
|
return resolved;
|
|
74
74
|
const mutations = [];
|
|
75
75
|
for (const record of params.records) {
|
|
76
|
-
const recordId =
|
|
76
|
+
const recordId = createDatasetId();
|
|
77
77
|
mutations.push(this.db.tx.dataset_records[recordId].update({
|
|
78
78
|
rowContent: record.rowContent,
|
|
79
79
|
order: record.order,
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
|
|
2
|
-
import { id } from "@instantdb/admin";
|
|
3
2
|
import { createClearDatasetTool } from "../clearDataset.tool.js";
|
|
4
3
|
import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
|
|
5
4
|
import { datasetUpdateSchemaStep } from "../dataset/steps.js";
|
|
6
5
|
import { createExecuteCommandTool } from "../executeCommand.tool.js";
|
|
7
6
|
import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
|
|
7
|
+
import { createDatasetId } from "../id.js";
|
|
8
8
|
async function awaitContextRun(run) {
|
|
9
9
|
if (!run)
|
|
10
10
|
return;
|
|
@@ -146,7 +146,7 @@ function createTransformDatasetContextDefinition(params) {
|
|
|
146
146
|
return { datasetId: fallbackDatasetId ?? "", context };
|
|
147
147
|
}
|
|
148
148
|
export function createTransformDatasetContext(params) {
|
|
149
|
-
const datasetId = params.datasetId ??
|
|
149
|
+
const datasetId = params.datasetId ?? createDatasetId();
|
|
150
150
|
const { context } = createTransformDatasetContextDefinition({
|
|
151
151
|
sourceDatasetIds: params.sourceDatasetIds,
|
|
152
152
|
outputSchema: params.outputSchema,
|
|
@@ -163,7 +163,7 @@ export function createTransformDatasetContext(params) {
|
|
|
163
163
|
? "the source dataset"
|
|
164
164
|
: `${params.sourceDatasetIds.length} source datasets`;
|
|
165
165
|
const triggerEvent = {
|
|
166
|
-
id:
|
|
166
|
+
id: createDatasetId(),
|
|
167
167
|
type: INPUT_TEXT_ITEM_TYPE,
|
|
168
168
|
channel: WEB_CHANNEL,
|
|
169
169
|
createdAt: new Date().toISOString(),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ekairos/dataset",
|
|
3
|
-
"version": "1.22.
|
|
3
|
+
"version": "1.22.59-beta.development.0",
|
|
4
4
|
"description": "Pulzar Dataset Tools",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,9 +65,9 @@
|
|
|
65
65
|
"test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@ekairos/domain": "^1.22.
|
|
69
|
-
"@ekairos/events": "^1.22.
|
|
70
|
-
"@ekairos/sandbox": "^1.22.
|
|
68
|
+
"@ekairos/domain": "^1.22.59-beta.development.0",
|
|
69
|
+
"@ekairos/events": "^1.22.59-beta.development.0",
|
|
70
|
+
"@ekairos/sandbox": "^1.22.59-beta.development.0",
|
|
71
71
|
"@instantdb/admin": "0.22.158",
|
|
72
72
|
"@instantdb/core": "0.22.142",
|
|
73
73
|
"ai": "^5.0.44",
|