@ekairos/dataset 1.22.57-beta.development.0 → 1.22.59-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,13 +3,66 @@ import { createTransformDatasetContext } from "../transform/transform-dataset.ag
3
3
  import { datasetInferAndUpdateSchemaStep, datasetReadOneStep, } from "../dataset/steps.js";
4
4
  import { registerDatasetAgentMaterializers } from "./agentMaterializers.js";
5
5
  import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransformInstructions, } from "./instructions.js";
6
- import { createOrUpdateDatasetMetadata, uploadInlineTextSource, } from "./persistence.js";
6
+ import { createOrUpdateDatasetMetadata, materializeRowsToDataset, uploadInlineTextSource, } from "./persistence.js";
7
7
  import { getDomainDescriptor } from "./sourceRows.js";
8
8
  import { materializeQuerySource } from "./materializeQuery.js";
9
9
  import { createDatasetSandboxStep } from "../sandbox/steps.js";
10
10
  function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
11
11
  return `${targetDatasetId}__${sourceKind}_${index}`;
12
12
  }
13
+ function normalizeParsedTextRows(value) {
14
+ if (Array.isArray(value)) {
15
+ return value.map((item) => (item && typeof item === "object" ? item : { value: item }));
16
+ }
17
+ if (value && typeof value === "object")
18
+ return [value];
19
+ return [{ value }];
20
+ }
21
+ function materializeRawTextRows(source) {
22
+ const text = String(source.text ?? "");
23
+ const mimeType = String(source.mimeType ?? "").toLowerCase();
24
+ const name = String(source.name ?? "").toLowerCase();
25
+ const shouldParseJson = mimeType.includes("json") || name.endsWith(".json") || name.endsWith(".jsonl");
26
+ if (shouldParseJson) {
27
+ try {
28
+ if (name.endsWith(".jsonl")) {
29
+ const rows = text
30
+ .split(/\r?\n/g)
31
+ .map((line) => line.trim())
32
+ .filter(Boolean)
33
+ .map((line) => JSON.parse(line));
34
+ return rows.flatMap((row) => normalizeParsedTextRows(row));
35
+ }
36
+ return normalizeParsedTextRows(JSON.parse(text));
37
+ }
38
+ catch {
39
+ return [{ text }];
40
+ }
41
+ }
42
+ return [{ text }];
43
+ }
44
+ async function materializeRawTextSource(state, source, targetDatasetId) {
45
+ const rows = materializeRawTextRows(source);
46
+ await materializeRowsToDataset(state.runtime, {
47
+ datasetId: targetDatasetId,
48
+ sandboxId: state.sandboxId,
49
+ title: state.title ?? source.name ?? targetDatasetId,
50
+ instructions: state.instructions,
51
+ sources: [
52
+ {
53
+ kind: "text",
54
+ mimeType: source.mimeType,
55
+ name: source.name,
56
+ description: source.description,
57
+ },
58
+ ],
59
+ sourceKinds: ["text"],
60
+ rows,
61
+ schema: state.outputSchema,
62
+ first: state.first,
63
+ });
64
+ return targetDatasetId;
65
+ }
13
66
  async function resolveDatasetSandboxId(state, targetDatasetId) {
14
67
  const sandboxId = String(state.sandboxId ?? "").trim();
15
68
  if (sandboxId)
@@ -90,6 +143,16 @@ async function normalizeSourceToDatasetId(state, source, targetDatasetId, source
90
143
  });
91
144
  return intermediateDatasetId;
92
145
  }
146
+ if (source.kind === "text") {
147
+ await materializeRawTextSource({
148
+ ...state,
149
+ outputSchema: undefined,
150
+ first: false,
151
+ instructions: buildRawSourceInstructions(source.kind),
152
+ title: source.name ?? state.title,
153
+ }, source, intermediateDatasetId);
154
+ return intermediateDatasetId;
155
+ }
93
156
  await materializeSingleFileLikeSource({
94
157
  ...state,
95
158
  outputSchema: undefined,
package/dist/dataset.js CHANGED
@@ -1,5 +1,5 @@
1
- import { id as newId } from "@instantdb/admin";
2
1
  import { buildObjectOutputInstructions } from "./builder/instructions.js";
2
+ import { createDatasetId } from "./id.js";
3
3
  import { materializeDerivedDataset, materializeSingleFileLikeSource, } from "./builder/materialize.js";
4
4
  import { materializeQuerySource } from "./builder/materializeQuery.js";
5
5
  import { finalizeBuildResult } from "./builder/persistence.js";
@@ -144,7 +144,7 @@ export function dataset(runtime, options = {}) {
144
144
  return api;
145
145
  }
146
146
  function normalizeDatasetId(datasetId) {
147
- const normalized = String(datasetId ?? newId()).trim();
147
+ const normalized = String(datasetId ?? createDatasetId()).trim();
148
148
  if (!normalized) {
149
149
  throw new Error("dataset_id_required");
150
150
  }
@@ -1,11 +1,11 @@
1
1
  import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
2
- import { id } from "@instantdb/admin";
3
2
  import { createClearDatasetTool } from "../clearDataset.tool.js";
4
3
  import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
5
4
  import { datasetGetByIdStep } from "../dataset/steps.js";
6
5
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
6
  import { createGenerateSchemaTool } from "./generateSchema.tool.js";
8
7
  import { buildFileDatasetPromptStep, generateFileParsePreviewStep, initializeFileParseSandboxStep, } from "./file-dataset.steps.js";
8
+ import { createDatasetId } from "../id.js";
9
9
  async function awaitContextRun(run) {
10
10
  if (!run)
11
11
  return;
@@ -152,7 +152,7 @@ function createFileParseContextDefinition(params) {
152
152
  return { datasetId: fallbackDatasetId ?? "", context };
153
153
  }
154
154
  export function createFileParseContext(fileId, opts) {
155
- const datasetId = opts?.datasetId ?? id();
155
+ const datasetId = opts?.datasetId ?? createDatasetId();
156
156
  const params = {
157
157
  fileId,
158
158
  instructions: opts?.instructions,
@@ -166,7 +166,7 @@ export function createFileParseContext(fileId, opts) {
166
166
  datasetId,
167
167
  async parse(runtime, options = {}) {
168
168
  const triggerEvent = {
169
- id: id(),
169
+ id: createDatasetId(),
170
170
  type: INPUT_TEXT_ITEM_TYPE,
171
171
  channel: WEB_CHANNEL,
172
172
  createdAt: new Date().toISOString(),
package/dist/id.d.ts ADDED
@@ -0,0 +1 @@
1
+ export declare function createDatasetId(): string;
package/dist/id.js ADDED
@@ -0,0 +1,10 @@
1
+ export function createDatasetId() {
2
+ const uuid = globalThis.crypto?.randomUUID?.();
3
+ if (typeof uuid === "string" && uuid.length > 0)
4
+ return uuid;
5
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
6
+ const r = (Math.random() * 16) | 0;
7
+ const v = c === "x" ? r : (r & 0x3) | 0x8;
8
+ return v.toString(16);
9
+ });
10
+ }
@@ -1,5 +1,5 @@
1
- import { id as newId } from "@instantdb/admin";
2
1
  import { DatasetService } from "../service.js";
2
+ import { createDatasetId } from "../id.js";
3
3
  function normalizeRows(result) {
4
4
  if (!result || typeof result !== "object")
5
5
  return [];
@@ -56,7 +56,7 @@ export async function queryDomainStep(params) {
56
56
  "use step";
57
57
  const db = await getRuntimeDb(params.runtime);
58
58
  const service = new DatasetService(db);
59
- const datasetId = params.datasetId ?? newId();
59
+ const datasetId = params.datasetId ?? createDatasetId();
60
60
  const queryResult = await db.query(params.query);
61
61
  const rows = normalizeRows(queryResult);
62
62
  const previewRows = rows.slice(0, 20);
package/dist/service.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { InstantAdminDatabase } from "@instantdb/admin";
1
+ import type { InstantAdminDatabase } from "@instantdb/admin";
2
2
  import { SchemaOf } from "@ekairos/domain";
3
3
  import { datasetDomain } from "./schema.js";
4
4
  export type ServiceResult<T = any> = {
package/dist/service.js CHANGED
@@ -1,4 +1,4 @@
1
- import { id as newId } from "@instantdb/admin";
1
+ import { createDatasetId } from "./id.js";
2
2
  export class DatasetService {
3
3
  constructor(db) {
4
4
  this.db = db;
@@ -27,9 +27,9 @@ export class DatasetService {
27
27
  }
28
28
  async createDataset(params) {
29
29
  try {
30
- const datasetId = params.id ?? newId();
30
+ const datasetId = params.id ?? createDatasetId();
31
31
  const existing = await this.resolveDatasetEntityId(datasetId);
32
- const entityId = existing.ok ? existing.data : newId();
32
+ const entityId = existing.ok ? existing.data : createDatasetId();
33
33
  const mutations = [];
34
34
  mutations.push(this.db.tx.dataset_datasets[entityId].update({
35
35
  datasetId,
@@ -73,7 +73,7 @@ export class DatasetService {
73
73
  return resolved;
74
74
  const mutations = [];
75
75
  for (const record of params.records) {
76
- const recordId = newId();
76
+ const recordId = createDatasetId();
77
77
  mutations.push(this.db.tx.dataset_records[recordId].update({
78
78
  rowContent: record.rowContent,
79
79
  order: record.order,
@@ -1,10 +1,10 @@
1
1
  import { createContext, INPUT_TEXT_ITEM_TYPE, WEB_CHANNEL, } from "@ekairos/events";
2
- import { id } from "@instantdb/admin";
3
2
  import { createClearDatasetTool } from "../clearDataset.tool.js";
4
3
  import { createCompleteDatasetTool, didCompleteDatasetSucceed, getDatasetFatalFailure, } from "../completeDataset.tool.js";
5
4
  import { datasetUpdateSchemaStep } from "../dataset/steps.js";
6
5
  import { createExecuteCommandTool } from "../executeCommand.tool.js";
7
6
  import { buildTransformDatasetPromptStep, ensureTransformSourcesInSandboxStep, generateTransformSourcePreviewsStep, } from "./transform-dataset.steps.js";
7
+ import { createDatasetId } from "../id.js";
8
8
  async function awaitContextRun(run) {
9
9
  if (!run)
10
10
  return;
@@ -146,7 +146,7 @@ function createTransformDatasetContextDefinition(params) {
146
146
  return { datasetId: fallbackDatasetId ?? "", context };
147
147
  }
148
148
  export function createTransformDatasetContext(params) {
149
- const datasetId = params.datasetId ?? id();
149
+ const datasetId = params.datasetId ?? createDatasetId();
150
150
  const { context } = createTransformDatasetContextDefinition({
151
151
  sourceDatasetIds: params.sourceDatasetIds,
152
152
  outputSchema: params.outputSchema,
@@ -163,7 +163,7 @@ export function createTransformDatasetContext(params) {
163
163
  ? "the source dataset"
164
164
  : `${params.sourceDatasetIds.length} source datasets`;
165
165
  const triggerEvent = {
166
- id: id(),
166
+ id: createDatasetId(),
167
167
  type: INPUT_TEXT_ITEM_TYPE,
168
168
  channel: WEB_CHANNEL,
169
169
  createdAt: new Date().toISOString(),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.57-beta.development.0",
3
+ "version": "1.22.59-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.57-beta.development.0",
69
- "@ekairos/events": "^1.22.57-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.57-beta.development.0",
68
+ "@ekairos/domain": "^1.22.59-beta.development.0",
69
+ "@ekairos/events": "^1.22.59-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.59-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",