@ekairos/dataset 1.22.58-beta.development.0 → 1.22.59-beta.development.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,13 +3,66 @@ import { createTransformDatasetContext } from "../transform/transform-dataset.ag
3
3
  import { datasetInferAndUpdateSchemaStep, datasetReadOneStep, } from "../dataset/steps.js";
4
4
  import { registerDatasetAgentMaterializers } from "./agentMaterializers.js";
5
5
  import { buildFileDefaultInstructions, buildRawSourceInstructions, buildTransformInstructions, } from "./instructions.js";
6
- import { createOrUpdateDatasetMetadata, uploadInlineTextSource, } from "./persistence.js";
6
+ import { createOrUpdateDatasetMetadata, materializeRowsToDataset, uploadInlineTextSource, } from "./persistence.js";
7
7
  import { getDomainDescriptor } from "./sourceRows.js";
8
8
  import { materializeQuerySource } from "./materializeQuery.js";
9
9
  import { createDatasetSandboxStep } from "../sandbox/steps.js";
10
10
  function makeIntermediateDatasetId(targetDatasetId, sourceKind, index) {
11
11
  return `${targetDatasetId}__${sourceKind}_${index}`;
12
12
  }
13
+ function normalizeParsedTextRows(value) {
14
+ if (Array.isArray(value)) {
15
+ return value.map((item) => (item && typeof item === "object" ? item : { value: item }));
16
+ }
17
+ if (value && typeof value === "object")
18
+ return [value];
19
+ return [{ value }];
20
+ }
21
+ function materializeRawTextRows(source) {
22
+ const text = String(source.text ?? "");
23
+ const mimeType = String(source.mimeType ?? "").toLowerCase();
24
+ const name = String(source.name ?? "").toLowerCase();
25
+ const shouldParseJson = mimeType.includes("json") || name.endsWith(".json") || name.endsWith(".jsonl");
26
+ if (shouldParseJson) {
27
+ try {
28
+ if (name.endsWith(".jsonl")) {
29
+ const rows = text
30
+ .split(/\r?\n/g)
31
+ .map((line) => line.trim())
32
+ .filter(Boolean)
33
+ .map((line) => JSON.parse(line));
34
+ return rows.flatMap((row) => normalizeParsedTextRows(row));
35
+ }
36
+ return normalizeParsedTextRows(JSON.parse(text));
37
+ }
38
+ catch {
39
+ return [{ text }];
40
+ }
41
+ }
42
+ return [{ text }];
43
+ }
44
+ async function materializeRawTextSource(state, source, targetDatasetId) {
45
+ const rows = materializeRawTextRows(source);
46
+ await materializeRowsToDataset(state.runtime, {
47
+ datasetId: targetDatasetId,
48
+ sandboxId: state.sandboxId,
49
+ title: state.title ?? source.name ?? targetDatasetId,
50
+ instructions: state.instructions,
51
+ sources: [
52
+ {
53
+ kind: "text",
54
+ mimeType: source.mimeType,
55
+ name: source.name,
56
+ description: source.description,
57
+ },
58
+ ],
59
+ sourceKinds: ["text"],
60
+ rows,
61
+ schema: state.outputSchema,
62
+ first: state.first,
63
+ });
64
+ return targetDatasetId;
65
+ }
13
66
  async function resolveDatasetSandboxId(state, targetDatasetId) {
14
67
  const sandboxId = String(state.sandboxId ?? "").trim();
15
68
  if (sandboxId)
@@ -90,6 +143,16 @@ async function normalizeSourceToDatasetId(state, source, targetDatasetId, source
90
143
  });
91
144
  return intermediateDatasetId;
92
145
  }
146
+ if (source.kind === "text") {
147
+ await materializeRawTextSource({
148
+ ...state,
149
+ outputSchema: undefined,
150
+ first: false,
151
+ instructions: buildRawSourceInstructions(source.kind),
152
+ title: source.name ?? state.title,
153
+ }, source, intermediateDatasetId);
154
+ return intermediateDatasetId;
155
+ }
93
156
  await materializeSingleFileLikeSource({
94
157
  ...state,
95
158
  outputSchema: undefined,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekairos/dataset",
3
- "version": "1.22.58-beta.development.0",
3
+ "version": "1.22.59-beta.development.0",
4
4
  "description": "Pulzar Dataset Tools",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,9 +65,9 @@
65
65
  "test:ai-sdk:instant": "vitest run -c vitest.codex.config.mts src/tests/materializeDataset.ai-sdk.instant.test.ts"
66
66
  },
67
67
  "dependencies": {
68
- "@ekairos/domain": "^1.22.58-beta.development.0",
69
- "@ekairos/events": "^1.22.58-beta.development.0",
70
- "@ekairos/sandbox": "^1.22.58-beta.development.0",
68
+ "@ekairos/domain": "^1.22.59-beta.development.0",
69
+ "@ekairos/events": "^1.22.59-beta.development.0",
70
+ "@ekairos/sandbox": "^1.22.59-beta.development.0",
71
71
  "@instantdb/admin": "0.22.158",
72
72
  "@instantdb/core": "0.22.142",
73
73
  "ai": "^5.0.44",