@etohq/workflows-input-dataset 0.0.1-next-20260318155517

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # @etohq/workflows-input-dataset
2
+
3
+ Domain-agnostic dataset import spec for turning tabular sources (starting with CSV) into
4
+ `@etohq/workflows-input-schema` submissions.
5
+
6
+ This package contains types only. Runtime behavior (CSV parsing + mapping) lives in
7
+ `@etohq/workflows-input-dataset-runtime`.
8
+
9
+ ## What This Enables
10
+
11
+ - Different tenants can import "anything" (admission lists, addresses, tenants, etc.)
12
+ - Mapping from arbitrary CSV headers to schema field keys is programmable via specs, not code.
13
+
14
+ ## Core Type
15
+
16
+ `DatasetImportSpec`:
17
+
18
+ - `target.schema_id/schema_version`: which input schema to produce submissions for
19
+ - `mappings[]`: how CSV columns map to schema fields
20
+ - `behavior`: how strict to be on unknown columns and empty strings
21
+
@@ -0,0 +1,2 @@
1
+ export * from "./spec";
2
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,QAAQ,CAAA"}
package/dist/index.js ADDED
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./spec"), exports);
18
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,yCAAsB"}
package/dist/spec.d.ts ADDED
@@ -0,0 +1,187 @@
1
+ import type { InputTransform, InputValueType, ISODateString } from "@etohq/workflows-input-schema";
2
+ /**
3
+ * Canonical tabular source kinds. We start with CSV because it's the most common onboarding format.
4
+ */
5
+ export type TabularSourceType = "csv";
6
+ /**
7
+ * Preprocessing steps applied to extracted text before attempting record extraction.
8
+ *
9
+ * Keep this declarative so specs are portable and safe.
10
+ */
11
+ export type TextPreprocessSpec = {
12
+ type: "trim";
13
+ } | {
14
+ type: "normalize_whitespace";
15
+ } | {
16
+ type: "remove_empty_lines";
17
+ } | {
18
+ type: "remove_lines_matching";
19
+ pattern: string;
20
+ flags?: string | null;
21
+ } | {
22
+ type: "regex_replace";
23
+ pattern: string;
24
+ replace_with: string;
25
+ flags?: string | null;
26
+ };
27
+ /**
28
+ * Extracts tabular-ish records from text using regular expressions.
29
+ *
30
+ * Recommended: use named capture groups (e.g. `(?<Appnum>\\w+)`) and then map to output columns.
31
+ */
32
+ export type TextRecordExtractSpec = {
33
+ type: "line_regex";
34
+ pattern: string;
35
+ flags?: string | null;
36
+ /** If provided, only these group names are emitted (after mapping). */
37
+ groups?: string[] | null;
38
+ /** Maps capture-group name -> output column name. */
39
+ column_map?: Record<string, string> | null;
40
+ } | {
41
+ type: "global_regex";
42
+ pattern: string;
43
+ flags?: string | null;
44
+ groups?: string[] | null;
45
+ column_map?: Record<string, string> | null;
46
+ } | {
47
+ /**
48
+ * Token-based extraction for fixed/whitespace-aligned PDF rows.
49
+ *
50
+ * After preprocessing (especially `normalize_whitespace`), each line is split by spaces.
51
+ * Column values are selected by token indices (supports negative indices).
52
+ */
53
+ type: "token_columns";
54
+ line_filter?: {
55
+ pattern: string;
56
+ flags?: string | null;
57
+ } | null;
58
+ columns: Array<{
59
+ column: string;
60
+ /** Inclusive token index. Negative indices are from the end (e.g. -1 is last token). */
61
+ from: number;
62
+ /** Inclusive token index. If omitted/null, defaults to `from`. */
63
+ to?: number | null;
64
+ }>;
65
+ };
66
+ /**
67
+ * PDF source extraction spec.
68
+ *
69
+ * The `pdf-parse` layer only yields plain text. This spec defines how to turn that text into
70
+ * `Record<string, string>[]` so existing mapping/import logic can run unchanged.
71
+ */
72
+ export type PdfExtractSpec = {
73
+ preprocess?: TextPreprocessSpec[] | null;
74
+ records: TextRecordExtractSpec;
75
+ };
76
+ export type DatasetRefSpec = {
77
+ id: string;
78
+ version: number;
79
+ };
80
+ /**
81
+ * Which input schema this dataset import spec targets.
82
+ */
83
+ export type DatasetTargetSchemaSpec = {
84
+ schema_id: string;
85
+ schema_version: number;
86
+ };
87
+ /**
88
+ * Controls how the importer behaves when the input doesn't match the spec.
89
+ */
90
+ export type DatasetImportBehaviorSpec = {
91
+ on_unknown_columns?: "ignore" | "warn" | "error" | null;
92
+ empty_string_as_null?: boolean | null;
93
+ skip_empty_rows?: boolean | null;
94
+ };
95
+ /**
96
+ * Identifies a column in a tabular source.
97
+ *
98
+ * This is intentionally friendly for end-users:
99
+ * - names can be listed as aliases (e.g. ["Email", "E-mail", "email_address"])
100
+ * - matching can be case-insensitive
101
+ */
102
+ export type ColumnSelectorSpec = {
103
+ type: "by_header";
104
+ header: string;
105
+ } | {
106
+ type: "by_header_any";
107
+ headers: string[];
108
+ case_insensitive?: boolean | null;
109
+ };
110
+ /**
111
+ * Parses a cell (string) into an `InputValue` shape that `validateSubmission` can further coerce/validate.
112
+ *
113
+ * Notes:
114
+ * - For richer types (date/enum/array), we encode values as JSON-safe wrappers.
115
+ * - If you need more power, prefer adding a plugin layer rather than allowing functions in specs.
116
+ */
117
+ export type CellParseSpec = {
118
+ type: "as_string";
119
+ } | {
120
+ type: "as_number";
121
+ } | {
122
+ type: "as_boolean";
123
+ true_values?: string[] | null;
124
+ false_values?: string[] | null;
125
+ } | {
126
+ type: "as_date";
127
+ format?: string | null;
128
+ assume_iso?: boolean | null;
129
+ } | {
130
+ type: "as_enum";
131
+ } | {
132
+ type: "as_array";
133
+ delimiter: string;
134
+ item: Exclude<CellParseSpec, {
135
+ type: "as_array";
136
+ }>;
137
+ } | {
138
+ type: "as_json";
139
+ };
140
+ export type FieldMappingSpec = {
141
+ field_key: string;
142
+ source: ColumnSelectorSpec;
143
+ parse?: CellParseSpec | null;
144
+ transforms?: InputTransform[] | null;
145
+ required?: boolean | null;
146
+ default_value?: string | null;
147
+ };
148
+ /**
149
+ * Domain-agnostic import spec for turning a tabular source into `InputSubmission[]`.
150
+ */
151
+ export type DatasetImportSpec = {
152
+ id: string;
153
+ version: number;
154
+ name: string;
155
+ description?: string | null;
156
+ source_type: TabularSourceType;
157
+ target: DatasetTargetSchemaSpec;
158
+ behavior?: DatasetImportBehaviorSpec | null;
159
+ mappings: FieldMappingSpec[];
160
+ metadata?: Record<string, unknown> | null;
161
+ };
162
+ /**
163
+ * A normalized, JSON-safe intermediate cell value produced by the importer.
164
+ *
165
+ * This is intentionally more limited than `InputValue` so it stays stable for tabular inputs.
166
+ */
167
+ export type ImportedCellValue = null | string | number | boolean | {
168
+ type: "date";
169
+ value: ISODateString;
170
+ } | {
171
+ type: "enum";
172
+ value: string;
173
+ } | {
174
+ type: "array";
175
+ value: ImportedCellValue[];
176
+ } | {
177
+ type: "json";
178
+ value: unknown;
179
+ };
180
+ /**
181
+ * Used by importers to align to schema field value types without embedding business logic.
182
+ */
183
+ export type FieldTypeHint = {
184
+ field_key: string;
185
+ value_type: InputValueType;
186
+ };
187
+ //# sourceMappingURL=spec.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"spec.d.ts","sourceRoot":"","sources":["../src/spec.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAA;AAElG;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,KAAK,CAAA;AAErC;;;;GAIG;AACH,MAAM,MAAM,kBAAkB,GAC1B;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAChB;IAAE,IAAI,EAAE,sBAAsB,CAAA;CAAE,GAChC;IAAE,IAAI,EAAE,oBAAoB,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,uBAAuB,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GACzE;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAA;AAE3F;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAC7B;IACE,IAAI,EAAE,YAAY,CAAA;IAClB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,uEAAuE;IACvE,MAAM,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;IACxB,qDAAqD;IACrD,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAA;CAC3C,GACD;IACE,IAAI,EAAE,cAAc,CAAA;IACpB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAA;CAC3C,GACD;IACE;;;;;OAKG;IACH,IAAI,EAAE,eAAe,CAAA;IACrB,WAAW,CAAC,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAAG,IAAI,CAAA;IAC/D,OAAO,EAAE,KAAK,CAAC;QACb,MAAM,EAAE,MAAM,CAAA;QACd,wFAAwF;QACxF,IAAI,EAAE,MAAM,CAAA;QACZ,kEAAkE;QAClE,EAAE,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KACnB,CAAC,CAAA;CACH,CAAA;AAEL;;;;;GAKG;AACH,MAAM,MAAM,cAAc,GAAG;IAC3B,UAAU,CAAC,EAAE,kBAAkB,EAAE,GAAG,IAAI,CAAA;IACxC,OAAO,EAAE,qBAAqB,CAAA;CAC/B,CAAA;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,EAAE,EAAE,MAAM,CAAA;IACV,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED;;GAEG;AACH,MAAM,MAAM,uBAAuB,GAAG;IACpC,SAAS,EAAE,MAAM,CAAA;IACjB,cAAc,EAAE,MAAM,CAAA;CACvB,CAAA;AAED;;GAEG;AACH,MAAM,MAAM,yBAAyB,GAAG;IACtC,kBAAkB,CAAC,EAAE,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAAA;IACvD,oBAAoB,CAAC,EAAE,OAAO,GAAG,IAAI,CAAA;IACrC,eAAe,CAAC,EAAE,OAAO,GAAG,IAAI,CAAA;CACjC,CAAA;AAED;;;;;;GAMG;AACH,MAAM,MAAM,kBAAkB,GAC1B;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,OAAO,EAAE,MAAM,EAAE,CAAC;IAAC,gBAAgB,CAAC,EAAE,OAAO,GAAG,IAAI,CAAA;CAAE,CAAA;AAEnF;;;;;;GAMG;AACH,MAAM,MAAM,aAAa,GACrB;IAAE,IAAI,EAAE,WAAW,CAAA;CAAE,GACrB;IAAE,IAAI,EAAE,WAAW,CAAA;CAAE,GACrB;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;CAAE,GACrF;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,UAAU,CAAC,EAAE,OAAO,GAAG,IAAI,CAAA;CAAE,GACxE;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,GACnB;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,OAAO,CAAC,aAAa,EAAE;QAAE,IAAI,EAAE,UAAU,CAAA;KAAE,CAAC,CAAA;CAAE,GAC3F;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAAA;AAEvB,MAAM,MAAM,gBAAgB,GAAG;IAC7B,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,kBAAkB,CAAA;IAC1B,KAAK,CAAC,EAAE,aAAa,GAAG,IAAI,CAAA;IAC5B,UAAU,CAAC,EAAE,cAAc,EAAE,GAAG,IAAI,CAAA;IACpC,QAAQ,CAAC,EAAE,OAAO,GAAG,IAAI,CAAA;IACzB,aAAa,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAC9B,CAAA;AAED;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG;IAC9B,EAAE,EAAE,MAAM,CAAA;IACV,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IAC3B,WAAW,EAAE,iBAAiB,CAAA;IAC9B,MAAM,EAAE,uBAAuB,CAAA;IAC/B,QAAQ,CAAC,EAAE,yBAAyB,GAAG,IAAI,CAAA;IAC3C,QAAQ,EAAE,gBAAgB,EAAE,CAAA;IAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAA;CAC1C,CAAA;AAED;;;;GAIG;AACH,MAAM,MAAM,iBAAiB,GACzB,IAAI,GACJ,MAAM,GACN,MAAM,GACN,OAAO,GACP;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,aAAa,CAAA;CAAE,GACtC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAC/B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,iBAAiB,EAAE,CAAA;CAAE,GAC7C;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,OAAO,CAAA;CAAE,CAAA;AAEpC;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG;IAC1B,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,cAAc,CAAA;CAC3B,CAAA"}
package/dist/spec.js ADDED
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=spec.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"spec.js","sourceRoot":"","sources":["../src/spec.ts"],"names":[],"mappings":""}
@@ -0,0 +1 @@
1
+ {"root":["../src/index.ts","../src/spec.ts"],"version":"5.8.3"}
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "@etohq/workflows-input-dataset",
3
+ "version": "0.0.1-next-20260318155517",
4
+ "description": "Domain-agnostic dataset import spec (CSV/tabular mapping) for workflows input schemas",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "exports": {
8
+ ".": {
9
+ "types": "./dist/index.d.ts",
10
+ "import": "./dist/index.js",
11
+ "require": "./dist/index.js"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist"
16
+ ],
17
+ "dependencies": {
18
+ "@etohq/workflows-input-schema": "0.0.1-next-20260318155517"
19
+ },
20
+ "devDependencies": {
21
+ "@types/node": "22.10.5",
22
+ "rimraf": "5.0.2",
23
+ "typescript": "5.8.3"
24
+ },
25
+ "scripts": {
26
+ "build": "rimraf dist && tsc --build",
27
+ "watch": "tsc --build --watch",
28
+ "test": "exit 0"
29
+ }
30
+ }