hazo_collect 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGE_LOG.md +79 -1
- package/README.md +20 -1
- package/dist/chunk-6RFFJN6X.js +131 -0
- package/dist/chunk-NNLJFKX5.js +51 -0
- package/dist/{index-C47n5Xur.d.ts → index-Dls93tTu.d.ts} +3 -3
- package/dist/index.d.ts +5 -5
- package/dist/index.js +21 -135
- package/dist/run-envelope-BX10F3lB.d.ts +56 -0
- package/dist/run-result-yw8Iqipg.d.ts +39 -0
- package/dist/sdk/index.d.ts +2 -2
- package/dist/sdk/index.js +8 -39
- package/dist/server/index.d.ts +6 -6
- package/dist/server/index.js +20 -133
- package/package.json +11 -9
- package/dist/run-envelope-COvdsleR.d.ts +0 -129
- package/dist/run-result-qW7bJEZ-.d.ts +0 -88
package/CHANGE_LOG.md
CHANGED
|
@@ -1,6 +1,84 @@
|
|
|
1
1
|
# hazo_collect — Change Log
|
|
2
2
|
|
|
3
|
-
## 0.2.
|
|
3
|
+
## 0.2.3 — 2026-06-14
|
|
4
|
+
|
|
5
|
+
### Fixes — stop double-encoding JSON columns for PostgREST/JSONB adapters
|
|
6
|
+
|
|
7
|
+
Previously, `write-adapter.ts`, `registry/index.ts`, and `manager/index.ts` all called
|
|
8
|
+
`JSON.stringify()` on JSON-valued fields before handing them to the adapter. This was correct for
|
|
9
|
+
SQLite (which stores JSON as TEXT), but caused **double-encoding** for PostgREST/pg adapters whose
|
|
10
|
+
JSONB columns serialize objects natively — making `manifest->>'name'` and `payload->>'seq'`
|
|
11
|
+
return null on ocdata.
|
|
12
|
+
|
|
13
|
+
Fix: pass objects/arrays through directly and declare the JSON columns via the new
|
|
14
|
+
`QueryBuilder.jsonColumns([...])` API (introduced in `hazo_connect@3.8.0`). The SQLite adapter
|
|
15
|
+
reads the declared columns and serializes them itself; the PostgREST adapter passes the object
|
|
16
|
+
through as-is.
|
|
17
|
+
|
|
18
|
+
**Changed write sites:**
|
|
19
|
+
- `src/server/write-adapter.ts` — `payload` no longer pre-stringified; `QueryBuilder` chain adds
|
|
20
|
+
`.jsonColumns(['payload'])`.
|
|
21
|
+
- `src/registry/index.ts` — `manifest` no longer pre-stringified; `RegistryRow.manifest` type
|
|
22
|
+
widened from `string` to `ManifestInput`; both `doUpdate` and `doNothing` QueryBuilders get
|
|
23
|
+
`.jsonColumns(['manifest'])`.
|
|
24
|
+
- `src/manager/index.ts` `insertRunRow` — `errors` changed from `'[]'` string to `[]` array;
|
|
25
|
+
QueryBuilder gets `.jsonColumns(['errors'])`.
|
|
26
|
+
- `src/manager/index.ts` `updateRunRow` — `errors` and `metrics` no longer pre-stringified;
|
|
27
|
+
QueryBuilder gets `.jsonColumns(['errors', 'metrics'])`.
|
|
28
|
+
|
|
29
|
+
**Requires:** `hazo_connect ^3.8.0`.
|
|
30
|
+
|
|
31
|
+
## 0.2.2 — 2026-06-14
|
|
32
|
+
|
|
33
|
+
### Fixes — CJS-host plugin discovery (empty-registry bug)
|
|
34
|
+
|
|
35
|
+
A downstream CommonJS app (ocdata) whose worker runs under `tsx` booted with an **empty plugin
|
|
36
|
+
registry** — `discover()` returned `[]`, so the operator UI (`/plugins/<name>`, `/health`,
|
|
37
|
+
`/explore`) showed nothing. Two packaging/runtime defects combined to make folder plugins
|
|
38
|
+
undiscoverable; both are now fixed.
|
|
39
|
+
|
|
40
|
+
- **Process-global registry singleton** (`src/sdk/index.ts`). The collector and sink registries
|
|
41
|
+
were module-level `const _collectors = new Map()` / `const _sinks = new Map()`. tsup inlined
|
|
42
|
+
`src/sdk/index.ts` into **both** the `sdk` and `server` bundles, and a CJS host loaded the ESM
|
|
43
|
+
build a second time under a different module format — yielding multiple independent Maps.
|
|
44
|
+
`defineCollector` (via `hazo_collect/sdk`) wrote to one Map while `discover()` →
|
|
45
|
+
`listCollectors()` (via `hazo_collect/server`) read another, which was never written to. The
|
|
46
|
+
registries are now keyed on `Symbol.for('hazo_collect.collectors')` /
|
|
47
|
+
`Symbol.for('hazo_collect.sinks')` and stashed on `globalThis`, so every copy of the module —
|
|
48
|
+
regardless of bundle or module format — coalesces onto a single store. Public API
|
|
49
|
+
(`defineCollector`, `defineSink`, `getCollector`, `getSink`, `listCollectors`,
|
|
50
|
+
`resetCollectorRegistry`) is unchanged.
|
|
51
|
+
- **`exports` map conditions** (`package.json`). `.` and `./server` declared only an `import`
|
|
52
|
+
condition, so a CommonJS host (which resolves with the `require`/`node` condition set) threw
|
|
53
|
+
`ERR_PACKAGE_PATH_NOT_EXPORTED` on `require('hazo_collect')` / `require('hazo_collect/server')`.
|
|
54
|
+
Both subpaths now also declare a `default` condition (mirroring `./sdk`), so CJS hosts resolve
|
|
55
|
+
them.
|
|
56
|
+
- **tsup `splitting: true`** (`tsup.config.ts`). Code-splitting extracts the shared registry module
|
|
57
|
+
into a common chunk that both the `sdk` and `server` bundles import, rather than inlining a
|
|
58
|
+
separate copy into each. The `globalThis` store is the real safety net; this keeps the emitted
|
|
59
|
+
bundles clean (verified: `dist/sdk/index.js` and `dist/server/index.js` import the same
|
|
60
|
+
registry chunk).
|
|
61
|
+
- **Regression test** (`src/__tests__/registry_singleton.test.ts`). Loads `src/sdk/index.ts` twice
|
|
62
|
+
as independent module instances (via `jest.isolateModulesAsync`) — reproducing the duplicated
|
|
63
|
+
bundle / dual-format condition — and asserts a collector registered through one instance is
|
|
64
|
+
visible through the other. 71/71 tests green.
|
|
65
|
+
|
|
66
|
+
## 0.2.1 — 2026-06-14
|
|
67
|
+
|
|
68
|
+
### Fixes — zod v4 compatibility + manifest input/output typing
|
|
69
|
+
|
|
70
|
+
- **zod v4 `z.record`** — all four record schemas migrated to the two-arg form
|
|
71
|
+
(`z.record(keyType, valueType)`) required by zod v4: `ManifestSchema.inputs`/`labels`,
|
|
72
|
+
`RunEnvelopeSchema.inputs`/`secrets`, `RunResultSchema.metrics`. The single-arg form left the
|
|
73
|
+
schema modules with a `tsc` error, which silently collapsed `Manifest` to a loose type and masked
|
|
74
|
+
a cascade of downstream type errors. `type-check` is now green for the first time under zod v4.
|
|
75
|
+
- **`ManifestInput` type** — new `z.input<typeof ManifestSchema>` export. The SDK
|
|
76
|
+
(`defineCollector` / `defineSink`), `RegistryEntry`, `persistRegistry`, and `validateInputs` now
|
|
77
|
+
accept this author-input shape, so `.default()` fields (`timezone`, `timeout_sec`, `concurrency`,
|
|
78
|
+
`retry.*`) may be **omitted** when authoring a collector — the registry runs `parseManifest` to
|
|
79
|
+
fill defaults before the runtime reads them. No runtime behavior change.
|
|
80
|
+
- **Tests** — jest mock generics tightened in `runtime.test.ts` / `registry.test.ts` so the strict
|
|
81
|
+
manifest types type-check. 68/68 tests still green.
|
|
4
82
|
|
|
5
83
|
### Test-app improvements (no API changes)
|
|
6
84
|
|
package/README.md
CHANGED
|
@@ -10,6 +10,15 @@ npm install hazo_collect
|
|
|
10
10
|
|
|
11
11
|
Peer deps: `hazo_core`, `hazo_connect`. Optional: `hazo_secure` (for secrets injection).
|
|
12
12
|
|
|
13
|
+
### Module formats
|
|
14
|
+
|
|
15
|
+
`hazo_collect` ships as ESM, but every subpath (`.`, `./server`, `./sdk`) is resolvable from a
|
|
16
|
+
**CommonJS** host as well — including a Next.js project (`"type": "commonjs"`) whose worker runs
|
|
17
|
+
under `tsx`. Both `import('hazo_collect/server')` and `require('hazo_collect/server')` resolve, and a
|
|
18
|
+
plugin that registers via `import { defineCollector } from 'hazo_collect/sdk'` is discovered by a
|
|
19
|
+
`discover()` call made from the CJS worker — the registry is shared process-wide regardless of how
|
|
20
|
+
each side is loaded.
|
|
21
|
+
|
|
13
22
|
## Quick start
|
|
14
23
|
|
|
15
24
|
### 1. Define a collector (SDK)
|
|
@@ -77,13 +86,23 @@ const result = await manager.runNow({ plugin: 'my_source' });
|
|
|
77
86
|
|
|
78
87
|
| Export | Description |
|
|
79
88
|
|---|---|
|
|
80
|
-
| `defineCollector(def)` | Register a collector in the
|
|
89
|
+
| `defineCollector(def)` | Register a collector in the process-global registry |
|
|
81
90
|
| `getCollector(name)` | Look up a registered collector by name |
|
|
82
91
|
| `listCollectors()` | Return all registered collectors |
|
|
83
92
|
| `defineSink(def)` | Register a sink (data destination) |
|
|
84
93
|
| `getSink(name)` | Look up a registered sink |
|
|
85
94
|
| `resetCollectorRegistry()` | Clear all registrations (test hygiene) |
|
|
86
95
|
|
|
96
|
+
The registry is a **process-global singleton** keyed on `globalThis[Symbol.for('hazo_collect.collectors')]`.
|
|
97
|
+
This guarantees that a collector registered through `hazo_collect/sdk` is visible to `discover()` /
|
|
98
|
+
`listCollectors()` in `hazo_collect/server`, even when the two entry points are bundled separately or
|
|
99
|
+
loaded under different module formats (see [Module formats](#module-formats) below).
|
|
100
|
+
|
|
101
|
+
`defineCollector` / `defineSink` accept the manifest **input** shape (`ManifestInput`): fields with
|
|
102
|
+
defaults — `timezone`, `timeout_sec`, `concurrency`, `retry.*` — may be omitted. The registry runs
|
|
103
|
+
`parseManifest` to fill defaults before the runtime reads them, so wrapping the literal in
|
|
104
|
+
`parseManifest(...)` (as in the quick start above) is optional.
|
|
105
|
+
|
|
87
106
|
## Database tables
|
|
88
107
|
|
|
89
108
|
Run migrations from `hazo_collect/ddl/sqlite.sql` (SQLite) or `hazo_collect/ddl/postgres.sql` (PostgreSQL):
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
// src/contract-version.ts
|
|
2
|
+
var CONTRACT_VERSION = "1.0.0";
|
|
3
|
+
|
|
4
|
+
// src/schema/manifest.ts
|
|
5
|
+
import { z } from "zod";
|
|
6
|
+
import { fromZodValidation } from "hazo_core";
|
|
7
|
+
var RetrySchema = z.object({
|
|
8
|
+
max: z.number().int().min(0).default(3),
|
|
9
|
+
backoff: z.enum(["exponential", "linear", "constant"]).default("exponential"),
|
|
10
|
+
base_ms: z.number().int().min(0).default(1e3),
|
|
11
|
+
jitter: z.boolean().default(true)
|
|
12
|
+
});
|
|
13
|
+
var ManifestSchema = z.object({
|
|
14
|
+
name: z.string().min(1),
|
|
15
|
+
kind: z.enum(["source", "sink"]),
|
|
16
|
+
version: z.string().min(1),
|
|
17
|
+
runtime: z.enum(["node", "python"]),
|
|
18
|
+
entry: z.string().min(1),
|
|
19
|
+
schedule: z.string().optional(),
|
|
20
|
+
timezone: z.string().default("UTC"),
|
|
21
|
+
timeout_sec: z.number().int().min(1).default(600),
|
|
22
|
+
concurrency: z.number().int().min(1).default(1),
|
|
23
|
+
retry: RetrySchema.optional(),
|
|
24
|
+
inputs: z.record(z.string(), z.unknown()).optional(),
|
|
25
|
+
// JSON-Schema object
|
|
26
|
+
produces: z.array(z.string()).optional(),
|
|
27
|
+
consumes: z.array(z.string()).optional(),
|
|
28
|
+
secrets: z.array(z.string()).optional(),
|
|
29
|
+
idempotency_key: z.array(z.string()).min(1),
|
|
30
|
+
labels: z.record(z.string(), z.string()).optional()
|
|
31
|
+
});
|
|
32
|
+
function parseManifest(input) {
|
|
33
|
+
const result = ManifestSchema.safeParse(input);
|
|
34
|
+
if (!result.success) {
|
|
35
|
+
throw fromZodValidation(result.error, {
|
|
36
|
+
pkg: "hazo_collect",
|
|
37
|
+
code: "HAZO_COLLECT_INVALID_MANIFEST"
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
return result.data;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// src/schema/run-result.ts
|
|
44
|
+
import { z as z2 } from "zod";
|
|
45
|
+
import { fromZodValidation as fromZodValidation2 } from "hazo_core";
|
|
46
|
+
var RunErrorSchema = z2.object({
|
|
47
|
+
code: z2.string(),
|
|
48
|
+
message: z2.string(),
|
|
49
|
+
retryable: z2.boolean().optional(),
|
|
50
|
+
context: z2.unknown().optional()
|
|
51
|
+
});
|
|
52
|
+
var RunResultSchema = z2.object({
|
|
53
|
+
plugin: z2.string(),
|
|
54
|
+
run_id: z2.string(),
|
|
55
|
+
kind: z2.enum(["source", "sink"]),
|
|
56
|
+
started_at: z2.string(),
|
|
57
|
+
completed_at: z2.string(),
|
|
58
|
+
status: z2.enum(["success", "partial", "failed"]),
|
|
59
|
+
records_fetched: z2.number().int().min(0),
|
|
60
|
+
records_written: z2.number().int().min(0),
|
|
61
|
+
watermark: z2.string().optional(),
|
|
62
|
+
errors: z2.array(RunErrorSchema).default([]),
|
|
63
|
+
metrics: z2.record(z2.string(), z2.number()).optional(),
|
|
64
|
+
contract_version: z2.string().default(CONTRACT_VERSION)
|
|
65
|
+
});
|
|
66
|
+
function parseRunResult(input) {
|
|
67
|
+
const result = RunResultSchema.safeParse(input);
|
|
68
|
+
if (!result.success) {
|
|
69
|
+
throw fromZodValidation2(result.error, {
|
|
70
|
+
pkg: "hazo_collect",
|
|
71
|
+
code: "HAZO_COLLECT_INVALID_RUN_RESULT"
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return result.data;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// src/validate/inputs.ts
|
|
78
|
+
import { Ajv } from "ajv";
|
|
79
|
+
import { HazoValidationError } from "hazo_core";
|
|
80
|
+
var ajv = new Ajv({ allErrors: true });
|
|
81
|
+
function validateInputs(manifest, payload) {
|
|
82
|
+
if (!manifest.inputs) {
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
const schemaIsValid = ajv.validateSchema(manifest.inputs);
|
|
86
|
+
if (!schemaIsValid) {
|
|
87
|
+
const issues = (ajv.errors ?? []).map((e) => ({
|
|
88
|
+
path: [e.instancePath || e.schemaPath],
|
|
89
|
+
message: e.message ?? "invalid schema"
|
|
90
|
+
}));
|
|
91
|
+
throw new HazoValidationError({
|
|
92
|
+
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
93
|
+
pkg: "hazo_collect",
|
|
94
|
+
message: `manifest "${manifest.name}" has an invalid inputs JSON-Schema`,
|
|
95
|
+
issues
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
let validate;
|
|
99
|
+
try {
|
|
100
|
+
validate = ajv.compile(manifest.inputs);
|
|
101
|
+
} catch (err) {
|
|
102
|
+
throw new HazoValidationError({
|
|
103
|
+
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
104
|
+
pkg: "hazo_collect",
|
|
105
|
+
message: `manifest "${manifest.name}" inputs schema could not be compiled: ${String(err)}`,
|
|
106
|
+
issues: []
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
const valid = validate(payload);
|
|
110
|
+
if (!valid) {
|
|
111
|
+
const issues = (validate.errors ?? []).map((e) => ({
|
|
112
|
+
path: [e.instancePath],
|
|
113
|
+
message: e.message ?? "validation failed"
|
|
114
|
+
}));
|
|
115
|
+
throw new HazoValidationError({
|
|
116
|
+
code: "HAZO_COLLECT_INVALID_INPUTS",
|
|
117
|
+
pkg: "hazo_collect",
|
|
118
|
+
message: `inputs for plugin "${manifest.name}" failed validation`,
|
|
119
|
+
issues
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export {
|
|
125
|
+
CONTRACT_VERSION,
|
|
126
|
+
ManifestSchema,
|
|
127
|
+
parseManifest,
|
|
128
|
+
RunResultSchema,
|
|
129
|
+
parseRunResult,
|
|
130
|
+
validateInputs
|
|
131
|
+
};
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// src/sdk/index.ts
|
|
2
|
+
import { HazoConfigError } from "hazo_core";
|
|
3
|
+
var COLLECTOR_REGISTRY_KEY = /* @__PURE__ */ Symbol.for("hazo_collect.collectors");
|
|
4
|
+
var SINK_REGISTRY_KEY = /* @__PURE__ */ Symbol.for("hazo_collect.sinks");
|
|
5
|
+
var _g = globalThis;
|
|
6
|
+
var _collectors = _g[COLLECTOR_REGISTRY_KEY] ?? (_g[COLLECTOR_REGISTRY_KEY] = /* @__PURE__ */ new Map());
|
|
7
|
+
var _sinks = _g[SINK_REGISTRY_KEY] ?? (_g[SINK_REGISTRY_KEY] = /* @__PURE__ */ new Map());
|
|
8
|
+
function defineCollector(def) {
|
|
9
|
+
if (_collectors.has(def.manifest.name)) {
|
|
10
|
+
throw new HazoConfigError({
|
|
11
|
+
code: "HAZO_COLLECT_DUPLICATE_COLLECTOR",
|
|
12
|
+
pkg: "hazo_collect",
|
|
13
|
+
message: `Collector "${def.manifest.name}" is already registered`
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
_collectors.set(def.manifest.name, def);
|
|
17
|
+
return def;
|
|
18
|
+
}
|
|
19
|
+
function getCollector(name) {
|
|
20
|
+
return _collectors.get(name);
|
|
21
|
+
}
|
|
22
|
+
function listCollectors() {
|
|
23
|
+
return Array.from(_collectors.values());
|
|
24
|
+
}
|
|
25
|
+
function defineSink(def) {
|
|
26
|
+
if (_sinks.has(def.manifest.name)) {
|
|
27
|
+
throw new HazoConfigError({
|
|
28
|
+
code: "HAZO_COLLECT_DUPLICATE_SINK",
|
|
29
|
+
pkg: "hazo_collect",
|
|
30
|
+
message: `Sink "${def.manifest.name}" is already registered`
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
_sinks.set(def.manifest.name, def);
|
|
34
|
+
return def;
|
|
35
|
+
}
|
|
36
|
+
function getSink(name) {
|
|
37
|
+
return _sinks.get(name);
|
|
38
|
+
}
|
|
39
|
+
function resetCollectorRegistry() {
|
|
40
|
+
_collectors.clear();
|
|
41
|
+
_sinks.clear();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export {
|
|
45
|
+
defineCollector,
|
|
46
|
+
getCollector,
|
|
47
|
+
listCollectors,
|
|
48
|
+
defineSink,
|
|
49
|
+
getSink,
|
|
50
|
+
resetCollectorRegistry
|
|
51
|
+
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { HazoCoreLogger } from 'hazo_core';
|
|
2
|
-
import {
|
|
2
|
+
import { a as ManifestInput, R as RunEnvelope } from './run-envelope-BX10F3lB.js';
|
|
3
3
|
import { DbResult, HazoConnectAdapter } from 'hazo_connect/server';
|
|
4
4
|
|
|
5
5
|
interface LandingRow {
|
|
@@ -44,11 +44,11 @@ type CollectorOutcome = {
|
|
|
44
44
|
metrics?: Record<string, number>;
|
|
45
45
|
};
|
|
46
46
|
type CollectorDefinition = {
|
|
47
|
-
manifest:
|
|
47
|
+
manifest: ManifestInput;
|
|
48
48
|
run(ctx: CollectorContext): Promise<CollectorOutcome>;
|
|
49
49
|
};
|
|
50
50
|
type SinkDefinition = {
|
|
51
|
-
manifest:
|
|
51
|
+
manifest: ManifestInput;
|
|
52
52
|
};
|
|
53
53
|
declare function defineCollector(def: CollectorDefinition): CollectorDefinition;
|
|
54
54
|
declare function getCollector(name: string): CollectorDefinition | undefined;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
export {
|
|
3
|
-
export { R as RunError, a as RunResult, b as RunResultSchema, p as parseRunResult } from './run-result-
|
|
1
|
+
import { a as ManifestInput } from './run-envelope-BX10F3lB.js';
|
|
2
|
+
export { M as Manifest, b as ManifestSchema, R as RunEnvelope, c as RunEnvelopeSchema, p as parseManifest, d as parseRunEnvelope } from './run-envelope-BX10F3lB.js';
|
|
3
|
+
export { R as RunError, a as RunResult, b as RunResultSchema, p as parseRunResult } from './run-result-yw8Iqipg.js';
|
|
4
4
|
import 'zod';
|
|
5
5
|
|
|
6
6
|
declare const CONTRACT_VERSION = "1.0.0";
|
|
7
7
|
|
|
8
|
-
declare function validateInputs(manifest:
|
|
8
|
+
declare function validateInputs(manifest: ManifestInput, payload: unknown): void;
|
|
9
9
|
|
|
10
|
-
export { CONTRACT_VERSION,
|
|
10
|
+
export { CONTRACT_VERSION, ManifestInput, validateInputs };
|
package/dist/index.js
CHANGED
|
@@ -1,152 +1,38 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import {
|
|
2
|
+
CONTRACT_VERSION,
|
|
3
|
+
ManifestSchema,
|
|
4
|
+
RunResultSchema,
|
|
5
|
+
parseManifest,
|
|
6
|
+
parseRunResult,
|
|
7
|
+
validateInputs
|
|
8
|
+
} from "./chunk-6RFFJN6X.js";
|
|
3
9
|
|
|
4
|
-
// src/schema/
|
|
10
|
+
// src/schema/run-envelope.ts
|
|
5
11
|
import { z } from "zod";
|
|
6
12
|
import { fromZodValidation } from "hazo_core";
|
|
7
|
-
var
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
})
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
version: z.string().min(1),
|
|
17
|
-
runtime: z.enum(["node", "python"]),
|
|
18
|
-
entry: z.string().min(1),
|
|
19
|
-
schedule: z.string().optional(),
|
|
20
|
-
timezone: z.string().default("UTC"),
|
|
21
|
-
timeout_sec: z.number().int().min(1).default(600),
|
|
22
|
-
concurrency: z.number().int().min(1).default(1),
|
|
23
|
-
retry: RetrySchema.optional(),
|
|
24
|
-
inputs: z.record(z.unknown()).optional(),
|
|
25
|
-
// JSON-Schema object
|
|
26
|
-
produces: z.array(z.string()).optional(),
|
|
27
|
-
consumes: z.array(z.string()).optional(),
|
|
28
|
-
secrets: z.array(z.string()).optional(),
|
|
29
|
-
idempotency_key: z.array(z.string()).min(1),
|
|
30
|
-
labels: z.record(z.string()).optional()
|
|
31
|
-
});
|
|
32
|
-
function parseManifest(input) {
|
|
33
|
-
const result = ManifestSchema.safeParse(input);
|
|
34
|
-
if (!result.success) {
|
|
35
|
-
throw fromZodValidation(result.error, {
|
|
36
|
-
pkg: "hazo_collect",
|
|
37
|
-
code: "HAZO_COLLECT_INVALID_MANIFEST"
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
return result.data;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// src/schema/run-result.ts
|
|
44
|
-
import { z as z2 } from "zod";
|
|
45
|
-
import { fromZodValidation as fromZodValidation2 } from "hazo_core";
|
|
46
|
-
var RunErrorSchema = z2.object({
|
|
47
|
-
code: z2.string(),
|
|
48
|
-
message: z2.string(),
|
|
49
|
-
retryable: z2.boolean().optional(),
|
|
50
|
-
context: z2.unknown().optional()
|
|
51
|
-
});
|
|
52
|
-
var RunResultSchema = z2.object({
|
|
53
|
-
plugin: z2.string(),
|
|
54
|
-
run_id: z2.string(),
|
|
55
|
-
kind: z2.enum(["source", "sink"]),
|
|
56
|
-
started_at: z2.string(),
|
|
57
|
-
completed_at: z2.string(),
|
|
58
|
-
status: z2.enum(["success", "partial", "failed"]),
|
|
59
|
-
records_fetched: z2.number().int().min(0),
|
|
60
|
-
records_written: z2.number().int().min(0),
|
|
61
|
-
watermark: z2.string().optional(),
|
|
62
|
-
errors: z2.array(RunErrorSchema).default([]),
|
|
63
|
-
metrics: z2.record(z2.number()).optional(),
|
|
64
|
-
contract_version: z2.string().default(CONTRACT_VERSION)
|
|
65
|
-
});
|
|
66
|
-
function parseRunResult(input) {
|
|
67
|
-
const result = RunResultSchema.safeParse(input);
|
|
68
|
-
if (!result.success) {
|
|
69
|
-
throw fromZodValidation2(result.error, {
|
|
70
|
-
pkg: "hazo_collect",
|
|
71
|
-
code: "HAZO_COLLECT_INVALID_RUN_RESULT"
|
|
72
|
-
});
|
|
73
|
-
}
|
|
74
|
-
return result.data;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// src/schema/run-envelope.ts
|
|
78
|
-
import { z as z3 } from "zod";
|
|
79
|
-
import { fromZodValidation as fromZodValidation3 } from "hazo_core";
|
|
80
|
-
var RunEnvelopeSchema = z3.object({
|
|
81
|
-
run_id: z3.string(),
|
|
82
|
-
correlation_id: z3.string(),
|
|
83
|
-
plugin: z3.string(),
|
|
84
|
-
inputs: z3.record(z3.unknown()).default({}),
|
|
85
|
-
secrets: z3.record(z3.string()).default({}),
|
|
86
|
-
window: z3.object({
|
|
87
|
-
since: z3.string().optional(),
|
|
88
|
-
until: z3.string().optional()
|
|
13
|
+
var RunEnvelopeSchema = z.object({
|
|
14
|
+
run_id: z.string(),
|
|
15
|
+
correlation_id: z.string(),
|
|
16
|
+
plugin: z.string(),
|
|
17
|
+
inputs: z.record(z.string(), z.unknown()).default({}),
|
|
18
|
+
secrets: z.record(z.string(), z.string()).default({}),
|
|
19
|
+
window: z.object({
|
|
20
|
+
since: z.string().optional(),
|
|
21
|
+
until: z.string().optional()
|
|
89
22
|
}).optional(),
|
|
90
|
-
attempt:
|
|
91
|
-
contract_version:
|
|
23
|
+
attempt: z.number().int().min(1).default(1),
|
|
24
|
+
contract_version: z.string().default(CONTRACT_VERSION)
|
|
92
25
|
});
|
|
93
26
|
function parseRunEnvelope(input) {
|
|
94
27
|
const result = RunEnvelopeSchema.safeParse(input);
|
|
95
28
|
if (!result.success) {
|
|
96
|
-
throw
|
|
29
|
+
throw fromZodValidation(result.error, {
|
|
97
30
|
pkg: "hazo_collect",
|
|
98
31
|
code: "HAZO_COLLECT_INVALID_RUN_ENVELOPE"
|
|
99
32
|
});
|
|
100
33
|
}
|
|
101
34
|
return result.data;
|
|
102
35
|
}
|
|
103
|
-
|
|
104
|
-
// src/validate/inputs.ts
|
|
105
|
-
import { Ajv } from "ajv";
|
|
106
|
-
import { HazoValidationError } from "hazo_core";
|
|
107
|
-
var ajv = new Ajv({ allErrors: true });
|
|
108
|
-
function validateInputs(manifest, payload) {
|
|
109
|
-
if (!manifest.inputs) {
|
|
110
|
-
return;
|
|
111
|
-
}
|
|
112
|
-
const schemaIsValid = ajv.validateSchema(manifest.inputs);
|
|
113
|
-
if (!schemaIsValid) {
|
|
114
|
-
const issues = (ajv.errors ?? []).map((e) => ({
|
|
115
|
-
path: [e.instancePath || e.schemaPath],
|
|
116
|
-
message: e.message ?? "invalid schema"
|
|
117
|
-
}));
|
|
118
|
-
throw new HazoValidationError({
|
|
119
|
-
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
120
|
-
pkg: "hazo_collect",
|
|
121
|
-
message: `manifest "${manifest.name}" has an invalid inputs JSON-Schema`,
|
|
122
|
-
issues
|
|
123
|
-
});
|
|
124
|
-
}
|
|
125
|
-
let validate;
|
|
126
|
-
try {
|
|
127
|
-
validate = ajv.compile(manifest.inputs);
|
|
128
|
-
} catch (err) {
|
|
129
|
-
throw new HazoValidationError({
|
|
130
|
-
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
131
|
-
pkg: "hazo_collect",
|
|
132
|
-
message: `manifest "${manifest.name}" inputs schema could not be compiled: ${String(err)}`,
|
|
133
|
-
issues: []
|
|
134
|
-
});
|
|
135
|
-
}
|
|
136
|
-
const valid = validate(payload);
|
|
137
|
-
if (!valid) {
|
|
138
|
-
const issues = (validate.errors ?? []).map((e) => ({
|
|
139
|
-
path: [e.instancePath],
|
|
140
|
-
message: e.message ?? "validation failed"
|
|
141
|
-
}));
|
|
142
|
-
throw new HazoValidationError({
|
|
143
|
-
code: "HAZO_COLLECT_INVALID_INPUTS",
|
|
144
|
-
pkg: "hazo_collect",
|
|
145
|
-
message: `inputs for plugin "${manifest.name}" failed validation`,
|
|
146
|
-
issues
|
|
147
|
-
});
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
36
|
export {
|
|
151
37
|
CONTRACT_VERSION,
|
|
152
38
|
ManifestSchema,
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
declare const ManifestSchema: z.ZodObject<{
|
|
4
|
+
name: z.ZodString;
|
|
5
|
+
kind: z.ZodEnum<{
|
|
6
|
+
source: "source";
|
|
7
|
+
sink: "sink";
|
|
8
|
+
}>;
|
|
9
|
+
version: z.ZodString;
|
|
10
|
+
runtime: z.ZodEnum<{
|
|
11
|
+
node: "node";
|
|
12
|
+
python: "python";
|
|
13
|
+
}>;
|
|
14
|
+
entry: z.ZodString;
|
|
15
|
+
schedule: z.ZodOptional<z.ZodString>;
|
|
16
|
+
timezone: z.ZodDefault<z.ZodString>;
|
|
17
|
+
timeout_sec: z.ZodDefault<z.ZodNumber>;
|
|
18
|
+
concurrency: z.ZodDefault<z.ZodNumber>;
|
|
19
|
+
retry: z.ZodOptional<z.ZodObject<{
|
|
20
|
+
max: z.ZodDefault<z.ZodNumber>;
|
|
21
|
+
backoff: z.ZodDefault<z.ZodEnum<{
|
|
22
|
+
exponential: "exponential";
|
|
23
|
+
linear: "linear";
|
|
24
|
+
constant: "constant";
|
|
25
|
+
}>>;
|
|
26
|
+
base_ms: z.ZodDefault<z.ZodNumber>;
|
|
27
|
+
jitter: z.ZodDefault<z.ZodBoolean>;
|
|
28
|
+
}, z.core.$strip>>;
|
|
29
|
+
inputs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
30
|
+
produces: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
31
|
+
consumes: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
32
|
+
secrets: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
33
|
+
idempotency_key: z.ZodArray<z.ZodString>;
|
|
34
|
+
labels: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
35
|
+
}, z.core.$strip>;
|
|
36
|
+
type Manifest = z.infer<typeof ManifestSchema>;
|
|
37
|
+
type ManifestInput = z.input<typeof ManifestSchema>;
|
|
38
|
+
declare function parseManifest(input: unknown): Manifest;
|
|
39
|
+
|
|
40
|
+
declare const RunEnvelopeSchema: z.ZodObject<{
|
|
41
|
+
run_id: z.ZodString;
|
|
42
|
+
correlation_id: z.ZodString;
|
|
43
|
+
plugin: z.ZodString;
|
|
44
|
+
inputs: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
45
|
+
secrets: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
46
|
+
window: z.ZodOptional<z.ZodObject<{
|
|
47
|
+
since: z.ZodOptional<z.ZodString>;
|
|
48
|
+
until: z.ZodOptional<z.ZodString>;
|
|
49
|
+
}, z.core.$strip>>;
|
|
50
|
+
attempt: z.ZodDefault<z.ZodNumber>;
|
|
51
|
+
contract_version: z.ZodDefault<z.ZodString>;
|
|
52
|
+
}, z.core.$strip>;
|
|
53
|
+
type RunEnvelope = z.infer<typeof RunEnvelopeSchema>;
|
|
54
|
+
declare function parseRunEnvelope(input: unknown): RunEnvelope;
|
|
55
|
+
|
|
56
|
+
export { type Manifest as M, type RunEnvelope as R, type ManifestInput as a, ManifestSchema as b, RunEnvelopeSchema as c, parseRunEnvelope as d, parseManifest as p };
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
declare const RunErrorSchema: z.ZodObject<{
|
|
4
|
+
code: z.ZodString;
|
|
5
|
+
message: z.ZodString;
|
|
6
|
+
retryable: z.ZodOptional<z.ZodBoolean>;
|
|
7
|
+
context: z.ZodOptional<z.ZodUnknown>;
|
|
8
|
+
}, z.core.$strip>;
|
|
9
|
+
declare const RunResultSchema: z.ZodObject<{
|
|
10
|
+
plugin: z.ZodString;
|
|
11
|
+
run_id: z.ZodString;
|
|
12
|
+
kind: z.ZodEnum<{
|
|
13
|
+
source: "source";
|
|
14
|
+
sink: "sink";
|
|
15
|
+
}>;
|
|
16
|
+
started_at: z.ZodString;
|
|
17
|
+
completed_at: z.ZodString;
|
|
18
|
+
status: z.ZodEnum<{
|
|
19
|
+
success: "success";
|
|
20
|
+
partial: "partial";
|
|
21
|
+
failed: "failed";
|
|
22
|
+
}>;
|
|
23
|
+
records_fetched: z.ZodNumber;
|
|
24
|
+
records_written: z.ZodNumber;
|
|
25
|
+
watermark: z.ZodOptional<z.ZodString>;
|
|
26
|
+
errors: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
27
|
+
code: z.ZodString;
|
|
28
|
+
message: z.ZodString;
|
|
29
|
+
retryable: z.ZodOptional<z.ZodBoolean>;
|
|
30
|
+
context: z.ZodOptional<z.ZodUnknown>;
|
|
31
|
+
}, z.core.$strip>>>;
|
|
32
|
+
metrics: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
33
|
+
contract_version: z.ZodDefault<z.ZodString>;
|
|
34
|
+
}, z.core.$strip>;
|
|
35
|
+
type RunResult = z.infer<typeof RunResultSchema>;
|
|
36
|
+
type RunError = z.infer<typeof RunErrorSchema>;
|
|
37
|
+
declare function parseRunResult(input: unknown): RunResult;
|
|
38
|
+
|
|
39
|
+
export { type RunError as R, type RunResult as a, RunResultSchema as b, parseRunResult as p };
|
package/dist/sdk/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import 'hazo_core';
|
|
2
|
-
import '../run-envelope-
|
|
3
|
-
export { a as CollectorContext, b as CollectorDefinition, c as CollectorOutcome, S as SinkDefinition, g as defineCollector, h as defineSink, i as getCollector, j as getSink, l as listCollectors, r as resetCollectorRegistry } from '../index-
|
|
2
|
+
import '../run-envelope-BX10F3lB.js';
|
|
3
|
+
export { a as CollectorContext, b as CollectorDefinition, c as CollectorOutcome, S as SinkDefinition, g as defineCollector, h as defineSink, i as getCollector, j as getSink, l as listCollectors, r as resetCollectorRegistry } from '../index-Dls93tTu.js';
|
|
4
4
|
import 'zod';
|
|
5
5
|
import 'hazo_connect/server';
|
package/dist/sdk/index.js
CHANGED
|
@@ -1,42 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
pkg: "hazo_collect",
|
|
10
|
-
message: `Collector "${def.manifest.name}" is already registered`
|
|
11
|
-
});
|
|
12
|
-
}
|
|
13
|
-
_collectors.set(def.manifest.name, def);
|
|
14
|
-
return def;
|
|
15
|
-
}
|
|
16
|
-
function getCollector(name) {
|
|
17
|
-
return _collectors.get(name);
|
|
18
|
-
}
|
|
19
|
-
function listCollectors() {
|
|
20
|
-
return Array.from(_collectors.values());
|
|
21
|
-
}
|
|
22
|
-
function defineSink(def) {
|
|
23
|
-
if (_sinks.has(def.manifest.name)) {
|
|
24
|
-
throw new HazoConfigError({
|
|
25
|
-
code: "HAZO_COLLECT_DUPLICATE_SINK",
|
|
26
|
-
pkg: "hazo_collect",
|
|
27
|
-
message: `Sink "${def.manifest.name}" is already registered`
|
|
28
|
-
});
|
|
29
|
-
}
|
|
30
|
-
_sinks.set(def.manifest.name, def);
|
|
31
|
-
return def;
|
|
32
|
-
}
|
|
33
|
-
function getSink(name) {
|
|
34
|
-
return _sinks.get(name);
|
|
35
|
-
}
|
|
36
|
-
function resetCollectorRegistry() {
|
|
37
|
-
_collectors.clear();
|
|
38
|
-
_sinks.clear();
|
|
39
|
-
}
|
|
1
|
+
import {
|
|
2
|
+
defineCollector,
|
|
3
|
+
defineSink,
|
|
4
|
+
getCollector,
|
|
5
|
+
getSink,
|
|
6
|
+
listCollectors,
|
|
7
|
+
resetCollectorRegistry
|
|
8
|
+
} from "../chunk-NNLJFKX5.js";
|
|
40
9
|
export {
|
|
41
10
|
defineCollector,
|
|
42
11
|
defineSink,
|
package/dist/server/index.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { b as CollectorDefinition, c as CollectorOutcome } from '../index-
|
|
2
|
-
export { C as CanonicalTarget, L as LandingRow, W as WriteAdapter, d as WriteArgs, e as WriteResult, f as createWriteAdapter } from '../index-
|
|
1
|
+
import { b as CollectorDefinition, c as CollectorOutcome } from '../index-Dls93tTu.js';
|
|
2
|
+
export { C as CanonicalTarget, L as LandingRow, W as WriteAdapter, d as WriteArgs, e as WriteResult, f as createWriteAdapter } from '../index-Dls93tTu.js';
|
|
3
3
|
import { HazoConnectAdapter, DbResult } from 'hazo_connect/server';
|
|
4
|
-
import { a as RunResult } from '../run-result-
|
|
5
|
-
import {
|
|
4
|
+
import { a as RunResult } from '../run-result-yw8Iqipg.js';
|
|
5
|
+
import { a as ManifestInput, R as RunEnvelope } from '../run-envelope-BX10F3lB.js';
|
|
6
6
|
import 'hazo_core';
|
|
7
7
|
import 'zod';
|
|
8
8
|
|
|
@@ -14,7 +14,7 @@ interface DiscoverOptions {
|
|
|
14
14
|
declare function discover(opts: DiscoverOptions): Promise<Registry>;
|
|
15
15
|
|
|
16
16
|
interface RegistryEntry {
|
|
17
|
-
manifest:
|
|
17
|
+
manifest: ManifestInput;
|
|
18
18
|
worker: CollectorDefinition;
|
|
19
19
|
}
|
|
20
20
|
interface Registry {
|
|
@@ -28,7 +28,7 @@ declare function createInMemoryRegistry(): Registry;
|
|
|
28
28
|
* Invalid manifests (parse failure) are quarantined: valid=0, quarantine_reason=error message.
|
|
29
29
|
*/
|
|
30
30
|
declare function persistRegistry(adapter: HazoConnectAdapter, entries: Array<{
|
|
31
|
-
manifest:
|
|
31
|
+
manifest: ManifestInput;
|
|
32
32
|
source?: string;
|
|
33
33
|
}>, opts?: {
|
|
34
34
|
reseed?: boolean;
|
package/dist/server/index.js
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
import {
|
|
2
|
+
CONTRACT_VERSION,
|
|
3
|
+
RunResultSchema,
|
|
4
|
+
parseManifest,
|
|
5
|
+
validateInputs
|
|
6
|
+
} from "../chunk-6RFFJN6X.js";
|
|
7
|
+
import {
|
|
8
|
+
getCollector,
|
|
9
|
+
listCollectors
|
|
10
|
+
} from "../chunk-NNLJFKX5.js";
|
|
11
|
+
|
|
1
12
|
// src/server/write-adapter.ts
|
|
2
13
|
import { QueryBuilder, wrapResult } from "hazo_connect/server";
|
|
3
14
|
import { generateRequestId } from "hazo_core";
|
|
@@ -11,17 +22,16 @@ function createWriteAdapter(adapter) {
|
|
|
11
22
|
let landingWritten = 0;
|
|
12
23
|
let canonicalWritten = 0;
|
|
13
24
|
for (const row of landing) {
|
|
14
|
-
const payload = typeof row.payload === "string" ? row.payload : JSON.stringify(row.payload);
|
|
15
25
|
const record = {
|
|
16
26
|
id: generateRequestId().slice(4),
|
|
17
27
|
plugin,
|
|
18
28
|
run_id: row.run_id,
|
|
19
29
|
idempotency_key: row.idempotency_key,
|
|
20
|
-
payload,
|
|
30
|
+
payload: row.payload,
|
|
21
31
|
...row.window?.since != null ? { window_since: row.window.since } : {},
|
|
22
32
|
...row.window?.until != null ? { window_until: row.window.until } : {}
|
|
23
33
|
};
|
|
24
|
-
const qb = new QueryBuilder().from("hazo_collect_landing").onConflict(["plugin", "idempotency_key"]).doUpdate();
|
|
34
|
+
const qb = new QueryBuilder().from("hazo_collect_landing").onConflict(["plugin", "idempotency_key"]).doUpdate().jsonColumns(["payload"]);
|
|
25
35
|
await tx.query(qb, "POST", record);
|
|
26
36
|
landingWritten++;
|
|
27
37
|
}
|
|
@@ -43,80 +53,6 @@ function createWriteAdapter(adapter) {
|
|
|
43
53
|
import { QueryBuilder as QueryBuilder2, wrapResult as wrapResult2 } from "hazo_connect/server";
|
|
44
54
|
import { generateRequestId as generateRequestId2, getCorrelationId, withContext } from "hazo_core";
|
|
45
55
|
|
|
46
|
-
// src/validate/inputs.ts
|
|
47
|
-
import { Ajv } from "ajv";
|
|
48
|
-
import { HazoValidationError } from "hazo_core";
|
|
49
|
-
var ajv = new Ajv({ allErrors: true });
|
|
50
|
-
function validateInputs(manifest, payload) {
|
|
51
|
-
if (!manifest.inputs) {
|
|
52
|
-
return;
|
|
53
|
-
}
|
|
54
|
-
const schemaIsValid = ajv.validateSchema(manifest.inputs);
|
|
55
|
-
if (!schemaIsValid) {
|
|
56
|
-
const issues = (ajv.errors ?? []).map((e) => ({
|
|
57
|
-
path: [e.instancePath || e.schemaPath],
|
|
58
|
-
message: e.message ?? "invalid schema"
|
|
59
|
-
}));
|
|
60
|
-
throw new HazoValidationError({
|
|
61
|
-
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
62
|
-
pkg: "hazo_collect",
|
|
63
|
-
message: `manifest "${manifest.name}" has an invalid inputs JSON-Schema`,
|
|
64
|
-
issues
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
let validate;
|
|
68
|
-
try {
|
|
69
|
-
validate = ajv.compile(manifest.inputs);
|
|
70
|
-
} catch (err) {
|
|
71
|
-
throw new HazoValidationError({
|
|
72
|
-
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
73
|
-
pkg: "hazo_collect",
|
|
74
|
-
message: `manifest "${manifest.name}" inputs schema could not be compiled: ${String(err)}`,
|
|
75
|
-
issues: []
|
|
76
|
-
});
|
|
77
|
-
}
|
|
78
|
-
const valid = validate(payload);
|
|
79
|
-
if (!valid) {
|
|
80
|
-
const issues = (validate.errors ?? []).map((e) => ({
|
|
81
|
-
path: [e.instancePath],
|
|
82
|
-
message: e.message ?? "validation failed"
|
|
83
|
-
}));
|
|
84
|
-
throw new HazoValidationError({
|
|
85
|
-
code: "HAZO_COLLECT_INVALID_INPUTS",
|
|
86
|
-
pkg: "hazo_collect",
|
|
87
|
-
message: `inputs for plugin "${manifest.name}" failed validation`,
|
|
88
|
-
issues
|
|
89
|
-
});
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// src/contract-version.ts
|
|
94
|
-
var CONTRACT_VERSION = "1.0.0";
|
|
95
|
-
|
|
96
|
-
// src/schema/run-result.ts
|
|
97
|
-
import { z } from "zod";
|
|
98
|
-
import { fromZodValidation } from "hazo_core";
|
|
99
|
-
var RunErrorSchema = z.object({
|
|
100
|
-
code: z.string(),
|
|
101
|
-
message: z.string(),
|
|
102
|
-
retryable: z.boolean().optional(),
|
|
103
|
-
context: z.unknown().optional()
|
|
104
|
-
});
|
|
105
|
-
var RunResultSchema = z.object({
|
|
106
|
-
plugin: z.string(),
|
|
107
|
-
run_id: z.string(),
|
|
108
|
-
kind: z.enum(["source", "sink"]),
|
|
109
|
-
started_at: z.string(),
|
|
110
|
-
completed_at: z.string(),
|
|
111
|
-
status: z.enum(["success", "partial", "failed"]),
|
|
112
|
-
records_fetched: z.number().int().min(0),
|
|
113
|
-
records_written: z.number().int().min(0),
|
|
114
|
-
watermark: z.string().optional(),
|
|
115
|
-
errors: z.array(RunErrorSchema).default([]),
|
|
116
|
-
metrics: z.record(z.number()).optional(),
|
|
117
|
-
contract_version: z.string().default(CONTRACT_VERSION)
|
|
118
|
-
});
|
|
119
|
-
|
|
120
56
|
// src/runtime/node.ts
|
|
121
57
|
import { createLogger } from "hazo_core";
|
|
122
58
|
|
|
@@ -230,22 +166,22 @@ async function insertRunRow(adapter, params) {
|
|
|
230
166
|
correlation_id: params.correlation_id,
|
|
231
167
|
records_fetched: 0,
|
|
232
168
|
records_written: 0,
|
|
233
|
-
errors:
|
|
169
|
+
errors: [],
|
|
234
170
|
contract_version: CONTRACT_VERSION
|
|
235
171
|
};
|
|
236
|
-
const qb = new QueryBuilder2().from("hazo_collect_plugin_runs");
|
|
172
|
+
const qb = new QueryBuilder2().from("hazo_collect_plugin_runs").jsonColumns(["errors"]);
|
|
237
173
|
await adapter.query(qb, "POST", row);
|
|
238
174
|
}
|
|
239
175
|
async function updateRunRow(adapter, run_id, result) {
|
|
240
|
-
const qb = new QueryBuilder2().from("hazo_collect_plugin_runs").where("run_id", "eq", run_id);
|
|
176
|
+
const qb = new QueryBuilder2().from("hazo_collect_plugin_runs").where("run_id", "eq", run_id).jsonColumns(["errors", "metrics"]);
|
|
241
177
|
await adapter.query(qb, "PATCH", {
|
|
242
178
|
status: result.status,
|
|
243
179
|
completed_at: result.completed_at,
|
|
244
180
|
records_fetched: result.records_fetched,
|
|
245
181
|
records_written: result.records_written,
|
|
246
182
|
watermark: result.watermark ?? null,
|
|
247
|
-
errors:
|
|
248
|
-
metrics: result.metrics
|
|
183
|
+
errors: result.errors,
|
|
184
|
+
metrics: result.metrics ?? null,
|
|
249
185
|
contract_version: result.contract_version ?? null
|
|
250
186
|
});
|
|
251
187
|
}
|
|
@@ -432,55 +368,6 @@ function createManager(opts) {
|
|
|
432
368
|
// src/registry/index.ts
|
|
433
369
|
import { QueryBuilder as QueryBuilder4, wrapResult as wrapResult3 } from "hazo_connect/server";
|
|
434
370
|
|
|
435
|
-
// src/schema/manifest.ts
|
|
436
|
-
import { z as z2 } from "zod";
|
|
437
|
-
import { fromZodValidation as fromZodValidation2 } from "hazo_core";
|
|
438
|
-
var RetrySchema = z2.object({
|
|
439
|
-
max: z2.number().int().min(0).default(3),
|
|
440
|
-
backoff: z2.enum(["exponential", "linear", "constant"]).default("exponential"),
|
|
441
|
-
base_ms: z2.number().int().min(0).default(1e3),
|
|
442
|
-
jitter: z2.boolean().default(true)
|
|
443
|
-
});
|
|
444
|
-
var ManifestSchema = z2.object({
|
|
445
|
-
name: z2.string().min(1),
|
|
446
|
-
kind: z2.enum(["source", "sink"]),
|
|
447
|
-
version: z2.string().min(1),
|
|
448
|
-
runtime: z2.enum(["node", "python"]),
|
|
449
|
-
entry: z2.string().min(1),
|
|
450
|
-
schedule: z2.string().optional(),
|
|
451
|
-
timezone: z2.string().default("UTC"),
|
|
452
|
-
timeout_sec: z2.number().int().min(1).default(600),
|
|
453
|
-
concurrency: z2.number().int().min(1).default(1),
|
|
454
|
-
retry: RetrySchema.optional(),
|
|
455
|
-
inputs: z2.record(z2.unknown()).optional(),
|
|
456
|
-
// JSON-Schema object
|
|
457
|
-
produces: z2.array(z2.string()).optional(),
|
|
458
|
-
consumes: z2.array(z2.string()).optional(),
|
|
459
|
-
secrets: z2.array(z2.string()).optional(),
|
|
460
|
-
idempotency_key: z2.array(z2.string()).min(1),
|
|
461
|
-
labels: z2.record(z2.string()).optional()
|
|
462
|
-
});
|
|
463
|
-
function parseManifest(input) {
|
|
464
|
-
const result = ManifestSchema.safeParse(input);
|
|
465
|
-
if (!result.success) {
|
|
466
|
-
throw fromZodValidation2(result.error, {
|
|
467
|
-
pkg: "hazo_collect",
|
|
468
|
-
code: "HAZO_COLLECT_INVALID_MANIFEST"
|
|
469
|
-
});
|
|
470
|
-
}
|
|
471
|
-
return result.data;
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
// src/sdk/index.ts
|
|
475
|
-
import { HazoConfigError } from "hazo_core";
|
|
476
|
-
var _collectors = /* @__PURE__ */ new Map();
|
|
477
|
-
function getCollector(name) {
|
|
478
|
-
return _collectors.get(name);
|
|
479
|
-
}
|
|
480
|
-
function listCollectors() {
|
|
481
|
-
return Array.from(_collectors.values());
|
|
482
|
-
}
|
|
483
|
-
|
|
484
371
|
// src/registry/discovery.ts
|
|
485
372
|
import { readFileSync, readdirSync } from "fs";
|
|
486
373
|
import { resolve } from "path";
|
|
@@ -572,12 +459,12 @@ async function persistRegistry(adapter, entries, opts) {
|
|
|
572
459
|
kind: entry.manifest.kind,
|
|
573
460
|
version: entry.manifest.version,
|
|
574
461
|
runtime: entry.manifest.runtime,
|
|
575
|
-
manifest:
|
|
462
|
+
manifest: entry.manifest,
|
|
576
463
|
source: entry.source ?? "folder",
|
|
577
464
|
valid,
|
|
578
465
|
quarantine_reason: quarantineReason
|
|
579
466
|
};
|
|
580
|
-
const qb = opts?.reseed ? new QueryBuilder4().from("hazo_collect_plugin_registry").onConflict(["name"]).doUpdate() : new QueryBuilder4().from("hazo_collect_plugin_registry").onConflict(["name"]).doNothing();
|
|
467
|
+
const qb = opts?.reseed ? new QueryBuilder4().from("hazo_collect_plugin_registry").onConflict(["name"]).doUpdate().jsonColumns(["manifest"]) : new QueryBuilder4().from("hazo_collect_plugin_registry").onConflict(["name"]).doNothing().jsonColumns(["manifest"]);
|
|
581
468
|
await adapter.query(qb, "POST", row);
|
|
582
469
|
}
|
|
583
470
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hazo_collect",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.3",
|
|
4
4
|
"description": "Collector-manager engine for the Ocdata platform",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -8,11 +8,13 @@
|
|
|
8
8
|
"exports": {
|
|
9
9
|
".": {
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
|
-
"import": "./dist/index.js"
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"default": "./dist/index.js"
|
|
12
13
|
},
|
|
13
14
|
"./server": {
|
|
14
15
|
"types": "./dist/server/index.d.ts",
|
|
15
|
-
"import": "./dist/server/index.js"
|
|
16
|
+
"import": "./dist/server/index.js",
|
|
17
|
+
"default": "./dist/server/index.js"
|
|
16
18
|
},
|
|
17
19
|
"./sdk": {
|
|
18
20
|
"types": "./dist/sdk/index.d.ts",
|
|
@@ -37,16 +39,16 @@
|
|
|
37
39
|
"build:test-app": "npm run build && cd test-app && npm run build"
|
|
38
40
|
},
|
|
39
41
|
"dependencies": {
|
|
40
|
-
"zod": "^
|
|
42
|
+
"zod": "^4.1.12",
|
|
41
43
|
"ajv": "^8.17.1"
|
|
42
44
|
},
|
|
43
45
|
"peerDependencies": {
|
|
44
|
-
"hazo_core": "^1.
|
|
45
|
-
"hazo_connect": "^3.
|
|
46
|
+
"hazo_core": "^1.2.0",
|
|
47
|
+
"hazo_connect": "^3.8.0",
|
|
46
48
|
"hazo_secure": "^1.3.0",
|
|
47
49
|
"react": "^18.0.0 || ^19.0.0",
|
|
48
50
|
"react-dom": "^18.0.0 || ^19.0.0",
|
|
49
|
-
"hazo_ui": "^
|
|
51
|
+
"hazo_ui": "^4.0.0"
|
|
50
52
|
},
|
|
51
53
|
"peerDependenciesMeta": {
|
|
52
54
|
"hazo_secure": {
|
|
@@ -72,8 +74,8 @@
|
|
|
72
74
|
"@types/node": "^22.10.0",
|
|
73
75
|
"@types/react": "^19.0.0",
|
|
74
76
|
"@types/react-dom": "^19.0.0",
|
|
75
|
-
"hazo_core": "^1.
|
|
76
|
-
"hazo_connect": "^3.
|
|
77
|
+
"hazo_core": "^1.2.0",
|
|
78
|
+
"hazo_connect": "^3.8.0",
|
|
77
79
|
"hazo_secure": "^1.3.0"
|
|
78
80
|
},
|
|
79
81
|
"keywords": [],
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
|
|
3
|
-
declare const ManifestSchema: z.ZodObject<{
|
|
4
|
-
name: z.ZodString;
|
|
5
|
-
kind: z.ZodEnum<["source", "sink"]>;
|
|
6
|
-
version: z.ZodString;
|
|
7
|
-
runtime: z.ZodEnum<["node", "python"]>;
|
|
8
|
-
entry: z.ZodString;
|
|
9
|
-
schedule: z.ZodOptional<z.ZodString>;
|
|
10
|
-
timezone: z.ZodDefault<z.ZodString>;
|
|
11
|
-
timeout_sec: z.ZodDefault<z.ZodNumber>;
|
|
12
|
-
concurrency: z.ZodDefault<z.ZodNumber>;
|
|
13
|
-
retry: z.ZodOptional<z.ZodObject<{
|
|
14
|
-
max: z.ZodDefault<z.ZodNumber>;
|
|
15
|
-
backoff: z.ZodDefault<z.ZodEnum<["exponential", "linear", "constant"]>>;
|
|
16
|
-
base_ms: z.ZodDefault<z.ZodNumber>;
|
|
17
|
-
jitter: z.ZodDefault<z.ZodBoolean>;
|
|
18
|
-
}, "strip", z.ZodTypeAny, {
|
|
19
|
-
max: number;
|
|
20
|
-
backoff: "exponential" | "linear" | "constant";
|
|
21
|
-
base_ms: number;
|
|
22
|
-
jitter: boolean;
|
|
23
|
-
}, {
|
|
24
|
-
max?: number | undefined;
|
|
25
|
-
backoff?: "exponential" | "linear" | "constant" | undefined;
|
|
26
|
-
base_ms?: number | undefined;
|
|
27
|
-
jitter?: boolean | undefined;
|
|
28
|
-
}>>;
|
|
29
|
-
inputs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
30
|
-
produces: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
31
|
-
consumes: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
32
|
-
secrets: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
33
|
-
idempotency_key: z.ZodArray<z.ZodString, "many">;
|
|
34
|
-
labels: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
35
|
-
}, "strip", z.ZodTypeAny, {
|
|
36
|
-
name: string;
|
|
37
|
-
kind: "source" | "sink";
|
|
38
|
-
version: string;
|
|
39
|
-
runtime: "node" | "python";
|
|
40
|
-
entry: string;
|
|
41
|
-
timezone: string;
|
|
42
|
-
timeout_sec: number;
|
|
43
|
-
concurrency: number;
|
|
44
|
-
idempotency_key: string[];
|
|
45
|
-
schedule?: string | undefined;
|
|
46
|
-
retry?: {
|
|
47
|
-
max: number;
|
|
48
|
-
backoff: "exponential" | "linear" | "constant";
|
|
49
|
-
base_ms: number;
|
|
50
|
-
jitter: boolean;
|
|
51
|
-
} | undefined;
|
|
52
|
-
inputs?: Record<string, unknown> | undefined;
|
|
53
|
-
produces?: string[] | undefined;
|
|
54
|
-
consumes?: string[] | undefined;
|
|
55
|
-
secrets?: string[] | undefined;
|
|
56
|
-
labels?: Record<string, string> | undefined;
|
|
57
|
-
}, {
|
|
58
|
-
name: string;
|
|
59
|
-
kind: "source" | "sink";
|
|
60
|
-
version: string;
|
|
61
|
-
runtime: "node" | "python";
|
|
62
|
-
entry: string;
|
|
63
|
-
idempotency_key: string[];
|
|
64
|
-
schedule?: string | undefined;
|
|
65
|
-
timezone?: string | undefined;
|
|
66
|
-
timeout_sec?: number | undefined;
|
|
67
|
-
concurrency?: number | undefined;
|
|
68
|
-
retry?: {
|
|
69
|
-
max?: number | undefined;
|
|
70
|
-
backoff?: "exponential" | "linear" | "constant" | undefined;
|
|
71
|
-
base_ms?: number | undefined;
|
|
72
|
-
jitter?: boolean | undefined;
|
|
73
|
-
} | undefined;
|
|
74
|
-
inputs?: Record<string, unknown> | undefined;
|
|
75
|
-
produces?: string[] | undefined;
|
|
76
|
-
consumes?: string[] | undefined;
|
|
77
|
-
secrets?: string[] | undefined;
|
|
78
|
-
labels?: Record<string, string> | undefined;
|
|
79
|
-
}>;
|
|
80
|
-
type Manifest = z.infer<typeof ManifestSchema>;
|
|
81
|
-
declare function parseManifest(input: unknown): Manifest;
|
|
82
|
-
|
|
83
|
-
declare const RunEnvelopeSchema: z.ZodObject<{
|
|
84
|
-
run_id: z.ZodString;
|
|
85
|
-
correlation_id: z.ZodString;
|
|
86
|
-
plugin: z.ZodString;
|
|
87
|
-
inputs: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
88
|
-
secrets: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
89
|
-
window: z.ZodOptional<z.ZodObject<{
|
|
90
|
-
since: z.ZodOptional<z.ZodString>;
|
|
91
|
-
until: z.ZodOptional<z.ZodString>;
|
|
92
|
-
}, "strip", z.ZodTypeAny, {
|
|
93
|
-
since?: string | undefined;
|
|
94
|
-
until?: string | undefined;
|
|
95
|
-
}, {
|
|
96
|
-
since?: string | undefined;
|
|
97
|
-
until?: string | undefined;
|
|
98
|
-
}>>;
|
|
99
|
-
attempt: z.ZodDefault<z.ZodNumber>;
|
|
100
|
-
contract_version: z.ZodDefault<z.ZodString>;
|
|
101
|
-
}, "strip", z.ZodTypeAny, {
|
|
102
|
-
inputs: Record<string, unknown>;
|
|
103
|
-
secrets: Record<string, string>;
|
|
104
|
-
plugin: string;
|
|
105
|
-
run_id: string;
|
|
106
|
-
contract_version: string;
|
|
107
|
-
correlation_id: string;
|
|
108
|
-
attempt: number;
|
|
109
|
-
window?: {
|
|
110
|
-
since?: string | undefined;
|
|
111
|
-
until?: string | undefined;
|
|
112
|
-
} | undefined;
|
|
113
|
-
}, {
|
|
114
|
-
plugin: string;
|
|
115
|
-
run_id: string;
|
|
116
|
-
correlation_id: string;
|
|
117
|
-
inputs?: Record<string, unknown> | undefined;
|
|
118
|
-
secrets?: Record<string, string> | undefined;
|
|
119
|
-
contract_version?: string | undefined;
|
|
120
|
-
window?: {
|
|
121
|
-
since?: string | undefined;
|
|
122
|
-
until?: string | undefined;
|
|
123
|
-
} | undefined;
|
|
124
|
-
attempt?: number | undefined;
|
|
125
|
-
}>;
|
|
126
|
-
type RunEnvelope = z.infer<typeof RunEnvelopeSchema>;
|
|
127
|
-
declare function parseRunEnvelope(input: unknown): RunEnvelope;
|
|
128
|
-
|
|
129
|
-
export { type Manifest as M, type RunEnvelope as R, ManifestSchema as a, RunEnvelopeSchema as b, parseRunEnvelope as c, parseManifest as p };
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
|
|
3
|
-
declare const RunErrorSchema: z.ZodObject<{
|
|
4
|
-
code: z.ZodString;
|
|
5
|
-
message: z.ZodString;
|
|
6
|
-
retryable: z.ZodOptional<z.ZodBoolean>;
|
|
7
|
-
context: z.ZodOptional<z.ZodUnknown>;
|
|
8
|
-
}, "strip", z.ZodTypeAny, {
|
|
9
|
-
code: string;
|
|
10
|
-
message: string;
|
|
11
|
-
retryable?: boolean | undefined;
|
|
12
|
-
context?: unknown;
|
|
13
|
-
}, {
|
|
14
|
-
code: string;
|
|
15
|
-
message: string;
|
|
16
|
-
retryable?: boolean | undefined;
|
|
17
|
-
context?: unknown;
|
|
18
|
-
}>;
|
|
19
|
-
declare const RunResultSchema: z.ZodObject<{
|
|
20
|
-
plugin: z.ZodString;
|
|
21
|
-
run_id: z.ZodString;
|
|
22
|
-
kind: z.ZodEnum<["source", "sink"]>;
|
|
23
|
-
started_at: z.ZodString;
|
|
24
|
-
completed_at: z.ZodString;
|
|
25
|
-
status: z.ZodEnum<["success", "partial", "failed"]>;
|
|
26
|
-
records_fetched: z.ZodNumber;
|
|
27
|
-
records_written: z.ZodNumber;
|
|
28
|
-
watermark: z.ZodOptional<z.ZodString>;
|
|
29
|
-
errors: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
30
|
-
code: z.ZodString;
|
|
31
|
-
message: z.ZodString;
|
|
32
|
-
retryable: z.ZodOptional<z.ZodBoolean>;
|
|
33
|
-
context: z.ZodOptional<z.ZodUnknown>;
|
|
34
|
-
}, "strip", z.ZodTypeAny, {
|
|
35
|
-
code: string;
|
|
36
|
-
message: string;
|
|
37
|
-
retryable?: boolean | undefined;
|
|
38
|
-
context?: unknown;
|
|
39
|
-
}, {
|
|
40
|
-
code: string;
|
|
41
|
-
message: string;
|
|
42
|
-
retryable?: boolean | undefined;
|
|
43
|
-
context?: unknown;
|
|
44
|
-
}>, "many">>;
|
|
45
|
-
metrics: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
|
|
46
|
-
contract_version: z.ZodDefault<z.ZodString>;
|
|
47
|
-
}, "strip", z.ZodTypeAny, {
|
|
48
|
-
status: "success" | "partial" | "failed";
|
|
49
|
-
kind: "source" | "sink";
|
|
50
|
-
plugin: string;
|
|
51
|
-
run_id: string;
|
|
52
|
-
started_at: string;
|
|
53
|
-
completed_at: string;
|
|
54
|
-
records_fetched: number;
|
|
55
|
-
records_written: number;
|
|
56
|
-
errors: {
|
|
57
|
-
code: string;
|
|
58
|
-
message: string;
|
|
59
|
-
retryable?: boolean | undefined;
|
|
60
|
-
context?: unknown;
|
|
61
|
-
}[];
|
|
62
|
-
contract_version: string;
|
|
63
|
-
watermark?: string | undefined;
|
|
64
|
-
metrics?: Record<string, number> | undefined;
|
|
65
|
-
}, {
|
|
66
|
-
status: "success" | "partial" | "failed";
|
|
67
|
-
kind: "source" | "sink";
|
|
68
|
-
plugin: string;
|
|
69
|
-
run_id: string;
|
|
70
|
-
started_at: string;
|
|
71
|
-
completed_at: string;
|
|
72
|
-
records_fetched: number;
|
|
73
|
-
records_written: number;
|
|
74
|
-
watermark?: string | undefined;
|
|
75
|
-
errors?: {
|
|
76
|
-
code: string;
|
|
77
|
-
message: string;
|
|
78
|
-
retryable?: boolean | undefined;
|
|
79
|
-
context?: unknown;
|
|
80
|
-
}[] | undefined;
|
|
81
|
-
metrics?: Record<string, number> | undefined;
|
|
82
|
-
contract_version?: string | undefined;
|
|
83
|
-
}>;
|
|
84
|
-
type RunResult = z.infer<typeof RunResultSchema>;
|
|
85
|
-
type RunError = z.infer<typeof RunErrorSchema>;
|
|
86
|
-
declare function parseRunResult(input: unknown): RunResult;
|
|
87
|
-
|
|
88
|
-
export { type RunError as R, type RunResult as a, RunResultSchema as b, parseRunResult as p };
|