hazo_collect 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGE_LOG.md +44 -0
- package/README.md +104 -0
- package/SETUP_CHECKLIST.md +112 -0
- package/dist/ddl/postgres.sql +78 -0
- package/dist/ddl/sqlite.sql +75 -0
- package/dist/index-C47n5Xur.d.ts +60 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +159 -0
- package/dist/run-envelope-COvdsleR.d.ts +129 -0
- package/dist/run-result-qW7bJEZ-.d.ts +88 -0
- package/dist/sdk/index.d.ts +5 -0
- package/dist/sdk/index.js +47 -0
- package/dist/server/index.d.ts +66 -0
- package/dist/server/index.js +592 -0
- package/package.json +82 -0
package/CHANGE_LOG.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# hazo_collect — Change Log
|
|
2
|
+
|
|
3
|
+
## 0.2.1 — 2026-06-12
|
|
4
|
+
|
|
5
|
+
### Test-app improvements (no API changes)
|
|
6
|
+
|
|
7
|
+
- **Shadcn UI** — test-app upgraded from hand-rolled HTML to hazo_ui shadcn components: collapsible `SidebarLayout` (hazo_debug pattern), `Button`, `Table`, `Select`, `Card` across all pages
|
|
8
|
+
- **Registry page** — `Button` (outline/sm + sm), `Table` family, `StatusBadge` for valid column
|
|
9
|
+
- **Run page** — `Select` for plugin picker, `Card`/`CardContent` for RunResult, `Table` for recent runs, `StatusBadge` for status
|
|
10
|
+
- **status_badge.tsx** — new shared pill helper (success / partial / failed / valid / invalid → green / yellow / red)
|
|
11
|
+
- **Token wiring** — `globals.css` now imports `hazo_ui/dist/styles.css` at the correct depth and maps all shadcn HSL tokens via `@theme inline`
|
|
12
|
+
- **Fix** — `assertThrows` callbacks in autotest scenarios made synchronous (3 callsites)
|
|
13
|
+
|
|
14
|
+
## 0.2.0 — 2026-06-12
|
|
15
|
+
|
|
16
|
+
### Phase 1 Core: Manager Vertical + Secrets + SDK
|
|
17
|
+
|
|
18
|
+
First fully-functional vertical slice: discover a Node collector → run it in-process → write its
|
|
19
|
+
own data via the shared adapter → persist the RunResult scorecard, health, and watermark.
|
|
20
|
+
|
|
21
|
+
- **Contract versioning** — `src/contract-version.ts` exports `CONTRACT_VERSION = '1.0.0'`;
|
|
22
|
+
`contract_version` field stamped into `RunEnvelopeSchema` and `RunResultSchema`.
|
|
23
|
+
- **hazo_secure/secrets** — new `SecretsProvider` interface + `EnvSecretsProvider` /
|
|
24
|
+
`StaticSecretsProvider` / `LookupSecretsProvider` impls + `SecretsError`
|
|
25
|
+
(`HAZO_SECURE_SECRET_NOT_FOUND`). `hazo_secure` bumped to v1.3.0.
|
|
26
|
+
- **Node author SDK** (`hazo_collect/sdk`) — `defineCollector` / `defineSink` / `getCollector` /
|
|
27
|
+
`resetCollectorRegistry` Map-registry; `createHttpHelper` with exponential back-off + jitter.
|
|
28
|
+
- **Node runtime adapter** (`src/runtime/node.ts`) — `nodeRuntime` builds `CollectorContext`
|
|
29
|
+
(`ctx.write` / `ctx.http` / `ctx.log` / `ctx.envelope`) and dispatches to the worker; worker
|
|
30
|
+
throws are captured, never re-thrown.
|
|
31
|
+
- **Registry** (`src/registry/`) — `createInMemoryRegistry()` backed by SDK Map; `persistRegistry()`
|
|
32
|
+
upserts `hazo_collect_plugin_registry`, quarantines invalid manifests.
|
|
33
|
+
- **Manager** (`src/manager/`) — `createManager()` 12-step pipeline: registry → validate → secrets →
|
|
34
|
+
watermark → envelope → run row → dispatch → assemble → update → watermark advance (success only)
|
|
35
|
+
→ health upsert → return `DbResult<RunResult>`. Plugin-level failures never cause a rejected
|
|
36
|
+
promise. `hazo_secure` wired as optional peer dep `^1.3.0`.
|
|
37
|
+
- **Exports** — `./server` now re-exports `createManager`, `nodeRuntime`, `createInMemoryRegistry`,
|
|
38
|
+
`persistRegistry`.
|
|
39
|
+
- **Tests** — 68 tests green: 8 manager integration, 7 registry, 3 runtime, 17 SDK, 21 schema/
|
|
40
|
+
validation/write-adapter, 12 autotest scenarios (server + discover + runs_db).
|
|
41
|
+
|
|
42
|
+
## 0.1.0 — 2026-06-11
|
|
43
|
+
|
|
44
|
+
- Initial release.
|
package/README.md
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# hazo_collect
|
|
2
|
+
|
|
3
|
+
Collector-manager engine for the Ocdata platform. Handles plugin discovery, contract validation, execution orchestration, result persistence, and watermark/health tracking.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install hazo_collect
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Peer deps: `hazo_core`, `hazo_connect`. Optional: `hazo_secure` (for secrets injection).
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
### 1. Define a collector (SDK)
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import { defineCollector } from 'hazo_collect/sdk';
|
|
19
|
+
import { parseManifest } from 'hazo_collect';
|
|
20
|
+
|
|
21
|
+
defineCollector({
|
|
22
|
+
manifest: parseManifest({
|
|
23
|
+
name: 'my_source',
|
|
24
|
+
kind: 'source',
|
|
25
|
+
version: '1.0.0',
|
|
26
|
+
runtime: 'node',
|
|
27
|
+
entry: './my_source.js',
|
|
28
|
+
idempotency_key: ['record_id'],
|
|
29
|
+
}),
|
|
30
|
+
async run(ctx) {
|
|
31
|
+
const rows = [{ record_id: 'r1', data: 'hello' }];
|
|
32
|
+
await ctx.write({ plugin: 'my_source', rows, idempotency_key: ['record_id'] });
|
|
33
|
+
return { records_fetched: rows.length, records_written: rows.length };
|
|
34
|
+
},
|
|
35
|
+
});
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 2. Create a manager and run the collector
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
import { createManager, nodeRuntime, createInMemoryRegistry } from 'hazo_collect/server';
|
|
42
|
+
|
|
43
|
+
const manager = createManager({
|
|
44
|
+
getHazoConnect: () => adapter, // hazo_connect adapter
|
|
45
|
+
getCollector: name => registry.get(name),
|
|
46
|
+
runtime: nodeRuntime,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
const result = await manager.runNow({ plugin: 'my_source' });
|
|
50
|
+
// result.data: RunResult — status, records_fetched, records_written, errors, watermark
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## API
|
|
54
|
+
|
|
55
|
+
### `hazo_collect` (client-safe)
|
|
56
|
+
|
|
57
|
+
| Export | Description |
|
|
58
|
+
|---|---|
|
|
59
|
+
| `CONTRACT_VERSION` | Current contract version string (`"1.0.0"`) |
|
|
60
|
+
| `parseManifest(obj)` | Parse + validate a plugin manifest; throws `HazoValidationError` on invalid input |
|
|
61
|
+
| `parseRunResult(obj)` | Parse + validate a `RunResult` object |
|
|
62
|
+
| `parseRunEnvelope(obj)` | Parse + validate a `RunEnvelope`; applies defaults |
|
|
63
|
+
| `validateInputs(manifest, payload)` | Validate run inputs against a manifest's JSON schema |
|
|
64
|
+
|
|
65
|
+
### `hazo_collect/server`
|
|
66
|
+
|
|
67
|
+
| Export | Description |
|
|
68
|
+
|---|---|
|
|
69
|
+
| `createManager(opts)` | Main orchestration pipeline — resolve → validate → run → persist |
|
|
70
|
+
| `nodeRuntime` | Execution adapter for in-process Node.js collectors |
|
|
71
|
+
| `createInMemoryRegistry()` | In-memory registry backed by the SDK Map |
|
|
72
|
+
| `persistRegistry(adapter, manifests)` | Upsert manifests into `hazo_collect_plugin_registry`; quarantines invalid entries |
|
|
73
|
+
| `discover(opts)` | Scan directories for `manifest.json` files and return parsed entries |
|
|
74
|
+
| `createWriteAdapter(adapter, runId)` | Create a `ctx.write` function bound to a specific run |
|
|
75
|
+
|
|
76
|
+
### `hazo_collect/sdk`
|
|
77
|
+
|
|
78
|
+
| Export | Description |
|
|
79
|
+
|---|---|
|
|
80
|
+
| `defineCollector(def)` | Register a collector in the module-level registry |
|
|
81
|
+
| `getCollector(name)` | Look up a registered collector by name |
|
|
82
|
+
| `listCollectors()` | Return all registered collectors |
|
|
83
|
+
| `defineSink(def)` | Register a sink (data destination) |
|
|
84
|
+
| `getSink(name)` | Look up a registered sink |
|
|
85
|
+
| `resetCollectorRegistry()` | Clear all registrations (test hygiene) |
|
|
86
|
+
|
|
87
|
+
## Database tables
|
|
88
|
+
|
|
89
|
+
Run migrations from `hazo_collect/ddl/sqlite.sql` (SQLite) or `hazo_collect/ddl/postgres.sql` (PostgreSQL):
|
|
90
|
+
|
|
91
|
+
- `hazo_collect_plugin_registry` — discovered manifest catalogue with validity + quarantine info
|
|
92
|
+
- `hazo_collect_plugin_runs` — per-run scorecard (status, records, contract_version, watermark)
|
|
93
|
+
- `hazo_collect_plugin_health` — rolling health state (consecutive failures, last status)
|
|
94
|
+
- `hazo_collect_landing` — idempotent landing zone for collector output rows
|
|
95
|
+
|
|
96
|
+
## Tailwind v4 (`@source` required)
|
|
97
|
+
|
|
98
|
+
```css
|
|
99
|
+
@source "../node_modules/hazo_collect/dist";
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## License
|
|
103
|
+
|
|
104
|
+
MIT
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# hazo_collect — Setup Checklist
|
|
2
|
+
|
|
3
|
+
Follow these steps when adding hazo_collect to a consuming application.
|
|
4
|
+
|
|
5
|
+
## 1. Install the package
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install hazo_collect
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Peer dependencies (install separately):
|
|
12
|
+
```bash
|
|
13
|
+
npm install hazo_core hazo_connect
|
|
14
|
+
# Optional — needed for secrets injection:
|
|
15
|
+
npm install hazo_secure
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## 2. Run database migrations
|
|
19
|
+
|
|
20
|
+
Apply the DDL for your database engine:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# SQLite
|
|
24
|
+
sqlite3 your.db < node_modules/hazo_collect/ddl/sqlite.sql
|
|
25
|
+
|
|
26
|
+
# PostgreSQL
|
|
27
|
+
psql $DATABASE_URL -f node_modules/hazo_collect/ddl/postgres.sql
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Tables created:
|
|
31
|
+
- `hazo_collect_plugin_registry`
|
|
32
|
+
- `hazo_collect_plugin_runs`
|
|
33
|
+
- `hazo_collect_plugin_health`
|
|
34
|
+
- `hazo_collect_landing`
|
|
35
|
+
|
|
36
|
+
## 3. Register your collectors
|
|
37
|
+
|
|
38
|
+
In your app's startup code (runs once, before the manager is called):
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
import { defineCollector } from 'hazo_collect/sdk';
|
|
42
|
+
import { parseManifest } from 'hazo_collect';
|
|
43
|
+
|
|
44
|
+
defineCollector({
|
|
45
|
+
manifest: parseManifest({
|
|
46
|
+
name: 'my_source',
|
|
47
|
+
kind: 'source',
|
|
48
|
+
version: '1.0.0',
|
|
49
|
+
runtime: 'node',
|
|
50
|
+
entry: './my_source.js',
|
|
51
|
+
idempotency_key: ['record_id'],
|
|
52
|
+
}),
|
|
53
|
+
async run(ctx) {
|
|
54
|
+
// ... fetch data and write via ctx.write(...)
|
|
55
|
+
return { records_fetched: 0, records_written: 0 };
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## 4. Wire up the manager
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import { createManager, nodeRuntime, createInMemoryRegistry } from 'hazo_collect/server';
|
|
64
|
+
|
|
65
|
+
const registry = createInMemoryRegistry();
|
|
66
|
+
const manager = createManager({
|
|
67
|
+
getHazoConnect: () => hazoConnectAdapter,
|
|
68
|
+
getCollector: name => registry.get(name),
|
|
69
|
+
runtime: nodeRuntime,
|
|
70
|
+
});
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## 5. Expose API routes (Next.js example)
|
|
74
|
+
|
|
75
|
+
```ts
|
|
76
|
+
// app/api/collect/manager/run/route.ts
|
|
77
|
+
import { createRunHandler } from 'hazo_collect/server';
|
|
78
|
+
|
|
79
|
+
export const POST = createRunHandler({ getManager: () => manager });
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Or call directly:
|
|
83
|
+
|
|
84
|
+
```ts
|
|
85
|
+
const result = await manager.runNow({ plugin: 'my_source' });
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## 6. Optional: wire secrets (hazo_secure)
|
|
89
|
+
|
|
90
|
+
```ts
|
|
91
|
+
import { EnvSecretsProvider } from 'hazo_secure';
|
|
92
|
+
|
|
93
|
+
const manager = createManager({
|
|
94
|
+
...opts,
|
|
95
|
+
getSecretsProvider: () => new EnvSecretsProvider(),
|
|
96
|
+
});
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## 7. Tailwind v4: add `@source`
|
|
100
|
+
|
|
101
|
+
In your CSS entry point:
|
|
102
|
+
|
|
103
|
+
```css
|
|
104
|
+
@source "../node_modules/hazo_collect/dist";
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## 8. Verify
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
const result = await manager.runNow({ plugin: 'your_plugin_name' });
|
|
111
|
+
console.log(result.data.status); // 'success' | 'partial' | 'failed'
|
|
112
|
+
```
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
-- hazo_collect — PostgreSQL DDL
|
|
2
|
+
-- Consumers MAY copy this verbatim or wrap with their own role grants.
|
|
3
|
+
-- Idempotent — safe to re-run.
|
|
4
|
+
--
|
|
5
|
+
-- Tables:
|
|
6
|
+
-- hazo_collect_plugin_runs — one row per plugin run (RunResult scorecard)
|
|
7
|
+
-- hazo_collect_plugin_health — latest health snapshot per plugin
|
|
8
|
+
-- hazo_collect_plugin_registry — discovered manifest catalogue
|
|
9
|
+
-- hazo_collect_watermarks — per-plugin incremental watermark
|
|
10
|
+
-- hazo_collect_landing — raw as-fetched payloads (idempotent on plugin+key)
|
|
11
|
+
|
|
12
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_plugin_runs (
|
|
13
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
14
|
+
plugin TEXT NOT NULL,
|
|
15
|
+
run_id TEXT NOT NULL UNIQUE,
|
|
16
|
+
kind TEXT NOT NULL CHECK (kind IN ('source', 'sink')),
|
|
17
|
+
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
18
|
+
completed_at TIMESTAMPTZ,
|
|
19
|
+
status TEXT NOT NULL CHECK (status IN ('success', 'partial', 'failed', 'running')),
|
|
20
|
+
records_fetched INT NOT NULL DEFAULT 0,
|
|
21
|
+
records_written INT NOT NULL DEFAULT 0,
|
|
22
|
+
watermark TEXT,
|
|
23
|
+
errors JSONB NOT NULL DEFAULT '[]'::jsonb,
|
|
24
|
+
metrics JSONB,
|
|
25
|
+
correlation_id TEXT,
|
|
26
|
+
env TEXT,
|
|
27
|
+
contract_version TEXT
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
CREATE INDEX IF NOT EXISTS hazo_collect_plugin_runs_plugin_idx
|
|
31
|
+
ON hazo_collect_plugin_runs (plugin, started_at DESC);
|
|
32
|
+
|
|
33
|
+
CREATE INDEX IF NOT EXISTS hazo_collect_plugin_runs_run_id_idx
|
|
34
|
+
ON hazo_collect_plugin_runs (run_id);
|
|
35
|
+
|
|
36
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_plugin_health (
|
|
37
|
+
plugin TEXT PRIMARY KEY,
|
|
38
|
+
last_success_at TIMESTAMPTZ,
|
|
39
|
+
last_run_at TIMESTAMPTZ,
|
|
40
|
+
last_status TEXT,
|
|
41
|
+
state TEXT NOT NULL DEFAULT 'ok' CHECK (state IN ('ok', 'stale', 'failed')),
|
|
42
|
+
consecutive_failures INT NOT NULL DEFAULT 0
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_plugin_registry (
|
|
46
|
+
name TEXT PRIMARY KEY,
|
|
47
|
+
kind TEXT NOT NULL CHECK (kind IN ('source', 'sink')),
|
|
48
|
+
version TEXT NOT NULL,
|
|
49
|
+
runtime TEXT NOT NULL CHECK (runtime IN ('node', 'python')),
|
|
50
|
+
manifest JSONB NOT NULL,
|
|
51
|
+
source TEXT NOT NULL DEFAULT 'folder' CHECK (source IN ('folder', 'db')),
|
|
52
|
+
discovered_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
53
|
+
valid BOOLEAN NOT NULL DEFAULT TRUE,
|
|
54
|
+
quarantine_reason TEXT
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_watermarks (
|
|
58
|
+
plugin TEXT PRIMARY KEY,
|
|
59
|
+
watermark TEXT NOT NULL,
|
|
60
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_landing (
|
|
64
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
65
|
+
plugin TEXT NOT NULL,
|
|
66
|
+
run_id TEXT NOT NULL,
|
|
67
|
+
idempotency_key TEXT NOT NULL,
|
|
68
|
+
payload JSONB NOT NULL,
|
|
69
|
+
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
70
|
+
window_since TEXT,
|
|
71
|
+
window_until TEXT,
|
|
72
|
+
CONSTRAINT hazo_collect_landing_plugin_key UNIQUE (plugin, idempotency_key)
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
CREATE INDEX IF NOT EXISTS hazo_collect_landing_plugin_idx
|
|
76
|
+
ON hazo_collect_landing (plugin, fetched_at DESC);
|
|
77
|
+
|
|
78
|
+
NOTIFY pgrst, 'reload schema';
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
-- hazo_collect — SQLite DDL
|
|
2
|
+
-- Idempotent — safe to re-run.
|
|
3
|
+
--
|
|
4
|
+
-- SQLite companion to postgres.sql.
|
|
5
|
+
-- Type mapping: UUID→TEXT, BOOLEAN→INTEGER(0/1), TIMESTAMPTZ→TEXT, JSONB→TEXT.
|
|
6
|
+
--
|
|
7
|
+
-- Tables:
|
|
8
|
+
-- hazo_collect_plugin_runs
|
|
9
|
+
-- hazo_collect_plugin_health
|
|
10
|
+
-- hazo_collect_plugin_registry
|
|
11
|
+
-- hazo_collect_watermarks
|
|
12
|
+
-- hazo_collect_landing
|
|
13
|
+
|
|
14
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_plugin_runs (
|
|
15
|
+
id TEXT PRIMARY KEY,
|
|
16
|
+
plugin TEXT NOT NULL,
|
|
17
|
+
run_id TEXT NOT NULL UNIQUE,
|
|
18
|
+
kind TEXT NOT NULL CHECK (kind IN ('source', 'sink')),
|
|
19
|
+
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
20
|
+
completed_at TEXT,
|
|
21
|
+
status TEXT NOT NULL CHECK (status IN ('success', 'partial', 'failed', 'running')),
|
|
22
|
+
records_fetched INTEGER NOT NULL DEFAULT 0,
|
|
23
|
+
records_written INTEGER NOT NULL DEFAULT 0,
|
|
24
|
+
watermark TEXT,
|
|
25
|
+
errors TEXT NOT NULL DEFAULT '[]',
|
|
26
|
+
metrics TEXT,
|
|
27
|
+
correlation_id TEXT,
|
|
28
|
+
env TEXT,
|
|
29
|
+
contract_version TEXT
|
|
30
|
+
);
|
|
31
|
+
|
|
32
|
+
CREATE INDEX IF NOT EXISTS hazo_collect_plugin_runs_plugin_idx
|
|
33
|
+
ON hazo_collect_plugin_runs (plugin, started_at DESC);
|
|
34
|
+
|
|
35
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_plugin_health (
|
|
36
|
+
plugin TEXT PRIMARY KEY,
|
|
37
|
+
last_success_at TEXT,
|
|
38
|
+
last_run_at TEXT,
|
|
39
|
+
last_status TEXT,
|
|
40
|
+
state TEXT NOT NULL DEFAULT 'ok' CHECK (state IN ('ok', 'stale', 'failed')),
|
|
41
|
+
consecutive_failures INTEGER NOT NULL DEFAULT 0
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_plugin_registry (
|
|
45
|
+
name TEXT PRIMARY KEY,
|
|
46
|
+
kind TEXT NOT NULL CHECK (kind IN ('source', 'sink')),
|
|
47
|
+
version TEXT NOT NULL,
|
|
48
|
+
runtime TEXT NOT NULL CHECK (runtime IN ('node', 'python')),
|
|
49
|
+
manifest TEXT NOT NULL,
|
|
50
|
+
source TEXT NOT NULL DEFAULT 'folder' CHECK (source IN ('folder', 'db')),
|
|
51
|
+
discovered_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
52
|
+
valid INTEGER NOT NULL DEFAULT 1,
|
|
53
|
+
quarantine_reason TEXT
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_watermarks (
|
|
57
|
+
plugin TEXT PRIMARY KEY,
|
|
58
|
+
watermark TEXT NOT NULL,
|
|
59
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
CREATE TABLE IF NOT EXISTS hazo_collect_landing (
|
|
63
|
+
id TEXT PRIMARY KEY,
|
|
64
|
+
plugin TEXT NOT NULL,
|
|
65
|
+
run_id TEXT NOT NULL,
|
|
66
|
+
idempotency_key TEXT NOT NULL,
|
|
67
|
+
payload TEXT NOT NULL,
|
|
68
|
+
fetched_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
69
|
+
window_since TEXT,
|
|
70
|
+
window_until TEXT,
|
|
71
|
+
CONSTRAINT hazo_collect_landing_plugin_key UNIQUE (plugin, idempotency_key)
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
CREATE INDEX IF NOT EXISTS hazo_collect_landing_plugin_idx
|
|
75
|
+
ON hazo_collect_landing (plugin, fetched_at DESC);
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { HazoCoreLogger } from 'hazo_core';
|
|
2
|
+
import { M as Manifest, R as RunEnvelope } from './run-envelope-COvdsleR.js';
|
|
3
|
+
import { DbResult, HazoConnectAdapter } from 'hazo_connect/server';
|
|
4
|
+
|
|
5
|
+
interface LandingRow {
|
|
6
|
+
idempotency_key: string;
|
|
7
|
+
payload: unknown;
|
|
8
|
+
run_id: string;
|
|
9
|
+
window?: {
|
|
10
|
+
since?: string;
|
|
11
|
+
until?: string;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
interface CanonicalTarget {
|
|
15
|
+
table: string;
|
|
16
|
+
keyCols: string[];
|
|
17
|
+
rows: Record<string, unknown>[];
|
|
18
|
+
}
|
|
19
|
+
interface WriteArgs {
|
|
20
|
+
plugin: string;
|
|
21
|
+
landing: LandingRow[];
|
|
22
|
+
canonical?: CanonicalTarget;
|
|
23
|
+
}
|
|
24
|
+
interface WriteResult {
|
|
25
|
+
landingWritten: number;
|
|
26
|
+
canonicalWritten: number;
|
|
27
|
+
}
|
|
28
|
+
interface WriteAdapter {
|
|
29
|
+
write(args: WriteArgs): Promise<DbResult<WriteResult>>;
|
|
30
|
+
}
|
|
31
|
+
declare function createWriteAdapter(adapter: HazoConnectAdapter): WriteAdapter;
|
|
32
|
+
|
|
33
|
+
interface CollectorContext {
|
|
34
|
+
envelope: RunEnvelope;
|
|
35
|
+
write: WriteAdapter['write'];
|
|
36
|
+
http: (url: string | URL, init?: RequestInit) => Promise<Response>;
|
|
37
|
+
log: HazoCoreLogger;
|
|
38
|
+
}
|
|
39
|
+
type CollectorOutcome = {
|
|
40
|
+
status?: 'success' | 'partial' | 'failed';
|
|
41
|
+
records_fetched: number;
|
|
42
|
+
records_written: number;
|
|
43
|
+
watermark?: string;
|
|
44
|
+
metrics?: Record<string, number>;
|
|
45
|
+
};
|
|
46
|
+
type CollectorDefinition = {
|
|
47
|
+
manifest: Manifest;
|
|
48
|
+
run(ctx: CollectorContext): Promise<CollectorOutcome>;
|
|
49
|
+
};
|
|
50
|
+
type SinkDefinition = {
|
|
51
|
+
manifest: Manifest;
|
|
52
|
+
};
|
|
53
|
+
declare function defineCollector(def: CollectorDefinition): CollectorDefinition;
|
|
54
|
+
declare function getCollector(name: string): CollectorDefinition | undefined;
|
|
55
|
+
declare function listCollectors(): CollectorDefinition[];
|
|
56
|
+
declare function defineSink(def: SinkDefinition): SinkDefinition;
|
|
57
|
+
declare function getSink(name: string): SinkDefinition | undefined;
|
|
58
|
+
declare function resetCollectorRegistry(): void;
|
|
59
|
+
|
|
60
|
+
export { type CanonicalTarget as C, type LandingRow as L, type SinkDefinition as S, type WriteAdapter as W, type CollectorContext as a, type CollectorDefinition as b, type CollectorOutcome as c, type WriteArgs as d, type WriteResult as e, createWriteAdapter as f, defineCollector as g, defineSink as h, getCollector as i, getSink as j, listCollectors as l, resetCollectorRegistry as r };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { M as Manifest } from './run-envelope-COvdsleR.js';
|
|
2
|
+
export { a as ManifestSchema, R as RunEnvelope, b as RunEnvelopeSchema, p as parseManifest, c as parseRunEnvelope } from './run-envelope-COvdsleR.js';
|
|
3
|
+
export { R as RunError, a as RunResult, b as RunResultSchema, p as parseRunResult } from './run-result-qW7bJEZ-.js';
|
|
4
|
+
import 'zod';
|
|
5
|
+
|
|
6
|
+
declare const CONTRACT_VERSION = "1.0.0";
|
|
7
|
+
|
|
8
|
+
declare function validateInputs(manifest: Manifest, payload: unknown): void;
|
|
9
|
+
|
|
10
|
+
export { CONTRACT_VERSION, Manifest, validateInputs };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
// src/contract-version.ts
|
|
2
|
+
var CONTRACT_VERSION = "1.0.0";
|
|
3
|
+
|
|
4
|
+
// src/schema/manifest.ts
|
|
5
|
+
import { z } from "zod";
|
|
6
|
+
import { fromZodValidation } from "hazo_core";
|
|
7
|
+
var RetrySchema = z.object({
|
|
8
|
+
max: z.number().int().min(0).default(3),
|
|
9
|
+
backoff: z.enum(["exponential", "linear", "constant"]).default("exponential"),
|
|
10
|
+
base_ms: z.number().int().min(0).default(1e3),
|
|
11
|
+
jitter: z.boolean().default(true)
|
|
12
|
+
});
|
|
13
|
+
var ManifestSchema = z.object({
|
|
14
|
+
name: z.string().min(1),
|
|
15
|
+
kind: z.enum(["source", "sink"]),
|
|
16
|
+
version: z.string().min(1),
|
|
17
|
+
runtime: z.enum(["node", "python"]),
|
|
18
|
+
entry: z.string().min(1),
|
|
19
|
+
schedule: z.string().optional(),
|
|
20
|
+
timezone: z.string().default("UTC"),
|
|
21
|
+
timeout_sec: z.number().int().min(1).default(600),
|
|
22
|
+
concurrency: z.number().int().min(1).default(1),
|
|
23
|
+
retry: RetrySchema.optional(),
|
|
24
|
+
inputs: z.record(z.unknown()).optional(),
|
|
25
|
+
// JSON-Schema object
|
|
26
|
+
produces: z.array(z.string()).optional(),
|
|
27
|
+
consumes: z.array(z.string()).optional(),
|
|
28
|
+
secrets: z.array(z.string()).optional(),
|
|
29
|
+
idempotency_key: z.array(z.string()).min(1),
|
|
30
|
+
labels: z.record(z.string()).optional()
|
|
31
|
+
});
|
|
32
|
+
function parseManifest(input) {
|
|
33
|
+
const result = ManifestSchema.safeParse(input);
|
|
34
|
+
if (!result.success) {
|
|
35
|
+
throw fromZodValidation(result.error, {
|
|
36
|
+
pkg: "hazo_collect",
|
|
37
|
+
code: "HAZO_COLLECT_INVALID_MANIFEST"
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
return result.data;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// src/schema/run-result.ts
|
|
44
|
+
import { z as z2 } from "zod";
|
|
45
|
+
import { fromZodValidation as fromZodValidation2 } from "hazo_core";
|
|
46
|
+
var RunErrorSchema = z2.object({
|
|
47
|
+
code: z2.string(),
|
|
48
|
+
message: z2.string(),
|
|
49
|
+
retryable: z2.boolean().optional(),
|
|
50
|
+
context: z2.unknown().optional()
|
|
51
|
+
});
|
|
52
|
+
var RunResultSchema = z2.object({
|
|
53
|
+
plugin: z2.string(),
|
|
54
|
+
run_id: z2.string(),
|
|
55
|
+
kind: z2.enum(["source", "sink"]),
|
|
56
|
+
started_at: z2.string(),
|
|
57
|
+
completed_at: z2.string(),
|
|
58
|
+
status: z2.enum(["success", "partial", "failed"]),
|
|
59
|
+
records_fetched: z2.number().int().min(0),
|
|
60
|
+
records_written: z2.number().int().min(0),
|
|
61
|
+
watermark: z2.string().optional(),
|
|
62
|
+
errors: z2.array(RunErrorSchema).default([]),
|
|
63
|
+
metrics: z2.record(z2.number()).optional(),
|
|
64
|
+
contract_version: z2.string().default(CONTRACT_VERSION)
|
|
65
|
+
});
|
|
66
|
+
function parseRunResult(input) {
|
|
67
|
+
const result = RunResultSchema.safeParse(input);
|
|
68
|
+
if (!result.success) {
|
|
69
|
+
throw fromZodValidation2(result.error, {
|
|
70
|
+
pkg: "hazo_collect",
|
|
71
|
+
code: "HAZO_COLLECT_INVALID_RUN_RESULT"
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return result.data;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// src/schema/run-envelope.ts
|
|
78
|
+
import { z as z3 } from "zod";
|
|
79
|
+
import { fromZodValidation as fromZodValidation3 } from "hazo_core";
|
|
80
|
+
var RunEnvelopeSchema = z3.object({
|
|
81
|
+
run_id: z3.string(),
|
|
82
|
+
correlation_id: z3.string(),
|
|
83
|
+
plugin: z3.string(),
|
|
84
|
+
inputs: z3.record(z3.unknown()).default({}),
|
|
85
|
+
secrets: z3.record(z3.string()).default({}),
|
|
86
|
+
window: z3.object({
|
|
87
|
+
since: z3.string().optional(),
|
|
88
|
+
until: z3.string().optional()
|
|
89
|
+
}).optional(),
|
|
90
|
+
attempt: z3.number().int().min(1).default(1),
|
|
91
|
+
contract_version: z3.string().default(CONTRACT_VERSION)
|
|
92
|
+
});
|
|
93
|
+
function parseRunEnvelope(input) {
|
|
94
|
+
const result = RunEnvelopeSchema.safeParse(input);
|
|
95
|
+
if (!result.success) {
|
|
96
|
+
throw fromZodValidation3(result.error, {
|
|
97
|
+
pkg: "hazo_collect",
|
|
98
|
+
code: "HAZO_COLLECT_INVALID_RUN_ENVELOPE"
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
return result.data;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// src/validate/inputs.ts
|
|
105
|
+
import { Ajv } from "ajv";
|
|
106
|
+
import { HazoValidationError } from "hazo_core";
|
|
107
|
+
var ajv = new Ajv({ allErrors: true });
|
|
108
|
+
function validateInputs(manifest, payload) {
|
|
109
|
+
if (!manifest.inputs) {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const schemaIsValid = ajv.validateSchema(manifest.inputs);
|
|
113
|
+
if (!schemaIsValid) {
|
|
114
|
+
const issues = (ajv.errors ?? []).map((e) => ({
|
|
115
|
+
path: [e.instancePath || e.schemaPath],
|
|
116
|
+
message: e.message ?? "invalid schema"
|
|
117
|
+
}));
|
|
118
|
+
throw new HazoValidationError({
|
|
119
|
+
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
120
|
+
pkg: "hazo_collect",
|
|
121
|
+
message: `manifest "${manifest.name}" has an invalid inputs JSON-Schema`,
|
|
122
|
+
issues
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
let validate;
|
|
126
|
+
try {
|
|
127
|
+
validate = ajv.compile(manifest.inputs);
|
|
128
|
+
} catch (err) {
|
|
129
|
+
throw new HazoValidationError({
|
|
130
|
+
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
131
|
+
pkg: "hazo_collect",
|
|
132
|
+
message: `manifest "${manifest.name}" inputs schema could not be compiled: ${String(err)}`,
|
|
133
|
+
issues: []
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
const valid = validate(payload);
|
|
137
|
+
if (!valid) {
|
|
138
|
+
const issues = (validate.errors ?? []).map((e) => ({
|
|
139
|
+
path: [e.instancePath],
|
|
140
|
+
message: e.message ?? "validation failed"
|
|
141
|
+
}));
|
|
142
|
+
throw new HazoValidationError({
|
|
143
|
+
code: "HAZO_COLLECT_INVALID_INPUTS",
|
|
144
|
+
pkg: "hazo_collect",
|
|
145
|
+
message: `inputs for plugin "${manifest.name}" failed validation`,
|
|
146
|
+
issues
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
export {
|
|
151
|
+
CONTRACT_VERSION,
|
|
152
|
+
ManifestSchema,
|
|
153
|
+
RunEnvelopeSchema,
|
|
154
|
+
RunResultSchema,
|
|
155
|
+
parseManifest,
|
|
156
|
+
parseRunEnvelope,
|
|
157
|
+
parseRunResult,
|
|
158
|
+
validateInputs
|
|
159
|
+
};
|