hazo_collect 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGE_LOG.md +44 -0
- package/README.md +104 -0
- package/SETUP_CHECKLIST.md +112 -0
- package/dist/ddl/postgres.sql +78 -0
- package/dist/ddl/sqlite.sql +75 -0
- package/dist/index-C47n5Xur.d.ts +60 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +159 -0
- package/dist/run-envelope-COvdsleR.d.ts +129 -0
- package/dist/run-result-qW7bJEZ-.d.ts +88 -0
- package/dist/sdk/index.d.ts +5 -0
- package/dist/sdk/index.js +47 -0
- package/dist/server/index.d.ts +66 -0
- package/dist/server/index.js +592 -0
- package/package.json +82 -0
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
// src/server/write-adapter.ts
|
|
2
|
+
import { QueryBuilder, wrapResult } from "hazo_connect/server";
|
|
3
|
+
import { generateRequestId } from "hazo_core";
|
|
4
|
+
function createWriteAdapter(adapter) {
|
|
5
|
+
return {
|
|
6
|
+
async write({ plugin, landing, canonical }) {
|
|
7
|
+
return wrapResult(async () => {
|
|
8
|
+
const txAdapter = adapter;
|
|
9
|
+
const exec = txAdapter.transaction ? (fn) => txAdapter.transaction(fn) : (fn) => fn(adapter);
|
|
10
|
+
return exec(async (tx) => {
|
|
11
|
+
let landingWritten = 0;
|
|
12
|
+
let canonicalWritten = 0;
|
|
13
|
+
for (const row of landing) {
|
|
14
|
+
const payload = typeof row.payload === "string" ? row.payload : JSON.stringify(row.payload);
|
|
15
|
+
const record = {
|
|
16
|
+
id: generateRequestId().slice(4),
|
|
17
|
+
plugin,
|
|
18
|
+
run_id: row.run_id,
|
|
19
|
+
idempotency_key: row.idempotency_key,
|
|
20
|
+
payload,
|
|
21
|
+
...row.window?.since != null ? { window_since: row.window.since } : {},
|
|
22
|
+
...row.window?.until != null ? { window_until: row.window.until } : {}
|
|
23
|
+
};
|
|
24
|
+
const qb = new QueryBuilder().from("hazo_collect_landing").onConflict(["plugin", "idempotency_key"]).doUpdate();
|
|
25
|
+
await tx.query(qb, "POST", record);
|
|
26
|
+
landingWritten++;
|
|
27
|
+
}
|
|
28
|
+
if (canonical) {
|
|
29
|
+
for (const row of canonical.rows) {
|
|
30
|
+
const qb = new QueryBuilder().from(canonical.table).onConflict(canonical.keyCols).doUpdate();
|
|
31
|
+
await tx.query(qb, "POST", row);
|
|
32
|
+
canonicalWritten++;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return { landingWritten, canonicalWritten };
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// src/manager/index.ts
|
|
43
|
+
import { QueryBuilder as QueryBuilder2, wrapResult as wrapResult2 } from "hazo_connect/server";
|
|
44
|
+
import { generateRequestId as generateRequestId2, getCorrelationId, withContext } from "hazo_core";
|
|
45
|
+
|
|
46
|
+
// src/validate/inputs.ts
|
|
47
|
+
import { Ajv } from "ajv";
|
|
48
|
+
import { HazoValidationError } from "hazo_core";
|
|
49
|
+
var ajv = new Ajv({ allErrors: true });
|
|
50
|
+
function validateInputs(manifest, payload) {
|
|
51
|
+
if (!manifest.inputs) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
const schemaIsValid = ajv.validateSchema(manifest.inputs);
|
|
55
|
+
if (!schemaIsValid) {
|
|
56
|
+
const issues = (ajv.errors ?? []).map((e) => ({
|
|
57
|
+
path: [e.instancePath || e.schemaPath],
|
|
58
|
+
message: e.message ?? "invalid schema"
|
|
59
|
+
}));
|
|
60
|
+
throw new HazoValidationError({
|
|
61
|
+
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
62
|
+
pkg: "hazo_collect",
|
|
63
|
+
message: `manifest "${manifest.name}" has an invalid inputs JSON-Schema`,
|
|
64
|
+
issues
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
let validate;
|
|
68
|
+
try {
|
|
69
|
+
validate = ajv.compile(manifest.inputs);
|
|
70
|
+
} catch (err) {
|
|
71
|
+
throw new HazoValidationError({
|
|
72
|
+
code: "HAZO_COLLECT_MALFORMED_MANIFEST",
|
|
73
|
+
pkg: "hazo_collect",
|
|
74
|
+
message: `manifest "${manifest.name}" inputs schema could not be compiled: ${String(err)}`,
|
|
75
|
+
issues: []
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
const valid = validate(payload);
|
|
79
|
+
if (!valid) {
|
|
80
|
+
const issues = (validate.errors ?? []).map((e) => ({
|
|
81
|
+
path: [e.instancePath],
|
|
82
|
+
message: e.message ?? "validation failed"
|
|
83
|
+
}));
|
|
84
|
+
throw new HazoValidationError({
|
|
85
|
+
code: "HAZO_COLLECT_INVALID_INPUTS",
|
|
86
|
+
pkg: "hazo_collect",
|
|
87
|
+
message: `inputs for plugin "${manifest.name}" failed validation`,
|
|
88
|
+
issues
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// src/contract-version.ts
|
|
94
|
+
var CONTRACT_VERSION = "1.0.0";
|
|
95
|
+
|
|
96
|
+
// src/schema/run-result.ts
|
|
97
|
+
import { z } from "zod";
|
|
98
|
+
import { fromZodValidation } from "hazo_core";
|
|
99
|
+
var RunErrorSchema = z.object({
|
|
100
|
+
code: z.string(),
|
|
101
|
+
message: z.string(),
|
|
102
|
+
retryable: z.boolean().optional(),
|
|
103
|
+
context: z.unknown().optional()
|
|
104
|
+
});
|
|
105
|
+
var RunResultSchema = z.object({
|
|
106
|
+
plugin: z.string(),
|
|
107
|
+
run_id: z.string(),
|
|
108
|
+
kind: z.enum(["source", "sink"]),
|
|
109
|
+
started_at: z.string(),
|
|
110
|
+
completed_at: z.string(),
|
|
111
|
+
status: z.enum(["success", "partial", "failed"]),
|
|
112
|
+
records_fetched: z.number().int().min(0),
|
|
113
|
+
records_written: z.number().int().min(0),
|
|
114
|
+
watermark: z.string().optional(),
|
|
115
|
+
errors: z.array(RunErrorSchema).default([]),
|
|
116
|
+
metrics: z.record(z.number()).optional(),
|
|
117
|
+
contract_version: z.string().default(CONTRACT_VERSION)
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// src/runtime/node.ts
|
|
121
|
+
import { createLogger } from "hazo_core";
|
|
122
|
+
|
|
123
|
+
// src/sdk/http.ts
|
|
124
|
+
import { fetchWithRequestId, sleep } from "hazo_core";
|
|
125
|
+
var MAX_RETRIES = 3;
|
|
126
|
+
var BASE_DELAY_MS = 500;
|
|
127
|
+
function createHttpHelper(_requestId, fetchFn = fetchWithRequestId) {
|
|
128
|
+
return async (url, init) => {
|
|
129
|
+
let lastResponse;
|
|
130
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
131
|
+
if (attempt > 0) {
|
|
132
|
+
const delay = BASE_DELAY_MS * Math.pow(2, attempt - 1) + Math.random() * 100;
|
|
133
|
+
await sleep(delay);
|
|
134
|
+
}
|
|
135
|
+
let response;
|
|
136
|
+
try {
|
|
137
|
+
response = await fetchFn(url, init);
|
|
138
|
+
} catch (err) {
|
|
139
|
+
if (err && typeof err === "object" && "retryable" in err && err.retryable === false) {
|
|
140
|
+
throw err;
|
|
141
|
+
}
|
|
142
|
+
if (attempt === MAX_RETRIES) throw err;
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
if (response.ok) return response;
|
|
146
|
+
const shouldRetry = response.status === 429 || response.status >= 500;
|
|
147
|
+
if (!shouldRetry) return response;
|
|
148
|
+
lastResponse = response;
|
|
149
|
+
if (attempt === MAX_RETRIES) return response;
|
|
150
|
+
}
|
|
151
|
+
return lastResponse;
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// src/runtime/node.ts
|
|
156
|
+
var nodeRuntime = {
|
|
157
|
+
async run(def, envelope, adapter) {
|
|
158
|
+
const writeAdapter = createWriteAdapter(adapter);
|
|
159
|
+
const log = createLogger("hazo_collect").child({
|
|
160
|
+
collector: def.manifest.name,
|
|
161
|
+
runId: envelope.run_id
|
|
162
|
+
});
|
|
163
|
+
const http = createHttpHelper(envelope.run_id);
|
|
164
|
+
const ctx = {
|
|
165
|
+
envelope,
|
|
166
|
+
write: writeAdapter.write.bind(writeAdapter),
|
|
167
|
+
http,
|
|
168
|
+
log
|
|
169
|
+
};
|
|
170
|
+
try {
|
|
171
|
+
const outcome = await def.run(ctx);
|
|
172
|
+
return { ok: true, outcome };
|
|
173
|
+
} catch (error) {
|
|
174
|
+
return { ok: false, error };
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// src/manager/index.ts
|
|
180
|
+
async function buildDefaultSecretsProvider() {
|
|
181
|
+
try {
|
|
182
|
+
const { EnvSecretsProvider } = await import("hazo_secure/secrets");
|
|
183
|
+
return new EnvSecretsProvider("HAZO_COLLECT_SECRET");
|
|
184
|
+
} catch {
|
|
185
|
+
return {
|
|
186
|
+
async resolve(names) {
|
|
187
|
+
if (names.length === 0) return {};
|
|
188
|
+
throw new Error(
|
|
189
|
+
"hazo_secure is not installed; provide a SecretsProvider option to createManager"
|
|
190
|
+
);
|
|
191
|
+
},
|
|
192
|
+
async get(name) {
|
|
193
|
+
throw new Error(
|
|
194
|
+
`hazo_secure not installed; cannot resolve secret "${name}"`
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
function nowISO() {
|
|
201
|
+
return (/* @__PURE__ */ new Date()).toISOString();
|
|
202
|
+
}
|
|
203
|
+
function makeFailedResult(plugin, run_id, kind, started_at, error) {
|
|
204
|
+
return RunResultSchema.parse({
|
|
205
|
+
plugin,
|
|
206
|
+
run_id,
|
|
207
|
+
kind,
|
|
208
|
+
started_at,
|
|
209
|
+
completed_at: nowISO(),
|
|
210
|
+
status: "failed",
|
|
211
|
+
records_fetched: 0,
|
|
212
|
+
records_written: 0,
|
|
213
|
+
errors: [{ code: error.code, message: error.message, retryable: error.retryable }],
|
|
214
|
+
contract_version: CONTRACT_VERSION
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
async function readWatermark(adapter, plugin) {
|
|
218
|
+
const qb = new QueryBuilder2().from("hazo_collect_watermarks").where("plugin", "eq", plugin).limit(1);
|
|
219
|
+
const rows = await adapter.query(qb);
|
|
220
|
+
return rows[0]?.watermark;
|
|
221
|
+
}
|
|
222
|
+
async function insertRunRow(adapter, params) {
|
|
223
|
+
const row = {
|
|
224
|
+
id: generateRequestId2().slice(4),
|
|
225
|
+
run_id: params.run_id,
|
|
226
|
+
plugin: params.plugin,
|
|
227
|
+
kind: params.kind,
|
|
228
|
+
status: "running",
|
|
229
|
+
started_at: params.started_at,
|
|
230
|
+
correlation_id: params.correlation_id,
|
|
231
|
+
records_fetched: 0,
|
|
232
|
+
records_written: 0,
|
|
233
|
+
errors: "[]",
|
|
234
|
+
contract_version: CONTRACT_VERSION
|
|
235
|
+
};
|
|
236
|
+
const qb = new QueryBuilder2().from("hazo_collect_plugin_runs");
|
|
237
|
+
await adapter.query(qb, "POST", row);
|
|
238
|
+
}
|
|
239
|
+
async function updateRunRow(adapter, run_id, result) {
|
|
240
|
+
const qb = new QueryBuilder2().from("hazo_collect_plugin_runs").where("run_id", "eq", run_id);
|
|
241
|
+
await adapter.query(qb, "PATCH", {
|
|
242
|
+
status: result.status,
|
|
243
|
+
completed_at: result.completed_at,
|
|
244
|
+
records_fetched: result.records_fetched,
|
|
245
|
+
records_written: result.records_written,
|
|
246
|
+
watermark: result.watermark ?? null,
|
|
247
|
+
errors: JSON.stringify(result.errors),
|
|
248
|
+
metrics: result.metrics ? JSON.stringify(result.metrics) : null,
|
|
249
|
+
contract_version: result.contract_version ?? null
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
async function upsertWatermark(adapter, plugin, watermark) {
|
|
253
|
+
const qb = new QueryBuilder2().from("hazo_collect_watermarks").onConflict(["plugin"]).doUpdate();
|
|
254
|
+
await adapter.query(qb, "POST", {
|
|
255
|
+
plugin,
|
|
256
|
+
watermark,
|
|
257
|
+
updated_at: nowISO()
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
async function upsertHealth(adapter, plugin, status, now) {
|
|
261
|
+
if (status === "success") {
|
|
262
|
+
const qb = new QueryBuilder2().from("hazo_collect_plugin_health").onConflict(["plugin"]).doUpdate();
|
|
263
|
+
await adapter.query(qb, "POST", {
|
|
264
|
+
plugin,
|
|
265
|
+
last_success_at: now,
|
|
266
|
+
last_run_at: now,
|
|
267
|
+
last_status: "success",
|
|
268
|
+
consecutive_failures: 0,
|
|
269
|
+
state: "ok"
|
|
270
|
+
});
|
|
271
|
+
return;
|
|
272
|
+
}
|
|
273
|
+
const qbRead = new QueryBuilder2().from("hazo_collect_plugin_health").where("plugin", "eq", plugin).limit(1);
|
|
274
|
+
const rows = await adapter.query(qbRead);
|
|
275
|
+
const current = rows[0]?.consecutive_failures ?? 0;
|
|
276
|
+
const upsertRow = {
|
|
277
|
+
plugin,
|
|
278
|
+
last_run_at: now,
|
|
279
|
+
last_status: status,
|
|
280
|
+
consecutive_failures: current + 1
|
|
281
|
+
};
|
|
282
|
+
if (status === "failed") {
|
|
283
|
+
upsertRow.state = "failed";
|
|
284
|
+
}
|
|
285
|
+
const qbUpsert = new QueryBuilder2().from("hazo_collect_plugin_health").onConflict(["plugin"]).doUpdate();
|
|
286
|
+
await adapter.query(qbUpsert, "POST", upsertRow);
|
|
287
|
+
}
|
|
288
|
+
function createManager(opts) {
|
|
289
|
+
const { adapter, registry } = opts;
|
|
290
|
+
const runtime = opts.runtime ?? nodeRuntime;
|
|
291
|
+
let _secretsProviderPromise;
|
|
292
|
+
function getSecretsProvider() {
|
|
293
|
+
if (opts.secrets) return Promise.resolve(opts.secrets);
|
|
294
|
+
if (!_secretsProviderPromise) {
|
|
295
|
+
_secretsProviderPromise = buildDefaultSecretsProvider();
|
|
296
|
+
}
|
|
297
|
+
return _secretsProviderPromise;
|
|
298
|
+
}
|
|
299
|
+
async function _executeRun(entry, envelope) {
|
|
300
|
+
return wrapResult2(async () => {
|
|
301
|
+
const started_at = nowISO();
|
|
302
|
+
await insertRunRow(adapter, {
|
|
303
|
+
run_id: envelope.run_id,
|
|
304
|
+
plugin: envelope.plugin,
|
|
305
|
+
kind: entry.manifest.kind,
|
|
306
|
+
correlation_id: envelope.correlation_id,
|
|
307
|
+
started_at
|
|
308
|
+
});
|
|
309
|
+
const dispatchResult = await runtime.run(entry.worker, envelope, adapter);
|
|
310
|
+
let result;
|
|
311
|
+
if (dispatchResult.ok) {
|
|
312
|
+
const outcome = dispatchResult.outcome;
|
|
313
|
+
result = RunResultSchema.parse({
|
|
314
|
+
plugin: envelope.plugin,
|
|
315
|
+
run_id: envelope.run_id,
|
|
316
|
+
kind: entry.manifest.kind,
|
|
317
|
+
started_at,
|
|
318
|
+
completed_at: nowISO(),
|
|
319
|
+
status: outcome.status ?? "success",
|
|
320
|
+
records_fetched: outcome.records_fetched,
|
|
321
|
+
records_written: outcome.records_written,
|
|
322
|
+
watermark: outcome.watermark,
|
|
323
|
+
errors: [],
|
|
324
|
+
metrics: outcome.metrics,
|
|
325
|
+
contract_version: CONTRACT_VERSION
|
|
326
|
+
});
|
|
327
|
+
} else {
|
|
328
|
+
const err = dispatchResult.error;
|
|
329
|
+
result = RunResultSchema.parse({
|
|
330
|
+
plugin: envelope.plugin,
|
|
331
|
+
run_id: envelope.run_id,
|
|
332
|
+
kind: entry.manifest.kind,
|
|
333
|
+
started_at,
|
|
334
|
+
completed_at: nowISO(),
|
|
335
|
+
status: "failed",
|
|
336
|
+
records_fetched: 0,
|
|
337
|
+
records_written: 0,
|
|
338
|
+
errors: [{
|
|
339
|
+
code: err?.code ?? "HAZO_COLLECT_WORKER_ERROR",
|
|
340
|
+
message: err instanceof Error ? err.message : String(err),
|
|
341
|
+
retryable: err?.retryable
|
|
342
|
+
}],
|
|
343
|
+
contract_version: CONTRACT_VERSION
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
await updateRunRow(adapter, envelope.run_id, result);
|
|
347
|
+
if (result.status === "success" && result.watermark) {
|
|
348
|
+
await upsertWatermark(adapter, envelope.plugin, result.watermark);
|
|
349
|
+
}
|
|
350
|
+
await upsertHealth(adapter, envelope.plugin, result.status, nowISO());
|
|
351
|
+
return result;
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
async function runNow(pluginName, runOpts) {
|
|
355
|
+
const entry = registry.get(pluginName);
|
|
356
|
+
if (!entry) {
|
|
357
|
+
const run_id2 = generateRequestId2();
|
|
358
|
+
return {
|
|
359
|
+
ok: true,
|
|
360
|
+
data: makeFailedResult(pluginName, run_id2, "source", nowISO(), {
|
|
361
|
+
code: "HAZO_COLLECT_PLUGIN_NOT_FOUND",
|
|
362
|
+
message: `Plugin "${pluginName}" not found in registry`
|
|
363
|
+
})
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
const { manifest } = entry;
|
|
367
|
+
const inputs = runOpts?.inputs ?? {};
|
|
368
|
+
try {
|
|
369
|
+
validateInputs(manifest, inputs);
|
|
370
|
+
} catch (err) {
|
|
371
|
+
const run_id2 = generateRequestId2();
|
|
372
|
+
return {
|
|
373
|
+
ok: true,
|
|
374
|
+
data: makeFailedResult(pluginName, run_id2, manifest.kind, nowISO(), {
|
|
375
|
+
code: err?.code ?? "HAZO_COLLECT_INVALID_INPUTS",
|
|
376
|
+
message: err instanceof Error ? err.message : String(err)
|
|
377
|
+
})
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
let secrets = {};
|
|
381
|
+
if (manifest.secrets && manifest.secrets.length > 0) {
|
|
382
|
+
const provider = await getSecretsProvider();
|
|
383
|
+
try {
|
|
384
|
+
secrets = await provider.resolve(manifest.secrets);
|
|
385
|
+
} catch (err) {
|
|
386
|
+
const run_id2 = generateRequestId2();
|
|
387
|
+
return {
|
|
388
|
+
ok: true,
|
|
389
|
+
data: makeFailedResult(pluginName, run_id2, manifest.kind, nowISO(), {
|
|
390
|
+
code: err?.code ?? "HAZO_SECURE_SECRET_NOT_FOUND",
|
|
391
|
+
message: err instanceof Error ? err.message : String(err)
|
|
392
|
+
})
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
const run_id = generateRequestId2();
|
|
397
|
+
const correlation_id = getCorrelationId() ?? run_id;
|
|
398
|
+
return withContext({ correlationId: correlation_id }, async () => {
|
|
399
|
+
const watermark = await readWatermark(adapter, pluginName);
|
|
400
|
+
const envelope = {
|
|
401
|
+
run_id,
|
|
402
|
+
correlation_id,
|
|
403
|
+
plugin: pluginName,
|
|
404
|
+
inputs,
|
|
405
|
+
secrets,
|
|
406
|
+
window: { since: watermark, until: nowISO() },
|
|
407
|
+
attempt: 1,
|
|
408
|
+
contract_version: CONTRACT_VERSION
|
|
409
|
+
};
|
|
410
|
+
return _executeRun(entry, envelope);
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
async function runEnvelope(envelope) {
|
|
414
|
+
const entry = registry.get(envelope.plugin);
|
|
415
|
+
if (!entry) {
|
|
416
|
+
return {
|
|
417
|
+
ok: true,
|
|
418
|
+
data: makeFailedResult(envelope.plugin, envelope.run_id, "source", nowISO(), {
|
|
419
|
+
code: "HAZO_COLLECT_PLUGIN_NOT_FOUND",
|
|
420
|
+
message: `Plugin "${envelope.plugin}" not found in registry`
|
|
421
|
+
})
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
return withContext(
|
|
425
|
+
{ correlationId: envelope.correlation_id },
|
|
426
|
+
() => _executeRun(entry, envelope)
|
|
427
|
+
);
|
|
428
|
+
}
|
|
429
|
+
return { runNow, runEnvelope };
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// src/registry/index.ts
|
|
433
|
+
import { QueryBuilder as QueryBuilder4, wrapResult as wrapResult3 } from "hazo_connect/server";
|
|
434
|
+
|
|
435
|
+
// src/schema/manifest.ts
|
|
436
|
+
import { z as z2 } from "zod";
|
|
437
|
+
import { fromZodValidation as fromZodValidation2 } from "hazo_core";
|
|
438
|
+
var RetrySchema = z2.object({
|
|
439
|
+
max: z2.number().int().min(0).default(3),
|
|
440
|
+
backoff: z2.enum(["exponential", "linear", "constant"]).default("exponential"),
|
|
441
|
+
base_ms: z2.number().int().min(0).default(1e3),
|
|
442
|
+
jitter: z2.boolean().default(true)
|
|
443
|
+
});
|
|
444
|
+
var ManifestSchema = z2.object({
|
|
445
|
+
name: z2.string().min(1),
|
|
446
|
+
kind: z2.enum(["source", "sink"]),
|
|
447
|
+
version: z2.string().min(1),
|
|
448
|
+
runtime: z2.enum(["node", "python"]),
|
|
449
|
+
entry: z2.string().min(1),
|
|
450
|
+
schedule: z2.string().optional(),
|
|
451
|
+
timezone: z2.string().default("UTC"),
|
|
452
|
+
timeout_sec: z2.number().int().min(1).default(600),
|
|
453
|
+
concurrency: z2.number().int().min(1).default(1),
|
|
454
|
+
retry: RetrySchema.optional(),
|
|
455
|
+
inputs: z2.record(z2.unknown()).optional(),
|
|
456
|
+
// JSON-Schema object
|
|
457
|
+
produces: z2.array(z2.string()).optional(),
|
|
458
|
+
consumes: z2.array(z2.string()).optional(),
|
|
459
|
+
secrets: z2.array(z2.string()).optional(),
|
|
460
|
+
idempotency_key: z2.array(z2.string()).min(1),
|
|
461
|
+
labels: z2.record(z2.string()).optional()
|
|
462
|
+
});
|
|
463
|
+
function parseManifest(input) {
|
|
464
|
+
const result = ManifestSchema.safeParse(input);
|
|
465
|
+
if (!result.success) {
|
|
466
|
+
throw fromZodValidation2(result.error, {
|
|
467
|
+
pkg: "hazo_collect",
|
|
468
|
+
code: "HAZO_COLLECT_INVALID_MANIFEST"
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
return result.data;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// src/sdk/index.ts
|
|
475
|
+
import { HazoConfigError } from "hazo_core";
|
|
476
|
+
var _collectors = /* @__PURE__ */ new Map();
|
|
477
|
+
function getCollector(name) {
|
|
478
|
+
return _collectors.get(name);
|
|
479
|
+
}
|
|
480
|
+
function listCollectors() {
|
|
481
|
+
return Array.from(_collectors.values());
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// src/registry/discovery.ts
|
|
485
|
+
import { readFileSync, readdirSync } from "fs";
|
|
486
|
+
import { resolve } from "path";
|
|
487
|
+
import { pathToFileURL } from "url";
|
|
488
|
+
import { QueryBuilder as QueryBuilder3 } from "hazo_connect/server";
|
|
489
|
+
async function scanFolder(pluginsDir) {
|
|
490
|
+
const entries = readdirSync(pluginsDir, { withFileTypes: true });
|
|
491
|
+
const subdirs = entries.filter((e) => e.isDirectory());
|
|
492
|
+
for (const subdir of subdirs) {
|
|
493
|
+
const manifestPath = resolve(pluginsDir, subdir.name, "manifest.json");
|
|
494
|
+
const raw = readFileSync(manifestPath, "utf-8");
|
|
495
|
+
const json = JSON.parse(raw);
|
|
496
|
+
const manifest = parseManifest(json);
|
|
497
|
+
if (manifest.runtime !== "node" || manifest.kind !== "source") {
|
|
498
|
+
continue;
|
|
499
|
+
}
|
|
500
|
+
const entryPath = resolve(pluginsDir, subdir.name, manifest.entry);
|
|
501
|
+
await import(pathToFileURL(entryPath).href);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
async function discover(opts) {
|
|
505
|
+
const pluginsDir = opts.pluginsDir ?? "./plugins";
|
|
506
|
+
await scanFolder(pluginsDir);
|
|
507
|
+
const defs = listCollectors();
|
|
508
|
+
await persistRegistry(
|
|
509
|
+
opts.adapter,
|
|
510
|
+
defs.map((def) => ({ manifest: def.manifest, source: "folder" })),
|
|
511
|
+
{ reseed: opts.reseed }
|
|
512
|
+
);
|
|
513
|
+
const qb = new QueryBuilder3().from("hazo_collect_plugin_registry").where("valid", "eq", 1);
|
|
514
|
+
const rows = await opts.adapter.query(qb);
|
|
515
|
+
const snapshotEntries = [];
|
|
516
|
+
for (const row of rows) {
|
|
517
|
+
const parsedManifest = parseManifest(JSON.parse(row.manifest));
|
|
518
|
+
const worker = getCollector(parsedManifest.name);
|
|
519
|
+
if (!worker) {
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
522
|
+
if (parsedManifest.version !== worker.manifest.version) {
|
|
523
|
+
throw new Error(
|
|
524
|
+
`Version skew for plugin "${parsedManifest.name}": DB has "${parsedManifest.version}", worker has "${worker.manifest.version}"`
|
|
525
|
+
);
|
|
526
|
+
}
|
|
527
|
+
snapshotEntries.push({ manifest: parsedManifest, worker });
|
|
528
|
+
}
|
|
529
|
+
let snapshot = new Map(
|
|
530
|
+
snapshotEntries.map((e) => [e.manifest.name, e])
|
|
531
|
+
);
|
|
532
|
+
return {
|
|
533
|
+
get(name) {
|
|
534
|
+
return snapshot.get(name);
|
|
535
|
+
},
|
|
536
|
+
list() {
|
|
537
|
+
return Array.from(snapshot.values());
|
|
538
|
+
},
|
|
539
|
+
async refresh() {
|
|
540
|
+
const refreshed = await discover(opts);
|
|
541
|
+
snapshot = new Map(refreshed.list().map((e) => [e.manifest.name, e]));
|
|
542
|
+
}
|
|
543
|
+
};
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// src/registry/index.ts
|
|
547
|
+
function createInMemoryRegistry() {
|
|
548
|
+
return {
|
|
549
|
+
get(name) {
|
|
550
|
+
const def = getCollector(name);
|
|
551
|
+
if (!def) return void 0;
|
|
552
|
+
return { manifest: def.manifest, worker: def };
|
|
553
|
+
},
|
|
554
|
+
list() {
|
|
555
|
+
return listCollectors().map((def) => ({ manifest: def.manifest, worker: def }));
|
|
556
|
+
}
|
|
557
|
+
};
|
|
558
|
+
}
|
|
559
|
+
async function persistRegistry(adapter, entries, opts) {
|
|
560
|
+
return wrapResult3(async () => {
|
|
561
|
+
for (const entry of entries) {
|
|
562
|
+
let valid = 1;
|
|
563
|
+
let quarantineReason = null;
|
|
564
|
+
try {
|
|
565
|
+
parseManifest(entry.manifest);
|
|
566
|
+
} catch (err) {
|
|
567
|
+
valid = 0;
|
|
568
|
+
quarantineReason = err instanceof Error ? err.message : String(err);
|
|
569
|
+
}
|
|
570
|
+
const row = {
|
|
571
|
+
name: entry.manifest.name,
|
|
572
|
+
kind: entry.manifest.kind,
|
|
573
|
+
version: entry.manifest.version,
|
|
574
|
+
runtime: entry.manifest.runtime,
|
|
575
|
+
manifest: JSON.stringify(entry.manifest),
|
|
576
|
+
source: entry.source ?? "folder",
|
|
577
|
+
valid,
|
|
578
|
+
quarantine_reason: quarantineReason
|
|
579
|
+
};
|
|
580
|
+
const qb = opts?.reseed ? new QueryBuilder4().from("hazo_collect_plugin_registry").onConflict(["name"]).doUpdate() : new QueryBuilder4().from("hazo_collect_plugin_registry").onConflict(["name"]).doNothing();
|
|
581
|
+
await adapter.query(qb, "POST", row);
|
|
582
|
+
}
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
export {
|
|
586
|
+
createInMemoryRegistry,
|
|
587
|
+
createManager,
|
|
588
|
+
createWriteAdapter,
|
|
589
|
+
discover,
|
|
590
|
+
nodeRuntime,
|
|
591
|
+
persistRegistry
|
|
592
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "hazo_collect",
|
|
3
|
+
"version": "0.2.1",
|
|
4
|
+
"description": "Collector-manager engine for the Ocdata platform",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"module": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
},
|
|
13
|
+
"./server": {
|
|
14
|
+
"types": "./dist/server/index.d.ts",
|
|
15
|
+
"import": "./dist/server/index.js"
|
|
16
|
+
},
|
|
17
|
+
"./sdk": {
|
|
18
|
+
"types": "./dist/sdk/index.d.ts",
|
|
19
|
+
"import": "./dist/sdk/index.js",
|
|
20
|
+
"default": "./dist/sdk/index.js"
|
|
21
|
+
},
|
|
22
|
+
"./ddl/sqlite.sql": "./dist/ddl/sqlite.sql",
|
|
23
|
+
"./ddl/postgres.sql": "./dist/ddl/postgres.sql",
|
|
24
|
+
"./package.json": "./package.json"
|
|
25
|
+
},
|
|
26
|
+
"files": [
|
|
27
|
+
"dist",
|
|
28
|
+
"README.md",
|
|
29
|
+
"CHANGE_LOG.md",
|
|
30
|
+
"SETUP_CHECKLIST.md"
|
|
31
|
+
],
|
|
32
|
+
"scripts": {
|
|
33
|
+
"build": "tsup",
|
|
34
|
+
"type-check": "tsc --noEmit",
|
|
35
|
+
"test": "node --experimental-vm-modules ../node_modules/jest/bin/jest.js --config jest.config.cjs",
|
|
36
|
+
"dev:test-app": "npm run build && cd test-app && npm run dev",
|
|
37
|
+
"build:test-app": "npm run build && cd test-app && npm run build"
|
|
38
|
+
},
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"zod": "^3.23.8",
|
|
41
|
+
"ajv": "^8.17.1"
|
|
42
|
+
},
|
|
43
|
+
"peerDependencies": {
|
|
44
|
+
"hazo_core": "^1.1.0",
|
|
45
|
+
"hazo_connect": "^3.5.1",
|
|
46
|
+
"hazo_secure": "^1.3.0",
|
|
47
|
+
"react": "^18.0.0 || ^19.0.0",
|
|
48
|
+
"react-dom": "^18.0.0 || ^19.0.0",
|
|
49
|
+
"hazo_ui": "^3.4.1"
|
|
50
|
+
},
|
|
51
|
+
"peerDependenciesMeta": {
|
|
52
|
+
"hazo_secure": {
|
|
53
|
+
"optional": true
|
|
54
|
+
},
|
|
55
|
+
"react": {
|
|
56
|
+
"optional": true
|
|
57
|
+
},
|
|
58
|
+
"react-dom": {
|
|
59
|
+
"optional": true
|
|
60
|
+
},
|
|
61
|
+
"hazo_ui": {
|
|
62
|
+
"optional": true
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"devDependencies": {
|
|
66
|
+
"tsup": "^8.3.0",
|
|
67
|
+
"typescript": "^5.7.2",
|
|
68
|
+
"jest": "^30.2.0",
|
|
69
|
+
"ts-jest": "^29.4.5",
|
|
70
|
+
"@types/jest": "^30.0.0",
|
|
71
|
+
"jest-environment-node": "^30.2.0",
|
|
72
|
+
"@types/node": "^22.10.0",
|
|
73
|
+
"@types/react": "^19.0.0",
|
|
74
|
+
"@types/react-dom": "^19.0.0",
|
|
75
|
+
"hazo_core": "^1.1.0",
|
|
76
|
+
"hazo_connect": "^3.5.1",
|
|
77
|
+
"hazo_secure": "^1.3.0"
|
|
78
|
+
},
|
|
79
|
+
"keywords": [],
|
|
80
|
+
"author": "Pubs Abayasiri",
|
|
81
|
+
"license": "MIT"
|
|
82
|
+
}
|