@hatk/hatk 0.0.1-alpha.4 → 0.0.1-alpha.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter.d.ts +19 -0
- package/dist/adapter.d.ts.map +1 -0
- package/dist/adapter.js +107 -0
- package/dist/backfill.d.ts +60 -1
- package/dist/backfill.d.ts.map +1 -1
- package/dist/backfill.js +167 -33
- package/dist/car.d.ts +59 -1
- package/dist/car.d.ts.map +1 -1
- package/dist/car.js +179 -7
- package/dist/cbor.d.ts +37 -0
- package/dist/cbor.d.ts.map +1 -1
- package/dist/cbor.js +36 -3
- package/dist/cid.d.ts +37 -0
- package/dist/cid.d.ts.map +1 -1
- package/dist/cid.js +38 -3
- package/dist/cli.js +417 -133
- package/dist/cloudflare/container.d.ts +73 -0
- package/dist/cloudflare/container.d.ts.map +1 -0
- package/dist/cloudflare/container.js +232 -0
- package/dist/cloudflare/hooks.d.ts +33 -0
- package/dist/cloudflare/hooks.d.ts.map +1 -0
- package/dist/cloudflare/hooks.js +40 -0
- package/dist/cloudflare/init.d.ts +27 -0
- package/dist/cloudflare/init.d.ts.map +1 -0
- package/dist/cloudflare/init.js +103 -0
- package/dist/cloudflare/worker.d.ts +27 -0
- package/dist/cloudflare/worker.d.ts.map +1 -0
- package/dist/cloudflare/worker.js +54 -0
- package/dist/config.d.ts +12 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +36 -9
- package/dist/database/adapter-factory.d.ts +6 -0
- package/dist/database/adapter-factory.d.ts.map +1 -0
- package/dist/database/adapter-factory.js +20 -0
- package/dist/database/adapters/d1.d.ts +56 -0
- package/dist/database/adapters/d1.d.ts.map +1 -0
- package/dist/database/adapters/d1.js +108 -0
- package/dist/database/adapters/duckdb-search.d.ts +12 -0
- package/dist/database/adapters/duckdb-search.d.ts.map +1 -0
- package/dist/database/adapters/duckdb-search.js +27 -0
- package/dist/database/adapters/duckdb.d.ts +25 -0
- package/dist/database/adapters/duckdb.d.ts.map +1 -0
- package/dist/database/adapters/duckdb.js +161 -0
- package/dist/database/adapters/sqlite-search.d.ts +23 -0
- package/dist/database/adapters/sqlite-search.d.ts.map +1 -0
- package/dist/database/adapters/sqlite-search.js +74 -0
- package/dist/database/adapters/sqlite.d.ts +18 -0
- package/dist/database/adapters/sqlite.d.ts.map +1 -0
- package/dist/database/adapters/sqlite.js +87 -0
- package/dist/database/db.d.ts +159 -0
- package/dist/database/db.d.ts.map +1 -0
- package/dist/database/db.js +1445 -0
- package/dist/database/dialect.d.ts +45 -0
- package/dist/database/dialect.d.ts.map +1 -0
- package/dist/database/dialect.js +72 -0
- package/dist/database/fts.d.ts +27 -0
- package/dist/database/fts.d.ts.map +1 -0
- package/dist/database/fts.js +846 -0
- package/dist/database/index.d.ts +7 -0
- package/dist/database/index.d.ts.map +1 -0
- package/dist/database/index.js +6 -0
- package/dist/database/ports.d.ts +50 -0
- package/dist/database/ports.d.ts.map +1 -0
- package/dist/database/ports.js +1 -0
- package/dist/database/schema.d.ts +61 -0
- package/dist/database/schema.d.ts.map +1 -0
- package/dist/database/schema.js +394 -0
- package/dist/db.d.ts +1 -1
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +4 -38
- package/dist/dev-entry.d.ts +8 -0
- package/dist/dev-entry.d.ts.map +1 -0
- package/dist/dev-entry.js +111 -0
- package/dist/feeds.d.ts +12 -8
- package/dist/feeds.d.ts.map +1 -1
- package/dist/feeds.js +45 -6
- package/dist/fts.d.ts.map +1 -1
- package/dist/fts.js +5 -0
- package/dist/hooks.d.ts +43 -0
- package/dist/hooks.d.ts.map +1 -0
- package/dist/hooks.js +102 -0
- package/dist/hydrate.d.ts +6 -5
- package/dist/hydrate.d.ts.map +1 -1
- package/dist/hydrate.js +4 -16
- package/dist/indexer.d.ts +22 -0
- package/dist/indexer.d.ts.map +1 -1
- package/dist/indexer.js +70 -7
- package/dist/labels.d.ts +34 -0
- package/dist/labels.d.ts.map +1 -1
- package/dist/labels.js +66 -6
- package/dist/logger.d.ts +29 -0
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +29 -0
- package/dist/main.js +135 -67
- package/dist/mst.d.ts +18 -1
- package/dist/mst.d.ts.map +1 -1
- package/dist/mst.js +19 -8
- package/dist/oauth/db.d.ts.map +1 -1
- package/dist/oauth/db.js +43 -17
- package/dist/oauth/server.d.ts +2 -0
- package/dist/oauth/server.d.ts.map +1 -1
- package/dist/oauth/server.js +103 -8
- package/dist/oauth/session.d.ts +11 -0
- package/dist/oauth/session.d.ts.map +1 -0
- package/dist/oauth/session.js +65 -0
- package/dist/opengraph.d.ts +10 -0
- package/dist/opengraph.d.ts.map +1 -1
- package/dist/opengraph.js +73 -39
- package/dist/pds-proxy.d.ts +42 -0
- package/dist/pds-proxy.d.ts.map +1 -0
- package/dist/pds-proxy.js +189 -0
- package/dist/renderer.d.ts +27 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +46 -0
- package/dist/resolve-hatk.d.ts +6 -0
- package/dist/resolve-hatk.d.ts.map +1 -0
- package/dist/resolve-hatk.js +20 -0
- package/dist/response.d.ts +16 -0
- package/dist/response.d.ts.map +1 -0
- package/dist/response.js +69 -0
- package/dist/scanner.d.ts +21 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +88 -0
- package/dist/schema.d.ts +8 -0
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +29 -0
- package/dist/seed.d.ts +19 -0
- package/dist/seed.d.ts.map +1 -1
- package/dist/seed.js +43 -4
- package/dist/server-init.d.ts +8 -0
- package/dist/server-init.d.ts.map +1 -0
- package/dist/server-init.js +61 -0
- package/dist/server.d.ts +26 -3
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +528 -635
- package/dist/setup.d.ts +28 -1
- package/dist/setup.d.ts.map +1 -1
- package/dist/setup.js +50 -3
- package/dist/test.d.ts +1 -1
- package/dist/test.d.ts.map +1 -1
- package/dist/test.js +38 -32
- package/dist/views.js +1 -1
- package/dist/vite-plugin.d.ts +1 -1
- package/dist/vite-plugin.d.ts.map +1 -1
- package/dist/vite-plugin.js +254 -66
- package/dist/xrpc.d.ts +60 -10
- package/dist/xrpc.d.ts.map +1 -1
- package/dist/xrpc.js +155 -39
- package/package.json +13 -6
- package/public/admin.html +0 -54
package/dist/indexer.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { cborDecode } from "./cbor.js";
|
|
2
2
|
import { parseCarFrame } from "./car.js";
|
|
3
|
-
import { insertRecord, deleteRecord, setCursor, setRepoStatus, getRepoRetryInfo, listAllRepoStatuses } from "./db.js";
|
|
3
|
+
import { insertRecord, deleteRecord, setCursor, setRepoStatus, getRepoRetryInfo, listAllRepoStatuses, getDatabasePort, } from "./database/db.js";
|
|
4
4
|
import { backfillRepo } from "./backfill.js";
|
|
5
|
-
import { rebuildAllIndexes } from "./fts.js";
|
|
5
|
+
import { rebuildAllIndexes } from "./database/fts.js";
|
|
6
6
|
import { log, emit, timer } from "./logger.js";
|
|
7
7
|
import { runLabelRules } from "./labels.js";
|
|
8
|
-
import { getLexiconArray } from "./schema.js";
|
|
8
|
+
import { getLexiconArray } from "./database/schema.js";
|
|
9
9
|
import { validateRecord } from '@bigmoves/lexicon';
|
|
10
10
|
let buffer = [];
|
|
11
11
|
let flushTimer = null;
|
|
@@ -18,7 +18,8 @@ let ftsRebuildInterval = 500;
|
|
|
18
18
|
const pendingBuffers = new Map();
|
|
19
19
|
// Track in-flight backfills to avoid duplicates
|
|
20
20
|
const backfillInFlight = new Set();
|
|
21
|
-
const
|
|
21
|
+
const backfillPromises = new Map();
|
|
22
|
+
const pendingReschedule = new Set();
|
|
22
23
|
// In-memory cache of repo status to avoid flooding the DB read queue
|
|
23
24
|
const repoStatusCache = new Map();
|
|
24
25
|
// Set by startIndexer
|
|
@@ -27,6 +28,12 @@ let indexerSignalCollections;
|
|
|
27
28
|
let indexerPinnedRepos = null;
|
|
28
29
|
let indexerFetchTimeout;
|
|
29
30
|
let indexerMaxRetries;
|
|
31
|
+
let maxConcurrentBackfills = 3;
|
|
32
|
+
/**
|
|
33
|
+
* Flush the write buffer — insert all buffered records, update the relay cursor,
|
|
34
|
+
* run label rules on inserted records, and trigger FTS rebuilds when the write
|
|
35
|
+
* threshold is reached. Emits a wide event with batch stats.
|
|
36
|
+
*/
|
|
30
37
|
async function flushBuffer() {
|
|
31
38
|
if (buffer.length === 0)
|
|
32
39
|
return;
|
|
@@ -86,9 +93,14 @@ async function flushBuffer() {
|
|
|
86
93
|
writesSinceRebuild += batch.length;
|
|
87
94
|
if (writesSinceRebuild >= ftsRebuildInterval) {
|
|
88
95
|
writesSinceRebuild = 0;
|
|
89
|
-
|
|
96
|
+
// Skip periodic full rebuild for SQLite — it uses incremental FTS updates
|
|
97
|
+
const port = getDatabasePort();
|
|
98
|
+
if (port.dialect !== 'sqlite') {
|
|
99
|
+
rebuildAllIndexes([...indexerCollections]).catch(() => { });
|
|
100
|
+
}
|
|
90
101
|
}
|
|
91
102
|
}
|
|
103
|
+
/** Schedule a flush after FLUSH_INTERVAL_MS if one isn't already pending. */
|
|
92
104
|
function scheduleFlush() {
|
|
93
105
|
if (flushTimer)
|
|
94
106
|
return;
|
|
@@ -97,6 +109,7 @@ function scheduleFlush() {
|
|
|
97
109
|
await flushBuffer();
|
|
98
110
|
}, FLUSH_INTERVAL_MS);
|
|
99
111
|
}
|
|
112
|
+
/** Add a record to the write buffer. Flushes immediately if BATCH_SIZE is reached. */
|
|
100
113
|
function bufferWrite(item) {
|
|
101
114
|
buffer.push(item);
|
|
102
115
|
if (buffer.length >= BATCH_SIZE) {
|
|
@@ -110,11 +123,39 @@ function bufferWrite(item) {
|
|
|
110
123
|
scheduleFlush();
|
|
111
124
|
}
|
|
112
125
|
}
|
|
126
|
+
/**
|
|
127
|
+
* Auto-backfill a DID's repo when first seen on the firehose.
|
|
128
|
+
*
|
|
129
|
+
* Fetches the full repo via CAR export, inserts all records, then replays any
|
|
130
|
+
* firehose events that arrived during the backfill. Concurrency is capped at
|
|
131
|
+
* `maxConcurrentBackfills`. Failed backfills retry with exponential delay up
|
|
132
|
+
* to `maxRetries`.
|
|
133
|
+
*/
|
|
134
|
+
/** Wait for a DID's backfill to complete if one is in flight. */
|
|
135
|
+
export function awaitBackfill(did) {
|
|
136
|
+
const entry = backfillPromises.get(did);
|
|
137
|
+
return entry ? entry.promise : Promise.resolve();
|
|
138
|
+
}
|
|
113
139
|
export async function triggerAutoBackfill(did, attempt = 0) {
|
|
114
140
|
if (backfillInFlight.has(did))
|
|
115
141
|
return;
|
|
142
|
+
if (backfillInFlight.size >= maxConcurrentBackfills) {
|
|
143
|
+
if (!pendingReschedule.has(did)) {
|
|
144
|
+
pendingReschedule.add(did);
|
|
145
|
+
setTimeout(() => {
|
|
146
|
+
pendingReschedule.delete(did);
|
|
147
|
+
triggerAutoBackfill(did, attempt);
|
|
148
|
+
}, 10_000);
|
|
149
|
+
}
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
116
152
|
backfillInFlight.add(did);
|
|
117
153
|
pendingBuffers.set(did, []);
|
|
154
|
+
if (!backfillPromises.has(did)) {
|
|
155
|
+
let resolveBackfill;
|
|
156
|
+
const promise = new Promise((r) => { resolveBackfill = r; });
|
|
157
|
+
backfillPromises.set(did, { promise, resolve: resolveBackfill });
|
|
158
|
+
}
|
|
118
159
|
if (attempt === 0)
|
|
119
160
|
await setRepoStatus(did, 'pending');
|
|
120
161
|
const elapsed = timer();
|
|
@@ -154,6 +195,12 @@ export async function triggerAutoBackfill(did, attempt = 0) {
|
|
|
154
195
|
error,
|
|
155
196
|
retry_count: currentRetryCount,
|
|
156
197
|
});
|
|
198
|
+
// Resolve awaiting callers (e.g. on-login hooks)
|
|
199
|
+
const entry = backfillPromises.get(did);
|
|
200
|
+
if (entry) {
|
|
201
|
+
entry.resolve();
|
|
202
|
+
backfillPromises.delete(did);
|
|
203
|
+
}
|
|
157
204
|
if (status === 'error' && currentRetryCount < indexerMaxRetries) {
|
|
158
205
|
const delaySecs = Math.min(currentRetryCount * 60, 3600);
|
|
159
206
|
const delayMs = Math.max(delaySecs, 60) * 1000;
|
|
@@ -162,7 +209,7 @@ export async function triggerAutoBackfill(did, attempt = 0) {
|
|
|
162
209
|
}, delayMs);
|
|
163
210
|
}
|
|
164
211
|
}
|
|
165
|
-
|
|
212
|
+
/** Emit a memory diagnostics wide event every 30s for observability. */
|
|
166
213
|
function startMemoryDiagnostics() {
|
|
167
214
|
setInterval(() => {
|
|
168
215
|
const mem = process.memoryUsage();
|
|
@@ -184,6 +231,16 @@ function startMemoryDiagnostics() {
|
|
|
184
231
|
});
|
|
185
232
|
}, 30_000);
|
|
186
233
|
}
|
|
234
|
+
/**
|
|
235
|
+
* Connect to the AT Protocol relay firehose and begin indexing.
|
|
236
|
+
*
|
|
237
|
+
* Opens a WebSocket to `subscribeRepos`, processes commit messages synchronously
|
|
238
|
+
* on the event loop to minimize backpressure, and batches writes through
|
|
239
|
+
* {@link flushBuffer}. New DIDs trigger auto-backfill via {@link triggerAutoBackfill}.
|
|
240
|
+
* Reconnects automatically on disconnect after a 3s delay.
|
|
241
|
+
*
|
|
242
|
+
* @returns The WebSocket connection (for shutdown coordination)
|
|
243
|
+
*/
|
|
187
244
|
export async function startIndexer(opts) {
|
|
188
245
|
const { relayUrl, collections, cursor, fetchTimeout } = opts;
|
|
189
246
|
if (opts.ftsRebuildInterval != null)
|
|
@@ -193,6 +250,7 @@ export async function startIndexer(opts) {
|
|
|
193
250
|
indexerPinnedRepos = opts.pinnedRepos || null;
|
|
194
251
|
indexerFetchTimeout = fetchTimeout;
|
|
195
252
|
indexerMaxRetries = opts.maxRetries;
|
|
253
|
+
maxConcurrentBackfills = opts.parallelism ?? 3;
|
|
196
254
|
// Pre-populate repo status cache from DB so non-signal updates
|
|
197
255
|
// (e.g. profile changes) are processed for already-tracked DIDs
|
|
198
256
|
if (repoStatusCache.size === 0) {
|
|
@@ -231,6 +289,11 @@ export async function startIndexer(opts) {
|
|
|
231
289
|
});
|
|
232
290
|
return ws;
|
|
233
291
|
}
|
|
292
|
+
/**
|
|
293
|
+
* Process a single firehose message. Decodes the CBOR header/body, filters
|
|
294
|
+
* for relevant collections, validates records against lexicons, and routes
|
|
295
|
+
* writes to the buffer (or pending buffer if the DID is mid-backfill).
|
|
296
|
+
*/
|
|
234
297
|
function processMessage(bytes, collections) {
|
|
235
298
|
const header = cborDecode(bytes, 0);
|
|
236
299
|
const body = cborDecode(bytes, header.offset);
|
|
@@ -264,7 +327,7 @@ function processMessage(bytes, collections) {
|
|
|
264
327
|
repoStatusCache.set(did, 'unknown');
|
|
265
328
|
}
|
|
266
329
|
if (hasSignalOp && (!indexerPinnedRepos || indexerPinnedRepos.has(did))) {
|
|
267
|
-
if (repoStatus === null && backfillInFlight.size <
|
|
330
|
+
if (repoStatus === null && backfillInFlight.size < maxConcurrentBackfills) {
|
|
268
331
|
repoStatusCache.set(did, 'pending');
|
|
269
332
|
triggerAutoBackfill(did);
|
|
270
333
|
}
|
package/dist/labels.d.ts
CHANGED
|
@@ -13,7 +13,34 @@ export interface LabelRuleContext {
|
|
|
13
13
|
value: Record<string, any>;
|
|
14
14
|
};
|
|
15
15
|
}
|
|
16
|
+
export interface LabelModule {
|
|
17
|
+
definition?: LabelDefinition;
|
|
18
|
+
evaluate?: (ctx: LabelRuleContext) => Promise<string[]>;
|
|
19
|
+
}
|
|
20
|
+
export declare function defineLabel(module: LabelModule): {
|
|
21
|
+
definition?: LabelDefinition;
|
|
22
|
+
evaluate?: (ctx: LabelRuleContext) => Promise<string[]>;
|
|
23
|
+
__type: "labels";
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* Discover and load label rule modules from the `labels/` directory.
|
|
27
|
+
*
|
|
28
|
+
* Each module should default-export an object with an optional `definition`
|
|
29
|
+
* (label metadata like severity and blur behavior) and an optional `evaluate`
|
|
30
|
+
* function that returns label values to apply to a record.
|
|
31
|
+
*
|
|
32
|
+
* @param labelsDir - Absolute path to the `labels/` directory
|
|
33
|
+
*/
|
|
16
34
|
export declare function initLabels(labelsDir: string): Promise<void>;
|
|
35
|
+
/** Register a single label module from a scanned server/ module. */
|
|
36
|
+
export declare function registerLabelModule(name: string, labelMod: {
|
|
37
|
+
definition?: LabelDefinition;
|
|
38
|
+
evaluate?: (ctx: LabelRuleContext) => Promise<string[]>;
|
|
39
|
+
}): void;
|
|
40
|
+
/**
|
|
41
|
+
* Evaluate all loaded label rules against a record and persist any resulting labels.
|
|
42
|
+
* Called after each record is indexed. Rule errors are logged but never block indexing.
|
|
43
|
+
*/
|
|
17
44
|
export declare function runLabelRules(record: {
|
|
18
45
|
uri: string;
|
|
19
46
|
cid: string;
|
|
@@ -21,9 +48,16 @@ export declare function runLabelRules(record: {
|
|
|
21
48
|
collection: string;
|
|
22
49
|
value: Record<string, any>;
|
|
23
50
|
}): Promise<void>;
|
|
51
|
+
/**
|
|
52
|
+
* Re-evaluate all label rules against every existing record in the given collections.
|
|
53
|
+
* Used by `/admin/rescan-labels` to apply new or updated rules retroactively.
|
|
54
|
+
*
|
|
55
|
+
* @returns Count of records scanned and new labels applied
|
|
56
|
+
*/
|
|
24
57
|
export declare function rescanLabels(collections: string[]): Promise<{
|
|
25
58
|
scanned: number;
|
|
26
59
|
labeled: number;
|
|
27
60
|
}>;
|
|
61
|
+
/** Return all label definitions discovered during {@link initLabels}. */
|
|
28
62
|
export declare function getLabelDefinitions(): LabelDefinition[];
|
|
29
63
|
//# sourceMappingURL=labels.d.ts.map
|
package/dist/labels.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../src/labels.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../src/labels.ts"],"names":[],"mappings":"AA8BA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAIlD,wDAAwD;AACxD,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE;QACF,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,GAAG,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAA;KACtD,CAAA;IACD,MAAM,EAAE;QACN,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,UAAU,EAAE,MAAM,CAAA;QAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;KAC3B,CAAA;CACF;AAED,MAAM,WAAW,WAAW;IAC1B,UAAU,CAAC,EAAE,eAAe,CAAA;IAC5B,QAAQ,CAAC,EAAE,CAAC,GAAG,EAAE,gBAAgB,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;CACxD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,WAAW;iBAJhC,eAAe;eACjB,CAAC,GAAG,EAAE,gBAAgB,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;;EAKxD;AAYD;;;;;;;;GAQG;AACH,wBAAsB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmCjE;AAED,oEAAoE;AACpE,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE;IAAE,UAAU,CAAC,EAAE,eAAe,CAAC;IAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,EAAE,gBAAgB,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;CAAE,GAClG,IAAI,CAON;AAED;;;GAGG;AACH,wBAAsB,aAAa,CAAC,MAAM,EAAE;IAC1C,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;CAC3B,GAAG,OAAO,CAAC,IAAI,CAAC,CAyBhB;AAED;;;;;GAKG;AACH,wBAAsB,YAAY,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAuCvG;AAED,yEAAyE;AACzE,wBAAgB,mBAAmB,IAAI,eAAe,EAAE,CAEvD"}
|
package/dist/labels.js
CHANGED
|
@@ -6,13 +6,53 @@ var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExte
|
|
|
6
6
|
}
|
|
7
7
|
return path;
|
|
8
8
|
};
|
|
9
|
+
/**
|
|
10
|
+
* Label system for applying moderation labels to records as they are indexed.
|
|
11
|
+
*
|
|
12
|
+
* Place label modules in the `labels/` directory. Each module default-exports
|
|
13
|
+
* an object with a `definition` (label metadata) and/or an `evaluate` function
|
|
14
|
+
* (rule that returns label values for a given record).
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* // labels/nsfw.ts
|
|
19
|
+
* import type { LabelRuleContext } from '@hatk/hatk/labels'
|
|
20
|
+
*
|
|
21
|
+
* export default {
|
|
22
|
+
* definition: {
|
|
23
|
+
* identifier: 'nsfw',
|
|
24
|
+
* severity: 'alert',
|
|
25
|
+
* blurs: 'media',
|
|
26
|
+
* defaultSetting: 'warn',
|
|
27
|
+
* locales: [{ lang: 'en', name: 'NSFW', description: 'Not safe for work' }],
|
|
28
|
+
* },
|
|
29
|
+
*
|
|
30
|
+
* async evaluate(ctx: LabelRuleContext): Promise<string[]> {
|
|
31
|
+
* if (ctx.record.value.nsfw === true) return ['nsfw']
|
|
32
|
+
* return []
|
|
33
|
+
* },
|
|
34
|
+
* }
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
9
37
|
import { resolve } from 'node:path';
|
|
10
38
|
import { readdirSync } from 'node:fs';
|
|
11
|
-
import { querySQL, runSQL, insertLabels, getSchema } from "./db.js";
|
|
39
|
+
import { querySQL, runSQL, insertLabels, getSchema } from "./database/db.js";
|
|
12
40
|
import { log, emit } from "./logger.js";
|
|
41
|
+
export function defineLabel(module) {
|
|
42
|
+
return { __type: 'labels', ...module };
|
|
43
|
+
}
|
|
13
44
|
const rules = [];
|
|
14
45
|
let labelDefs = [];
|
|
15
46
|
let labelSrc = 'self';
|
|
47
|
+
/**
|
|
48
|
+
* Discover and load label rule modules from the `labels/` directory.
|
|
49
|
+
*
|
|
50
|
+
* Each module should default-export an object with an optional `definition`
|
|
51
|
+
* (label metadata like severity and blur behavior) and an optional `evaluate`
|
|
52
|
+
* function that returns label values to apply to a record.
|
|
53
|
+
*
|
|
54
|
+
* @param labelsDir - Absolute path to the `labels/` directory
|
|
55
|
+
*/
|
|
16
56
|
export async function initLabels(labelsDir) {
|
|
17
57
|
let files;
|
|
18
58
|
try {
|
|
@@ -26,7 +66,7 @@ export async function initLabels(labelsDir) {
|
|
|
26
66
|
for (const file of files) {
|
|
27
67
|
const name = file.replace(/\.(ts|js)$/, '');
|
|
28
68
|
const scriptPath = resolve(labelsDir, file);
|
|
29
|
-
const mod = await import(__rewriteRelativeImportExtension(scriptPath));
|
|
69
|
+
const mod = await import(__rewriteRelativeImportExtension(/* @vite-ignore */ `${scriptPath}?t=${Date.now()}`));
|
|
30
70
|
const handler = mod.default;
|
|
31
71
|
if (handler.definition) {
|
|
32
72
|
labelDefs.push(handler.definition);
|
|
@@ -45,6 +85,19 @@ export async function initLabels(labelsDir) {
|
|
|
45
85
|
log(`[labels] ${labelDefs.length} label definitions loaded`);
|
|
46
86
|
}
|
|
47
87
|
}
|
|
88
|
+
/** Register a single label module from a scanned server/ module. */
|
|
89
|
+
export function registerLabelModule(name, labelMod) {
|
|
90
|
+
if (labelMod.definition) {
|
|
91
|
+
labelDefs.push(labelMod.definition);
|
|
92
|
+
}
|
|
93
|
+
if (labelMod.evaluate) {
|
|
94
|
+
rules.push({ name, evaluate: labelMod.evaluate });
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Evaluate all loaded label rules against a record and persist any resulting labels.
|
|
99
|
+
* Called after each record is indexed. Rule errors are logged but never block indexing.
|
|
100
|
+
*/
|
|
48
101
|
export async function runLabelRules(record) {
|
|
49
102
|
if (rules.length === 0)
|
|
50
103
|
return;
|
|
@@ -69,15 +122,21 @@ export async function runLabelRules(record) {
|
|
|
69
122
|
emit('labels', 'applied', { count: allLabels.length, uri: record.uri, vals: allLabels.map((l) => l.val) });
|
|
70
123
|
}
|
|
71
124
|
}
|
|
125
|
+
/**
|
|
126
|
+
* Re-evaluate all label rules against every existing record in the given collections.
|
|
127
|
+
* Used by `/admin/rescan-labels` to apply new or updated rules retroactively.
|
|
128
|
+
*
|
|
129
|
+
* @returns Count of records scanned and new labels applied
|
|
130
|
+
*/
|
|
72
131
|
export async function rescanLabels(collections) {
|
|
73
|
-
const beforeRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
|
|
132
|
+
const beforeRows = (await querySQL(`SELECT COUNT(*) as count FROM _labels`));
|
|
74
133
|
const beforeCount = Number(beforeRows[0]?.count || 0);
|
|
75
134
|
let scanned = 0;
|
|
76
135
|
for (const collection of collections) {
|
|
77
136
|
const schema = getSchema(collection);
|
|
78
137
|
if (!schema)
|
|
79
138
|
continue;
|
|
80
|
-
const rows = await querySQL(`SELECT * FROM ${schema.tableName}`);
|
|
139
|
+
const rows = (await querySQL(`SELECT * FROM ${schema.tableName}`));
|
|
81
140
|
for (const row of rows) {
|
|
82
141
|
scanned++;
|
|
83
142
|
const value = {};
|
|
@@ -85,7 +144,7 @@ export async function rescanLabels(collections) {
|
|
|
85
144
|
let v = row[col.name];
|
|
86
145
|
if (v === null || v === undefined)
|
|
87
146
|
continue;
|
|
88
|
-
if (col.
|
|
147
|
+
if (col.isJson && typeof v === 'string') {
|
|
89
148
|
try {
|
|
90
149
|
v = JSON.parse(v);
|
|
91
150
|
}
|
|
@@ -102,10 +161,11 @@ export async function rescanLabels(collections) {
|
|
|
102
161
|
});
|
|
103
162
|
}
|
|
104
163
|
}
|
|
105
|
-
const afterRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
|
|
164
|
+
const afterRows = (await querySQL(`SELECT COUNT(*) as count FROM _labels`));
|
|
106
165
|
const afterCount = Number(afterRows[0]?.count || 0);
|
|
107
166
|
return { scanned, labeled: afterCount - beforeCount };
|
|
108
167
|
}
|
|
168
|
+
/** Return all label definitions discovered during {@link initLabels}. */
|
|
109
169
|
export function getLabelDefinitions() {
|
|
110
170
|
return labelDefs;
|
|
111
171
|
}
|
package/dist/logger.d.ts
CHANGED
|
@@ -1,4 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unstructured debug log — use sparingly for human-readable dev output.
|
|
3
|
+
* Prefer {@link emit} for anything that should be queryable in production.
|
|
4
|
+
* Disabled when `DEBUG=0`.
|
|
5
|
+
*/
|
|
1
6
|
export declare function log(...args: unknown[]): void;
|
|
7
|
+
/**
|
|
8
|
+
* Emit a structured wide event as a single JSON line to stdout.
|
|
9
|
+
*
|
|
10
|
+
* Each call produces one canonical log line with a timestamp, module, operation,
|
|
11
|
+
* and arbitrary key-value fields — designed for columnar search and aggregation,
|
|
12
|
+
* not string grep. Pack as much context as possible into `fields` (request IDs,
|
|
13
|
+
* durations, status codes, user DIDs, counts) so a single event tells the full
|
|
14
|
+
* story. See https://loggingsucks.com for the philosophy behind this approach.
|
|
15
|
+
*
|
|
16
|
+
* Disabled when `DEBUG=0`.
|
|
17
|
+
*
|
|
18
|
+
* @param module - Subsystem emitting the event (e.g. "server", "indexer", "backfill")
|
|
19
|
+
* @param op - Operation name (e.g. "request", "commit", "memory")
|
|
20
|
+
* @param fields - High-cardinality key-value context — include everything relevant
|
|
21
|
+
*/
|
|
2
22
|
export declare function emit(module: string, op: string, fields: Record<string, unknown>): void;
|
|
23
|
+
/**
|
|
24
|
+
* Start a millisecond timer. Call the returned function to get elapsed ms.
|
|
25
|
+
* Use with {@link emit} to add `duration_ms` to wide events.
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* const elapsed = timer()
|
|
29
|
+
* await doWork()
|
|
30
|
+
* emit('server', 'request', { path, status_code, duration_ms: elapsed() })
|
|
31
|
+
*/
|
|
3
32
|
export declare function timer(): () => number;
|
|
4
33
|
//# sourceMappingURL=logger.d.ts.map
|
package/dist/logger.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,wBAAgB,GAAG,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,GAAG,IAAI,CAG5C;AAED,wBAAgB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAWtF;AAED,wBAAgB,KAAK,IAAI,MAAM,MAAM,CAGpC"}
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,wBAAgB,GAAG,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,GAAG,IAAI,CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAWtF;AAED;;;;;;;;GAQG;AACH,wBAAgB,KAAK,IAAI,MAAM,MAAM,CAGpC"}
|
package/dist/logger.js
CHANGED
|
@@ -1,8 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unstructured debug log — use sparingly for human-readable dev output.
|
|
3
|
+
* Prefer {@link emit} for anything that should be queryable in production.
|
|
4
|
+
* Disabled when `DEBUG=0`.
|
|
5
|
+
*/
|
|
1
6
|
export function log(...args) {
|
|
2
7
|
if (process.env.DEBUG === '0')
|
|
3
8
|
return;
|
|
4
9
|
console.log(...args);
|
|
5
10
|
}
|
|
11
|
+
/**
|
|
12
|
+
* Emit a structured wide event as a single JSON line to stdout.
|
|
13
|
+
*
|
|
14
|
+
* Each call produces one canonical log line with a timestamp, module, operation,
|
|
15
|
+
* and arbitrary key-value fields — designed for columnar search and aggregation,
|
|
16
|
+
* not string grep. Pack as much context as possible into `fields` (request IDs,
|
|
17
|
+
* durations, status codes, user DIDs, counts) so a single event tells the full
|
|
18
|
+
* story. See https://loggingsucks.com for the philosophy behind this approach.
|
|
19
|
+
*
|
|
20
|
+
* Disabled when `DEBUG=0`.
|
|
21
|
+
*
|
|
22
|
+
* @param module - Subsystem emitting the event (e.g. "server", "indexer", "backfill")
|
|
23
|
+
* @param op - Operation name (e.g. "request", "commit", "memory")
|
|
24
|
+
* @param fields - High-cardinality key-value context — include everything relevant
|
|
25
|
+
*/
|
|
6
26
|
export function emit(module, op, fields) {
|
|
7
27
|
if (process.env.DEBUG === '0')
|
|
8
28
|
return;
|
|
@@ -17,6 +37,15 @@ export function emit(module, op, fields) {
|
|
|
17
37
|
}
|
|
18
38
|
process.stdout.write(JSON.stringify(entry) + '\n');
|
|
19
39
|
}
|
|
40
|
+
/**
|
|
41
|
+
* Start a millisecond timer. Call the returned function to get elapsed ms.
|
|
42
|
+
* Use with {@link emit} to add `duration_ms` to wide events.
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* const elapsed = timer()
|
|
46
|
+
* await doWork()
|
|
47
|
+
* emit('server', 'request', { path, status_code, duration_ms: elapsed() })
|
|
48
|
+
*/
|
|
20
49
|
export function timer() {
|
|
21
50
|
const start = performance.now();
|
|
22
51
|
return () => Math.round(performance.now() - start);
|