@hatk/hatk 0.0.1-alpha.4 → 0.0.1-alpha.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/adapter.d.ts +19 -0
  2. package/dist/adapter.d.ts.map +1 -0
  3. package/dist/adapter.js +107 -0
  4. package/dist/backfill.d.ts +60 -1
  5. package/dist/backfill.d.ts.map +1 -1
  6. package/dist/backfill.js +167 -33
  7. package/dist/car.d.ts +59 -1
  8. package/dist/car.d.ts.map +1 -1
  9. package/dist/car.js +179 -7
  10. package/dist/cbor.d.ts +37 -0
  11. package/dist/cbor.d.ts.map +1 -1
  12. package/dist/cbor.js +36 -3
  13. package/dist/cid.d.ts +37 -0
  14. package/dist/cid.d.ts.map +1 -1
  15. package/dist/cid.js +38 -3
  16. package/dist/cli.js +417 -133
  17. package/dist/cloudflare/container.d.ts +73 -0
  18. package/dist/cloudflare/container.d.ts.map +1 -0
  19. package/dist/cloudflare/container.js +232 -0
  20. package/dist/cloudflare/hooks.d.ts +33 -0
  21. package/dist/cloudflare/hooks.d.ts.map +1 -0
  22. package/dist/cloudflare/hooks.js +40 -0
  23. package/dist/cloudflare/init.d.ts +27 -0
  24. package/dist/cloudflare/init.d.ts.map +1 -0
  25. package/dist/cloudflare/init.js +103 -0
  26. package/dist/cloudflare/worker.d.ts +27 -0
  27. package/dist/cloudflare/worker.d.ts.map +1 -0
  28. package/dist/cloudflare/worker.js +54 -0
  29. package/dist/config.d.ts +12 -1
  30. package/dist/config.d.ts.map +1 -1
  31. package/dist/config.js +36 -9
  32. package/dist/database/adapter-factory.d.ts +6 -0
  33. package/dist/database/adapter-factory.d.ts.map +1 -0
  34. package/dist/database/adapter-factory.js +20 -0
  35. package/dist/database/adapters/d1.d.ts +56 -0
  36. package/dist/database/adapters/d1.d.ts.map +1 -0
  37. package/dist/database/adapters/d1.js +108 -0
  38. package/dist/database/adapters/duckdb-search.d.ts +12 -0
  39. package/dist/database/adapters/duckdb-search.d.ts.map +1 -0
  40. package/dist/database/adapters/duckdb-search.js +27 -0
  41. package/dist/database/adapters/duckdb.d.ts +25 -0
  42. package/dist/database/adapters/duckdb.d.ts.map +1 -0
  43. package/dist/database/adapters/duckdb.js +161 -0
  44. package/dist/database/adapters/sqlite-search.d.ts +23 -0
  45. package/dist/database/adapters/sqlite-search.d.ts.map +1 -0
  46. package/dist/database/adapters/sqlite-search.js +74 -0
  47. package/dist/database/adapters/sqlite.d.ts +18 -0
  48. package/dist/database/adapters/sqlite.d.ts.map +1 -0
  49. package/dist/database/adapters/sqlite.js +87 -0
  50. package/dist/database/db.d.ts +159 -0
  51. package/dist/database/db.d.ts.map +1 -0
  52. package/dist/database/db.js +1445 -0
  53. package/dist/database/dialect.d.ts +45 -0
  54. package/dist/database/dialect.d.ts.map +1 -0
  55. package/dist/database/dialect.js +72 -0
  56. package/dist/database/fts.d.ts +27 -0
  57. package/dist/database/fts.d.ts.map +1 -0
  58. package/dist/database/fts.js +846 -0
  59. package/dist/database/index.d.ts +7 -0
  60. package/dist/database/index.d.ts.map +1 -0
  61. package/dist/database/index.js +6 -0
  62. package/dist/database/ports.d.ts +50 -0
  63. package/dist/database/ports.d.ts.map +1 -0
  64. package/dist/database/ports.js +1 -0
  65. package/dist/database/schema.d.ts +61 -0
  66. package/dist/database/schema.d.ts.map +1 -0
  67. package/dist/database/schema.js +394 -0
  68. package/dist/db.d.ts +1 -1
  69. package/dist/db.d.ts.map +1 -1
  70. package/dist/db.js +4 -38
  71. package/dist/dev-entry.d.ts +8 -0
  72. package/dist/dev-entry.d.ts.map +1 -0
  73. package/dist/dev-entry.js +111 -0
  74. package/dist/feeds.d.ts +12 -8
  75. package/dist/feeds.d.ts.map +1 -1
  76. package/dist/feeds.js +45 -6
  77. package/dist/fts.d.ts.map +1 -1
  78. package/dist/fts.js +5 -0
  79. package/dist/hooks.d.ts +43 -0
  80. package/dist/hooks.d.ts.map +1 -0
  81. package/dist/hooks.js +102 -0
  82. package/dist/hydrate.d.ts +6 -5
  83. package/dist/hydrate.d.ts.map +1 -1
  84. package/dist/hydrate.js +4 -16
  85. package/dist/indexer.d.ts +22 -0
  86. package/dist/indexer.d.ts.map +1 -1
  87. package/dist/indexer.js +70 -7
  88. package/dist/labels.d.ts +34 -0
  89. package/dist/labels.d.ts.map +1 -1
  90. package/dist/labels.js +66 -6
  91. package/dist/logger.d.ts +29 -0
  92. package/dist/logger.d.ts.map +1 -1
  93. package/dist/logger.js +29 -0
  94. package/dist/main.js +135 -67
  95. package/dist/mst.d.ts +18 -1
  96. package/dist/mst.d.ts.map +1 -1
  97. package/dist/mst.js +19 -8
  98. package/dist/oauth/db.d.ts.map +1 -1
  99. package/dist/oauth/db.js +43 -17
  100. package/dist/oauth/server.d.ts +2 -0
  101. package/dist/oauth/server.d.ts.map +1 -1
  102. package/dist/oauth/server.js +103 -8
  103. package/dist/oauth/session.d.ts +11 -0
  104. package/dist/oauth/session.d.ts.map +1 -0
  105. package/dist/oauth/session.js +65 -0
  106. package/dist/opengraph.d.ts +10 -0
  107. package/dist/opengraph.d.ts.map +1 -1
  108. package/dist/opengraph.js +73 -39
  109. package/dist/pds-proxy.d.ts +42 -0
  110. package/dist/pds-proxy.d.ts.map +1 -0
  111. package/dist/pds-proxy.js +189 -0
  112. package/dist/renderer.d.ts +27 -0
  113. package/dist/renderer.d.ts.map +1 -0
  114. package/dist/renderer.js +46 -0
  115. package/dist/resolve-hatk.d.ts +6 -0
  116. package/dist/resolve-hatk.d.ts.map +1 -0
  117. package/dist/resolve-hatk.js +20 -0
  118. package/dist/response.d.ts +16 -0
  119. package/dist/response.d.ts.map +1 -0
  120. package/dist/response.js +69 -0
  121. package/dist/scanner.d.ts +21 -0
  122. package/dist/scanner.d.ts.map +1 -0
  123. package/dist/scanner.js +88 -0
  124. package/dist/schema.d.ts +8 -0
  125. package/dist/schema.d.ts.map +1 -1
  126. package/dist/schema.js +29 -0
  127. package/dist/seed.d.ts +19 -0
  128. package/dist/seed.d.ts.map +1 -1
  129. package/dist/seed.js +43 -4
  130. package/dist/server-init.d.ts +8 -0
  131. package/dist/server-init.d.ts.map +1 -0
  132. package/dist/server-init.js +61 -0
  133. package/dist/server.d.ts +26 -3
  134. package/dist/server.d.ts.map +1 -1
  135. package/dist/server.js +528 -635
  136. package/dist/setup.d.ts +28 -1
  137. package/dist/setup.d.ts.map +1 -1
  138. package/dist/setup.js +50 -3
  139. package/dist/test.d.ts +1 -1
  140. package/dist/test.d.ts.map +1 -1
  141. package/dist/test.js +38 -32
  142. package/dist/views.js +1 -1
  143. package/dist/vite-plugin.d.ts +1 -1
  144. package/dist/vite-plugin.d.ts.map +1 -1
  145. package/dist/vite-plugin.js +254 -66
  146. package/dist/xrpc.d.ts +60 -10
  147. package/dist/xrpc.d.ts.map +1 -1
  148. package/dist/xrpc.js +155 -39
  149. package/package.json +13 -6
  150. package/public/admin.html +0 -54
package/dist/indexer.js CHANGED
@@ -1,11 +1,11 @@
1
1
  import { cborDecode } from "./cbor.js";
2
2
  import { parseCarFrame } from "./car.js";
3
- import { insertRecord, deleteRecord, setCursor, setRepoStatus, getRepoRetryInfo, listAllRepoStatuses } from "./db.js";
3
+ import { insertRecord, deleteRecord, setCursor, setRepoStatus, getRepoRetryInfo, listAllRepoStatuses, getDatabasePort, } from "./database/db.js";
4
4
  import { backfillRepo } from "./backfill.js";
5
- import { rebuildAllIndexes } from "./fts.js";
5
+ import { rebuildAllIndexes } from "./database/fts.js";
6
6
  import { log, emit, timer } from "./logger.js";
7
7
  import { runLabelRules } from "./labels.js";
8
- import { getLexiconArray } from "./schema.js";
8
+ import { getLexiconArray } from "./database/schema.js";
9
9
  import { validateRecord } from '@bigmoves/lexicon';
10
10
  let buffer = [];
11
11
  let flushTimer = null;
@@ -18,7 +18,8 @@ let ftsRebuildInterval = 500;
18
18
  const pendingBuffers = new Map();
19
19
  // Track in-flight backfills to avoid duplicates
20
20
  const backfillInFlight = new Set();
21
- const MAX_CONCURRENT_BACKFILLS = 5;
21
+ const backfillPromises = new Map();
22
+ const pendingReschedule = new Set();
22
23
  // In-memory cache of repo status to avoid flooding the DB read queue
23
24
  const repoStatusCache = new Map();
24
25
  // Set by startIndexer
@@ -27,6 +28,12 @@ let indexerSignalCollections;
27
28
  let indexerPinnedRepos = null;
28
29
  let indexerFetchTimeout;
29
30
  let indexerMaxRetries;
31
+ let maxConcurrentBackfills = 3;
32
+ /**
33
+ * Flush the write buffer — insert all buffered records, update the relay cursor,
34
+ * run label rules on inserted records, and trigger FTS rebuilds when the write
35
+ * threshold is reached. Emits a wide event with batch stats.
36
+ */
30
37
  async function flushBuffer() {
31
38
  if (buffer.length === 0)
32
39
  return;
@@ -86,9 +93,14 @@ async function flushBuffer() {
86
93
  writesSinceRebuild += batch.length;
87
94
  if (writesSinceRebuild >= ftsRebuildInterval) {
88
95
  writesSinceRebuild = 0;
89
- rebuildAllIndexes([...indexerCollections]).catch(() => { });
96
+ // Skip periodic full rebuild for SQLite — it uses incremental FTS updates
97
+ const port = getDatabasePort();
98
+ if (port.dialect !== 'sqlite') {
99
+ rebuildAllIndexes([...indexerCollections]).catch(() => { });
100
+ }
90
101
  }
91
102
  }
103
+ /** Schedule a flush after FLUSH_INTERVAL_MS if one isn't already pending. */
92
104
  function scheduleFlush() {
93
105
  if (flushTimer)
94
106
  return;
@@ -97,6 +109,7 @@ function scheduleFlush() {
97
109
  await flushBuffer();
98
110
  }, FLUSH_INTERVAL_MS);
99
111
  }
112
+ /** Add a record to the write buffer. Flushes immediately if BATCH_SIZE is reached. */
100
113
  function bufferWrite(item) {
101
114
  buffer.push(item);
102
115
  if (buffer.length >= BATCH_SIZE) {
@@ -110,11 +123,39 @@ function bufferWrite(item) {
110
123
  scheduleFlush();
111
124
  }
112
125
  }
126
+ /**
127
+ * Auto-backfill a DID's repo when first seen on the firehose.
128
+ *
129
+ * Fetches the full repo via CAR export, inserts all records, then replays any
130
+ * firehose events that arrived during the backfill. Concurrency is capped at
131
+ * `maxConcurrentBackfills`. Failed backfills retry with exponential delay up
132
+ * to `maxRetries`.
133
+ */
134
+ /** Wait for a DID's backfill to complete if one is in flight. */
135
+ export function awaitBackfill(did) {
136
+ const entry = backfillPromises.get(did);
137
+ return entry ? entry.promise : Promise.resolve();
138
+ }
113
139
  export async function triggerAutoBackfill(did, attempt = 0) {
114
140
  if (backfillInFlight.has(did))
115
141
  return;
142
+ if (backfillInFlight.size >= maxConcurrentBackfills) {
143
+ if (!pendingReschedule.has(did)) {
144
+ pendingReschedule.add(did);
145
+ setTimeout(() => {
146
+ pendingReschedule.delete(did);
147
+ triggerAutoBackfill(did, attempt);
148
+ }, 10_000);
149
+ }
150
+ return;
151
+ }
116
152
  backfillInFlight.add(did);
117
153
  pendingBuffers.set(did, []);
154
+ if (!backfillPromises.has(did)) {
155
+ let resolveBackfill;
156
+ const promise = new Promise((r) => { resolveBackfill = r; });
157
+ backfillPromises.set(did, { promise, resolve: resolveBackfill });
158
+ }
118
159
  if (attempt === 0)
119
160
  await setRepoStatus(did, 'pending');
120
161
  const elapsed = timer();
@@ -154,6 +195,12 @@ export async function triggerAutoBackfill(did, attempt = 0) {
154
195
  error,
155
196
  retry_count: currentRetryCount,
156
197
  });
198
+ // Resolve awaiting callers (e.g. on-login hooks)
199
+ const entry = backfillPromises.get(did);
200
+ if (entry) {
201
+ entry.resolve();
202
+ backfillPromises.delete(did);
203
+ }
157
204
  if (status === 'error' && currentRetryCount < indexerMaxRetries) {
158
205
  const delaySecs = Math.min(currentRetryCount * 60, 3600);
159
206
  const delayMs = Math.max(delaySecs, 60) * 1000;
@@ -162,7 +209,7 @@ export async function triggerAutoBackfill(did, attempt = 0) {
162
209
  }, delayMs);
163
210
  }
164
211
  }
165
- // Periodic memory diagnostics
212
+ /** Emit a memory diagnostics wide event every 30s for observability. */
166
213
  function startMemoryDiagnostics() {
167
214
  setInterval(() => {
168
215
  const mem = process.memoryUsage();
@@ -184,6 +231,16 @@ function startMemoryDiagnostics() {
184
231
  });
185
232
  }, 30_000);
186
233
  }
234
+ /**
235
+ * Connect to the AT Protocol relay firehose and begin indexing.
236
+ *
237
+ * Opens a WebSocket to `subscribeRepos`, processes commit messages synchronously
238
+ * on the event loop to minimize backpressure, and batches writes through
239
+ * {@link flushBuffer}. New DIDs trigger auto-backfill via {@link triggerAutoBackfill}.
240
+ * Reconnects automatically on disconnect after a 3s delay.
241
+ *
242
+ * @returns The WebSocket connection (for shutdown coordination)
243
+ */
187
244
  export async function startIndexer(opts) {
188
245
  const { relayUrl, collections, cursor, fetchTimeout } = opts;
189
246
  if (opts.ftsRebuildInterval != null)
@@ -193,6 +250,7 @@ export async function startIndexer(opts) {
193
250
  indexerPinnedRepos = opts.pinnedRepos || null;
194
251
  indexerFetchTimeout = fetchTimeout;
195
252
  indexerMaxRetries = opts.maxRetries;
253
+ maxConcurrentBackfills = opts.parallelism ?? 3;
196
254
  // Pre-populate repo status cache from DB so non-signal updates
197
255
  // (e.g. profile changes) are processed for already-tracked DIDs
198
256
  if (repoStatusCache.size === 0) {
@@ -231,6 +289,11 @@ export async function startIndexer(opts) {
231
289
  });
232
290
  return ws;
233
291
  }
292
+ /**
293
+ * Process a single firehose message. Decodes the CBOR header/body, filters
294
+ * for relevant collections, validates records against lexicons, and routes
295
+ * writes to the buffer (or pending buffer if the DID is mid-backfill).
296
+ */
234
297
  function processMessage(bytes, collections) {
235
298
  const header = cborDecode(bytes, 0);
236
299
  const body = cborDecode(bytes, header.offset);
@@ -264,7 +327,7 @@ function processMessage(bytes, collections) {
264
327
  repoStatusCache.set(did, 'unknown');
265
328
  }
266
329
  if (hasSignalOp && (!indexerPinnedRepos || indexerPinnedRepos.has(did))) {
267
- if (repoStatus === null && backfillInFlight.size < MAX_CONCURRENT_BACKFILLS) {
330
+ if (repoStatus === null && backfillInFlight.size < maxConcurrentBackfills) {
268
331
  repoStatusCache.set(did, 'pending');
269
332
  triggerAutoBackfill(did);
270
333
  }
package/dist/labels.d.ts CHANGED
@@ -13,7 +13,34 @@ export interface LabelRuleContext {
13
13
  value: Record<string, any>;
14
14
  };
15
15
  }
16
+ export interface LabelModule {
17
+ definition?: LabelDefinition;
18
+ evaluate?: (ctx: LabelRuleContext) => Promise<string[]>;
19
+ }
20
+ export declare function defineLabel(module: LabelModule): {
21
+ definition?: LabelDefinition;
22
+ evaluate?: (ctx: LabelRuleContext) => Promise<string[]>;
23
+ __type: "labels";
24
+ };
25
+ /**
26
+ * Discover and load label rule modules from the `labels/` directory.
27
+ *
28
+ * Each module should default-export an object with an optional `definition`
29
+ * (label metadata like severity and blur behavior) and an optional `evaluate`
30
+ * function that returns label values to apply to a record.
31
+ *
32
+ * @param labelsDir - Absolute path to the `labels/` directory
33
+ */
16
34
  export declare function initLabels(labelsDir: string): Promise<void>;
35
+ /** Register a single label module from a scanned server/ module. */
36
+ export declare function registerLabelModule(name: string, labelMod: {
37
+ definition?: LabelDefinition;
38
+ evaluate?: (ctx: LabelRuleContext) => Promise<string[]>;
39
+ }): void;
40
+ /**
41
+ * Evaluate all loaded label rules against a record and persist any resulting labels.
42
+ * Called after each record is indexed. Rule errors are logged but never block indexing.
43
+ */
17
44
  export declare function runLabelRules(record: {
18
45
  uri: string;
19
46
  cid: string;
@@ -21,9 +48,16 @@ export declare function runLabelRules(record: {
21
48
  collection: string;
22
49
  value: Record<string, any>;
23
50
  }): Promise<void>;
51
+ /**
52
+ * Re-evaluate all label rules against every existing record in the given collections.
53
+ * Used by `/admin/rescan-labels` to apply new or updated rules retroactively.
54
+ *
55
+ * @returns Count of records scanned and new labels applied
56
+ */
24
57
  export declare function rescanLabels(collections: string[]): Promise<{
25
58
  scanned: number;
26
59
  labeled: number;
27
60
  }>;
61
+ /** Return all label definitions discovered during {@link initLabels}. */
28
62
  export declare function getLabelDefinitions(): LabelDefinition[];
29
63
  //# sourceMappingURL=labels.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../src/labels.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAIlD,wDAAwD;AACxD,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE;QACF,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,GAAG,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAA;KACtD,CAAA;IACD,MAAM,EAAE;QACN,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,UAAU,EAAE,MAAM,CAAA;QAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;KAC3B,CAAA;CACF;AAWD,wBAAsB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmCjE;AAED,wBAAsB,aAAa,CAAC,MAAM,EAAE;IAC1C,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;CAC3B,GAAG,OAAO,CAAC,IAAI,CAAC,CAyBhB;AAED,wBAAsB,YAAY,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAuCvG;AAED,wBAAgB,mBAAmB,IAAI,eAAe,EAAE,CAEvD"}
1
+ {"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../src/labels.ts"],"names":[],"mappings":"AA8BA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAIlD,wDAAwD;AACxD,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE;QACF,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,GAAG,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAA;KACtD,CAAA;IACD,MAAM,EAAE;QACN,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,UAAU,EAAE,MAAM,CAAA;QAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;KAC3B,CAAA;CACF;AAED,MAAM,WAAW,WAAW;IAC1B,UAAU,CAAC,EAAE,eAAe,CAAA;IAC5B,QAAQ,CAAC,EAAE,CAAC,GAAG,EAAE,gBAAgB,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;CACxD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,WAAW;iBAJhC,eAAe;eACjB,CAAC,GAAG,EAAE,gBAAgB,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;;EAKxD;AAYD;;;;;;;;GAQG;AACH,wBAAsB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmCjE;AAED,oEAAoE;AACpE,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE;IAAE,UAAU,CAAC,EAAE,eAAe,CAAC;IAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,EAAE,gBAAgB,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;CAAE,GAClG,IAAI,CAON;AAED;;;GAGG;AACH,wBAAsB,aAAa,CAAC,MAAM,EAAE;IAC1C,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;CAC3B,GAAG,OAAO,CAAC,IAAI,CAAC,CAyBhB;AAED;;;;;GAKG;AACH,wBAAsB,YAAY,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAuCvG;AAED,yEAAyE;AACzE,wBAAgB,mBAAmB,IAAI,eAAe,EAAE,CAEvD"}
package/dist/labels.js CHANGED
@@ -6,13 +6,53 @@ var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExte
6
6
  }
7
7
  return path;
8
8
  };
9
+ /**
10
+ * Label system for applying moderation labels to records as they are indexed.
11
+ *
12
+ * Place label modules in the `labels/` directory. Each module default-exports
13
+ * an object with a `definition` (label metadata) and/or an `evaluate` function
14
+ * (rule that returns label values for a given record).
15
+ *
16
+ * @example
17
+ * ```ts
18
+ * // labels/nsfw.ts
19
+ * import type { LabelRuleContext } from '@hatk/hatk/labels'
20
+ *
21
+ * export default {
22
+ * definition: {
23
+ * identifier: 'nsfw',
24
+ * severity: 'alert',
25
+ * blurs: 'media',
26
+ * defaultSetting: 'warn',
27
+ * locales: [{ lang: 'en', name: 'NSFW', description: 'Not safe for work' }],
28
+ * },
29
+ *
30
+ * async evaluate(ctx: LabelRuleContext): Promise<string[]> {
31
+ * if (ctx.record.value.nsfw === true) return ['nsfw']
32
+ * return []
33
+ * },
34
+ * }
35
+ * ```
36
+ */
9
37
  import { resolve } from 'node:path';
10
38
  import { readdirSync } from 'node:fs';
11
- import { querySQL, runSQL, insertLabels, getSchema } from "./db.js";
39
+ import { querySQL, runSQL, insertLabels, getSchema } from "./database/db.js";
12
40
  import { log, emit } from "./logger.js";
41
+ export function defineLabel(module) {
42
+ return { __type: 'labels', ...module };
43
+ }
13
44
  const rules = [];
14
45
  let labelDefs = [];
15
46
  let labelSrc = 'self';
47
+ /**
48
+ * Discover and load label rule modules from the `labels/` directory.
49
+ *
50
+ * Each module should default-export an object with an optional `definition`
51
+ * (label metadata like severity and blur behavior) and an optional `evaluate`
52
+ * function that returns label values to apply to a record.
53
+ *
54
+ * @param labelsDir - Absolute path to the `labels/` directory
55
+ */
16
56
  export async function initLabels(labelsDir) {
17
57
  let files;
18
58
  try {
@@ -26,7 +66,7 @@ export async function initLabels(labelsDir) {
26
66
  for (const file of files) {
27
67
  const name = file.replace(/\.(ts|js)$/, '');
28
68
  const scriptPath = resolve(labelsDir, file);
29
- const mod = await import(__rewriteRelativeImportExtension(scriptPath));
69
+ const mod = await import(__rewriteRelativeImportExtension(/* @vite-ignore */ `${scriptPath}?t=${Date.now()}`));
30
70
  const handler = mod.default;
31
71
  if (handler.definition) {
32
72
  labelDefs.push(handler.definition);
@@ -45,6 +85,19 @@ export async function initLabels(labelsDir) {
45
85
  log(`[labels] ${labelDefs.length} label definitions loaded`);
46
86
  }
47
87
  }
88
+ /** Register a single label module from a scanned server/ module. */
89
+ export function registerLabelModule(name, labelMod) {
90
+ if (labelMod.definition) {
91
+ labelDefs.push(labelMod.definition);
92
+ }
93
+ if (labelMod.evaluate) {
94
+ rules.push({ name, evaluate: labelMod.evaluate });
95
+ }
96
+ }
97
+ /**
98
+ * Evaluate all loaded label rules against a record and persist any resulting labels.
99
+ * Called after each record is indexed. Rule errors are logged but never block indexing.
100
+ */
48
101
  export async function runLabelRules(record) {
49
102
  if (rules.length === 0)
50
103
  return;
@@ -69,15 +122,21 @@ export async function runLabelRules(record) {
69
122
  emit('labels', 'applied', { count: allLabels.length, uri: record.uri, vals: allLabels.map((l) => l.val) });
70
123
  }
71
124
  }
125
+ /**
126
+ * Re-evaluate all label rules against every existing record in the given collections.
127
+ * Used by `/admin/rescan-labels` to apply new or updated rules retroactively.
128
+ *
129
+ * @returns Count of records scanned and new labels applied
130
+ */
72
131
  export async function rescanLabels(collections) {
73
- const beforeRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
132
+ const beforeRows = (await querySQL(`SELECT COUNT(*) as count FROM _labels`));
74
133
  const beforeCount = Number(beforeRows[0]?.count || 0);
75
134
  let scanned = 0;
76
135
  for (const collection of collections) {
77
136
  const schema = getSchema(collection);
78
137
  if (!schema)
79
138
  continue;
80
- const rows = await querySQL(`SELECT * FROM ${schema.tableName}`);
139
+ const rows = (await querySQL(`SELECT * FROM ${schema.tableName}`));
81
140
  for (const row of rows) {
82
141
  scanned++;
83
142
  const value = {};
@@ -85,7 +144,7 @@ export async function rescanLabels(collections) {
85
144
  let v = row[col.name];
86
145
  if (v === null || v === undefined)
87
146
  continue;
88
- if (col.duckdbType === 'JSON' && typeof v === 'string') {
147
+ if (col.isJson && typeof v === 'string') {
89
148
  try {
90
149
  v = JSON.parse(v);
91
150
  }
@@ -102,10 +161,11 @@ export async function rescanLabels(collections) {
102
161
  });
103
162
  }
104
163
  }
105
- const afterRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
164
+ const afterRows = (await querySQL(`SELECT COUNT(*) as count FROM _labels`));
106
165
  const afterCount = Number(afterRows[0]?.count || 0);
107
166
  return { scanned, labeled: afterCount - beforeCount };
108
167
  }
168
+ /** Return all label definitions discovered during {@link initLabels}. */
109
169
  export function getLabelDefinitions() {
110
170
  return labelDefs;
111
171
  }
package/dist/logger.d.ts CHANGED
@@ -1,4 +1,33 @@
1
+ /**
2
+ * Unstructured debug log — use sparingly for human-readable dev output.
3
+ * Prefer {@link emit} for anything that should be queryable in production.
4
+ * Disabled when `DEBUG=0`.
5
+ */
1
6
  export declare function log(...args: unknown[]): void;
7
+ /**
8
+ * Emit a structured wide event as a single JSON line to stdout.
9
+ *
10
+ * Each call produces one canonical log line with a timestamp, module, operation,
11
+ * and arbitrary key-value fields — designed for columnar search and aggregation,
12
+ * not string grep. Pack as much context as possible into `fields` (request IDs,
13
+ * durations, status codes, user DIDs, counts) so a single event tells the full
14
+ * story. See https://loggingsucks.com for the philosophy behind this approach.
15
+ *
16
+ * Disabled when `DEBUG=0`.
17
+ *
18
+ * @param module - Subsystem emitting the event (e.g. "server", "indexer", "backfill")
19
+ * @param op - Operation name (e.g. "request", "commit", "memory")
20
+ * @param fields - High-cardinality key-value context — include everything relevant
21
+ */
2
22
  export declare function emit(module: string, op: string, fields: Record<string, unknown>): void;
23
+ /**
24
+ * Start a millisecond timer. Call the returned function to get elapsed ms.
25
+ * Use with {@link emit} to add `duration_ms` to wide events.
26
+ *
27
+ * @example
28
+ * const elapsed = timer()
29
+ * await doWork()
30
+ * emit('server', 'request', { path, status_code, duration_ms: elapsed() })
31
+ */
3
32
  export declare function timer(): () => number;
4
33
  //# sourceMappingURL=logger.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,wBAAgB,GAAG,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,GAAG,IAAI,CAG5C;AAED,wBAAgB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAWtF;AAED,wBAAgB,KAAK,IAAI,MAAM,MAAM,CAGpC"}
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,wBAAgB,GAAG,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,GAAG,IAAI,CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAWtF;AAED;;;;;;;;GAQG;AACH,wBAAgB,KAAK,IAAI,MAAM,MAAM,CAGpC"}
package/dist/logger.js CHANGED
@@ -1,8 +1,28 @@
1
+ /**
2
+ * Unstructured debug log — use sparingly for human-readable dev output.
3
+ * Prefer {@link emit} for anything that should be queryable in production.
4
+ * Disabled when `DEBUG=0`.
5
+ */
1
6
  export function log(...args) {
2
7
  if (process.env.DEBUG === '0')
3
8
  return;
4
9
  console.log(...args);
5
10
  }
11
+ /**
12
+ * Emit a structured wide event as a single JSON line to stdout.
13
+ *
14
+ * Each call produces one canonical log line with a timestamp, module, operation,
15
+ * and arbitrary key-value fields — designed for columnar search and aggregation,
16
+ * not string grep. Pack as much context as possible into `fields` (request IDs,
17
+ * durations, status codes, user DIDs, counts) so a single event tells the full
18
+ * story. See https://loggingsucks.com for the philosophy behind this approach.
19
+ *
20
+ * Disabled when `DEBUG=0`.
21
+ *
22
+ * @param module - Subsystem emitting the event (e.g. "server", "indexer", "backfill")
23
+ * @param op - Operation name (e.g. "request", "commit", "memory")
24
+ * @param fields - High-cardinality key-value context — include everything relevant
25
+ */
6
26
  export function emit(module, op, fields) {
7
27
  if (process.env.DEBUG === '0')
8
28
  return;
@@ -17,6 +37,15 @@ export function emit(module, op, fields) {
17
37
  }
18
38
  process.stdout.write(JSON.stringify(entry) + '\n');
19
39
  }
40
+ /**
41
+ * Start a millisecond timer. Call the returned function to get elapsed ms.
42
+ * Use with {@link emit} to add `duration_ms` to wide events.
43
+ *
44
+ * @example
45
+ * const elapsed = timer()
46
+ * await doWork()
47
+ * emit('server', 'request', { path, status_code, duration_ms: elapsed() })
48
+ */
20
49
  export function timer() {
21
50
  const start = performance.now();
22
51
  return () => Math.round(performance.now() - start);