@hatk/hatk 0.0.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/backfill.d.ts +11 -0
  2. package/dist/backfill.d.ts.map +1 -0
  3. package/dist/backfill.js +328 -0
  4. package/dist/car.d.ts +5 -0
  5. package/dist/car.d.ts.map +1 -0
  6. package/dist/car.js +52 -0
  7. package/dist/cbor.d.ts +7 -0
  8. package/dist/cbor.d.ts.map +1 -0
  9. package/dist/cbor.js +89 -0
  10. package/dist/cid.d.ts +4 -0
  11. package/dist/cid.d.ts.map +1 -0
  12. package/dist/cid.js +39 -0
  13. package/dist/cli.d.ts +3 -0
  14. package/dist/cli.d.ts.map +1 -0
  15. package/dist/cli.js +1663 -0
  16. package/dist/config.d.ts +47 -0
  17. package/dist/config.d.ts.map +1 -0
  18. package/dist/config.js +43 -0
  19. package/dist/db.d.ts +134 -0
  20. package/dist/db.d.ts.map +1 -0
  21. package/dist/db.js +1361 -0
  22. package/dist/feeds.d.ts +95 -0
  23. package/dist/feeds.d.ts.map +1 -0
  24. package/dist/feeds.js +144 -0
  25. package/dist/fts.d.ts +20 -0
  26. package/dist/fts.d.ts.map +1 -0
  27. package/dist/fts.js +762 -0
  28. package/dist/hydrate.d.ts +23 -0
  29. package/dist/hydrate.d.ts.map +1 -0
  30. package/dist/hydrate.js +75 -0
  31. package/dist/indexer.d.ts +14 -0
  32. package/dist/indexer.d.ts.map +1 -0
  33. package/dist/indexer.js +316 -0
  34. package/dist/labels.d.ts +29 -0
  35. package/dist/labels.d.ts.map +1 -0
  36. package/dist/labels.js +111 -0
  37. package/dist/lex-types.d.ts +401 -0
  38. package/dist/lex-types.d.ts.map +1 -0
  39. package/dist/lex-types.js +4 -0
  40. package/dist/lexicon-resolve.d.ts +14 -0
  41. package/dist/lexicon-resolve.d.ts.map +1 -0
  42. package/dist/lexicon-resolve.js +280 -0
  43. package/dist/logger.d.ts +4 -0
  44. package/dist/logger.d.ts.map +1 -0
  45. package/dist/logger.js +23 -0
  46. package/dist/main.d.ts +3 -0
  47. package/dist/main.d.ts.map +1 -0
  48. package/dist/main.js +148 -0
  49. package/dist/mst.d.ts +6 -0
  50. package/dist/mst.d.ts.map +1 -0
  51. package/dist/mst.js +30 -0
  52. package/dist/oauth/client.d.ts +16 -0
  53. package/dist/oauth/client.d.ts.map +1 -0
  54. package/dist/oauth/client.js +54 -0
  55. package/dist/oauth/crypto.d.ts +28 -0
  56. package/dist/oauth/crypto.d.ts.map +1 -0
  57. package/dist/oauth/crypto.js +101 -0
  58. package/dist/oauth/db.d.ts +47 -0
  59. package/dist/oauth/db.d.ts.map +1 -0
  60. package/dist/oauth/db.js +139 -0
  61. package/dist/oauth/discovery.d.ts +22 -0
  62. package/dist/oauth/discovery.d.ts.map +1 -0
  63. package/dist/oauth/discovery.js +50 -0
  64. package/dist/oauth/dpop.d.ts +11 -0
  65. package/dist/oauth/dpop.d.ts.map +1 -0
  66. package/dist/oauth/dpop.js +56 -0
  67. package/dist/oauth/hooks.d.ts +10 -0
  68. package/dist/oauth/hooks.d.ts.map +1 -0
  69. package/dist/oauth/hooks.js +40 -0
  70. package/dist/oauth/server.d.ts +86 -0
  71. package/dist/oauth/server.d.ts.map +1 -0
  72. package/dist/oauth/server.js +572 -0
  73. package/dist/opengraph.d.ts +34 -0
  74. package/dist/opengraph.d.ts.map +1 -0
  75. package/dist/opengraph.js +198 -0
  76. package/dist/schema.d.ts +51 -0
  77. package/dist/schema.d.ts.map +1 -0
  78. package/dist/schema.js +358 -0
  79. package/dist/seed.d.ts +29 -0
  80. package/dist/seed.d.ts.map +1 -0
  81. package/dist/seed.js +86 -0
  82. package/dist/server.d.ts +6 -0
  83. package/dist/server.d.ts.map +1 -0
  84. package/dist/server.js +1024 -0
  85. package/dist/setup.d.ts +8 -0
  86. package/dist/setup.d.ts.map +1 -0
  87. package/dist/setup.js +48 -0
  88. package/dist/test-browser.d.ts +14 -0
  89. package/dist/test-browser.d.ts.map +1 -0
  90. package/dist/test-browser.js +26 -0
  91. package/dist/test.d.ts +47 -0
  92. package/dist/test.d.ts.map +1 -0
  93. package/dist/test.js +256 -0
  94. package/dist/views.d.ts +40 -0
  95. package/dist/views.d.ts.map +1 -0
  96. package/dist/views.js +178 -0
  97. package/dist/vite-plugin.d.ts +5 -0
  98. package/dist/vite-plugin.d.ts.map +1 -0
  99. package/dist/vite-plugin.js +86 -0
  100. package/dist/xrpc-client.d.ts +18 -0
  101. package/dist/xrpc-client.d.ts.map +1 -0
  102. package/dist/xrpc-client.js +54 -0
  103. package/dist/xrpc.d.ts +53 -0
  104. package/dist/xrpc.d.ts.map +1 -0
  105. package/dist/xrpc.js +139 -0
  106. package/fonts/Inter-Regular.woff +0 -0
  107. package/package.json +41 -0
  108. package/public/admin-auth.js +320 -0
  109. package/public/admin.html +2166 -0
@@ -0,0 +1,23 @@
1
+ import type { Row } from './lex-types.ts';
2
+ export type { Row };
3
+ export interface HydrateContext<T = unknown> {
4
+ items: Row<T>[];
5
+ viewer: {
6
+ did: string;
7
+ } | null;
8
+ db: {
9
+ query: (sql: string, params?: unknown[]) => Promise<unknown[]>;
10
+ };
11
+ getRecords: <R = unknown>(collection: string, uris: string[]) => Promise<Map<string, Row<R>>>;
12
+ lookup: <R = unknown>(collection: string, field: string, values: string[]) => Promise<Map<string, Row<R>>>;
13
+ count: (collection: string, field: string, values: string[]) => Promise<Map<string, number>>;
14
+ labels: (uris: string[]) => Promise<Map<string, unknown[]>>;
15
+ blobUrl: (did: string, ref: unknown, preset?: 'avatar' | 'banner' | 'feed_thumbnail' | 'feed_fullsize') => string | undefined;
16
+ }
17
+ /** Fetch records for URIs, reshape them, and filter out taken-down DIDs. */
18
+ export declare function resolveRecords(uris: string[]): Promise<Row<unknown>[]>;
19
+ /** Build a HydrateContext for a feed's hydrate function. */
20
+ export declare function buildHydrateContext(items: Row<unknown>[], viewer: {
21
+ did: string;
22
+ } | null): HydrateContext;
23
+ //# sourceMappingURL=hydrate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hydrate.d.ts","sourceRoot":"","sources":["../src/hydrate.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAA;AAEzC,YAAY,EAAE,GAAG,EAAE,CAAA;AAInB,MAAM,WAAW,cAAc,CAAC,CAAC,GAAG,OAAO;IACzC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAA;IACf,MAAM,EAAE;QAAE,GAAG,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAA;IAC9B,EAAE,EAAE;QAAE,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,OAAO,EAAE,CAAC,CAAA;KAAE,CAAA;IACtE,UAAU,EAAE,CAAC,CAAC,GAAG,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC7F,MAAM,EAAE,CAAC,CAAC,GAAG,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC1G,KAAK,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;IAC5F,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,CAAA;IAC3D,OAAO,EAAE,CACP,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,OAAO,EACZ,MAAM,CAAC,EAAE,QAAQ,GAAG,QAAQ,GAAG,gBAAgB,GAAG,eAAe,KAC9D,MAAM,GAAG,SAAS,CAAA;CACxB;AAID,4EAA4E;AAC5E,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAoC5E;AAID,4DAA4D;AAC5D,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,MAAM,EAAE;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,GAAG,cAAc,CA4BzG"}
@@ -0,0 +1,75 @@
1
+ import { getRecordsByUris, countByFieldBatch, lookupByFieldBatch, querySQL, reshapeRow, queryLabelsForUris, filterTakendownDids, } from "./db.js";
2
+ import { blobUrl } from "./xrpc.js";
3
+ // --- Record Resolution ---
4
+ /** Fetch records for URIs, reshape them, and filter out taken-down DIDs. */
5
+ export async function resolveRecords(uris) {
6
+ if (uris.length === 0)
7
+ return [];
8
+ // Group URIs by collection for batch fetching
9
+ const byCollection = new Map();
10
+ for (const uri of uris) {
11
+ const parts = uri.replace('at://', '').split('/');
12
+ const col = parts[1];
13
+ if (!byCollection.has(col))
14
+ byCollection.set(col, []);
15
+ byCollection.get(col).push(uri);
16
+ }
17
+ const primaryRecords = new Map();
18
+ for (const [col, colUris] of byCollection) {
19
+ const records = await getRecordsByUris(col, colUris);
20
+ for (const r of records) {
21
+ primaryRecords.set(r.uri, r);
22
+ }
23
+ }
24
+ // Filter out records from taken-down DIDs
25
+ const allDids = [...new Set([...primaryRecords.values()].map((r) => r.did).filter(Boolean))];
26
+ const takendownDids = await filterTakendownDids(allDids);
27
+ if (takendownDids.size > 0) {
28
+ for (const [uri, rec] of primaryRecords) {
29
+ if (takendownDids.has(rec.did))
30
+ primaryRecords.delete(uri);
31
+ }
32
+ }
33
+ // Return in original URI order, reshaped
34
+ return uris
35
+ .map((uri) => {
36
+ const row = primaryRecords.get(uri);
37
+ return reshapeRow(row, row?.__childData, row?.__unionData);
38
+ })
39
+ .filter((r) => r != null);
40
+ }
41
+ // --- Context Builder ---
42
+ /** Build a HydrateContext for a feed's hydrate function. */
43
+ export function buildHydrateContext(items, viewer) {
44
+ return {
45
+ items,
46
+ viewer,
47
+ db: { query: querySQL },
48
+ getRecords: async (collection, uris) => {
49
+ if (uris.length === 0)
50
+ return new Map();
51
+ const records = await getRecordsByUris(collection, uris);
52
+ const map = new Map();
53
+ for (const r of records) {
54
+ const shaped = reshapeRow(r, r?.__childData, r?.__unionData);
55
+ if (shaped)
56
+ map.set(shaped.uri, shaped);
57
+ }
58
+ return map;
59
+ },
60
+ lookup: async (collection, field, values) => {
61
+ if (values.length === 0)
62
+ return new Map();
63
+ const unique = [...new Set(values.filter(Boolean))];
64
+ return lookupByFieldBatch(collection, field, unique);
65
+ },
66
+ count: async (collection, field, values) => {
67
+ if (values.length === 0)
68
+ return new Map();
69
+ const unique = [...new Set(values.filter(Boolean))];
70
+ return countByFieldBatch(collection, field, unique);
71
+ },
72
+ labels: queryLabelsForUris,
73
+ blobUrl,
74
+ };
75
+ }
@@ -0,0 +1,14 @@
1
+ export declare function triggerAutoBackfill(did: string, attempt?: number): Promise<void>;
2
+ interface IndexerOpts {
3
+ relayUrl: string;
4
+ collections: Set<string>;
5
+ signalCollections?: Set<string>;
6
+ pinnedRepos?: Set<string>;
7
+ cursor?: string | null;
8
+ fetchTimeout: number;
9
+ maxRetries: number;
10
+ ftsRebuildInterval?: number;
11
+ }
12
+ export declare function startIndexer(opts: IndexerOpts): Promise<WebSocket>;
13
+ export {};
14
+ //# sourceMappingURL=indexer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../src/indexer.ts"],"names":[],"mappings":"AAkIA,wBAAsB,mBAAmB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,SAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAsDjF;AAED,UAAU,WAAW;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IAC/B,WAAW,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACzB,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,YAAY,EAAE,MAAM,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,kBAAkB,CAAC,EAAE,MAAM,CAAA;CAC5B;AAyBD,wBAAsB,YAAY,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,SAAS,CAAC,CAkDxE"}
@@ -0,0 +1,316 @@
1
+ import { cborDecode } from "./cbor.js";
2
+ import { parseCarFrame } from "./car.js";
3
+ import { insertRecord, deleteRecord, setCursor, setRepoStatus, getRepoRetryInfo, listAllRepoStatuses } from "./db.js";
4
+ import { backfillRepo } from "./backfill.js";
5
+ import { rebuildAllIndexes } from "./fts.js";
6
+ import { log, emit, timer } from "./logger.js";
7
+ import { runLabelRules } from "./labels.js";
8
+ import { getLexiconArray } from "./schema.js";
9
+ import { validateRecord } from '@bigmoves/lexicon';
10
+ let buffer = [];
11
+ let flushTimer = null;
12
+ let lastSeq = null;
13
+ const BATCH_SIZE = 100;
14
+ const FLUSH_INTERVAL_MS = 500;
15
+ let writesSinceRebuild = 0;
16
+ let ftsRebuildInterval = 500;
17
+ // Event buffer for DIDs mid-backfill
18
+ const pendingBuffers = new Map();
19
+ // Track in-flight backfills to avoid duplicates
20
+ const backfillInFlight = new Set();
21
+ const MAX_CONCURRENT_BACKFILLS = 5;
22
+ // In-memory cache of repo status to avoid flooding the DB read queue
23
+ const repoStatusCache = new Map();
24
+ // Set by startIndexer
25
+ let indexerCollections;
26
+ let indexerSignalCollections;
27
+ let indexerPinnedRepos = null;
28
+ let indexerFetchTimeout;
29
+ let indexerMaxRetries;
30
+ async function flushBuffer() {
31
+ if (buffer.length === 0)
32
+ return;
33
+ const elapsed = timer();
34
+ const batch = buffer.splice(0);
35
+ let insertedCount = 0;
36
+ const errors = [];
37
+ let cursorError;
38
+ const inserted = [];
39
+ for (const item of batch) {
40
+ try {
41
+ await insertRecord(item.collection, item.uri, item.cid, item.authorDid, item.record);
42
+ insertedCount++;
43
+ inserted.push(item);
44
+ }
45
+ catch (err) {
46
+ errors.push(err.message);
47
+ }
48
+ }
49
+ if (lastSeq !== null) {
50
+ try {
51
+ await setCursor('relay', String(lastSeq));
52
+ }
53
+ catch (err) {
54
+ cursorError = err.message;
55
+ }
56
+ }
57
+ // Run label rules on successfully inserted records (async, non-blocking)
58
+ for (const item of inserted) {
59
+ runLabelRules({
60
+ uri: item.uri,
61
+ cid: item.cid,
62
+ did: item.authorDid,
63
+ collection: item.collection,
64
+ value: item.record,
65
+ }).catch(() => { });
66
+ }
67
+ // Aggregate collection counts and unique DIDs for wide event
68
+ const collections = {};
69
+ const dids = new Set();
70
+ for (const item of batch) {
71
+ collections[item.collection] = (collections[item.collection] || 0) + 1;
72
+ dids.add(item.authorDid);
73
+ }
74
+ emit('indexer', 'flush', {
75
+ batch_size: batch.length,
76
+ inserted_count: insertedCount,
77
+ error_count: errors.length,
78
+ cursor_seq: lastSeq,
79
+ duration_ms: elapsed(),
80
+ collections,
81
+ unique_dids: dids.size,
82
+ sample_dids: [...dids].slice(0, 5),
83
+ cursor_error: cursorError,
84
+ sample_errors: errors.length > 0 ? errors.slice(0, 3) : undefined,
85
+ });
86
+ writesSinceRebuild += batch.length;
87
+ if (writesSinceRebuild >= ftsRebuildInterval) {
88
+ writesSinceRebuild = 0;
89
+ rebuildAllIndexes([...indexerCollections]).catch(() => { });
90
+ }
91
+ }
92
+ function scheduleFlush() {
93
+ if (flushTimer)
94
+ return;
95
+ flushTimer = setTimeout(async () => {
96
+ flushTimer = null;
97
+ await flushBuffer();
98
+ }, FLUSH_INTERVAL_MS);
99
+ }
100
+ function bufferWrite(item) {
101
+ buffer.push(item);
102
+ if (buffer.length >= BATCH_SIZE) {
103
+ if (flushTimer) {
104
+ clearTimeout(flushTimer);
105
+ flushTimer = null;
106
+ }
107
+ flushBuffer();
108
+ }
109
+ else {
110
+ scheduleFlush();
111
+ }
112
+ }
113
+ export async function triggerAutoBackfill(did, attempt = 0) {
114
+ if (backfillInFlight.has(did))
115
+ return;
116
+ backfillInFlight.add(did);
117
+ pendingBuffers.set(did, []);
118
+ if (attempt === 0)
119
+ await setRepoStatus(did, 'pending');
120
+ const elapsed = timer();
121
+ let recordCount = 0;
122
+ let status = 'success';
123
+ let error;
124
+ let replayErrors = 0;
125
+ try {
126
+ recordCount = await backfillRepo(did, indexerCollections, indexerFetchTimeout);
127
+ }
128
+ catch (err) {
129
+ status = 'error';
130
+ error = err.message;
131
+ }
132
+ // Replay buffered events
133
+ const buffered = pendingBuffers.get(did) || [];
134
+ pendingBuffers.delete(did);
135
+ backfillInFlight.delete(did);
136
+ for (const item of buffered) {
137
+ try {
138
+ await insertRecord(item.collection, item.uri, item.cid, item.authorDid, item.record);
139
+ }
140
+ catch {
141
+ replayErrors++;
142
+ }
143
+ }
144
+ // Schedule retry if failed and under maxRetries
145
+ const retryInfo = status === 'error' ? await getRepoRetryInfo(did) : null;
146
+ const currentRetryCount = retryInfo?.retryCount ?? 0;
147
+ emit('indexer', 'auto_backfill', {
148
+ did,
149
+ record_count: recordCount,
150
+ buffered_events: buffered.length,
151
+ replay_errors: replayErrors,
152
+ duration_ms: elapsed(),
153
+ status,
154
+ error,
155
+ retry_count: currentRetryCount,
156
+ });
157
+ if (status === 'error' && currentRetryCount < indexerMaxRetries) {
158
+ const delaySecs = Math.min(currentRetryCount * 60, 3600);
159
+ const delayMs = Math.max(delaySecs, 60) * 1000;
160
+ setTimeout(() => {
161
+ triggerAutoBackfill(did, currentRetryCount);
162
+ }, delayMs);
163
+ }
164
+ }
165
+ // Periodic memory diagnostics
166
+ function startMemoryDiagnostics() {
167
+ setInterval(() => {
168
+ const mem = process.memoryUsage();
169
+ let pendingBufferItems = 0;
170
+ for (const [, items] of pendingBuffers) {
171
+ pendingBufferItems += items.length;
172
+ }
173
+ emit('diagnostics', 'memory', {
174
+ heap_used_mb: Math.round(mem.heapUsed / 1024 / 1024),
175
+ heap_total_mb: Math.round(mem.heapTotal / 1024 / 1024),
176
+ rss_mb: Math.round(mem.rss / 1024 / 1024),
177
+ external_mb: Math.round(mem.external / 1024 / 1024),
178
+ array_buffers_mb: Math.round(mem.arrayBuffers / 1024 / 1024),
179
+ write_buffer_len: buffer.length,
180
+ pending_buffer_dids: pendingBuffers.size,
181
+ pending_buffer_items: pendingBufferItems,
182
+ backfill_in_flight: backfillInFlight.size,
183
+ repo_status_cache_size: repoStatusCache.size,
184
+ });
185
+ }, 30_000);
186
+ }
187
+ export async function startIndexer(opts) {
188
+ const { relayUrl, collections, cursor, fetchTimeout } = opts;
189
+ if (opts.ftsRebuildInterval != null)
190
+ ftsRebuildInterval = opts.ftsRebuildInterval;
191
+ indexerCollections = collections;
192
+ indexerSignalCollections = opts.signalCollections || collections;
193
+ indexerPinnedRepos = opts.pinnedRepos || null;
194
+ indexerFetchTimeout = fetchTimeout;
195
+ indexerMaxRetries = opts.maxRetries;
196
+ // Pre-populate repo status cache from DB so non-signal updates
197
+ // (e.g. profile changes) are processed for already-tracked DIDs
198
+ if (repoStatusCache.size === 0) {
199
+ const statuses = await listAllRepoStatuses();
200
+ for (const { did, status } of statuses) {
201
+ repoStatusCache.set(did, status);
202
+ }
203
+ log(`[indexer] Warmed repo status cache with ${statuses.length} entries`);
204
+ }
205
+ startMemoryDiagnostics();
206
+ let wsUrl = `${relayUrl}/xrpc/com.atproto.sync.subscribeRepos`;
207
+ if (cursor) {
208
+ wsUrl += `?cursor=${cursor}`;
209
+ log(`[indexer] Resuming from cursor ${cursor}`);
210
+ }
211
+ log(`[indexer] Connecting to ${relayUrl}...`);
212
+ const ws = new WebSocket(wsUrl);
213
+ ws.binaryType = 'arraybuffer';
214
+ ws.addEventListener('message', (event) => {
215
+ try {
216
+ // Process synchronously to drain the event queue as fast as possible.
217
+ // Each pending event holds its ArrayBuffer; async handlers let them pile up.
218
+ if (!(event.data instanceof ArrayBuffer))
219
+ return;
220
+ const bytes = new Uint8Array(event.data);
221
+ processMessage(bytes, collections);
222
+ }
223
+ catch {
224
+ // Skip unparseable firehose messages silently
225
+ }
226
+ });
227
+ ws.addEventListener('open', () => log('[indexer] Connected to relay'));
228
+ ws.addEventListener('close', () => {
229
+ log('[indexer] Disconnected, reconnecting in 3s...');
230
+ setTimeout(() => startIndexer(opts), 3000);
231
+ });
232
+ return ws;
233
+ }
234
+ function processMessage(bytes, collections) {
235
+ const header = cborDecode(bytes, 0);
236
+ const body = cborDecode(bytes, header.offset);
237
+ if (header.value.op !== 1 || header.value.t !== '#commit')
238
+ return;
239
+ if (!body.value.blocks || !body.value.ops)
240
+ return;
241
+ // Track sequence number for cursor
242
+ if (body.value.seq)
243
+ lastSeq = body.value.seq;
244
+ const did = body.value.repo;
245
+ if (!did)
246
+ return;
247
+ // When repos are pinned, only process events from those DIDs
248
+ if (indexerPinnedRepos && !indexerPinnedRepos.has(did))
249
+ return;
250
+ // Check if any ops in this commit are for collections we care about
251
+ const relevantOps = body.value.ops.filter((op) => collections.has(op.path.split('/')[0]));
252
+ if (relevantOps.length === 0)
253
+ return;
254
+ // Copy blocks out of the original buffer before it can be GC'd
255
+ const { blocks } = parseCarFrame(new Uint8Array(body.value.blocks));
256
+ // Only auto-backfill when we see activity in a signal collection
257
+ const hasSignalOp = relevantOps.some((op) => indexerSignalCollections.has(op.path.split('/')[0]));
258
+ // Use in-memory cache only — never hit DB from the hot path.
259
+ // Unknown DIDs stay unknown until backfill or auto-backfill discovers them.
260
+ // The cache is populated by triggerAutoBackfill and setRepoStatus calls.
261
+ const cachedStatus = repoStatusCache.get(did);
262
+ const repoStatus = cachedStatus === undefined || cachedStatus === 'unknown' ? null : cachedStatus;
263
+ if (cachedStatus === undefined) {
264
+ repoStatusCache.set(did, 'unknown');
265
+ }
266
+ if (hasSignalOp && (!indexerPinnedRepos || indexerPinnedRepos.has(did))) {
267
+ if (repoStatus === null && backfillInFlight.size < MAX_CONCURRENT_BACKFILLS) {
268
+ repoStatusCache.set(did, 'pending');
269
+ triggerAutoBackfill(did);
270
+ }
271
+ else if (repoStatus === null) {
272
+ repoStatusCache.set(did, 'pending');
273
+ setRepoStatus(did, 'pending');
274
+ }
275
+ }
276
+ // For non-signal ops (e.g. profile updates), only process if this DID is already tracked
277
+ if (!hasSignalOp) {
278
+ if (repoStatus === null)
279
+ return;
280
+ }
281
+ for (const op of relevantOps) {
282
+ const collection = op.path.split('/')[0];
283
+ const uri = `at://${did}/${op.path}`;
284
+ if (op.action === 'delete') {
285
+ deleteRecord(collection, uri);
286
+ continue;
287
+ }
288
+ for (const [cid, data] of blocks) {
289
+ try {
290
+ const { value: record } = cborDecode(data);
291
+ if (record?.$type === collection) {
292
+ const validationError = validateRecord(getLexiconArray(), collection, record);
293
+ if (validationError) {
294
+ emit('indexer', 'validation_skip', {
295
+ uri,
296
+ collection,
297
+ path: validationError.path,
298
+ error: validationError.message,
299
+ });
300
+ break;
301
+ }
302
+ const item = { collection, uri, cid, authorDid: did, record };
303
+ // If DID is mid-backfill, buffer instead of writing directly
304
+ if (pendingBuffers.has(did)) {
305
+ pendingBuffers.get(did).push(item);
306
+ }
307
+ else {
308
+ bufferWrite(item);
309
+ }
310
+ break;
311
+ }
312
+ }
313
+ catch { }
314
+ }
315
+ }
316
+ }
@@ -0,0 +1,29 @@
1
+ import type { LabelDefinition } from './config.ts';
2
+ /** Context passed to label rule evaluate() functions */
3
+ export interface LabelRuleContext {
4
+ db: {
5
+ query: (sql: string, params?: any[]) => Promise<any[]>;
6
+ run: (sql: string, ...params: any[]) => Promise<void>;
7
+ };
8
+ record: {
9
+ uri: string;
10
+ cid: string;
11
+ did: string;
12
+ collection: string;
13
+ value: Record<string, any>;
14
+ };
15
+ }
16
+ export declare function initLabels(labelsDir: string): Promise<void>;
17
+ export declare function runLabelRules(record: {
18
+ uri: string;
19
+ cid: string;
20
+ did: string;
21
+ collection: string;
22
+ value: Record<string, any>;
23
+ }): Promise<void>;
24
+ export declare function rescanLabels(collections: string[]): Promise<{
25
+ scanned: number;
26
+ labeled: number;
27
+ }>;
28
+ export declare function getLabelDefinitions(): LabelDefinition[];
29
+ //# sourceMappingURL=labels.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../src/labels.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAIlD,wDAAwD;AACxD,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE;QACF,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,GAAG,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAA;KACtD,CAAA;IACD,MAAM,EAAE;QACN,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,UAAU,EAAE,MAAM,CAAA;QAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;KAC3B,CAAA;CACF;AAWD,wBAAsB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmCjE;AAED,wBAAsB,aAAa,CAAC,MAAM,EAAE;IAC1C,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;CAC3B,GAAG,OAAO,CAAC,IAAI,CAAC,CAyBhB;AAED,wBAAsB,YAAY,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAuCvG;AAED,wBAAgB,mBAAmB,IAAI,eAAe,EAAE,CAEvD"}
package/dist/labels.js ADDED
@@ -0,0 +1,111 @@
1
+ var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExtension) || function (path, preserveJsx) {
2
+ if (typeof path === "string" && /^\.\.?\//.test(path)) {
3
+ return path.replace(/\.(tsx)$|((?:\.d)?)((?:\.[^./]+?)?)\.([cm]?)ts$/i, function (m, tsx, d, ext, cm) {
4
+ return tsx ? preserveJsx ? ".jsx" : ".js" : d && (!ext || !cm) ? m : (d + ext + "." + cm.toLowerCase() + "js");
5
+ });
6
+ }
7
+ return path;
8
+ };
9
+ import { resolve } from 'node:path';
10
+ import { readdirSync } from 'node:fs';
11
+ import { querySQL, runSQL, insertLabels, getSchema } from "./db.js";
12
+ import { log, emit } from "./logger.js";
13
+ const rules = [];
14
+ let labelDefs = [];
15
+ let labelSrc = 'self';
16
+ export async function initLabels(labelsDir) {
17
+ let files;
18
+ try {
19
+ files = readdirSync(labelsDir)
20
+ .filter((f) => (f.endsWith('.ts') || f.endsWith('.js')) && !f.startsWith('_'))
21
+ .sort();
22
+ }
23
+ catch {
24
+ return;
25
+ }
26
+ for (const file of files) {
27
+ const name = file.replace(/\.(ts|js)$/, '');
28
+ const scriptPath = resolve(labelsDir, file);
29
+ const mod = await import(__rewriteRelativeImportExtension(scriptPath));
30
+ const handler = mod.default;
31
+ if (handler.definition) {
32
+ labelDefs.push(handler.definition);
33
+ }
34
+ if (handler.evaluate) {
35
+ rules.push({
36
+ name,
37
+ evaluate: async (ctx) => {
38
+ return handler.evaluate(ctx);
39
+ },
40
+ });
41
+ }
42
+ log(`[labels] discovered: ${name}${handler.evaluate ? ' (rule)' : ''}`);
43
+ }
44
+ if (labelDefs.length > 0) {
45
+ log(`[labels] ${labelDefs.length} label definitions loaded`);
46
+ }
47
+ }
48
+ export async function runLabelRules(record) {
49
+ if (rules.length === 0)
50
+ return;
51
+ const ctx = {
52
+ db: { query: querySQL, run: runSQL },
53
+ record,
54
+ };
55
+ const allLabels = [];
56
+ for (const rule of rules) {
57
+ try {
58
+ const vals = await rule.evaluate(ctx);
59
+ for (const val of vals) {
60
+ allLabels.push({ src: labelSrc, uri: record.uri, val });
61
+ }
62
+ }
63
+ catch (err) {
64
+ emit('labels', 'rule_error', { rule: rule.name, error: err.message });
65
+ }
66
+ }
67
+ if (allLabels.length > 0) {
68
+ await insertLabels(allLabels);
69
+ emit('labels', 'applied', { count: allLabels.length, uri: record.uri, vals: allLabels.map((l) => l.val) });
70
+ }
71
+ }
72
+ export async function rescanLabels(collections) {
73
+ const beforeRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
74
+ const beforeCount = Number(beforeRows[0]?.count || 0);
75
+ let scanned = 0;
76
+ for (const collection of collections) {
77
+ const schema = getSchema(collection);
78
+ if (!schema)
79
+ continue;
80
+ const rows = await querySQL(`SELECT * FROM ${schema.tableName}`);
81
+ for (const row of rows) {
82
+ scanned++;
83
+ const value = {};
84
+ for (const col of schema.columns) {
85
+ let v = row[col.name];
86
+ if (v === null || v === undefined)
87
+ continue;
88
+ if (col.duckdbType === 'JSON' && typeof v === 'string') {
89
+ try {
90
+ v = JSON.parse(v);
91
+ }
92
+ catch { }
93
+ }
94
+ value[col.originalName] = v;
95
+ }
96
+ await runLabelRules({
97
+ uri: row.uri,
98
+ cid: row.cid,
99
+ did: row.did,
100
+ collection,
101
+ value,
102
+ });
103
+ }
104
+ }
105
+ const afterRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
106
+ const afterCount = Number(afterRows[0]?.count || 0);
107
+ return { scanned, labeled: afterCount - beforeCount };
108
+ }
109
+ export function getLabelDefinitions() {
110
+ return labelDefs;
111
+ }