@hatk/hatk 0.0.1-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backfill.d.ts +11 -0
- package/dist/backfill.d.ts.map +1 -0
- package/dist/backfill.js +328 -0
- package/dist/car.d.ts +5 -0
- package/dist/car.d.ts.map +1 -0
- package/dist/car.js +52 -0
- package/dist/cbor.d.ts +7 -0
- package/dist/cbor.d.ts.map +1 -0
- package/dist/cbor.js +89 -0
- package/dist/cid.d.ts +4 -0
- package/dist/cid.d.ts.map +1 -0
- package/dist/cid.js +39 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +1663 -0
- package/dist/config.d.ts +47 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +43 -0
- package/dist/db.d.ts +134 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +1361 -0
- package/dist/feeds.d.ts +95 -0
- package/dist/feeds.d.ts.map +1 -0
- package/dist/feeds.js +144 -0
- package/dist/fts.d.ts +20 -0
- package/dist/fts.d.ts.map +1 -0
- package/dist/fts.js +762 -0
- package/dist/hydrate.d.ts +23 -0
- package/dist/hydrate.d.ts.map +1 -0
- package/dist/hydrate.js +75 -0
- package/dist/indexer.d.ts +14 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +316 -0
- package/dist/labels.d.ts +29 -0
- package/dist/labels.d.ts.map +1 -0
- package/dist/labels.js +111 -0
- package/dist/lex-types.d.ts +401 -0
- package/dist/lex-types.d.ts.map +1 -0
- package/dist/lex-types.js +4 -0
- package/dist/lexicon-resolve.d.ts +14 -0
- package/dist/lexicon-resolve.d.ts.map +1 -0
- package/dist/lexicon-resolve.js +280 -0
- package/dist/logger.d.ts +4 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +23 -0
- package/dist/main.d.ts +3 -0
- package/dist/main.d.ts.map +1 -0
- package/dist/main.js +148 -0
- package/dist/mst.d.ts +6 -0
- package/dist/mst.d.ts.map +1 -0
- package/dist/mst.js +30 -0
- package/dist/oauth/client.d.ts +16 -0
- package/dist/oauth/client.d.ts.map +1 -0
- package/dist/oauth/client.js +54 -0
- package/dist/oauth/crypto.d.ts +28 -0
- package/dist/oauth/crypto.d.ts.map +1 -0
- package/dist/oauth/crypto.js +101 -0
- package/dist/oauth/db.d.ts +47 -0
- package/dist/oauth/db.d.ts.map +1 -0
- package/dist/oauth/db.js +139 -0
- package/dist/oauth/discovery.d.ts +22 -0
- package/dist/oauth/discovery.d.ts.map +1 -0
- package/dist/oauth/discovery.js +50 -0
- package/dist/oauth/dpop.d.ts +11 -0
- package/dist/oauth/dpop.d.ts.map +1 -0
- package/dist/oauth/dpop.js +56 -0
- package/dist/oauth/hooks.d.ts +10 -0
- package/dist/oauth/hooks.d.ts.map +1 -0
- package/dist/oauth/hooks.js +40 -0
- package/dist/oauth/server.d.ts +86 -0
- package/dist/oauth/server.d.ts.map +1 -0
- package/dist/oauth/server.js +572 -0
- package/dist/opengraph.d.ts +34 -0
- package/dist/opengraph.d.ts.map +1 -0
- package/dist/opengraph.js +198 -0
- package/dist/schema.d.ts +51 -0
- package/dist/schema.d.ts.map +1 -0
- package/dist/schema.js +358 -0
- package/dist/seed.d.ts +29 -0
- package/dist/seed.d.ts.map +1 -0
- package/dist/seed.js +86 -0
- package/dist/server.d.ts +6 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +1024 -0
- package/dist/setup.d.ts +8 -0
- package/dist/setup.d.ts.map +1 -0
- package/dist/setup.js +48 -0
- package/dist/test-browser.d.ts +14 -0
- package/dist/test-browser.d.ts.map +1 -0
- package/dist/test-browser.js +26 -0
- package/dist/test.d.ts +47 -0
- package/dist/test.d.ts.map +1 -0
- package/dist/test.js +256 -0
- package/dist/views.d.ts +40 -0
- package/dist/views.d.ts.map +1 -0
- package/dist/views.js +178 -0
- package/dist/vite-plugin.d.ts +5 -0
- package/dist/vite-plugin.d.ts.map +1 -0
- package/dist/vite-plugin.js +86 -0
- package/dist/xrpc-client.d.ts +18 -0
- package/dist/xrpc-client.d.ts.map +1 -0
- package/dist/xrpc-client.js +54 -0
- package/dist/xrpc.d.ts +53 -0
- package/dist/xrpc.d.ts.map +1 -0
- package/dist/xrpc.js +139 -0
- package/fonts/Inter-Regular.woff +0 -0
- package/package.json +41 -0
- package/public/admin-auth.js +320 -0
- package/public/admin.html +2166 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { Row } from './lex-types.ts';
|
|
2
|
+
export type { Row };
|
|
3
|
+
export interface HydrateContext<T = unknown> {
|
|
4
|
+
items: Row<T>[];
|
|
5
|
+
viewer: {
|
|
6
|
+
did: string;
|
|
7
|
+
} | null;
|
|
8
|
+
db: {
|
|
9
|
+
query: (sql: string, params?: unknown[]) => Promise<unknown[]>;
|
|
10
|
+
};
|
|
11
|
+
getRecords: <R = unknown>(collection: string, uris: string[]) => Promise<Map<string, Row<R>>>;
|
|
12
|
+
lookup: <R = unknown>(collection: string, field: string, values: string[]) => Promise<Map<string, Row<R>>>;
|
|
13
|
+
count: (collection: string, field: string, values: string[]) => Promise<Map<string, number>>;
|
|
14
|
+
labels: (uris: string[]) => Promise<Map<string, unknown[]>>;
|
|
15
|
+
blobUrl: (did: string, ref: unknown, preset?: 'avatar' | 'banner' | 'feed_thumbnail' | 'feed_fullsize') => string | undefined;
|
|
16
|
+
}
|
|
17
|
+
/** Fetch records for URIs, reshape them, and filter out taken-down DIDs. */
|
|
18
|
+
export declare function resolveRecords(uris: string[]): Promise<Row<unknown>[]>;
|
|
19
|
+
/** Build a HydrateContext for a feed's hydrate function. */
|
|
20
|
+
export declare function buildHydrateContext(items: Row<unknown>[], viewer: {
|
|
21
|
+
did: string;
|
|
22
|
+
} | null): HydrateContext;
|
|
23
|
+
//# sourceMappingURL=hydrate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hydrate.d.ts","sourceRoot":"","sources":["../src/hydrate.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAA;AAEzC,YAAY,EAAE,GAAG,EAAE,CAAA;AAInB,MAAM,WAAW,cAAc,CAAC,CAAC,GAAG,OAAO;IACzC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAA;IACf,MAAM,EAAE;QAAE,GAAG,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAA;IAC9B,EAAE,EAAE;QAAE,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,OAAO,EAAE,CAAC,CAAA;KAAE,CAAA;IACtE,UAAU,EAAE,CAAC,CAAC,GAAG,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC7F,MAAM,EAAE,CAAC,CAAC,GAAG,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC1G,KAAK,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;IAC5F,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,CAAA;IAC3D,OAAO,EAAE,CACP,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,OAAO,EACZ,MAAM,CAAC,EAAE,QAAQ,GAAG,QAAQ,GAAG,gBAAgB,GAAG,eAAe,KAC9D,MAAM,GAAG,SAAS,CAAA;CACxB;AAID,4EAA4E;AAC5E,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAoC5E;AAID,4DAA4D;AAC5D,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,MAAM,EAAE;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,GAAG,cAAc,CA4BzG"}
|
package/dist/hydrate.js
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { getRecordsByUris, countByFieldBatch, lookupByFieldBatch, querySQL, reshapeRow, queryLabelsForUris, filterTakendownDids, } from "./db.js";
|
|
2
|
+
import { blobUrl } from "./xrpc.js";
|
|
3
|
+
// --- Record Resolution ---
|
|
4
|
+
/** Fetch records for URIs, reshape them, and filter out taken-down DIDs. */
|
|
5
|
+
export async function resolveRecords(uris) {
|
|
6
|
+
if (uris.length === 0)
|
|
7
|
+
return [];
|
|
8
|
+
// Group URIs by collection for batch fetching
|
|
9
|
+
const byCollection = new Map();
|
|
10
|
+
for (const uri of uris) {
|
|
11
|
+
const parts = uri.replace('at://', '').split('/');
|
|
12
|
+
const col = parts[1];
|
|
13
|
+
if (!byCollection.has(col))
|
|
14
|
+
byCollection.set(col, []);
|
|
15
|
+
byCollection.get(col).push(uri);
|
|
16
|
+
}
|
|
17
|
+
const primaryRecords = new Map();
|
|
18
|
+
for (const [col, colUris] of byCollection) {
|
|
19
|
+
const records = await getRecordsByUris(col, colUris);
|
|
20
|
+
for (const r of records) {
|
|
21
|
+
primaryRecords.set(r.uri, r);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
// Filter out records from taken-down DIDs
|
|
25
|
+
const allDids = [...new Set([...primaryRecords.values()].map((r) => r.did).filter(Boolean))];
|
|
26
|
+
const takendownDids = await filterTakendownDids(allDids);
|
|
27
|
+
if (takendownDids.size > 0) {
|
|
28
|
+
for (const [uri, rec] of primaryRecords) {
|
|
29
|
+
if (takendownDids.has(rec.did))
|
|
30
|
+
primaryRecords.delete(uri);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// Return in original URI order, reshaped
|
|
34
|
+
return uris
|
|
35
|
+
.map((uri) => {
|
|
36
|
+
const row = primaryRecords.get(uri);
|
|
37
|
+
return reshapeRow(row, row?.__childData, row?.__unionData);
|
|
38
|
+
})
|
|
39
|
+
.filter((r) => r != null);
|
|
40
|
+
}
|
|
41
|
+
// --- Context Builder ---
|
|
42
|
+
/** Build a HydrateContext for a feed's hydrate function. */
|
|
43
|
+
export function buildHydrateContext(items, viewer) {
|
|
44
|
+
return {
|
|
45
|
+
items,
|
|
46
|
+
viewer,
|
|
47
|
+
db: { query: querySQL },
|
|
48
|
+
getRecords: async (collection, uris) => {
|
|
49
|
+
if (uris.length === 0)
|
|
50
|
+
return new Map();
|
|
51
|
+
const records = await getRecordsByUris(collection, uris);
|
|
52
|
+
const map = new Map();
|
|
53
|
+
for (const r of records) {
|
|
54
|
+
const shaped = reshapeRow(r, r?.__childData, r?.__unionData);
|
|
55
|
+
if (shaped)
|
|
56
|
+
map.set(shaped.uri, shaped);
|
|
57
|
+
}
|
|
58
|
+
return map;
|
|
59
|
+
},
|
|
60
|
+
lookup: async (collection, field, values) => {
|
|
61
|
+
if (values.length === 0)
|
|
62
|
+
return new Map();
|
|
63
|
+
const unique = [...new Set(values.filter(Boolean))];
|
|
64
|
+
return lookupByFieldBatch(collection, field, unique);
|
|
65
|
+
},
|
|
66
|
+
count: async (collection, field, values) => {
|
|
67
|
+
if (values.length === 0)
|
|
68
|
+
return new Map();
|
|
69
|
+
const unique = [...new Set(values.filter(Boolean))];
|
|
70
|
+
return countByFieldBatch(collection, field, unique);
|
|
71
|
+
},
|
|
72
|
+
labels: queryLabelsForUris,
|
|
73
|
+
blobUrl,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export declare function triggerAutoBackfill(did: string, attempt?: number): Promise<void>;
|
|
2
|
+
interface IndexerOpts {
|
|
3
|
+
relayUrl: string;
|
|
4
|
+
collections: Set<string>;
|
|
5
|
+
signalCollections?: Set<string>;
|
|
6
|
+
pinnedRepos?: Set<string>;
|
|
7
|
+
cursor?: string | null;
|
|
8
|
+
fetchTimeout: number;
|
|
9
|
+
maxRetries: number;
|
|
10
|
+
ftsRebuildInterval?: number;
|
|
11
|
+
}
|
|
12
|
+
export declare function startIndexer(opts: IndexerOpts): Promise<WebSocket>;
|
|
13
|
+
export {};
|
|
14
|
+
//# sourceMappingURL=indexer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../src/indexer.ts"],"names":[],"mappings":"AAkIA,wBAAsB,mBAAmB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,SAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAsDjF;AAED,UAAU,WAAW;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IAC/B,WAAW,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACzB,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,YAAY,EAAE,MAAM,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,kBAAkB,CAAC,EAAE,MAAM,CAAA;CAC5B;AAyBD,wBAAsB,YAAY,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,SAAS,CAAC,CAkDxE"}
|
package/dist/indexer.js
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import { cborDecode } from "./cbor.js";
|
|
2
|
+
import { parseCarFrame } from "./car.js";
|
|
3
|
+
import { insertRecord, deleteRecord, setCursor, setRepoStatus, getRepoRetryInfo, listAllRepoStatuses } from "./db.js";
|
|
4
|
+
import { backfillRepo } from "./backfill.js";
|
|
5
|
+
import { rebuildAllIndexes } from "./fts.js";
|
|
6
|
+
import { log, emit, timer } from "./logger.js";
|
|
7
|
+
import { runLabelRules } from "./labels.js";
|
|
8
|
+
import { getLexiconArray } from "./schema.js";
|
|
9
|
+
import { validateRecord } from '@bigmoves/lexicon';
|
|
10
|
+
let buffer = [];
|
|
11
|
+
let flushTimer = null;
|
|
12
|
+
let lastSeq = null;
|
|
13
|
+
const BATCH_SIZE = 100;
|
|
14
|
+
const FLUSH_INTERVAL_MS = 500;
|
|
15
|
+
let writesSinceRebuild = 0;
|
|
16
|
+
let ftsRebuildInterval = 500;
|
|
17
|
+
// Event buffer for DIDs mid-backfill
|
|
18
|
+
const pendingBuffers = new Map();
|
|
19
|
+
// Track in-flight backfills to avoid duplicates
|
|
20
|
+
const backfillInFlight = new Set();
|
|
21
|
+
const MAX_CONCURRENT_BACKFILLS = 5;
|
|
22
|
+
// In-memory cache of repo status to avoid flooding the DB read queue
|
|
23
|
+
const repoStatusCache = new Map();
|
|
24
|
+
// Set by startIndexer
|
|
25
|
+
let indexerCollections;
|
|
26
|
+
let indexerSignalCollections;
|
|
27
|
+
let indexerPinnedRepos = null;
|
|
28
|
+
let indexerFetchTimeout;
|
|
29
|
+
let indexerMaxRetries;
|
|
30
|
+
async function flushBuffer() {
|
|
31
|
+
if (buffer.length === 0)
|
|
32
|
+
return;
|
|
33
|
+
const elapsed = timer();
|
|
34
|
+
const batch = buffer.splice(0);
|
|
35
|
+
let insertedCount = 0;
|
|
36
|
+
const errors = [];
|
|
37
|
+
let cursorError;
|
|
38
|
+
const inserted = [];
|
|
39
|
+
for (const item of batch) {
|
|
40
|
+
try {
|
|
41
|
+
await insertRecord(item.collection, item.uri, item.cid, item.authorDid, item.record);
|
|
42
|
+
insertedCount++;
|
|
43
|
+
inserted.push(item);
|
|
44
|
+
}
|
|
45
|
+
catch (err) {
|
|
46
|
+
errors.push(err.message);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (lastSeq !== null) {
|
|
50
|
+
try {
|
|
51
|
+
await setCursor('relay', String(lastSeq));
|
|
52
|
+
}
|
|
53
|
+
catch (err) {
|
|
54
|
+
cursorError = err.message;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// Run label rules on successfully inserted records (async, non-blocking)
|
|
58
|
+
for (const item of inserted) {
|
|
59
|
+
runLabelRules({
|
|
60
|
+
uri: item.uri,
|
|
61
|
+
cid: item.cid,
|
|
62
|
+
did: item.authorDid,
|
|
63
|
+
collection: item.collection,
|
|
64
|
+
value: item.record,
|
|
65
|
+
}).catch(() => { });
|
|
66
|
+
}
|
|
67
|
+
// Aggregate collection counts and unique DIDs for wide event
|
|
68
|
+
const collections = {};
|
|
69
|
+
const dids = new Set();
|
|
70
|
+
for (const item of batch) {
|
|
71
|
+
collections[item.collection] = (collections[item.collection] || 0) + 1;
|
|
72
|
+
dids.add(item.authorDid);
|
|
73
|
+
}
|
|
74
|
+
emit('indexer', 'flush', {
|
|
75
|
+
batch_size: batch.length,
|
|
76
|
+
inserted_count: insertedCount,
|
|
77
|
+
error_count: errors.length,
|
|
78
|
+
cursor_seq: lastSeq,
|
|
79
|
+
duration_ms: elapsed(),
|
|
80
|
+
collections,
|
|
81
|
+
unique_dids: dids.size,
|
|
82
|
+
sample_dids: [...dids].slice(0, 5),
|
|
83
|
+
cursor_error: cursorError,
|
|
84
|
+
sample_errors: errors.length > 0 ? errors.slice(0, 3) : undefined,
|
|
85
|
+
});
|
|
86
|
+
writesSinceRebuild += batch.length;
|
|
87
|
+
if (writesSinceRebuild >= ftsRebuildInterval) {
|
|
88
|
+
writesSinceRebuild = 0;
|
|
89
|
+
rebuildAllIndexes([...indexerCollections]).catch(() => { });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
function scheduleFlush() {
|
|
93
|
+
if (flushTimer)
|
|
94
|
+
return;
|
|
95
|
+
flushTimer = setTimeout(async () => {
|
|
96
|
+
flushTimer = null;
|
|
97
|
+
await flushBuffer();
|
|
98
|
+
}, FLUSH_INTERVAL_MS);
|
|
99
|
+
}
|
|
100
|
+
function bufferWrite(item) {
|
|
101
|
+
buffer.push(item);
|
|
102
|
+
if (buffer.length >= BATCH_SIZE) {
|
|
103
|
+
if (flushTimer) {
|
|
104
|
+
clearTimeout(flushTimer);
|
|
105
|
+
flushTimer = null;
|
|
106
|
+
}
|
|
107
|
+
flushBuffer();
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
scheduleFlush();
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
export async function triggerAutoBackfill(did, attempt = 0) {
|
|
114
|
+
if (backfillInFlight.has(did))
|
|
115
|
+
return;
|
|
116
|
+
backfillInFlight.add(did);
|
|
117
|
+
pendingBuffers.set(did, []);
|
|
118
|
+
if (attempt === 0)
|
|
119
|
+
await setRepoStatus(did, 'pending');
|
|
120
|
+
const elapsed = timer();
|
|
121
|
+
let recordCount = 0;
|
|
122
|
+
let status = 'success';
|
|
123
|
+
let error;
|
|
124
|
+
let replayErrors = 0;
|
|
125
|
+
try {
|
|
126
|
+
recordCount = await backfillRepo(did, indexerCollections, indexerFetchTimeout);
|
|
127
|
+
}
|
|
128
|
+
catch (err) {
|
|
129
|
+
status = 'error';
|
|
130
|
+
error = err.message;
|
|
131
|
+
}
|
|
132
|
+
// Replay buffered events
|
|
133
|
+
const buffered = pendingBuffers.get(did) || [];
|
|
134
|
+
pendingBuffers.delete(did);
|
|
135
|
+
backfillInFlight.delete(did);
|
|
136
|
+
for (const item of buffered) {
|
|
137
|
+
try {
|
|
138
|
+
await insertRecord(item.collection, item.uri, item.cid, item.authorDid, item.record);
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
replayErrors++;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// Schedule retry if failed and under maxRetries
|
|
145
|
+
const retryInfo = status === 'error' ? await getRepoRetryInfo(did) : null;
|
|
146
|
+
const currentRetryCount = retryInfo?.retryCount ?? 0;
|
|
147
|
+
emit('indexer', 'auto_backfill', {
|
|
148
|
+
did,
|
|
149
|
+
record_count: recordCount,
|
|
150
|
+
buffered_events: buffered.length,
|
|
151
|
+
replay_errors: replayErrors,
|
|
152
|
+
duration_ms: elapsed(),
|
|
153
|
+
status,
|
|
154
|
+
error,
|
|
155
|
+
retry_count: currentRetryCount,
|
|
156
|
+
});
|
|
157
|
+
if (status === 'error' && currentRetryCount < indexerMaxRetries) {
|
|
158
|
+
const delaySecs = Math.min(currentRetryCount * 60, 3600);
|
|
159
|
+
const delayMs = Math.max(delaySecs, 60) * 1000;
|
|
160
|
+
setTimeout(() => {
|
|
161
|
+
triggerAutoBackfill(did, currentRetryCount);
|
|
162
|
+
}, delayMs);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
// Periodic memory diagnostics
|
|
166
|
+
function startMemoryDiagnostics() {
|
|
167
|
+
setInterval(() => {
|
|
168
|
+
const mem = process.memoryUsage();
|
|
169
|
+
let pendingBufferItems = 0;
|
|
170
|
+
for (const [, items] of pendingBuffers) {
|
|
171
|
+
pendingBufferItems += items.length;
|
|
172
|
+
}
|
|
173
|
+
emit('diagnostics', 'memory', {
|
|
174
|
+
heap_used_mb: Math.round(mem.heapUsed / 1024 / 1024),
|
|
175
|
+
heap_total_mb: Math.round(mem.heapTotal / 1024 / 1024),
|
|
176
|
+
rss_mb: Math.round(mem.rss / 1024 / 1024),
|
|
177
|
+
external_mb: Math.round(mem.external / 1024 / 1024),
|
|
178
|
+
array_buffers_mb: Math.round(mem.arrayBuffers / 1024 / 1024),
|
|
179
|
+
write_buffer_len: buffer.length,
|
|
180
|
+
pending_buffer_dids: pendingBuffers.size,
|
|
181
|
+
pending_buffer_items: pendingBufferItems,
|
|
182
|
+
backfill_in_flight: backfillInFlight.size,
|
|
183
|
+
repo_status_cache_size: repoStatusCache.size,
|
|
184
|
+
});
|
|
185
|
+
}, 30_000);
|
|
186
|
+
}
|
|
187
|
+
export async function startIndexer(opts) {
|
|
188
|
+
const { relayUrl, collections, cursor, fetchTimeout } = opts;
|
|
189
|
+
if (opts.ftsRebuildInterval != null)
|
|
190
|
+
ftsRebuildInterval = opts.ftsRebuildInterval;
|
|
191
|
+
indexerCollections = collections;
|
|
192
|
+
indexerSignalCollections = opts.signalCollections || collections;
|
|
193
|
+
indexerPinnedRepos = opts.pinnedRepos || null;
|
|
194
|
+
indexerFetchTimeout = fetchTimeout;
|
|
195
|
+
indexerMaxRetries = opts.maxRetries;
|
|
196
|
+
// Pre-populate repo status cache from DB so non-signal updates
|
|
197
|
+
// (e.g. profile changes) are processed for already-tracked DIDs
|
|
198
|
+
if (repoStatusCache.size === 0) {
|
|
199
|
+
const statuses = await listAllRepoStatuses();
|
|
200
|
+
for (const { did, status } of statuses) {
|
|
201
|
+
repoStatusCache.set(did, status);
|
|
202
|
+
}
|
|
203
|
+
log(`[indexer] Warmed repo status cache with ${statuses.length} entries`);
|
|
204
|
+
}
|
|
205
|
+
startMemoryDiagnostics();
|
|
206
|
+
let wsUrl = `${relayUrl}/xrpc/com.atproto.sync.subscribeRepos`;
|
|
207
|
+
if (cursor) {
|
|
208
|
+
wsUrl += `?cursor=${cursor}`;
|
|
209
|
+
log(`[indexer] Resuming from cursor ${cursor}`);
|
|
210
|
+
}
|
|
211
|
+
log(`[indexer] Connecting to ${relayUrl}...`);
|
|
212
|
+
const ws = new WebSocket(wsUrl);
|
|
213
|
+
ws.binaryType = 'arraybuffer';
|
|
214
|
+
ws.addEventListener('message', (event) => {
|
|
215
|
+
try {
|
|
216
|
+
// Process synchronously to drain the event queue as fast as possible.
|
|
217
|
+
// Each pending event holds its ArrayBuffer; async handlers let them pile up.
|
|
218
|
+
if (!(event.data instanceof ArrayBuffer))
|
|
219
|
+
return;
|
|
220
|
+
const bytes = new Uint8Array(event.data);
|
|
221
|
+
processMessage(bytes, collections);
|
|
222
|
+
}
|
|
223
|
+
catch {
|
|
224
|
+
// Skip unparseable firehose messages silently
|
|
225
|
+
}
|
|
226
|
+
});
|
|
227
|
+
ws.addEventListener('open', () => log('[indexer] Connected to relay'));
|
|
228
|
+
ws.addEventListener('close', () => {
|
|
229
|
+
log('[indexer] Disconnected, reconnecting in 3s...');
|
|
230
|
+
setTimeout(() => startIndexer(opts), 3000);
|
|
231
|
+
});
|
|
232
|
+
return ws;
|
|
233
|
+
}
|
|
234
|
+
function processMessage(bytes, collections) {
|
|
235
|
+
const header = cborDecode(bytes, 0);
|
|
236
|
+
const body = cborDecode(bytes, header.offset);
|
|
237
|
+
if (header.value.op !== 1 || header.value.t !== '#commit')
|
|
238
|
+
return;
|
|
239
|
+
if (!body.value.blocks || !body.value.ops)
|
|
240
|
+
return;
|
|
241
|
+
// Track sequence number for cursor
|
|
242
|
+
if (body.value.seq)
|
|
243
|
+
lastSeq = body.value.seq;
|
|
244
|
+
const did = body.value.repo;
|
|
245
|
+
if (!did)
|
|
246
|
+
return;
|
|
247
|
+
// When repos are pinned, only process events from those DIDs
|
|
248
|
+
if (indexerPinnedRepos && !indexerPinnedRepos.has(did))
|
|
249
|
+
return;
|
|
250
|
+
// Check if any ops in this commit are for collections we care about
|
|
251
|
+
const relevantOps = body.value.ops.filter((op) => collections.has(op.path.split('/')[0]));
|
|
252
|
+
if (relevantOps.length === 0)
|
|
253
|
+
return;
|
|
254
|
+
// Copy blocks out of the original buffer before it can be GC'd
|
|
255
|
+
const { blocks } = parseCarFrame(new Uint8Array(body.value.blocks));
|
|
256
|
+
// Only auto-backfill when we see activity in a signal collection
|
|
257
|
+
const hasSignalOp = relevantOps.some((op) => indexerSignalCollections.has(op.path.split('/')[0]));
|
|
258
|
+
// Use in-memory cache only — never hit DB from the hot path.
|
|
259
|
+
// Unknown DIDs stay unknown until backfill or auto-backfill discovers them.
|
|
260
|
+
// The cache is populated by triggerAutoBackfill and setRepoStatus calls.
|
|
261
|
+
const cachedStatus = repoStatusCache.get(did);
|
|
262
|
+
const repoStatus = cachedStatus === undefined || cachedStatus === 'unknown' ? null : cachedStatus;
|
|
263
|
+
if (cachedStatus === undefined) {
|
|
264
|
+
repoStatusCache.set(did, 'unknown');
|
|
265
|
+
}
|
|
266
|
+
if (hasSignalOp && (!indexerPinnedRepos || indexerPinnedRepos.has(did))) {
|
|
267
|
+
if (repoStatus === null && backfillInFlight.size < MAX_CONCURRENT_BACKFILLS) {
|
|
268
|
+
repoStatusCache.set(did, 'pending');
|
|
269
|
+
triggerAutoBackfill(did);
|
|
270
|
+
}
|
|
271
|
+
else if (repoStatus === null) {
|
|
272
|
+
repoStatusCache.set(did, 'pending');
|
|
273
|
+
setRepoStatus(did, 'pending');
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
// For non-signal ops (e.g. profile updates), only process if this DID is already tracked
|
|
277
|
+
if (!hasSignalOp) {
|
|
278
|
+
if (repoStatus === null)
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
for (const op of relevantOps) {
|
|
282
|
+
const collection = op.path.split('/')[0];
|
|
283
|
+
const uri = `at://${did}/${op.path}`;
|
|
284
|
+
if (op.action === 'delete') {
|
|
285
|
+
deleteRecord(collection, uri);
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
for (const [cid, data] of blocks) {
|
|
289
|
+
try {
|
|
290
|
+
const { value: record } = cborDecode(data);
|
|
291
|
+
if (record?.$type === collection) {
|
|
292
|
+
const validationError = validateRecord(getLexiconArray(), collection, record);
|
|
293
|
+
if (validationError) {
|
|
294
|
+
emit('indexer', 'validation_skip', {
|
|
295
|
+
uri,
|
|
296
|
+
collection,
|
|
297
|
+
path: validationError.path,
|
|
298
|
+
error: validationError.message,
|
|
299
|
+
});
|
|
300
|
+
break;
|
|
301
|
+
}
|
|
302
|
+
const item = { collection, uri, cid, authorDid: did, record };
|
|
303
|
+
// If DID is mid-backfill, buffer instead of writing directly
|
|
304
|
+
if (pendingBuffers.has(did)) {
|
|
305
|
+
pendingBuffers.get(did).push(item);
|
|
306
|
+
}
|
|
307
|
+
else {
|
|
308
|
+
bufferWrite(item);
|
|
309
|
+
}
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
catch { }
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
package/dist/labels.d.ts
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { LabelDefinition } from './config.ts';
|
|
2
|
+
/** Context passed to label rule evaluate() functions */
|
|
3
|
+
export interface LabelRuleContext {
|
|
4
|
+
db: {
|
|
5
|
+
query: (sql: string, params?: any[]) => Promise<any[]>;
|
|
6
|
+
run: (sql: string, ...params: any[]) => Promise<void>;
|
|
7
|
+
};
|
|
8
|
+
record: {
|
|
9
|
+
uri: string;
|
|
10
|
+
cid: string;
|
|
11
|
+
did: string;
|
|
12
|
+
collection: string;
|
|
13
|
+
value: Record<string, any>;
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
export declare function initLabels(labelsDir: string): Promise<void>;
|
|
17
|
+
export declare function runLabelRules(record: {
|
|
18
|
+
uri: string;
|
|
19
|
+
cid: string;
|
|
20
|
+
did: string;
|
|
21
|
+
collection: string;
|
|
22
|
+
value: Record<string, any>;
|
|
23
|
+
}): Promise<void>;
|
|
24
|
+
export declare function rescanLabels(collections: string[]): Promise<{
|
|
25
|
+
scanned: number;
|
|
26
|
+
labeled: number;
|
|
27
|
+
}>;
|
|
28
|
+
export declare function getLabelDefinitions(): LabelDefinition[];
|
|
29
|
+
//# sourceMappingURL=labels.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../src/labels.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAA;AAIlD,wDAAwD;AACxD,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE;QACF,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,GAAG,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,GAAG,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAA;KACtD,CAAA;IACD,MAAM,EAAE;QACN,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,GAAG,EAAE,MAAM,CAAA;QACX,UAAU,EAAE,MAAM,CAAA;QAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;KAC3B,CAAA;CACF;AAWD,wBAAsB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmCjE;AAED,wBAAsB,aAAa,CAAC,MAAM,EAAE;IAC1C,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;CAC3B,GAAG,OAAO,CAAC,IAAI,CAAC,CAyBhB;AAED,wBAAsB,YAAY,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAuCvG;AAED,wBAAgB,mBAAmB,IAAI,eAAe,EAAE,CAEvD"}
|
package/dist/labels.js
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExtension) || function (path, preserveJsx) {
|
|
2
|
+
if (typeof path === "string" && /^\.\.?\//.test(path)) {
|
|
3
|
+
return path.replace(/\.(tsx)$|((?:\.d)?)((?:\.[^./]+?)?)\.([cm]?)ts$/i, function (m, tsx, d, ext, cm) {
|
|
4
|
+
return tsx ? preserveJsx ? ".jsx" : ".js" : d && (!ext || !cm) ? m : (d + ext + "." + cm.toLowerCase() + "js");
|
|
5
|
+
});
|
|
6
|
+
}
|
|
7
|
+
return path;
|
|
8
|
+
};
|
|
9
|
+
import { resolve } from 'node:path';
|
|
10
|
+
import { readdirSync } from 'node:fs';
|
|
11
|
+
import { querySQL, runSQL, insertLabels, getSchema } from "./db.js";
|
|
12
|
+
import { log, emit } from "./logger.js";
|
|
13
|
+
const rules = [];
|
|
14
|
+
let labelDefs = [];
|
|
15
|
+
let labelSrc = 'self';
|
|
16
|
+
export async function initLabels(labelsDir) {
|
|
17
|
+
let files;
|
|
18
|
+
try {
|
|
19
|
+
files = readdirSync(labelsDir)
|
|
20
|
+
.filter((f) => (f.endsWith('.ts') || f.endsWith('.js')) && !f.startsWith('_'))
|
|
21
|
+
.sort();
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
for (const file of files) {
|
|
27
|
+
const name = file.replace(/\.(ts|js)$/, '');
|
|
28
|
+
const scriptPath = resolve(labelsDir, file);
|
|
29
|
+
const mod = await import(__rewriteRelativeImportExtension(scriptPath));
|
|
30
|
+
const handler = mod.default;
|
|
31
|
+
if (handler.definition) {
|
|
32
|
+
labelDefs.push(handler.definition);
|
|
33
|
+
}
|
|
34
|
+
if (handler.evaluate) {
|
|
35
|
+
rules.push({
|
|
36
|
+
name,
|
|
37
|
+
evaluate: async (ctx) => {
|
|
38
|
+
return handler.evaluate(ctx);
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
log(`[labels] discovered: ${name}${handler.evaluate ? ' (rule)' : ''}`);
|
|
43
|
+
}
|
|
44
|
+
if (labelDefs.length > 0) {
|
|
45
|
+
log(`[labels] ${labelDefs.length} label definitions loaded`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
export async function runLabelRules(record) {
|
|
49
|
+
if (rules.length === 0)
|
|
50
|
+
return;
|
|
51
|
+
const ctx = {
|
|
52
|
+
db: { query: querySQL, run: runSQL },
|
|
53
|
+
record,
|
|
54
|
+
};
|
|
55
|
+
const allLabels = [];
|
|
56
|
+
for (const rule of rules) {
|
|
57
|
+
try {
|
|
58
|
+
const vals = await rule.evaluate(ctx);
|
|
59
|
+
for (const val of vals) {
|
|
60
|
+
allLabels.push({ src: labelSrc, uri: record.uri, val });
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
emit('labels', 'rule_error', { rule: rule.name, error: err.message });
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
if (allLabels.length > 0) {
|
|
68
|
+
await insertLabels(allLabels);
|
|
69
|
+
emit('labels', 'applied', { count: allLabels.length, uri: record.uri, vals: allLabels.map((l) => l.val) });
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
export async function rescanLabels(collections) {
|
|
73
|
+
const beforeRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
|
|
74
|
+
const beforeCount = Number(beforeRows[0]?.count || 0);
|
|
75
|
+
let scanned = 0;
|
|
76
|
+
for (const collection of collections) {
|
|
77
|
+
const schema = getSchema(collection);
|
|
78
|
+
if (!schema)
|
|
79
|
+
continue;
|
|
80
|
+
const rows = await querySQL(`SELECT * FROM ${schema.tableName}`);
|
|
81
|
+
for (const row of rows) {
|
|
82
|
+
scanned++;
|
|
83
|
+
const value = {};
|
|
84
|
+
for (const col of schema.columns) {
|
|
85
|
+
let v = row[col.name];
|
|
86
|
+
if (v === null || v === undefined)
|
|
87
|
+
continue;
|
|
88
|
+
if (col.duckdbType === 'JSON' && typeof v === 'string') {
|
|
89
|
+
try {
|
|
90
|
+
v = JSON.parse(v);
|
|
91
|
+
}
|
|
92
|
+
catch { }
|
|
93
|
+
}
|
|
94
|
+
value[col.originalName] = v;
|
|
95
|
+
}
|
|
96
|
+
await runLabelRules({
|
|
97
|
+
uri: row.uri,
|
|
98
|
+
cid: row.cid,
|
|
99
|
+
did: row.did,
|
|
100
|
+
collection,
|
|
101
|
+
value,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
const afterRows = await querySQL(`SELECT COUNT(*) as count FROM _labels`);
|
|
106
|
+
const afterCount = Number(afterRows[0]?.count || 0);
|
|
107
|
+
return { scanned, labeled: afterCount - beforeCount };
|
|
108
|
+
}
|
|
109
|
+
export function getLabelDefinitions() {
|
|
110
|
+
return labelDefs;
|
|
111
|
+
}
|