pi-simocracy 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lookup.ts ADDED
@@ -0,0 +1,537 @@
1
+ /**
2
+ * Record lookup + comment-thread fetch with sim-attribution join.
3
+ *
4
+ * Powers the `simocracy_lookup_record` tool. One entry point
5
+ * (`lookupRecord`) handles every kind the LLM might want to inspect:
6
+ * sims, proposals, gatherings, decisions, and individual comments.
7
+ *
8
+ * Two indexers are queried (Simocracy + Hyperindexer) plus the
9
+ * owner's PDS for direct AT-URI lookups. Sim-attribution for
10
+ * comments is joined client-side from `org.simocracy.history`
11
+ * records — same pattern simocracy-v2's notifications system uses.
12
+ * See `docs/SIM_AUTHORED_COMMENTS.md` for the full design.
13
+ */
14
+
15
+ import {
16
+ fetchBlob,
17
+ getRecordFromPds,
18
+ parseAtUri,
19
+ resolveHandle,
20
+ searchSimsByName,
21
+ SIMOCRACY_INDEXER_URL,
22
+ } from "./simocracy.ts";
23
+
24
+ /** Hyperindexer base URL — handles `org.hypercerts.*` and `org.impactindexer.*`. */
25
+ const HYPERINDEXER_URL = "https://api.hi.gainforest.app";
26
+
27
+ const COLLECTION_SIM = "org.simocracy.sim";
28
+ const COLLECTION_PROPOSAL = "org.hypercerts.claim.activity";
29
+ const COLLECTION_GATHERING = "org.simocracy.gathering";
30
+ const COLLECTION_DECISION = "org.simocracy.decision";
31
+ const COLLECTION_COMMENT = "org.impactindexer.review.comment";
32
+ const COLLECTION_HISTORY = "org.simocracy.history";
33
+
34
+ export type LookupKind =
35
+ | "sim"
36
+ | "proposal"
37
+ | "gathering"
38
+ | "decision"
39
+ | "comment"
40
+ | "auto";
41
+
42
+ const COLLECTION_BY_KIND: Record<Exclude<LookupKind, "auto">, string> = {
43
+ sim: COLLECTION_SIM,
44
+ proposal: COLLECTION_PROPOSAL,
45
+ gathering: COLLECTION_GATHERING,
46
+ decision: COLLECTION_DECISION,
47
+ comment: COLLECTION_COMMENT,
48
+ };
49
+
50
+ const KIND_BY_COLLECTION: Record<string, Exclude<LookupKind, "auto">> = {
51
+ [COLLECTION_SIM]: "sim",
52
+ [COLLECTION_PROPOSAL]: "proposal",
53
+ [COLLECTION_GATHERING]: "gathering",
54
+ [COLLECTION_DECISION]: "decision",
55
+ [COLLECTION_COMMENT]: "comment",
56
+ };
57
+
58
+ /** Which indexer hosts which collection. */
59
+ function indexerForCollection(collection: string): string {
60
+ if (collection.startsWith("org.simocracy.")) return SIMOCRACY_INDEXER_URL;
61
+ return HYPERINDEXER_URL;
62
+ }
63
+
64
+ interface GraphQLNode {
65
+ uri: string;
66
+ cid: string;
67
+ did: string;
68
+ rkey: string;
69
+ collection: string;
70
+ value: Record<string, unknown>;
71
+ }
72
+
73
+ const RECORDS_QUERY = `
74
+ query FetchRecords($collection: String!, $first: Int) {
75
+ records(collection: $collection, first: $first) {
76
+ edges { node { uri cid did rkey collection value } }
77
+ }
78
+ }
79
+ `;
80
+
81
+ async function fetchRecordsFromIndexer(
82
+ collection: string,
83
+ first: number,
84
+ ): Promise<GraphQLNode[]> {
85
+ const url = `${indexerForCollection(collection).replace(/\/+$/, "")}/graphql`;
86
+ const res = await fetch(url, {
87
+ method: "POST",
88
+ headers: { "Content-Type": "application/json" },
89
+ body: JSON.stringify({
90
+ query: RECORDS_QUERY,
91
+ variables: { collection, first },
92
+ }),
93
+ });
94
+ if (!res.ok) throw new Error(`Indexer ${url} returned ${res.status}`);
95
+ const json = (await res.json()) as {
96
+ data?: { records?: { edges?: Array<{ node: GraphQLNode }> } };
97
+ errors?: Array<{ message: string }>;
98
+ };
99
+ if (json.errors?.length) {
100
+ throw new Error(`Indexer GraphQL error: ${json.errors[0]?.message}`);
101
+ }
102
+ return json.data?.records?.edges?.map((e) => e.node) ?? [];
103
+ }
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Search (by name) — kind-specific entry points
107
+ // ---------------------------------------------------------------------------
108
+
109
+ interface SearchHit {
110
+ kind: Exclude<LookupKind, "auto">;
111
+ uri: string;
112
+ cid: string;
113
+ did: string;
114
+ rkey: string;
115
+ /** Best display name we could pull out of the value blob (varies by kind). */
116
+ name: string;
117
+ shortDescription?: string;
118
+ /** Lower = better match. */
119
+ score: number;
120
+ value: Record<string, unknown>;
121
+ }
122
+
123
+ function scoreNameAgainstQuery(name: string, query: string): number {
124
+ const a = (name || "").toLowerCase().trim();
125
+ const b = query.toLowerCase().trim();
126
+ if (!a) return Number.POSITIVE_INFINITY;
127
+ if (a === b) return 0;
128
+ if (a.replace(/\s+/g, "") === b.replace(/\s+/g, "")) return 1;
129
+ if (a.startsWith(b)) return 2;
130
+ if (a.includes(b)) return 3 + (a.length - b.length);
131
+ const tokens = b.split(/\s+/).filter(Boolean);
132
+ const matched = tokens.filter((t) => a.includes(t)).length;
133
+ if (matched > 0) return 100 - matched;
134
+ return Number.POSITIVE_INFINITY;
135
+ }
136
+
137
+ /** Best-effort display-name extractor across the supported record kinds. */
138
+ function nameFromValue(value: Record<string, unknown>): string {
139
+ return (
140
+ (value.name as string) ||
141
+ (value.title as string) ||
142
+ (value.proposalTitle as string) ||
143
+ (value.shortDescription as string) ||
144
+ ""
145
+ );
146
+ }
147
+
148
+ async function searchKind(
149
+ kind: Exclude<LookupKind, "auto">,
150
+ query: string,
151
+ maxResults: number,
152
+ ): Promise<SearchHit[]> {
153
+ // Sims have a richer search path already (paginated, indexer-aware).
154
+ if (kind === "sim") {
155
+ const matches = await searchSimsByName(query, { maxResults });
156
+ return matches.map((m, i) => ({
157
+ kind: "sim",
158
+ uri: m.uri,
159
+ cid: m.cid,
160
+ did: m.did,
161
+ rkey: m.rkey,
162
+ name: m.sim.name,
163
+ shortDescription:
164
+ ((m.sim as unknown) as { shortDescription?: string }).shortDescription ??
165
+ undefined,
166
+ score: i, // already sorted best-first
167
+ value: m.sim as unknown as Record<string, unknown>,
168
+ }));
169
+ }
170
+
171
+ const collection = COLLECTION_BY_KIND[kind];
172
+ const nodes = await fetchRecordsFromIndexer(collection, 500);
173
+ const scored: SearchHit[] = [];
174
+ for (const node of nodes) {
175
+ const name = nameFromValue(node.value);
176
+ const score = scoreNameAgainstQuery(name, query);
177
+ if (!Number.isFinite(score)) continue;
178
+ scored.push({
179
+ kind,
180
+ uri: node.uri,
181
+ cid: node.cid,
182
+ did: node.did,
183
+ rkey: node.rkey,
184
+ name,
185
+ shortDescription: node.value.shortDescription as string | undefined,
186
+ score,
187
+ value: node.value,
188
+ });
189
+ }
190
+ scored.sort((a, b) => a.score - b.score);
191
+ return scored.slice(0, maxResults);
192
+ }
193
+
194
+ /**
195
+ * Search every supported record kind in parallel. Results are pooled
196
+ * and re-ranked by score across kinds, so the LLM gets the single
197
+ * best match regardless of whether the query hit a sim, a proposal,
198
+ * or a gathering. Used when `kind = "auto"`.
199
+ */
200
+ export async function searchAllKinds(
201
+ query: string,
202
+ maxResults: number,
203
+ ): Promise<SearchHit[]> {
204
+ const kinds: Exclude<LookupKind, "auto">[] = [
205
+ "sim",
206
+ "proposal",
207
+ "gathering",
208
+ "decision",
209
+ ];
210
+ // Comments are intentionally excluded from auto-search — searching by
211
+ // text would need full-text scanning of every comment in the indexer
212
+ // and the LLM should reach for an AT-URI when it already has one.
213
+ const results = await Promise.all(
214
+ kinds.map((k) =>
215
+ searchKind(k, query, maxResults).catch((): SearchHit[] => []),
216
+ ),
217
+ );
218
+ const pooled = results.flat();
219
+ pooled.sort((a, b) => a.score - b.score);
220
+ return pooled.slice(0, maxResults);
221
+ }
222
+
223
+ // ---------------------------------------------------------------------------
224
+ // Comment thread fetch + sim-attribution join
225
+ // ---------------------------------------------------------------------------
226
+
227
+ export interface ResolvedComment {
228
+ uri: string;
229
+ did: string;
230
+ rkey: string;
231
+ text: string;
232
+ createdAt: string;
233
+ /** AT-URI of the parent — the subject this comment was posted under. */
234
+ parentUri: string;
235
+ authorHandle: string | null;
236
+ /** Set when an `org.simocracy.history` sidecar attributes this comment to a sim. */
237
+ simUri?: string;
238
+ simName?: string;
239
+ }
240
+
241
+ interface RawCommentNode {
242
+ uri: string;
243
+ did: string;
244
+ rkey: string;
245
+ value: Record<string, unknown>;
246
+ }
247
+
248
+ interface RawHistoryNode {
249
+ uri: string;
250
+ did: string;
251
+ value: Record<string, unknown>;
252
+ }
253
+
254
+ function extractCommentText(value: Record<string, unknown>): string {
255
+ for (const k of ["text", "body", "content", "message", "comment"] as const) {
256
+ const v = value[k];
257
+ if (typeof v === "string" && v.trim()) return v;
258
+ }
259
+ return "";
260
+ }
261
+
262
+ function extractSubjectUri(value: Record<string, unknown>): string | null {
263
+ const subject = value.subject as Record<string, unknown> | string | undefined;
264
+ if (!subject) return null;
265
+ if (typeof subject === "string") return subject;
266
+ const uri = (subject as Record<string, unknown>).uri;
267
+ return typeof uri === "string" ? uri : null;
268
+ }
269
+
270
+ /**
271
+ * Fetch every comment in the subtree rooted at `subjectUri`, then
272
+ * join `org.simocracy.history` sidecar records (type=`comment`,
273
+ * subjectCollection=`org.impactindexer.review.comment`) so each
274
+ * resolved comment carries its sim attribution when one exists.
275
+ *
276
+ * Both queries are capped at the indexer-default page size — the
277
+ * indexer compat layer doesn't filter server-side, so all subject /
278
+ * subtree / sim filtering happens here. This matches the pattern
279
+ * simocracy-v2's notifications code uses (see the
280
+ * `// TODO(scale)` comments there).
281
+ */
282
+ export async function fetchCommentSubtree(
283
+ subjectUri: string,
284
+ opts: { maxComments?: number; resolveAuthors?: boolean } = {},
285
+ ): Promise<ResolvedComment[]> {
286
+ const maxComments = opts.maxComments ?? 1000;
287
+ const resolveAuthors = opts.resolveAuthors ?? true;
288
+
289
+ const [allComments, allHistories] = await Promise.all([
290
+ fetchRecordsFromIndexer(COLLECTION_COMMENT, maxComments).catch(
291
+ (): GraphQLNode[] => [],
292
+ ),
293
+ fetchRecordsFromIndexer(COLLECTION_HISTORY, maxComments).catch(
294
+ (): GraphQLNode[] => [],
295
+ ),
296
+ ]);
297
+
298
+ // Index comments by parent URI for BFS traversal of the subtree.
299
+ const byParent = new Map<string, RawCommentNode[]>();
300
+ for (const r of allComments) {
301
+ const parent = extractSubjectUri(r.value);
302
+ if (!parent) continue;
303
+ const list = byParent.get(parent) ?? [];
304
+ list.push(r);
305
+ byParent.set(parent, list);
306
+ }
307
+
308
+ // BFS — collect every descendant comment of `subjectUri`.
309
+ const matched: { node: RawCommentNode; parentUri: string }[] = [];
310
+ const seen = new Set<string>();
311
+ const queue: string[] = [subjectUri];
312
+ while (queue.length > 0) {
313
+ const parent = queue.shift()!;
314
+ for (const child of byParent.get(parent) ?? []) {
315
+ if (seen.has(child.uri)) continue;
316
+ seen.add(child.uri);
317
+ matched.push({ node: child, parentUri: parent });
318
+ queue.push(child.uri);
319
+ }
320
+ }
321
+
322
+ // Build the comment-URI → sim-attribution index from history records.
323
+ const simByCommentUri = new Map<string, { simUri: string; simName: string }>();
324
+ for (const h of allHistories) {
325
+ const v = h.value as RawHistoryNode["value"];
326
+ if (v.type !== "comment") continue;
327
+ if (v.subjectCollection !== COLLECTION_COMMENT) continue;
328
+ const cu = v.subjectUri;
329
+ if (typeof cu !== "string") continue;
330
+ const simUris = Array.isArray(v.simUris) ? (v.simUris as string[]) : [];
331
+ const simNames = Array.isArray(v.simNames) ? (v.simNames as string[]) : [];
332
+ if (!simUris[0]) continue;
333
+ simByCommentUri.set(cu, {
334
+ simUri: simUris[0],
335
+ simName: simNames[0] || "(unnamed sim)",
336
+ });
337
+ }
338
+
339
+ // Resolve author handles in parallel (best-effort, deduped by DID).
340
+ const handleByDid = new Map<string, string | null>();
341
+ if (resolveAuthors) {
342
+ const dids = Array.from(new Set(matched.map((m) => m.node.did)));
343
+ await Promise.all(
344
+ dids.map(async (did) => {
345
+ const h = await resolveHandle(did).catch(() => null);
346
+ handleByDid.set(did, h);
347
+ }),
348
+ );
349
+ }
350
+
351
+ return matched
352
+ .map(({ node, parentUri }) => {
353
+ const sim = simByCommentUri.get(node.uri);
354
+ const text = extractCommentText(node.value);
355
+ const out: ResolvedComment = {
356
+ uri: node.uri,
357
+ did: node.did,
358
+ rkey: node.rkey,
359
+ text,
360
+ createdAt: (node.value.createdAt as string) || "",
361
+ parentUri,
362
+ authorHandle: handleByDid.get(node.did) ?? null,
363
+ };
364
+ if (sim) {
365
+ out.simUri = sim.simUri;
366
+ out.simName = sim.simName;
367
+ }
368
+ return out;
369
+ })
370
+ .filter((c) => c.text.length > 0)
371
+ .sort((a, b) => a.createdAt.localeCompare(b.createdAt));
372
+ }
373
+
374
+ // ---------------------------------------------------------------------------
375
+ // Single-record lookup (AT-URI or name)
376
+ // ---------------------------------------------------------------------------
377
+
378
+ export interface LookupResult {
379
+ kind: Exclude<LookupKind, "auto"> | "unknown";
380
+ uri: string;
381
+ did: string;
382
+ rkey: string;
383
+ collection: string;
384
+ /** Best display name we could pull out of the value blob. */
385
+ name: string;
386
+ /** Raw record value as returned by the PDS / indexer. */
387
+ value: Record<string, unknown>;
388
+ /** Resolved owner handle (best-effort). */
389
+ ownerHandle: string | null;
390
+ /** Comment subtree for proposals / gatherings / sims / decisions / comments. */
391
+ comments?: ResolvedComment[];
392
+ /** When the result *is* a comment, the parent record (best-effort fetch). */
393
+ parent?: {
394
+ uri: string;
395
+ collection: string;
396
+ name: string;
397
+ value?: Record<string, unknown>;
398
+ };
399
+ /** When the result *is* a comment with a sim-attribution sidecar. */
400
+ attribution?: { simUri: string; simName: string };
401
+ }
402
+
403
+ /**
404
+ * Best-effort title for a record value, varying by kind. Used both for
405
+ * the human-readable `name` field on `LookupResult` and for the
406
+ * `proposalTitle` / `subjectName` fields of any sim-attribution
407
+ * sidecars we end up writing.
408
+ */
409
+ export function bestNameForRecord(
410
+ collection: string,
411
+ value: Record<string, unknown>,
412
+ ): string {
413
+ if (collection === COLLECTION_COMMENT) {
414
+ const text = extractCommentText(value);
415
+ return text.length > 80 ? text.slice(0, 77) + "…" : text;
416
+ }
417
+ return nameFromValue(value);
418
+ }
419
+
420
+ /** Direct AT-URI lookup against the owner's PDS (no indexer round-trip). */
421
+ async function lookupByUri(
422
+ uri: string,
423
+ opts: { withComments?: boolean },
424
+ ): Promise<LookupResult> {
425
+ const { did, collection, rkey } = parseAtUri(uri);
426
+ const value = await getRecordFromPds<Record<string, unknown>>(
427
+ did,
428
+ collection,
429
+ rkey,
430
+ );
431
+ const ownerHandle = await resolveHandle(did).catch(() => null);
432
+ const kind = KIND_BY_COLLECTION[collection] ?? "unknown";
433
+
434
+ const result: LookupResult = {
435
+ kind,
436
+ uri,
437
+ did,
438
+ rkey,
439
+ collection,
440
+ name: bestNameForRecord(collection, value),
441
+ value,
442
+ ownerHandle,
443
+ };
444
+
445
+ // For everything that isn't a comment, fetch the subtree of replies.
446
+ if (opts.withComments && collection !== COLLECTION_COMMENT) {
447
+ result.comments = await fetchCommentSubtree(uri, { maxComments: 1000 }).catch(
448
+ (): ResolvedComment[] => [],
449
+ );
450
+ }
451
+
452
+ // For a comment, fetch the parent (the record being commented on) and any
453
+ // sim-attribution sidecar pointing at this comment.
454
+ if (collection === COLLECTION_COMMENT) {
455
+ const parentUri = extractSubjectUri(value);
456
+ if (parentUri) {
457
+ try {
458
+ const parsed = parseAtUri(parentUri);
459
+ const parentValue = await getRecordFromPds<Record<string, unknown>>(
460
+ parsed.did,
461
+ parsed.collection,
462
+ parsed.rkey,
463
+ ).catch(() => undefined);
464
+ result.parent = {
465
+ uri: parentUri,
466
+ collection: parsed.collection,
467
+ name: parentValue
468
+ ? bestNameForRecord(parsed.collection, parentValue)
469
+ : "",
470
+ value: parentValue,
471
+ };
472
+ } catch {
473
+ // Parent URI didn't parse — leave parent unset.
474
+ }
475
+ }
476
+ // Best-effort sim-attribution lookup. Pull all history records and find
477
+ // the one whose subjectUri matches this comment.
478
+ const histories = await fetchRecordsFromIndexer(
479
+ COLLECTION_HISTORY,
480
+ 1000,
481
+ ).catch((): GraphQLNode[] => []);
482
+ for (const h of histories) {
483
+ const v = h.value;
484
+ if (v.type !== "comment") continue;
485
+ if (v.subjectCollection !== COLLECTION_COMMENT) continue;
486
+ if (v.subjectUri !== uri) continue;
487
+ const simUris = Array.isArray(v.simUris) ? (v.simUris as string[]) : [];
488
+ const simNames = Array.isArray(v.simNames) ? (v.simNames as string[]) : [];
489
+ if (!simUris[0]) continue;
490
+ result.attribution = {
491
+ simUri: simUris[0],
492
+ simName: simNames[0] || "(unnamed sim)",
493
+ };
494
+ break;
495
+ }
496
+ }
497
+ return result;
498
+ }
499
+
500
+ /**
501
+ * Look up a record by AT-URI or by fuzzy name. The `kind` filter
502
+ * narrows which collection(s) the indexer searches; `auto` searches
503
+ * sims + proposals + gatherings + decisions in parallel and returns
504
+ * the highest-scoring match across all kinds.
505
+ *
506
+ * Always fetches the comment subtree (capped) and joins
507
+ * `org.simocracy.history` sidecars so each comment carries its sim
508
+ * attribution. See `docs/SIM_AUTHORED_COMMENTS.md` for the design.
509
+ */
510
+ export async function lookupRecord(
511
+ query: string,
512
+ opts: { kind?: LookupKind; withComments?: boolean } = {},
513
+ ): Promise<{ result: LookupResult | null; alternatives: SearchHit[] }> {
514
+ const kind = opts.kind ?? "auto";
515
+ const withComments = opts.withComments ?? true;
516
+ const trimmed = query.trim();
517
+ if (!trimmed) return { result: null, alternatives: [] };
518
+
519
+ if (trimmed.startsWith("at://")) {
520
+ const result = await lookupByUri(trimmed, { withComments });
521
+ return { result, alternatives: [] };
522
+ }
523
+
524
+ const hits =
525
+ kind === "auto"
526
+ ? await searchAllKinds(trimmed, 8)
527
+ : await searchKind(kind, trimmed, 8);
528
+
529
+ if (hits.length === 0) return { result: null, alternatives: [] };
530
+ const top = hits[0];
531
+ const result = await lookupByUri(top.uri, { withComments });
532
+ return { result, alternatives: hits.slice(1) };
533
+ }
534
+
535
+ // Re-export types other modules need.
536
+ export type { GraphQLNode };
537
+ export { fetchBlob };