@invana/graph-datasets 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,357 @@
1
+ /**
2
+ * Synthetic **LLM agent traces** — small DAGs that approximate the kind
3
+ * of execution graph LangSmith / Langfuse / Helicone draw for a single
4
+ * agent run. Each node is an `llm` call, a `tool` invocation, a
5
+ * `decision` branch, or a terminal `output`. Each edge is a `calls`
6
+ * (control-flow), `returns` (data-flow), or `branch` (decision branch).
7
+ *
8
+ * Three presets are exported so a single story can illustrate the
9
+ * happy path, an error+retry path, and a multi-tool branching path
10
+ * without re-deriving the data per render.
11
+ *
12
+ * Designed for layered DAG layouts (ELK `layered` `DOWN`); the dataset
13
+ * carries no positions.
14
+ */
15
+ /** What the node represents in the agent's execution graph. */
16
+ type AgentTraceNodeKind = 'llm' | 'tool' | 'decision' | 'output';
17
+ /** Per-node execution status — drives state-config styling in the story. */
18
+ type AgentTraceStatus = 'success' | 'error' | 'pending';
19
+ /** What the edge represents. */
20
+ type AgentTraceEdgeKind = 'calls' | 'returns' | 'branch';
21
+ /** Free-form per-node data. The store keeps this opaque. */
22
+ interface AgentTraceNodeData {
23
+ readonly kind: AgentTraceNodeKind;
24
+ readonly label: string;
25
+ readonly status: AgentTraceStatus;
26
+ readonly durationMs: number;
27
+ readonly tokens?: number;
28
+ }
29
+ /** Free-form per-edge data. */
30
+ interface AgentTraceEdgeData {
31
+ readonly kind: AgentTraceEdgeKind;
32
+ }
33
+ interface AgentTraceNode {
34
+ readonly id: string;
35
+ readonly data: AgentTraceNodeData;
36
+ }
37
+ interface AgentTraceEdge {
38
+ readonly id: string;
39
+ readonly source: string;
40
+ readonly target: string;
41
+ readonly data: AgentTraceEdgeData;
42
+ }
43
+ interface AgentTraceData {
44
+ readonly id: string;
45
+ readonly name: string;
46
+ readonly nodes: readonly AgentTraceNode[];
47
+ readonly edges: readonly AgentTraceEdge[];
48
+ }
49
+ /** All three presets, in display order. */
50
+ declare const agentTrace: readonly AgentTraceData[];
51
+
52
+ /**
53
+ * Synthetic **RAG embedding explorer** dataset — ~400 2D points that
54
+ * imitate a UMAP / t-SNE projection of a vector index. Five thematic
55
+ * clusters (auth, billing, search, infra, ML) plus a sprinkling of
56
+ * uniform outliers, generated from a seeded RNG so the visualisation
57
+ * is stable across reloads.
58
+ *
59
+ * Each point carries a `cluster` id, a short `text` snippet that stands
60
+ * in for the chunk content, and a `source` filename. There are no edges
61
+ * — the story renders raw points overlaid by a
62
+ * `DensityContourFillLayer` to bring the cluster topology forward.
63
+ */
64
+ declare const CLUSTER_NAMES: readonly ["auth", "billing", "search", "infra", "ml"];
65
+ type RagEmbeddingsCluster = (typeof CLUSTER_NAMES)[number];
66
+ interface RagEmbeddingsNodeData {
67
+ readonly cluster: RagEmbeddingsCluster;
68
+ readonly text: string;
69
+ readonly source: string;
70
+ }
71
+ interface RagEmbeddingsNode {
72
+ readonly id: string;
73
+ readonly position: {
74
+ readonly x: number;
75
+ readonly y: number;
76
+ };
77
+ readonly data: RagEmbeddingsNodeData;
78
+ }
79
+ interface RagEmbeddingsData {
80
+ readonly nodes: readonly RagEmbeddingsNode[];
81
+ }
82
+ declare const ragEmbeddings: RagEmbeddingsData;
83
+
84
+ /**
85
+ * Synthetic **microservices topology** dataset — ~20 services across a
86
+ * SaaS stack with call edges carrying RPS and per-edge error rates.
87
+ * Designed for a Datadog / Istio / Linkerd-style service-map demo.
88
+ *
89
+ * The dataset embeds one degraded service (`order-api`), one degraded
90
+ * downstream (`payment-service`), and one downed service
91
+ * (`fraud-detector`) so the story has something to flag visually
92
+ * without the consumer rolling a fake "simulate degradation" loop just
93
+ * to see the styled states.
94
+ */
95
+ /** Coarse architectural tier — drives icon choice in the story. */
96
+ type MicroservicesTier = 'gateway' | 'api' | 'logic' | 'data' | 'external';
97
+ /** Health roll-up rendered as the node state-config name. */
98
+ type MicroservicesHealth = 'healthy' | 'degraded' | 'down';
99
+ interface MicroservicesNodeData {
100
+ readonly tier: MicroservicesTier;
101
+ readonly health: MicroservicesHealth;
102
+ /** Sustained requests-per-second at this service. */
103
+ readonly rps: number;
104
+ }
105
+ interface MicroservicesEdgeData {
106
+ /** Sustained requests-per-second on this dependency edge. */
107
+ readonly rps: number;
108
+ /** Fraction in `[0, 1]`. Above ~0.05 the story flags the edge. */
109
+ readonly errorRate: number;
110
+ }
111
+ interface MicroservicesNode {
112
+ readonly id: string;
113
+ readonly data: MicroservicesNodeData;
114
+ }
115
+ interface MicroservicesEdge {
116
+ readonly id: string;
117
+ readonly source: string;
118
+ readonly target: string;
119
+ readonly data: MicroservicesEdgeData;
120
+ }
121
+ interface MicroservicesData {
122
+ readonly nodes: readonly MicroservicesNode[];
123
+ readonly edges: readonly MicroservicesEdge[];
124
+ }
125
+ declare const microservices: MicroservicesData;
126
+
127
+ /**
128
+ * Synthetic **company knowledge-graph** — entities of five kinds linked
129
+ * by typed relations. Modelled after Palantir / Neo4j Bloom / Diffbot
130
+ * entity-ontology demos so the story has a recognisable picture: five
131
+ * companies, the people who founded / run them, the products they ship,
132
+ * the cities they're based in, and the industries they operate in.
133
+ *
134
+ * The dataset is deliberately under-connected at the periphery so the
135
+ * "double-click to expand" interaction in the story has something
136
+ * meaningful to do — start with the core companies + their CEOs and
137
+ * unfold the products, locations, and industries by clicking.
138
+ */
139
+ /** Entity kind — drives shape and palette in the story. */
140
+ type OntologyEntityKind = 'company' | 'person' | 'product' | 'location' | 'industry';
141
+ /** Predicate / FK label on each edge. */
142
+ type OntologyEdgeKind = 'founded' | 'ceo_of' | 'works_at' | 'builds' | 'headquartered_in' | 'operates_in' | 'competes_with';
143
+ interface OntologyNodeData {
144
+ readonly kind: OntologyEntityKind;
145
+ /** Human-readable display name (id stays kebab-case for stability). */
146
+ readonly name: string;
147
+ }
148
+ interface OntologyEdgeData {
149
+ readonly kind: OntologyEdgeKind;
150
+ }
151
+ interface OntologyNode {
152
+ readonly id: string;
153
+ readonly data: OntologyNodeData;
154
+ }
155
+ interface OntologyEdge {
156
+ readonly id: string;
157
+ readonly source: string;
158
+ readonly target: string;
159
+ readonly data: OntologyEdgeData;
160
+ }
161
+ interface OntologyData {
162
+ readonly nodes: readonly OntologyNode[];
163
+ readonly edges: readonly OntologyEdge[];
164
+ /** Ids that the story shows in its initial collapsed view. */
165
+ readonly coreIds: readonly string[];
166
+ }
167
+ declare const ontology: OntologyData;
168
+
169
+ /**
170
+ * Synthetic **citation graph** — 150 papers across 5 research topics,
171
+ * connected by directed `paper-cites-paper` edges. Designed for a
172
+ * Connected-Papers / Litmaps / Elicit-style overview: density contours
173
+ * per topic bring the cluster topology forward, force layout pulls the
174
+ * dense intra-topic citation neighbourhoods together, and the inter-
175
+ * topic edges form the long bridges between clusters.
176
+ *
177
+ * Generation rules (seeded for snapshot stability):
178
+ *
179
+ * - 30 papers per topic across 5 topics, years span 2018–2025.
180
+ * - `citationsCount` drawn from a clipped log-normal so a handful of
181
+ * hub papers dominate the visualisation.
182
+ * - Each paper cites 2–4 prior papers, biased 70% intra-topic and
183
+ * 30% inter-topic. Within the bias bucket, targets are weighted
184
+ * toward older papers with higher citation counts — i.e. crude
185
+ * preferential attachment.
186
+ */
187
+ declare const TOPICS: readonly ["transformers", "diffusion-models", "reinforcement-learning", "graph-neural-networks", "vision-language"];
188
+ type CitationsTopic = (typeof TOPICS)[number];
189
+ interface CitationsNodeData {
190
+ readonly topic: CitationsTopic;
191
+ readonly title: string;
192
+ readonly year: number;
193
+ readonly citationsCount: number;
194
+ }
195
+ interface CitationsEdgeData {
196
+ /** Reserved for future predicates (`cites`, `extends`, `refutes`, ...). */
197
+ readonly kind: 'cites';
198
+ }
199
+ interface CitationsNode {
200
+ readonly id: string;
201
+ readonly data: CitationsNodeData;
202
+ }
203
+ interface CitationsEdge {
204
+ readonly id: string;
205
+ readonly source: string;
206
+ readonly target: string;
207
+ readonly data: CitationsEdgeData;
208
+ }
209
+ interface CitationsData {
210
+ readonly nodes: readonly CitationsNode[];
211
+ readonly edges: readonly CitationsEdge[];
212
+ }
213
+ declare const citations: CitationsData;
214
+
215
+ /**
216
+ * **Cora** citation network — the canonical machine-learning paper
217
+ * dataset (2,708 papers across 7 subject areas, 10,556 `CITES` edges).
218
+ * The raw CSVs live in `./cora-dataset/{nodes,edges}.csv`; a build-time
219
+ * pre-strip in `scripts/prepare-cora.mjs` projects them to a compact
220
+ * `cora.json` (drops the 1,433-dim bag-of-words feature matrix the
221
+ * viewer never needs). This module imports that JSON and shapes it for
222
+ * `GraphLayer.setData`.
223
+ *
224
+ * Re-run the prepare script when the CSVs change:
225
+ * `node scripts/prepare-cora.mjs`
226
+ *
227
+ * @example
228
+ * import { cora } from '@invana/graph-datasets/usecase-demos';
229
+ * graphLayer.setData({ nodes: cora.nodes, edges: cora.edges });
230
+ */
231
+ /** Subject category — the 7 ML topics the original Cora dataset partitions papers into. */
232
+ type CoraSubject = 'Neural_Networks' | 'Rule_Learning' | 'Reinforcement_Learning' | 'Probabilistic_Methods' | 'Theory' | 'Genetic_Algorithms' | 'Case_Based';
233
+ interface CoraNodeData {
234
+ readonly subject: CoraSubject;
235
+ }
236
+ interface CoraNode {
237
+ readonly id: string;
238
+ readonly data: CoraNodeData;
239
+ }
240
+ interface CoraEdge {
241
+ readonly id: string;
242
+ readonly source: string;
243
+ readonly target: string;
244
+ }
245
+ interface CoraData {
246
+ readonly nodes: readonly CoraNode[];
247
+ readonly edges: readonly CoraEdge[];
248
+ }
249
+ declare const cora: CoraData;
250
+
251
+ /**
252
+ * **Invana Code Knowledge Graph** — a real code-intelligence graph of the
253
+ * [Invana](https://github.com/invana) platform monorepo, produced by the
254
+ * `understand-anything` static analyser. 602 source entities (files,
255
+ * functions, classes, configs, docs) linked by 1,329 typed relations
256
+ * (`imports`, `contains`, `exports`, `calls`, `inherits`, …), partitioned
257
+ * into 8 architectural clusters with a 13-step guided tour.
258
+ *
259
+ * `./invana-code-kg/knowledge-graph.json` is authored **directly** in the
260
+ * property-graph shape this package standardises on — vertices are
261
+ * `{ id, label, properties }`, edges are
262
+ * `{ id, label, source, target, properties }` — so this module is a thin
263
+ * typed view over it, not a translator. The interfaces below ARE the
264
+ * on-disk contract; the JSON is its serialisation.
265
+ *
266
+ * `label` / `properties` don't match `@invana/graph`'s `GraphNode`
267
+ * (`type` / `data`) one-to-one, so a consuming story maps
268
+ * `label → type` and `properties → data` at `setData` time.
269
+ *
270
+ * @example
271
+ * import { invanaCodeKg } from '@invana/graph-datasets/usecase-demos';
272
+ * graphLayer.setData({
273
+ * nodes: invanaCodeKg.nodes.map((n) => ({ id: n.id, type: n.label, data: n.properties })),
274
+ * edges: invanaCodeKg.edges.map((e) => ({ id: e.id, source: e.source, target: e.target, type: e.label, data: e.properties })),
275
+ * });
276
+ */
277
+ /** Vertex label — the source-entity kind. Drives shape + palette downstream. */
278
+ type InvanaCodeNodeLabel = 'file' | 'function' | 'class' | 'config' | 'document';
279
+ /** Analyser's coarse complexity bucket for a node. */
280
+ type InvanaCodeComplexity = 'simple' | 'moderate' | 'complex';
281
+ /** Edge label — the relation kind. */
282
+ type InvanaCodeEdgeLabel = 'imports' | 'contains' | 'exports' | 'calls' | 'inherits' | 'configures' | 'depends_on' | 'documents' | 'related';
283
+ /** Node property bag — everything that isn't `id` or `label`. */
284
+ interface InvanaCodeNodeProperties {
285
+ /** Short display name (`main.tsx`, `ErrorPage`, …). `id` stays the stable path-qualified key. */
286
+ readonly name: string;
287
+ /** Repo-relative source path the entity lives in. */
288
+ readonly filePath: string;
289
+ /** One-line natural-language description from the analyser. */
290
+ readonly summary: string;
291
+ /** Free-form classification tags (`entry-point`, `react`, `error-handling`, …). */
292
+ readonly tags: readonly string[];
293
+ /** Analyser complexity bucket. */
294
+ readonly complexity: InvanaCodeComplexity;
295
+ /** `[startLine, endLine]` for function/class nodes; absent for whole-file nodes. */
296
+ readonly lineRange?: readonly [number, number];
297
+ /** Language-specific caveat the analyser flagged, when present. */
298
+ readonly languageNotes?: string;
299
+ /** Owning cluster id (from {@link InvanaCodeCluster}), or `null` when ungrouped. */
300
+ readonly cluster: string | null;
301
+ }
302
+ /** Edge property bag — relation strength + directionality metadata. */
303
+ interface InvanaCodeEdgeProperties {
304
+ /** Analyser-assigned edge weight in `[0.5, 1]`. */
305
+ readonly weight: number;
306
+ /** Relation directionality. The source graph only emits `'forward'`. */
307
+ readonly direction: 'forward';
308
+ }
309
+ /** A code entity as a property-graph vertex: `{ id, label, properties }`. */
310
+ interface InvanaCodeNode {
311
+ readonly id: string;
312
+ readonly label: InvanaCodeNodeLabel;
313
+ readonly properties: InvanaCodeNodeProperties;
314
+ }
315
+ /** A typed relation as a property-graph edge: `{ id, label, source, target, properties }`. */
316
+ interface InvanaCodeEdge {
317
+ readonly id: string;
318
+ readonly label: InvanaCodeEdgeLabel;
319
+ readonly source: string;
320
+ readonly target: string;
321
+ readonly properties: InvanaCodeEdgeProperties;
322
+ }
323
+ /** An architectural cluster — a named group of node ids. */
324
+ interface InvanaCodeCluster {
325
+ readonly id: string;
326
+ readonly name: string;
327
+ readonly description: string;
328
+ readonly nodeIds: readonly string[];
329
+ }
330
+ /** A guided-tour step spotlighting a set of nodes with a teaching note. */
331
+ interface InvanaCodeTourStep {
332
+ readonly order: number;
333
+ readonly title: string;
334
+ readonly description: string;
335
+ readonly nodeIds: readonly string[];
336
+ readonly languageLesson?: string;
337
+ }
338
+ /** Project-level provenance for the analysed repository. */
339
+ interface InvanaCodeProject {
340
+ readonly name: string;
341
+ readonly languages: readonly string[];
342
+ readonly frameworks: readonly string[];
343
+ readonly description: string;
344
+ readonly analyzedAt?: string;
345
+ readonly gitCommitHash?: string;
346
+ }
347
+ /** The full dataset: graph + cluster metadata + tour + provenance. */
348
+ interface InvanaCodeKgData {
349
+ readonly nodes: readonly InvanaCodeNode[];
350
+ readonly edges: readonly InvanaCodeEdge[];
351
+ readonly clusters: readonly InvanaCodeCluster[];
352
+ readonly tour: readonly InvanaCodeTourStep[];
353
+ readonly project: InvanaCodeProject;
354
+ }
355
+ declare const invanaCodeKg: InvanaCodeKgData;
356
+
357
+ export { type AgentTraceData, type AgentTraceEdge, type AgentTraceEdgeData, type AgentTraceEdgeKind, type AgentTraceNode, type AgentTraceNodeData, type AgentTraceNodeKind, type AgentTraceStatus, type CitationsData, type CitationsEdge, type CitationsEdgeData, type CitationsNode, type CitationsNodeData, type CitationsTopic, type CoraData, type CoraEdge, type CoraNode, type CoraNodeData, type CoraSubject, type InvanaCodeCluster, type InvanaCodeComplexity, type InvanaCodeEdge, type InvanaCodeEdgeLabel, type InvanaCodeEdgeProperties, type InvanaCodeKgData, type InvanaCodeNode, type InvanaCodeNodeLabel, type InvanaCodeNodeProperties, type InvanaCodeProject, type InvanaCodeTourStep, type MicroservicesData, type MicroservicesEdge, type MicroservicesEdgeData, type MicroservicesHealth, type MicroservicesNode, type MicroservicesNodeData, type MicroservicesTier, type OntologyData, type OntologyEdge, type OntologyEdgeData, type OntologyEdgeKind, type OntologyEntityKind, type OntologyNode, type OntologyNodeData, type RagEmbeddingsCluster, type RagEmbeddingsData, type RagEmbeddingsNode, type RagEmbeddingsNodeData, agentTrace, citations, cora, invanaCodeKg, microservices, ontology, ragEmbeddings };