@invana/graph-datasets 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +577 -0
- package/dist/index.js +1460 -0
- package/dist/index.js.map +1 -0
- package/dist/usecase-demos/index.d.ts +357 -0
- package/dist/usecase-demos/index.js +25708 -0
- package/dist/usecase-demos/index.js.map +1 -0
- package/package.json +49 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Synthetic **LLM agent traces** — small DAGs that approximate the kind
|
|
3
|
+
* of execution graph LangSmith / Langfuse / Helicone draw for a single
|
|
4
|
+
* agent run. Each node is an `llm` call, a `tool` invocation, a
|
|
5
|
+
* `decision` branch, or a terminal `output`. Each edge is a `calls`
|
|
6
|
+
* (control-flow), `returns` (data-flow), or `branch` (decision branch).
|
|
7
|
+
*
|
|
8
|
+
* Three presets are exported so a single story can illustrate the
|
|
9
|
+
* happy path, an error+retry path, and a multi-tool branching path
|
|
10
|
+
* without re-deriving the data per render.
|
|
11
|
+
*
|
|
12
|
+
* Designed for layered DAG layouts (ELK `layered` `DOWN`); the dataset
|
|
13
|
+
* carries no positions.
|
|
14
|
+
*/
|
|
15
|
+
/** What the node represents in the agent's execution graph. */
|
|
16
|
+
type AgentTraceNodeKind = 'llm' | 'tool' | 'decision' | 'output';
|
|
17
|
+
/** Per-node execution status — drives state-config styling in the story. */
|
|
18
|
+
type AgentTraceStatus = 'success' | 'error' | 'pending';
|
|
19
|
+
/** What the edge represents. */
|
|
20
|
+
type AgentTraceEdgeKind = 'calls' | 'returns' | 'branch';
|
|
21
|
+
/** Free-form per-node data. The store keeps this opaque. */
|
|
22
|
+
interface AgentTraceNodeData {
|
|
23
|
+
readonly kind: AgentTraceNodeKind;
|
|
24
|
+
readonly label: string;
|
|
25
|
+
readonly status: AgentTraceStatus;
|
|
26
|
+
readonly durationMs: number;
|
|
27
|
+
readonly tokens?: number;
|
|
28
|
+
}
|
|
29
|
+
/** Free-form per-edge data. */
|
|
30
|
+
interface AgentTraceEdgeData {
|
|
31
|
+
readonly kind: AgentTraceEdgeKind;
|
|
32
|
+
}
|
|
33
|
+
interface AgentTraceNode {
|
|
34
|
+
readonly id: string;
|
|
35
|
+
readonly data: AgentTraceNodeData;
|
|
36
|
+
}
|
|
37
|
+
interface AgentTraceEdge {
|
|
38
|
+
readonly id: string;
|
|
39
|
+
readonly source: string;
|
|
40
|
+
readonly target: string;
|
|
41
|
+
readonly data: AgentTraceEdgeData;
|
|
42
|
+
}
|
|
43
|
+
interface AgentTraceData {
|
|
44
|
+
readonly id: string;
|
|
45
|
+
readonly name: string;
|
|
46
|
+
readonly nodes: readonly AgentTraceNode[];
|
|
47
|
+
readonly edges: readonly AgentTraceEdge[];
|
|
48
|
+
}
|
|
49
|
+
/** All three presets, in display order. */
|
|
50
|
+
declare const agentTrace: readonly AgentTraceData[];
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Synthetic **RAG embedding explorer** dataset — ~400 2D points that
|
|
54
|
+
* imitate a UMAP / t-SNE projection of a vector index. Five thematic
|
|
55
|
+
* clusters (auth, billing, search, infra, ML) plus a sprinkling of
|
|
56
|
+
* uniform outliers, generated from a seeded RNG so the visualisation
|
|
57
|
+
* is stable across reloads.
|
|
58
|
+
*
|
|
59
|
+
* Each point carries a `cluster` id, a short `text` snippet that stands
|
|
60
|
+
* in for the chunk content, and a `source` filename. There are no edges
|
|
61
|
+
* — the story renders raw points overlaid by a
|
|
62
|
+
* `DensityContourFillLayer` to bring the cluster topology forward.
|
|
63
|
+
*/
|
|
64
|
+
declare const CLUSTER_NAMES: readonly ["auth", "billing", "search", "infra", "ml"];
|
|
65
|
+
type RagEmbeddingsCluster = (typeof CLUSTER_NAMES)[number];
|
|
66
|
+
interface RagEmbeddingsNodeData {
|
|
67
|
+
readonly cluster: RagEmbeddingsCluster;
|
|
68
|
+
readonly text: string;
|
|
69
|
+
readonly source: string;
|
|
70
|
+
}
|
|
71
|
+
interface RagEmbeddingsNode {
|
|
72
|
+
readonly id: string;
|
|
73
|
+
readonly position: {
|
|
74
|
+
readonly x: number;
|
|
75
|
+
readonly y: number;
|
|
76
|
+
};
|
|
77
|
+
readonly data: RagEmbeddingsNodeData;
|
|
78
|
+
}
|
|
79
|
+
interface RagEmbeddingsData {
|
|
80
|
+
readonly nodes: readonly RagEmbeddingsNode[];
|
|
81
|
+
}
|
|
82
|
+
declare const ragEmbeddings: RagEmbeddingsData;
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Synthetic **microservices topology** dataset — ~20 services across a
|
|
86
|
+
* SaaS stack with call edges carrying RPS and per-edge error rates.
|
|
87
|
+
* Designed for a Datadog / Istio / Linkerd-style service-map demo.
|
|
88
|
+
*
|
|
89
|
+
* The dataset embeds one degraded service (`order-api`), one degraded
|
|
90
|
+
* downstream (`payment-service`), and one downed service
|
|
91
|
+
* (`fraud-detector`) so the story has something to flag visually
|
|
92
|
+
* without the consumer rolling a fake "simulate degradation" loop just
|
|
93
|
+
* to see the styled states.
|
|
94
|
+
*/
|
|
95
|
+
/** Coarse architectural tier — drives icon choice in the story. */
|
|
96
|
+
type MicroservicesTier = 'gateway' | 'api' | 'logic' | 'data' | 'external';
|
|
97
|
+
/** Health roll-up rendered as the node state-config name. */
|
|
98
|
+
type MicroservicesHealth = 'healthy' | 'degraded' | 'down';
|
|
99
|
+
interface MicroservicesNodeData {
|
|
100
|
+
readonly tier: MicroservicesTier;
|
|
101
|
+
readonly health: MicroservicesHealth;
|
|
102
|
+
/** Sustained requests-per-second at this service. */
|
|
103
|
+
readonly rps: number;
|
|
104
|
+
}
|
|
105
|
+
interface MicroservicesEdgeData {
|
|
106
|
+
/** Sustained requests-per-second on this dependency edge. */
|
|
107
|
+
readonly rps: number;
|
|
108
|
+
/** Fraction in `[0, 1]`. Above ~0.05 the story flags the edge. */
|
|
109
|
+
readonly errorRate: number;
|
|
110
|
+
}
|
|
111
|
+
interface MicroservicesNode {
|
|
112
|
+
readonly id: string;
|
|
113
|
+
readonly data: MicroservicesNodeData;
|
|
114
|
+
}
|
|
115
|
+
interface MicroservicesEdge {
|
|
116
|
+
readonly id: string;
|
|
117
|
+
readonly source: string;
|
|
118
|
+
readonly target: string;
|
|
119
|
+
readonly data: MicroservicesEdgeData;
|
|
120
|
+
}
|
|
121
|
+
interface MicroservicesData {
|
|
122
|
+
readonly nodes: readonly MicroservicesNode[];
|
|
123
|
+
readonly edges: readonly MicroservicesEdge[];
|
|
124
|
+
}
|
|
125
|
+
declare const microservices: MicroservicesData;
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Synthetic **company knowledge-graph** — entities of five kinds linked
|
|
129
|
+
* by typed relations. Modelled after Palantir / Neo4j Bloom / Diffbot
|
|
130
|
+
* entity-ontology demos so the story has a recognisable picture: five
|
|
131
|
+
* companies, the people who founded / run them, the products they ship,
|
|
132
|
+
* the cities they're based in, and the industries they operate in.
|
|
133
|
+
*
|
|
134
|
+
* The dataset is deliberately under-connected at the periphery so the
|
|
135
|
+
* "double-click to expand" interaction in the story has something
|
|
136
|
+
* meaningful to do — start with the core companies + their CEOs and
|
|
137
|
+
* unfold the products, locations, and industries by clicking.
|
|
138
|
+
*/
|
|
139
|
+
/** Entity kind — drives shape and palette in the story. */
|
|
140
|
+
type OntologyEntityKind = 'company' | 'person' | 'product' | 'location' | 'industry';
|
|
141
|
+
/** Predicate / FK label on each edge. */
|
|
142
|
+
type OntologyEdgeKind = 'founded' | 'ceo_of' | 'works_at' | 'builds' | 'headquartered_in' | 'operates_in' | 'competes_with';
|
|
143
|
+
interface OntologyNodeData {
|
|
144
|
+
readonly kind: OntologyEntityKind;
|
|
145
|
+
/** Human-readable display name (id stays kebab-case for stability). */
|
|
146
|
+
readonly name: string;
|
|
147
|
+
}
|
|
148
|
+
interface OntologyEdgeData {
|
|
149
|
+
readonly kind: OntologyEdgeKind;
|
|
150
|
+
}
|
|
151
|
+
interface OntologyNode {
|
|
152
|
+
readonly id: string;
|
|
153
|
+
readonly data: OntologyNodeData;
|
|
154
|
+
}
|
|
155
|
+
interface OntologyEdge {
|
|
156
|
+
readonly id: string;
|
|
157
|
+
readonly source: string;
|
|
158
|
+
readonly target: string;
|
|
159
|
+
readonly data: OntologyEdgeData;
|
|
160
|
+
}
|
|
161
|
+
interface OntologyData {
|
|
162
|
+
readonly nodes: readonly OntologyNode[];
|
|
163
|
+
readonly edges: readonly OntologyEdge[];
|
|
164
|
+
/** Ids that the story shows in its initial collapsed view. */
|
|
165
|
+
readonly coreIds: readonly string[];
|
|
166
|
+
}
|
|
167
|
+
declare const ontology: OntologyData;
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Synthetic **citation graph** — 150 papers across 5 research topics,
|
|
171
|
+
* connected by directed `paper-cites-paper` edges. Designed for a
|
|
172
|
+
* Connected-Papers / Litmaps / Elicit-style overview: density contours
|
|
173
|
+
* per topic bring the cluster topology forward, force layout pulls the
|
|
174
|
+
* dense intra-topic citation neighbourhoods together, and the inter-
|
|
175
|
+
* topic edges form the long bridges between clusters.
|
|
176
|
+
*
|
|
177
|
+
* Generation rules (seeded for snapshot stability):
|
|
178
|
+
*
|
|
179
|
+
* - 30 papers per topic across 5 topics, years span 2018–2025.
|
|
180
|
+
* - `citationsCount` drawn from a clipped log-normal so a handful of
|
|
181
|
+
* hub papers dominate the visualisation.
|
|
182
|
+
* - Each paper cites 2–4 prior papers, biased 70% intra-topic and
|
|
183
|
+
* 30% inter-topic. Within the bias bucket, targets are weighted
|
|
184
|
+
* toward older papers with higher citation counts — i.e. crude
|
|
185
|
+
* preferential attachment.
|
|
186
|
+
*/
|
|
187
|
+
declare const TOPICS: readonly ["transformers", "diffusion-models", "reinforcement-learning", "graph-neural-networks", "vision-language"];
|
|
188
|
+
type CitationsTopic = (typeof TOPICS)[number];
|
|
189
|
+
interface CitationsNodeData {
|
|
190
|
+
readonly topic: CitationsTopic;
|
|
191
|
+
readonly title: string;
|
|
192
|
+
readonly year: number;
|
|
193
|
+
readonly citationsCount: number;
|
|
194
|
+
}
|
|
195
|
+
interface CitationsEdgeData {
|
|
196
|
+
/** Reserved for future predicates (`cites`, `extends`, `refutes`, ...). */
|
|
197
|
+
readonly kind: 'cites';
|
|
198
|
+
}
|
|
199
|
+
interface CitationsNode {
|
|
200
|
+
readonly id: string;
|
|
201
|
+
readonly data: CitationsNodeData;
|
|
202
|
+
}
|
|
203
|
+
interface CitationsEdge {
|
|
204
|
+
readonly id: string;
|
|
205
|
+
readonly source: string;
|
|
206
|
+
readonly target: string;
|
|
207
|
+
readonly data: CitationsEdgeData;
|
|
208
|
+
}
|
|
209
|
+
interface CitationsData {
|
|
210
|
+
readonly nodes: readonly CitationsNode[];
|
|
211
|
+
readonly edges: readonly CitationsEdge[];
|
|
212
|
+
}
|
|
213
|
+
declare const citations: CitationsData;
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* **Cora** citation network — the canonical machine-learning paper
|
|
217
|
+
* dataset (2,708 papers across 7 subject areas, 10,556 `CITES` edges).
|
|
218
|
+
* The raw CSVs live in `./cora-dataset/{nodes,edges}.csv`; a build-time
|
|
219
|
+
* pre-strip in `scripts/prepare-cora.mjs` projects them to a compact
|
|
220
|
+
* `cora.json` (drops the 1,433-dim bag-of-words feature matrix the
|
|
221
|
+
* viewer never needs). This module imports that JSON and shapes it for
|
|
222
|
+
* `GraphLayer.setData`.
|
|
223
|
+
*
|
|
224
|
+
* Re-run the prepare script when the CSVs change:
|
|
225
|
+
* `node scripts/prepare-cora.mjs`
|
|
226
|
+
*
|
|
227
|
+
* @example
|
|
228
|
+
* import { cora } from '@invana/graph-datasets/usecase-demos';
|
|
229
|
+
* graphLayer.setData({ nodes: cora.nodes, edges: cora.edges });
|
|
230
|
+
*/
|
|
231
|
+
/** Subject category — the 7 ML topics the original Cora dataset partitions papers into. */
|
|
232
|
+
type CoraSubject = 'Neural_Networks' | 'Rule_Learning' | 'Reinforcement_Learning' | 'Probabilistic_Methods' | 'Theory' | 'Genetic_Algorithms' | 'Case_Based';
|
|
233
|
+
interface CoraNodeData {
|
|
234
|
+
readonly subject: CoraSubject;
|
|
235
|
+
}
|
|
236
|
+
interface CoraNode {
|
|
237
|
+
readonly id: string;
|
|
238
|
+
readonly data: CoraNodeData;
|
|
239
|
+
}
|
|
240
|
+
interface CoraEdge {
|
|
241
|
+
readonly id: string;
|
|
242
|
+
readonly source: string;
|
|
243
|
+
readonly target: string;
|
|
244
|
+
}
|
|
245
|
+
interface CoraData {
|
|
246
|
+
readonly nodes: readonly CoraNode[];
|
|
247
|
+
readonly edges: readonly CoraEdge[];
|
|
248
|
+
}
|
|
249
|
+
declare const cora: CoraData;
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* **Invana Code Knowledge Graph** — a real code-intelligence graph of the
|
|
253
|
+
* [Invana](https://github.com/invana) platform monorepo, produced by the
|
|
254
|
+
* `understand-anything` static analyser. 602 source entities (files,
|
|
255
|
+
* functions, classes, configs, docs) linked by 1,329 typed relations
|
|
256
|
+
* (`imports`, `contains`, `exports`, `calls`, `inherits`, …), partitioned
|
|
257
|
+
* into 8 architectural clusters with a 13-step guided tour.
|
|
258
|
+
*
|
|
259
|
+
* `./invana-code-kg/knowledge-graph.json` is authored **directly** in the
|
|
260
|
+
* property-graph shape this package standardises on — vertices are
|
|
261
|
+
* `{ id, label, properties }`, edges are
|
|
262
|
+
* `{ id, label, source, target, properties }` — so this module is a thin
|
|
263
|
+
* typed view over it, not a translator. The interfaces below ARE the
|
|
264
|
+
* on-disk contract; the JSON is its serialisation.
|
|
265
|
+
*
|
|
266
|
+
* `label` / `properties` don't match `@invana/graph`'s `GraphNode`
|
|
267
|
+
* (`type` / `data`) one-to-one, so a consuming story maps
|
|
268
|
+
* `label → type` and `properties → data` at `setData` time.
|
|
269
|
+
*
|
|
270
|
+
* @example
|
|
271
|
+
* import { invanaCodeKg } from '@invana/graph-datasets/usecase-demos';
|
|
272
|
+
* graphLayer.setData({
|
|
273
|
+
* nodes: invanaCodeKg.nodes.map((n) => ({ id: n.id, type: n.label, data: n.properties })),
|
|
274
|
+
* edges: invanaCodeKg.edges.map((e) => ({ id: e.id, source: e.source, target: e.target, type: e.label, data: e.properties })),
|
|
275
|
+
* });
|
|
276
|
+
*/
|
|
277
|
+
/** Vertex label — the source-entity kind. Drives shape + palette downstream. */
|
|
278
|
+
type InvanaCodeNodeLabel = 'file' | 'function' | 'class' | 'config' | 'document';
|
|
279
|
+
/** Analyser's coarse complexity bucket for a node. */
|
|
280
|
+
type InvanaCodeComplexity = 'simple' | 'moderate' | 'complex';
|
|
281
|
+
/** Edge label — the relation kind. */
|
|
282
|
+
type InvanaCodeEdgeLabel = 'imports' | 'contains' | 'exports' | 'calls' | 'inherits' | 'configures' | 'depends_on' | 'documents' | 'related';
|
|
283
|
+
/** Node property bag — everything that isn't `id` or `label`. */
|
|
284
|
+
interface InvanaCodeNodeProperties {
|
|
285
|
+
/** Short display name (`main.tsx`, `ErrorPage`, …). `id` stays the stable path-qualified key. */
|
|
286
|
+
readonly name: string;
|
|
287
|
+
/** Repo-relative source path the entity lives in. */
|
|
288
|
+
readonly filePath: string;
|
|
289
|
+
/** One-line natural-language description from the analyser. */
|
|
290
|
+
readonly summary: string;
|
|
291
|
+
/** Free-form classification tags (`entry-point`, `react`, `error-handling`, …). */
|
|
292
|
+
readonly tags: readonly string[];
|
|
293
|
+
/** Analyser complexity bucket. */
|
|
294
|
+
readonly complexity: InvanaCodeComplexity;
|
|
295
|
+
/** `[startLine, endLine]` for function/class nodes; absent for whole-file nodes. */
|
|
296
|
+
readonly lineRange?: readonly [number, number];
|
|
297
|
+
/** Language-specific caveat the analyser flagged, when present. */
|
|
298
|
+
readonly languageNotes?: string;
|
|
299
|
+
/** Owning cluster id (from {@link InvanaCodeCluster}), or `null` when ungrouped. */
|
|
300
|
+
readonly cluster: string | null;
|
|
301
|
+
}
|
|
302
|
+
/** Edge property bag — relation strength + directionality metadata. */
|
|
303
|
+
interface InvanaCodeEdgeProperties {
|
|
304
|
+
/** Analyser-assigned edge weight in `[0.5, 1]`. */
|
|
305
|
+
readonly weight: number;
|
|
306
|
+
/** Relation directionality. The source graph only emits `'forward'`. */
|
|
307
|
+
readonly direction: 'forward';
|
|
308
|
+
}
|
|
309
|
+
/** A code entity as a property-graph vertex: `{ id, label, properties }`. */
|
|
310
|
+
interface InvanaCodeNode {
|
|
311
|
+
readonly id: string;
|
|
312
|
+
readonly label: InvanaCodeNodeLabel;
|
|
313
|
+
readonly properties: InvanaCodeNodeProperties;
|
|
314
|
+
}
|
|
315
|
+
/** A typed relation as a property-graph edge: `{ id, label, source, target, properties }`. */
|
|
316
|
+
interface InvanaCodeEdge {
|
|
317
|
+
readonly id: string;
|
|
318
|
+
readonly label: InvanaCodeEdgeLabel;
|
|
319
|
+
readonly source: string;
|
|
320
|
+
readonly target: string;
|
|
321
|
+
readonly properties: InvanaCodeEdgeProperties;
|
|
322
|
+
}
|
|
323
|
+
/** An architectural cluster — a named group of node ids. */
|
|
324
|
+
interface InvanaCodeCluster {
|
|
325
|
+
readonly id: string;
|
|
326
|
+
readonly name: string;
|
|
327
|
+
readonly description: string;
|
|
328
|
+
readonly nodeIds: readonly string[];
|
|
329
|
+
}
|
|
330
|
+
/** A guided-tour step spotlighting a set of nodes with a teaching note. */
|
|
331
|
+
interface InvanaCodeTourStep {
|
|
332
|
+
readonly order: number;
|
|
333
|
+
readonly title: string;
|
|
334
|
+
readonly description: string;
|
|
335
|
+
readonly nodeIds: readonly string[];
|
|
336
|
+
readonly languageLesson?: string;
|
|
337
|
+
}
|
|
338
|
+
/** Project-level provenance for the analysed repository. */
|
|
339
|
+
interface InvanaCodeProject {
|
|
340
|
+
readonly name: string;
|
|
341
|
+
readonly languages: readonly string[];
|
|
342
|
+
readonly frameworks: readonly string[];
|
|
343
|
+
readonly description: string;
|
|
344
|
+
readonly analyzedAt?: string;
|
|
345
|
+
readonly gitCommitHash?: string;
|
|
346
|
+
}
|
|
347
|
+
/** The full dataset: graph + cluster metadata + tour + provenance. */
|
|
348
|
+
interface InvanaCodeKgData {
|
|
349
|
+
readonly nodes: readonly InvanaCodeNode[];
|
|
350
|
+
readonly edges: readonly InvanaCodeEdge[];
|
|
351
|
+
readonly clusters: readonly InvanaCodeCluster[];
|
|
352
|
+
readonly tour: readonly InvanaCodeTourStep[];
|
|
353
|
+
readonly project: InvanaCodeProject;
|
|
354
|
+
}
|
|
355
|
+
declare const invanaCodeKg: InvanaCodeKgData;
|
|
356
|
+
|
|
357
|
+
export { type AgentTraceData, type AgentTraceEdge, type AgentTraceEdgeData, type AgentTraceEdgeKind, type AgentTraceNode, type AgentTraceNodeData, type AgentTraceNodeKind, type AgentTraceStatus, type CitationsData, type CitationsEdge, type CitationsEdgeData, type CitationsNode, type CitationsNodeData, type CitationsTopic, type CoraData, type CoraEdge, type CoraNode, type CoraNodeData, type CoraSubject, type InvanaCodeCluster, type InvanaCodeComplexity, type InvanaCodeEdge, type InvanaCodeEdgeLabel, type InvanaCodeEdgeProperties, type InvanaCodeKgData, type InvanaCodeNode, type InvanaCodeNodeLabel, type InvanaCodeNodeProperties, type InvanaCodeProject, type InvanaCodeTourStep, type MicroservicesData, type MicroservicesEdge, type MicroservicesEdgeData, type MicroservicesHealth, type MicroservicesNode, type MicroservicesNodeData, type MicroservicesTier, type OntologyData, type OntologyEdge, type OntologyEdgeData, type OntologyEdgeKind, type OntologyEntityKind, type OntologyNode, type OntologyNodeData, type RagEmbeddingsCluster, type RagEmbeddingsData, type RagEmbeddingsNode, type RagEmbeddingsNodeData, agentTrace, citations, cora, invanaCodeKg, microservices, ontology, ragEmbeddings };
|