graphmemory 1.2.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {graphmemory-1.2.0 → graphmemory-1.2.1}/PKG-INFO +1 -1
  2. graphmemory-1.2.1/examples/test_ingest.py +152 -0
  3. {graphmemory-1.2.0 → graphmemory-1.2.1}/graphmemory/extraction.py +123 -1
  4. {graphmemory-1.2.0 → graphmemory-1.2.1}/pyproject.toml +1 -1
  5. graphmemory-1.2.0/examples/test_ingest.py +0 -147
  6. {graphmemory-1.2.0 → graphmemory-1.2.1}/.gitignore +0 -0
  7. {graphmemory-1.2.0 → graphmemory-1.2.1}/LICENSE +0 -0
  8. {graphmemory-1.2.0 → graphmemory-1.2.1}/README.md +0 -0
  9. {graphmemory-1.2.0 → graphmemory-1.2.1}/examples/dspy_example_typed_pred.py +0 -0
  10. {graphmemory-1.2.0 → graphmemory-1.2.1}/examples/lexical_graph.py +0 -0
  11. {graphmemory-1.2.0 → graphmemory-1.2.1}/examples/openai_example.py +0 -0
  12. {graphmemory-1.2.0 → graphmemory-1.2.1}/graphmemory/__init__.py +0 -0
  13. {graphmemory-1.2.0 → graphmemory-1.2.1}/graphmemory/algorithms.py +0 -0
  14. {graphmemory-1.2.0 → graphmemory-1.2.1}/graphmemory/database.py +0 -0
  15. {graphmemory-1.2.0 → graphmemory-1.2.1}/graphmemory/models.py +0 -0
  16. {graphmemory-1.2.0 → graphmemory-1.2.1}/input/Genetic Programming1.txt +0 -0
  17. {graphmemory-1.2.0 → graphmemory-1.2.1}/input/Genetic Programming2.txt +0 -0
  18. {graphmemory-1.2.0 → graphmemory-1.2.1}/input/Genetic Programming3.txt +0 -0
  19. {graphmemory-1.2.0 → graphmemory-1.2.1}/input/Genetic Programming4.txt +0 -0
  20. {graphmemory-1.2.0 → graphmemory-1.2.1}/input/aimav4.txt +0 -0
  21. {graphmemory-1.2.0 → graphmemory-1.2.1}/input/reading_in_plannings.txt +0 -0
  22. {graphmemory-1.2.0 → graphmemory-1.2.1}/requirements.txt +0 -0
  23. {graphmemory-1.2.0 → graphmemory-1.2.1}/tests/tests.py +0 -0
  24. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/package-lock.json +0 -0
  25. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/package.json +0 -0
  26. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/public/banner.png +0 -0
  27. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/GraphMemoryShowcase.tsx +0 -0
  28. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/Root.tsx +0 -0
  29. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/components/Background.tsx +0 -0
  30. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/components/CodeBlock.tsx +0 -0
  31. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/components/FeaturePill.tsx +0 -0
  32. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/components/GraphViz.tsx +0 -0
  33. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/components/SectionTitle.tsx +0 -0
  34. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/index.ts +0 -0
  35. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/AlgorithmsScene.tsx +0 -0
  36. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/ExportScene.tsx +0 -0
  37. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/ExtractionScene.tsx +0 -0
  38. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/IntroScene.tsx +0 -0
  39. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/MergeScene.tsx +0 -0
  40. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/NodeEdgeScene.tsx +0 -0
  41. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/OutroScene.tsx +0 -0
  42. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/QueryBuilderScene.tsx +0 -0
  43. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/RetrievalScene.tsx +0 -0
  44. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/scenes/VectorSearchScene.tsx +0 -0
  45. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/src/theme.ts +0 -0
  46. {graphmemory-1.2.0 → graphmemory-1.2.1}/video/tsconfig.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphmemory
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: Graph-based memory system using DuckDB
5
5
  Project-URL: Homepage, https://github.com/bradAGI/GraphMemory
6
6
  Project-URL: Repository, https://github.com/bradAGI/GraphMemory
@@ -0,0 +1,152 @@
1
+ """End-to-end test: ingest aimav4.txt using parallel LLM extraction via DSPy."""
2
+
3
+ import sys
4
+ import os
5
+ import re
6
+ import time
7
+ import logging
8
+
9
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
10
+
11
+ import dspy
12
+ from graphmemory import GraphMemory, MergeStrategy
13
+ from graphmemory.extraction import extract_and_merge_parallel
14
+
15
+ logging.basicConfig(level=logging.WARNING, format="%(levelname)s %(name)s: %(message)s")
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # --- Configure DSPy with gpt-5-nano (10k RPM, 10M TPM) ---
19
+ lm = dspy.LM("openai/gpt-5-nano")
20
+ dspy.configure(lm=lm)
21
+
22
+ # With 10k RPM we can safely run 50+ concurrent requests
23
+ MAX_WORKERS = 50
24
+
25
+
26
+ def chunk_text(text: str, max_chars: int = 4000) -> list[str]:
27
+ """Split text into paragraph-aware chunks."""
28
+ paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
29
+ chunks = []
30
+ current = []
31
+ current_len = 0
32
+ for p in paragraphs:
33
+ if current_len + len(p) > max_chars and current:
34
+ chunks.append("\n\n".join(current))
35
+ current = []
36
+ current_len = 0
37
+ current.append(p)
38
+ current_len += len(p)
39
+ if current:
40
+ chunks.append("\n\n".join(current))
41
+ return chunks
42
+
43
+
44
+ def on_progress(phase, done, total):
45
+ bar_len = 30
46
+ filled = int(bar_len * done / total)
47
+ bar = "█" * filled + "░" * (bar_len - filled)
48
+ print(f"\r {phase:5s} [{bar}] {done}/{total}", end="", flush=True)
49
+ if done == total:
50
+ print()
51
+
52
+
53
+ def main():
54
+ input_path = os.path.join(os.path.dirname(__file__), "..", "input", "aimav4.txt")
55
+ with open(input_path) as f:
56
+ text = f.read(200_000)
57
+
58
+ text = re.sub(r"<!--.*?-->", "", text)
59
+ chunks = chunk_text(text, max_chars=4000)
60
+
61
+ print("=" * 60)
62
+ print("GraphMemory — Parallel LLM Extraction")
63
+ print("=" * 60)
64
+ print(f"Source: aimav4.txt ({len(text):,} chars)")
65
+ print(f"Chunks: {len(chunks)} x ~4k chars")
66
+ print(f"Workers: {MAX_WORKERS} concurrent LLM calls")
67
+ print(f"LLM: gpt-5-nano via DSPy")
68
+
69
+ db = GraphMemory(database=":memory:", vector_length=3)
70
+
71
+ print(f"\n--- Phase 1: Node extraction (parallel) ---")
72
+ print(f"--- Phase 2: Edge extraction (parallel) ---")
73
+ t0 = time.time()
74
+
75
+ node_results, edge_results = extract_and_merge_parallel(
76
+ db,
77
+ chunks,
78
+ match_keys=["name"],
79
+ match_type=True,
80
+ similarity_threshold=0.88,
81
+ max_workers=MAX_WORKERS,
82
+ on_progress=on_progress,
83
+ )
84
+
85
+ elapsed = time.time() - t0
86
+ created_n = sum(1 for r in node_results if r.created)
87
+ merged_n = sum(1 for r in node_results if not r.created)
88
+ created_e = sum(1 for r in edge_results if r.created)
89
+ merged_e = sum(1 for r in edge_results if not r.created)
90
+
91
+ print(f"\n Done in {elapsed:.1f}s ({len(chunks) * 2} LLM calls)")
92
+ print(f" Nodes: {created_n} new, {merged_n} fuzzy-merged")
93
+ print(f" Edges: {created_e} new, {merged_e} deduped")
94
+
95
+ # --- Post-extraction dedupe ---
96
+ print(f"\n--- Post-extraction duplicate resolution ---")
97
+ t1 = time.time()
98
+ clusters = db.resolve_duplicates(
99
+ match_keys=["name"],
100
+ match_type=True,
101
+ similarity_threshold=0.90,
102
+ )
103
+ print(f" {len(clusters)} clusters resolved in {time.time() - t1:.1f}s")
104
+ for c in clusters[:10]:
105
+ merged_names = [m.properties.get("name", "?") for m in c.merged]
106
+ print(f" '{c.survivor.properties.get('name')}' <- {merged_names}")
107
+ if len(clusters) > 10:
108
+ print(f" ... and {len(clusters) - 10} more")
109
+
110
+ # --- Results ---
111
+ all_nodes = db.nodes_to_json()
112
+ all_edges = db.edges_to_json()
113
+
114
+ type_counts = {}
115
+ for n in all_nodes:
116
+ t = n.get("type", "Unknown")
117
+ type_counts[t] = type_counts.get(t, 0) + 1
118
+
119
+ print(f"\n--- Final Graph ---")
120
+ print(f" Nodes: {len(all_nodes)}")
121
+ print(f" Edges: {len(all_edges)}")
122
+ print(f" Types: {dict(sorted(type_counts.items(), key=lambda x: -x[1]))}")
123
+
124
+ print(f"\n--- Sample Entities (first 30) ---")
125
+ sorted_nodes = sorted(all_nodes, key=lambda x: (x.get("type") or "", x.get("properties", {}).get("name") or ""))
126
+ for n in sorted_nodes[:30]:
127
+ props = n.get("properties", {})
128
+ print(f" [{n.get('type', '?'):15}] {props.get('name', props)}")
129
+ if len(sorted_nodes) > 30:
130
+ print(f" ... and {len(sorted_nodes) - 30} more")
131
+
132
+ print(f"\n--- Sample Relationships (first 20) ---")
133
+ node_id_map = {n["id"]: n for n in all_nodes}
134
+ for e in all_edges[:20]:
135
+ src = node_id_map.get(e["source_id"], {}).get("properties", {}).get("name", "?")
136
+ tgt = node_id_map.get(e["target_id"], {}).get("properties", {}).get("name", "?")
137
+ print(f" {src} --[{e['relation']}]--> {tgt}")
138
+ if len(all_edges) > 20:
139
+ print(f" ... and {len(all_edges) - 20} more")
140
+
141
+ print(f"\n--- Search: 'artificial intelligence' ---")
142
+ results = db.search_nodes("artificial intelligence", limit=5)
143
+ for sr in results:
144
+ print(f" [{sr.node.type}] {sr.node.properties.get('name', '?')} (score={sr.score:.3f})")
145
+
146
+ print(f"\n{'=' * 60}")
147
+ print(f"{len(all_nodes)} nodes, {len(all_edges)} edges from {len(text):,} chars in {elapsed:.1f}s")
148
+ print(f"{'=' * 60}")
149
+
150
+
151
+ if __name__ == "__main__":
152
+ main()
@@ -10,7 +10,8 @@ Requires the ``dspy`` optional dependency:
10
10
  from __future__ import annotations
11
11
 
12
12
  import logging
13
- from typing import TYPE_CHECKING, Any
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from typing import TYPE_CHECKING, Any, Callable
14
15
 
15
16
  from pydantic import BaseModel, Field
16
17
 
@@ -296,3 +297,124 @@ def extract_and_merge(
296
297
  len(edge_results),
297
298
  )
298
299
  return node_results, edge_results
300
+
301
+
302
+ # ---------------------------------------------------------------------------
303
+ # Parallel extraction
304
+ # ---------------------------------------------------------------------------
305
+
306
+
307
+ def _extract_nodes_chunk(chunk: str) -> list[Node]:
308
+ """Extract nodes from a single chunk (thread-safe, no DB access)."""
309
+ return extract_nodes(chunk, sentences=[chunk])
310
+
311
+
312
+ def _extract_edges_chunk(chunk: str, nodes: list[Node]) -> list[Edge]:
313
+ """Extract edges from a single chunk given known nodes (thread-safe)."""
314
+ return extract_edges(chunk, nodes, sentences=[chunk])
315
+
316
+
317
+ def extract_and_merge_parallel(
318
+ graph: GraphMemory,
319
+ chunks: list[str],
320
+ match_keys: list[str] | None = None,
321
+ match_type: bool = True,
322
+ strategy: MergeStrategy = MergeStrategy.UPDATE,
323
+ similarity_threshold: float = 1.0,
324
+ vector_threshold: float | None = None,
325
+ max_workers: int = 8,
326
+ on_progress: Callable[[str, int, int], None] | None = None,
327
+ ) -> tuple[list[MergeResult], list[EdgeMergeResult]]:
328
+ """Extract from multiple text chunks in parallel, then merge sequentially.
329
+
330
+ Runs in two parallel phases to maximize LLM throughput:
331
+ 1. Node extraction — all chunks concurrently (saturate RPM)
332
+ 2. Edge extraction — all chunks concurrently (with all extracted nodes as context)
333
+ Then merges into DB sequentially.
334
+
335
+ Args:
336
+ graph: A :class:`~graphmemory.database.GraphMemory` instance.
337
+ chunks: List of text chunks to process.
338
+ match_keys: Property names to match nodes on (default ``["name"]``).
339
+ match_type: Also require ``node.type`` to match.
340
+ strategy: How to merge properties on match.
341
+ similarity_threshold: Jaro-Winkler threshold for fuzzy matching.
342
+ vector_threshold: Max cosine distance for vector similarity.
343
+ max_workers: Max concurrent LLM calls (match your RPM headroom).
344
+ on_progress: Optional callback ``(phase, completed, total)``.
345
+
346
+ Returns:
347
+ Aggregated ``(node_results, edge_results)`` across all chunks.
348
+ """
349
+ if match_keys is None:
350
+ match_keys = ["name"]
351
+
352
+ total = len(chunks)
353
+
354
+ # Phase 1: Extract nodes from ALL chunks in parallel
355
+ chunk_nodes: dict[int, list[Node]] = {}
356
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
357
+ future_to_idx = {
358
+ pool.submit(_extract_nodes_chunk, chunk): i
359
+ for i, chunk in enumerate(chunks)
360
+ }
361
+ done = 0
362
+ for future in as_completed(future_to_idx):
363
+ idx = future_to_idx[future]
364
+ try:
365
+ chunk_nodes[idx] = future.result()
366
+ except Exception as e:
367
+ logger.warning("Node extraction failed for chunk %d: %s", idx + 1, e)
368
+ chunk_nodes[idx] = []
369
+ done += 1
370
+ if on_progress:
371
+ on_progress("nodes", done, total)
372
+
373
+ # Merge all nodes into DB sequentially to build the full node set
374
+ all_node_results: list[MergeResult] = []
375
+ for idx in range(total):
376
+ nodes = chunk_nodes.get(idx, [])
377
+ if nodes:
378
+ results = graph.bulk_merge_nodes(
379
+ nodes, match_keys=match_keys, match_type=match_type,
380
+ strategy=strategy, similarity_threshold=similarity_threshold,
381
+ vector_threshold=vector_threshold,
382
+ )
383
+ all_node_results.extend(results)
384
+
385
+ # Build complete node list for edge extraction context
386
+ all_nodes = [r.node for r in all_node_results]
387
+ logger.info("Phase 1 complete: %d nodes extracted and merged.", len(all_nodes))
388
+
389
+ # Phase 2: Extract edges from ALL chunks in parallel (with full node context)
390
+ chunk_edges: dict[int, list[Edge]] = {}
391
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
392
+ future_to_idx = {
393
+ pool.submit(_extract_edges_chunk, chunk, all_nodes): i
394
+ for i, chunk in enumerate(chunks)
395
+ }
396
+ done = 0
397
+ for future in as_completed(future_to_idx):
398
+ idx = future_to_idx[future]
399
+ try:
400
+ chunk_edges[idx] = future.result()
401
+ except Exception as e:
402
+ logger.warning("Edge extraction failed for chunk %d: %s", idx + 1, e)
403
+ chunk_edges[idx] = []
404
+ done += 1
405
+ if on_progress:
406
+ on_progress("edges", done, total)
407
+
408
+ # Merge all edges into DB sequentially
409
+ all_edge_results: list[EdgeMergeResult] = []
410
+ for idx in range(total):
411
+ edges = chunk_edges.get(idx, [])
412
+ if edges:
413
+ results = graph.bulk_merge_edges(edges)
414
+ all_edge_results.extend(results)
415
+
416
+ logger.info(
417
+ "Parallel extraction complete: %d chunks, %d nodes, %d edges.",
418
+ total, len(all_node_results), len(all_edge_results),
419
+ )
420
+ return all_node_results, all_edge_results
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "graphmemory"
7
- version = "1.2.0"
7
+ version = "1.2.1"
8
8
  description = "Graph-based memory system using DuckDB"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,147 +0,0 @@
1
- """End-to-end test: ingest aimav4.txt using real LLM extraction via DSPy."""
2
-
3
- import sys
4
- import os
5
- import re
6
- import logging
7
-
8
- sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
-
10
- import dspy
11
- from graphmemory import GraphMemory, MergeStrategy
12
- from graphmemory.extraction import extract_and_merge
13
-
14
- logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
15
- logger = logging.getLogger(__name__)
16
-
17
- # --- Configure DSPy with gpt-5-nano ---
18
- lm = dspy.LM("openai/gpt-5-nano")
19
- dspy.configure(lm=lm)
20
-
21
-
22
- def chunk_text(text: str, max_chars: int = 3000) -> list[str]:
23
- """Split text into paragraph-aware chunks."""
24
- paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
25
- chunks = []
26
- current = []
27
- current_len = 0
28
- for p in paragraphs:
29
- if current_len + len(p) > max_chars and current:
30
- chunks.append("\n\n".join(current))
31
- current = []
32
- current_len = 0
33
- current.append(p)
34
- current_len += len(p)
35
- if current:
36
- chunks.append("\n\n".join(current))
37
- return chunks
38
-
39
-
40
- def main():
41
- input_path = os.path.join(os.path.dirname(__file__), "..", "input", "aimav4.txt")
42
- with open(input_path) as f:
43
- text = f.read(100_000)
44
-
45
- text = re.sub(r"<!--.*?-->", "", text)
46
- chunks = chunk_text(text, max_chars=4000)
47
-
48
- print("=" * 60)
49
- print("GraphMemory — Real LLM Extraction Test")
50
- print("=" * 60)
51
- print(f"Source: aimav4.txt ({len(text)} chars)")
52
- print(f"Chunks: {len(chunks)}")
53
- print(f"LLM: gpt-5-nano via DSPy")
54
-
55
- db = GraphMemory(database=":memory:", vector_length=3)
56
-
57
- print(f"\n--- Extracting entities & relationships ---")
58
- total_nodes = 0
59
- total_edges = 0
60
- total_merged_nodes = 0
61
- total_merged_edges = 0
62
-
63
- for i, chunk in enumerate(chunks):
64
- print(f"\n Chunk {i + 1}/{len(chunks)} ({len(chunk)} chars)...")
65
- try:
66
- # Pass each chunk as a single "sentence" to avoid per-sentence LLM calls
67
- node_results, edge_results = extract_and_merge(
68
- db,
69
- chunk,
70
- match_keys=["name"],
71
- match_type=True,
72
- similarity_threshold=0.88,
73
- sentences=[chunk], # single LLM call per chunk
74
- )
75
- created_n = sum(1 for r in node_results if r.created)
76
- merged_n = sum(1 for r in node_results if not r.created)
77
- created_e = sum(1 for r in edge_results if r.created)
78
- merged_e = sum(1 for r in edge_results if not r.created)
79
-
80
- total_nodes += created_n
81
- total_merged_nodes += merged_n
82
- total_edges += created_e
83
- total_merged_edges += merged_e
84
-
85
- print(f" Nodes: {created_n} new, {merged_n} merged")
86
- print(f" Edges: {created_e} new, {merged_e} merged")
87
- except Exception as e:
88
- logger.warning(f" Chunk {i + 1} failed: {e}")
89
-
90
- # --- Post-extraction dedupe ---
91
- print(f"\n--- Post-extraction duplicate resolution ---")
92
- clusters = db.resolve_duplicates(
93
- match_keys=["name"],
94
- match_type=True,
95
- similarity_threshold=0.90,
96
- )
97
- if clusters:
98
- for c in clusters:
99
- merged_names = [m.properties.get("name", "?") for m in c.merged]
100
- print(f" Merged: '{c.survivor.properties.get('name')}' <- {merged_names}")
101
- else:
102
- print(" No additional duplicates found.")
103
-
104
- # --- Results ---
105
- all_nodes = db.nodes_to_json()
106
- all_edges = db.edges_to_json()
107
-
108
- print(f"\n--- Final Graph ---")
109
- print(f" Nodes: {len(all_nodes)}")
110
- print(f" Edges: {len(all_edges)}")
111
-
112
- type_counts = {}
113
- for n in all_nodes:
114
- t = n.get("type", "Unknown")
115
- type_counts[t] = type_counts.get(t, 0) + 1
116
- print(f" Types: {type_counts}")
117
-
118
- print(f"\n--- Extracted Entities ---")
119
- for n in sorted(all_nodes, key=lambda x: (x.get("type", ""), x.get("properties", {}).get("name", ""))):
120
- props = n.get("properties", {})
121
- print(f" [{n.get('type', '?'):15}] {props.get('name', props)}")
122
-
123
- print(f"\n--- Extracted Relationships ---")
124
- node_id_map = {n["id"]: n for n in all_nodes}
125
- for e in all_edges:
126
- src = node_id_map.get(e["source_id"], {}).get("properties", {}).get("name", e["source_id"])
127
- tgt = node_id_map.get(e["target_id"], {}).get("properties", {}).get("name", e["target_id"])
128
- print(f" {src} --[{e['relation']}]--> {tgt}")
129
-
130
- print(f"\n--- Full-text search: 'deep learning' ---")
131
- results = db.search_nodes("deep learning", limit=5)
132
- for sr in results:
133
- print(f" [{sr.node.type}] {sr.node.properties.get('name', '?')} (score={sr.score:.3f})")
134
-
135
- print(f"\n--- Summary ---")
136
- print(f" Extracted: {total_nodes} nodes, {total_edges} edges")
137
- print(f" Fuzzy-merged during ingest: {total_merged_nodes} nodes, {total_merged_edges} edges")
138
- print(f" Post-dedupe clusters: {len(clusters)}")
139
- print(f" Final graph: {len(all_nodes)} nodes, {len(all_edges)} edges")
140
-
141
- print("\n" + "=" * 60)
142
- print("Done!")
143
- print("=" * 60)
144
-
145
-
146
- if __name__ == "__main__":
147
- main()
File without changes
File without changes
File without changes
File without changes