graphrag-core 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. graphrag_core-0.2.0/.github/workflows/release.yml +28 -0
  2. graphrag_core-0.2.0/.github/workflows/test.yml +35 -0
  3. graphrag_core-0.2.0/.gitignore +191 -0
  4. graphrag_core-0.2.0/CHANGELOG.md +36 -0
  5. graphrag_core-0.2.0/CLAUDE.md +97 -0
  6. graphrag_core-0.2.0/LICENSE +21 -0
  7. graphrag_core-0.2.0/PKG-INFO +182 -0
  8. graphrag_core-0.2.0/README.md +152 -0
  9. graphrag_core-0.2.0/docs/graphrag_core_interface_spec.md +506 -0
  10. graphrag_core-0.2.0/docs/superpowers/plans/2026-04-11-bb1-document-ingestion.md +1259 -0
  11. graphrag_core-0.2.0/docs/superpowers/plans/2026-04-12-bb2-bb3-extraction-and-graph.md +2034 -0
  12. graphrag_core-0.2.0/docs/superpowers/plans/2026-04-12-bb4-hybrid-search.md +1174 -0
  13. graphrag_core-0.2.0/docs/superpowers/plans/2026-04-12-bb5-bb6-curation-and-registry.md +1532 -0
  14. graphrag_core-0.2.0/docs/superpowers/plans/2026-04-12-bb7-bb8-tools-and-agents.md +1187 -0
  15. graphrag_core-0.2.0/docs/superpowers/plans/2026-04-12-release-readiness.md +524 -0
  16. graphrag_core-0.2.0/docs/superpowers/specs/2026-04-11-bb1-document-ingestion-design.md +128 -0
  17. graphrag_core-0.2.0/docs/superpowers/specs/2026-04-12-bb2-bb3-extraction-and-graph-design.md +263 -0
  18. graphrag_core-0.2.0/docs/superpowers/specs/2026-04-12-bb4-hybrid-search-design.md +199 -0
  19. graphrag_core-0.2.0/docs/superpowers/specs/2026-04-12-bb5-bb6-curation-and-registry-design.md +245 -0
  20. graphrag_core-0.2.0/docs/superpowers/specs/2026-04-12-bb7-bb8-tools-and-agents-design.md +245 -0
  21. graphrag_core-0.2.0/docs/superpowers/specs/2026-04-12-release-readiness-design.md +112 -0
  22. graphrag_core-0.2.0/pyproject.toml +50 -0
  23. graphrag_core-0.2.0/src/graphrag_core/__init__.py +138 -0
  24. graphrag_core-0.2.0/src/graphrag_core/_cypher.py +15 -0
  25. graphrag_core-0.2.0/src/graphrag_core/agents/__init__.py +6 -0
  26. graphrag_core-0.2.0/src/graphrag_core/agents/context.py +16 -0
  27. graphrag_core-0.2.0/src/graphrag_core/agents/orchestrator.py +34 -0
  28. graphrag_core-0.2.0/src/graphrag_core/curation/__init__.py +6 -0
  29. graphrag_core-0.2.0/src/graphrag_core/curation/detection.py +158 -0
  30. graphrag_core-0.2.0/src/graphrag_core/curation/pipeline.py +39 -0
  31. graphrag_core-0.2.0/src/graphrag_core/extraction/__init__.py +5 -0
  32. graphrag_core-0.2.0/src/graphrag_core/extraction/engine.py +154 -0
  33. graphrag_core-0.2.0/src/graphrag_core/graph/__init__.py +11 -0
  34. graphrag_core-0.2.0/src/graphrag_core/graph/memory.py +118 -0
  35. graphrag_core-0.2.0/src/graphrag_core/graph/neo4j.py +196 -0
  36. graphrag_core-0.2.0/src/graphrag_core/ingestion/__init__.py +19 -0
  37. graphrag_core-0.2.0/src/graphrag_core/ingestion/chunker.py +45 -0
  38. graphrag_core-0.2.0/src/graphrag_core/ingestion/parsers.py +128 -0
  39. graphrag_core-0.2.0/src/graphrag_core/ingestion/pipeline.py +36 -0
  40. graphrag_core-0.2.0/src/graphrag_core/interfaces.py +229 -0
  41. graphrag_core-0.2.0/src/graphrag_core/llm/__init__.py +9 -0
  42. graphrag_core-0.2.0/src/graphrag_core/llm/anthropic.py +35 -0
  43. graphrag_core-0.2.0/src/graphrag_core/models.py +247 -0
  44. graphrag_core-0.2.0/src/graphrag_core/py.typed +0 -0
  45. graphrag_core-0.2.0/src/graphrag_core/registry/__init__.py +5 -0
  46. graphrag_core-0.2.0/src/graphrag_core/registry/matching.py +23 -0
  47. graphrag_core-0.2.0/src/graphrag_core/registry/memory.py +81 -0
  48. graphrag_core-0.2.0/src/graphrag_core/search/__init__.py +11 -0
  49. graphrag_core-0.2.0/src/graphrag_core/search/fusion.py +34 -0
  50. graphrag_core-0.2.0/src/graphrag_core/search/memory.py +104 -0
  51. graphrag_core-0.2.0/src/graphrag_core/search/neo4j.py +186 -0
  52. graphrag_core-0.2.0/src/graphrag_core/tools/__init__.py +6 -0
  53. graphrag_core-0.2.0/src/graphrag_core/tools/core_tools.py +88 -0
  54. graphrag_core-0.2.0/src/graphrag_core/tools/library.py +45 -0
  55. graphrag_core-0.2.0/tests/__init__.py +0 -0
  56. graphrag_core-0.2.0/tests/conftest.py +29 -0
  57. graphrag_core-0.2.0/tests/test_agents/__init__.py +0 -0
  58. graphrag_core-0.2.0/tests/test_agents/test_context.py +34 -0
  59. graphrag_core-0.2.0/tests/test_agents/test_orchestrator.py +114 -0
  60. graphrag_core-0.2.0/tests/test_curation/__init__.py +0 -0
  61. graphrag_core-0.2.0/tests/test_curation/test_detection.py +176 -0
  62. graphrag_core-0.2.0/tests/test_curation/test_pipeline.py +97 -0
  63. graphrag_core-0.2.0/tests/test_extraction/__init__.py +0 -0
  64. graphrag_core-0.2.0/tests/test_extraction/test_engine.py +271 -0
  65. graphrag_core-0.2.0/tests/test_graph/__init__.py +0 -0
  66. graphrag_core-0.2.0/tests/test_graph/test_memory.py +207 -0
  67. graphrag_core-0.2.0/tests/test_graph/test_neo4j.py +161 -0
  68. graphrag_core-0.2.0/tests/test_ingestion/__init__.py +0 -0
  69. graphrag_core-0.2.0/tests/test_ingestion/test_chunker.py +100 -0
  70. graphrag_core-0.2.0/tests/test_ingestion/test_parsers.py +198 -0
  71. graphrag_core-0.2.0/tests/test_ingestion/test_pipeline.py +142 -0
  72. graphrag_core-0.2.0/tests/test_integration/__init__.py +0 -0
  73. graphrag_core-0.2.0/tests/test_integration/test_ingest_to_graph.py +154 -0
  74. graphrag_core-0.2.0/tests/test_interfaces.py +277 -0
  75. graphrag_core-0.2.0/tests/test_models.py +228 -0
  76. graphrag_core-0.2.0/tests/test_registry/__init__.py +0 -0
  77. graphrag_core-0.2.0/tests/test_registry/test_matching.py +50 -0
  78. graphrag_core-0.2.0/tests/test_registry/test_memory_registry.py +153 -0
  79. graphrag_core-0.2.0/tests/test_search/__init__.py +0 -0
  80. graphrag_core-0.2.0/tests/test_search/test_fusion.py +75 -0
  81. graphrag_core-0.2.0/tests/test_search/test_memory.py +155 -0
  82. graphrag_core-0.2.0/tests/test_search/test_neo4j_search.py +171 -0
  83. graphrag_core-0.2.0/tests/test_tools/__init__.py +0 -0
  84. graphrag_core-0.2.0/tests/test_tools/test_core_tools.py +113 -0
  85. graphrag_core-0.2.0/tests/test_tools/test_library.py +101 -0
  86. graphrag_core-0.2.0/uv.lock +324 -0
@@ -0,0 +1,28 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ id-token: write
10
+
11
+ jobs:
12
+ publish:
13
+ runs-on: ubuntu-latest
14
+ environment: pypi
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v4
20
+
21
+ - name: Set up Python
22
+ run: uv python install 3.12
23
+
24
+ - name: Build package
25
+ run: uv build
26
+
27
+ - name: Publish to PyPI
28
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,35 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Install uv
16
+ uses: astral-sh/setup-uv@v4
17
+
18
+ - name: Set up Python
19
+ run: uv python install 3.12
20
+
21
+ - name: Install dependencies
22
+ run: uv sync --all-extras
23
+
24
+ - name: Run unit tests
25
+ run: uv run pytest tests/ -x -q
26
+
27
+ - name: Boundary check — no domain leakage
28
+ run: |
29
+ FORBIDDEN="MonitoringTopic|Perspective|SubjectArea|Interview|Dalux|CapturePoint|InvestorAlert|SollIst|EY|Parthenon|Prague"
30
+ if grep -rn -E "$FORBIDDEN" src/; then
31
+ echo "DOMAIN LEAKAGE DETECTED in graphrag-core"
32
+ exit 1
33
+ else
34
+ echo "graphrag-core is clean"
35
+ fi
@@ -0,0 +1,191 @@
1
+ # ============================================================================
2
+ # Cross-Platform Dotfiles .gitignore
3
+ # ============================================================================
4
+
5
+ # -----------------------------------------------------------------------------
6
+ # macOS
7
+ # -----------------------------------------------------------------------------
8
+ .DS_Store
9
+ .AppleDouble
10
+ .LSOverride
11
+ .DocumentRevisions-V100
12
+ .fseventsd
13
+ .Spotlight-V100
14
+ .TemporaryItems
15
+ .Trashes
16
+ .VolumeIcon.icns
17
+ .com.apple.timemachine.donotpresent
18
+ .AppleDB
19
+ .AppleDesktop
20
+ Network Trash Folder
21
+ Temporary Items
22
+ .apdisk
23
+
24
+ # Icon must end with two \r
25
+ Icon
26
+
27
+ # Thumbnails
28
+ ._*
29
+
30
+ # -----------------------------------------------------------------------------
31
+ # Linux
32
+ # -----------------------------------------------------------------------------
33
+ *~
34
+ .directory
35
+ .Trash-*
36
+ .nfs*
37
+
38
+ # -----------------------------------------------------------------------------
39
+ # Windows
40
+ # -----------------------------------------------------------------------------
41
+ Thumbs.db
42
+ Thumbs.db:encryptable
43
+ ehthumbs.db
44
+ ehthumbs_vista.db
45
+ [Dd]esktop.ini
46
+ $RECYCLE.BIN/
47
+ *.lnk
48
+
49
+ # -----------------------------------------------------------------------------
50
+ # Shell & Terminal
51
+ # -----------------------------------------------------------------------------
52
+ # History files
53
+ .bash_history
54
+ .zsh_history
55
+ .python_history
56
+ .node_repl_history
57
+ .lesshst
58
+
59
+ # Zsh compiled files
60
+ *.zwc
61
+ *.zwc.old
62
+ .zcompdump*
63
+
64
+ # Shell local overrides
65
+ *.local
66
+
67
+ # -----------------------------------------------------------------------------
68
+ # Editors
69
+ # -----------------------------------------------------------------------------
70
+ # Vim
71
+ *.swp
72
+ *.swo
73
+ *.swn
74
+ .*.sw?
75
+ *~
76
+ .netrwhist
77
+
78
+ # VS Code (if you want to ignore workspace settings)
79
+ # .vscode/
80
+
81
+ # Emacs
82
+ *~
83
+ \#*\#
84
+ .\#*
85
+ .emacs.desktop
86
+ .emacs.desktop.lock
87
+
88
+ # -----------------------------------------------------------------------------
89
+ # Security & Credentials
90
+ # -----------------------------------------------------------------------------
91
+ # Environment files
92
+ .env
93
+ .env.local
94
+ .env.*.local
95
+
96
+ # SSH keys (safety net - should never be in dotfiles repo anyway)
97
+ id_rsa
98
+ id_dsa
99
+ id_ecdsa
100
+ id_ed25519
101
+ *.pem
102
+ *.key
103
+
104
+ # AWS credentials
105
+ .aws/credentials
106
+
107
+ # Other credentials
108
+ .netrc
109
+ .gnupg/
110
+
111
+ # -----------------------------------------------------------------------------
112
+ # Backups & Temporary Files
113
+ # -----------------------------------------------------------------------------
114
+ *.backup
115
+ *.bak
116
+ *.tmp
117
+ *.temp
118
+ .backup/
119
+ backup/
120
+ *_backup/
121
+ dotfiles_backup/
122
+
123
+ # -----------------------------------------------------------------------------
124
+ # Package Managers & Dependencies
125
+ # -----------------------------------------------------------------------------
126
+ node_modules/
127
+ .npm/
128
+ .yarn/
129
+
130
+ # -----------------------------------------------------------------------------
131
+ # Cache & Generated Files
132
+ # -----------------------------------------------------------------------------
133
+ .cache/
134
+ *.log
135
+ *.pid
136
+
137
+ # Oh My Zsh custom (if you add custom plugins locally)
138
+ # .oh-my-zsh/custom/
139
+
140
+ # -----------------------------------------------------------------------------
141
+ # Claude Code - Personal Data & State Files
142
+ # -----------------------------------------------------------------------------
143
+ # Exclude personal data, session history, and cache
144
+ .claude.json
145
+ .claude.json.backup
146
+ .claude/history.jsonl
147
+ .claude/file-history/
148
+ .claude/todos/
149
+ .claude/session-env/
150
+ .claude/shell-snapshots/
151
+ .claude/debug/
152
+ .claude/statsig/
153
+ .claude/.anthropic/
154
+ .claude/settings.local.json
155
+
156
+ # Keep these Claude Code files (should be committed):
157
+ # .claude/commands/ - Custom slash commands
158
+ # .claude/settings.json - Project settings
159
+ # .mcp.json - MCP server configuration
160
+ # CLAUDE.md - Project context
161
+
162
+ # -----------------------------------------------------------------------------
163
+ # Project Specific
164
+ # -----------------------------------------------------------------------------
165
+ # Test files you might create while testing configs
166
+ test/
167
+ scratch/
168
+
169
+ # -----------------------------------------------------------------------------
170
+ # Python
171
+ # -----------------------------------------------------------------------------
172
+ __pycache__/
173
+ *.py[cod]
174
+ *$py.class
175
+ *.so
176
+ .venv/
177
+ venv/
178
+ .eggs/
179
+ *.egg-info/
180
+ *.egg
181
+ dist/
182
+ build/
183
+ .mypy_cache/
184
+ .pytest_cache/
185
+ .ruff_cache/
186
+ htmlcov/
187
+ .coverage
188
+ .coverage.*
189
+
190
+ # graphify
191
+ graphify-out/
@@ -0,0 +1,36 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/).
6
+
7
+ ## [0.2.0] - 2026-04-12
8
+
9
+ ### Added
10
+
11
+ - **BB1: Document Ingestion** — PDF, DOCX, Text, Markdown parsers; TokenChunker; IngestionPipeline
12
+ - **BB2: Schema-Guided Extraction** — LLMClient Protocol, AnthropicLLMClient, LLMExtractionEngine with strict schema validation
13
+ - **BB3: Provenance-Native Graph** — InMemoryGraphStore, Neo4jGraphStore with full provenance tracking
14
+ - **BB4: Hybrid Search** — InMemorySearchEngine, Neo4jHybridSearch with Reciprocal Rank Fusion
15
+ - **BB5: Governed Curation** — DeterministicDetectionLayer (duplicates, orphans, schema violations), CurationPipeline
16
+ - **BB6: Entity Registry** — InMemoryEntityRegistry with exact/fuzzy matching (token normalization + SequenceMatcher)
17
+ - **BB7: Tool Library** — ToolLibrary with 4 core tools (get_entity, search_entities, get_audit_trail, get_related)
18
+ - **BB8: Multi-Agent Orchestration** — Agent/Orchestrator/ReportRenderer Protocols, SequentialOrchestrator, AgentContext
19
+ - Cypher injection protection via identifier validation
20
+ - Integration test framework with `--run-integration` flag
21
+ - Optional dependencies: `graphrag-core[neo4j]`, `graphrag-core[anthropic]`, `graphrag-core[all]`
22
+
23
+ ### Protocols (defined, no default implementation yet)
24
+
25
+ - `LLMCurationLayer`, `ApprovalGateway` (BB5 layers 2-3)
26
+ - `ReportRenderer` (BB8)
27
+ - `EmbeddingModel` (cross-cutting)
28
+
29
+ ## [0.1.0] - 2026-04-10
30
+
31
+ ### Added
32
+
33
+ - Initial commit establishing prior art
34
+ - BB1-BB4 Protocol interfaces (`DocumentParser`, `Chunker`, `ExtractionEngine`, `GraphStore`, `SearchEngine`)
35
+ - Pydantic data models for BB1-BB4
36
+ - Project scaffolding with hatchling build system
@@ -0,0 +1,97 @@
1
+ # graphrag-core
2
+
3
+ > Domain-agnostic Graph RAG framework. MIT License. Open Source.
4
+
5
+ ## What This Is
6
+ Layer 1 of a 3-layer architecture. This repo contains ONLY domain-agnostic platform code.
7
+ Domain-specific logic (construction monitoring, due diligence, compliance) lives in separate repos that import graphrag-core as a dependency.
8
+
9
+ ## The One Rule That Cannot Be Broken
10
+ **No domain logic in this repo.** If you're importing a construction-specific concept, a customer-specific schema, or any business-domain term — stop and refactor. This code must work equally for construction monitoring, transaction due diligence, forensic investigations, or any other document-heavy knowledge work.
11
+
12
+ Test: Could a team building a legal compliance graph use this code without modification? If no → it doesn't belong here.
13
+
14
+ ## Architecture
15
+ 8 building blocks, each with an abstract interface (Protocol) and default implementation:
16
+
17
+ | # | Block | Interface | Default Impl |
18
+ |---|---|---|---|
19
+ | 1 | Document Ingestion | `DocumentParser`, `Chunker`, `IngestionPipeline` | PDF/DOCX parsers, semantic chunker |
20
+ | 2 | Entity Extraction | `ExtractionEngine`, `OntologySchema` | LLM-based extraction |
21
+ | 3 | Knowledge Graph | `GraphStore` | `Neo4jGraphStore` |
22
+ | 4 | Hybrid Search | `SearchEngine` | `Neo4jHybridSearch` |
23
+ | 5 | Governed Curation | `DetectionLayer`, `LLMCurationLayer`, `ApprovalGateway` | GDS detection, CLI approval |
24
+ | 6 | Entity Registry | `EntityRegistry` | Neo4j-backed registry |
25
+ | 7 | Core Tool Library | `ToolLibrary`, `Tool` | 8 core tools |
26
+ | 8 | Orchestration | `Orchestrator`, `ReportRenderer` | LangGraph, DOCX renderer |
27
+
28
+ ## Tech Stack
29
+ - Python 3.12+
30
+ - Pydantic v2 for all data models
31
+ - Neo4j (default graph backend, swappable via GraphStore interface)
32
+ - pytest + pytest-asyncio for tests
33
+ - Type hints everywhere. No exceptions.
34
+
35
+ ## Code Rules
36
+ - All interfaces are `Protocol` classes in `interfaces.py`
37
+ - All data models are `BaseModel` classes in `models.py`
38
+ - Async by default for all I/O
39
+ - Functions < 30 lines. Extract early.
40
+ - Docstrings: Google style, English only.
41
+ - No hardcoded technology references in interface definitions
42
+ - Default implementations live alongside interfaces but are clearly separated
43
+
44
+ ## Project Structure
45
+ ```
46
+ src/graphrag_core/
47
+ ├── interfaces.py # ALL Protocol definitions
48
+ ├── models.py # ALL Pydantic models
49
+ ├── ingestion/ # BB1: Parse, chunk, embed, store
50
+ ├── extraction/ # BB2: Schema-guided entity extraction
51
+ ├── graph/ # BB3: GraphStore + Neo4j default
52
+ ├── search/ # BB4: Hybrid search
53
+ ├── curation/ # BB5: 3-layer governance
54
+ ├── registry/ # BB6: Known entity dedup
55
+ ├── tools/ # BB7: Core tool library (semantic layer)
56
+ ├── agents/ # BB8: Orchestration + report rendering
57
+ └── report/ # BB8: Report renderer
58
+ ```
59
+
60
+ ## Extension Pattern
61
+ Domain layers extend graphrag-core by:
62
+ 1. Defining an `OntologySchema` (node types, relationships)
63
+ 2. Registering domain tools via `ToolLibrary.register()`
64
+ 3. Implementing domain-specific `Agent` subclasses
65
+ 4. Optionally providing a custom `ReportRenderer`
66
+
67
+ ```python
68
+ # Example: construction monitoring domain
69
+ from graphrag_core import OntologySchema, ToolLibrary, Agent
70
+
71
+ schema = OntologySchema(node_types=[...], relationship_types=[...])
72
+ tool_library.register(my_domain_tool)
73
+
74
+ class PerspectiveAgent(Agent):
75
+ async def execute(self, context): ...
76
+ ```
77
+
78
+ ## Commands
79
+ ```bash
80
+ pytest tests/ -x -q # tests (fail fast)
81
+ pytest tests/ -x -q --cov # with coverage
82
+ docker compose up neo4j # start Neo4j for integration tests
83
+ python -m graphrag_core.graph.schema # apply schema
84
+ ```
85
+
86
+ ## What Does NOT Belong Here
87
+ - Employer-specific anything (deployment configs, client references, internal tooling)
88
+ - Domain-specific terms (MonitoringTopic, SubjectArea, Perspective, CapturePoint, SollIstAbgleich, InvestorAlert)
89
+ - Hardcoded LLM model names (use config/env vars)
90
+ - Any reference to specific organizations or engagements
91
+
92
+ ## Release Strategy
93
+ - Semantic versioning (MAJOR.MINOR.PATCH)
94
+ - Public GitHub repo
95
+ - Published to PyPI as `graphrag-core`
96
+ - CHANGELOG.md tracks all changes
97
+ - First public commit establishes prior art before any organizational use
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 graphrag-core contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: graphrag-core
3
+ Version: 0.2.0
4
+ Summary: Domain-agnostic Graph RAG framework for building governed, auditable Knowledge Graphs
5
+ Project-URL: Homepage, https://github.com/cdel1/graphrag-core
6
+ Project-URL: Repository, https://github.com/cdel1/graphrag-core
7
+ Project-URL: Issues, https://github.com/cdel1/graphrag-core/issues
8
+ Author: Dino Celi
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Classifier: Typing :: Typed
18
+ Requires-Python: >=3.12
19
+ Requires-Dist: pydantic>=2.0
20
+ Requires-Dist: pypdf>=4.0
21
+ Requires-Dist: python-docx>=1.0
22
+ Provides-Extra: all
23
+ Requires-Dist: anthropic>=0.40; extra == 'all'
24
+ Requires-Dist: neo4j>=5.0; extra == 'all'
25
+ Provides-Extra: anthropic
26
+ Requires-Dist: anthropic>=0.40; extra == 'anthropic'
27
+ Provides-Extra: neo4j
28
+ Requires-Dist: neo4j>=5.0; extra == 'neo4j'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # graphrag-core
32
+
33
+ A domain-agnostic framework for building governed, auditable Knowledge Graphs from documents using LLM-powered extraction, provenance-native storage, and multi-agent orchestration.
34
+
35
+ ## Architecture
36
+
37
+ ```
38
+ YOUR DOMAIN LAYER (Layer 2)
39
+ Ontology, domain tools, domain agents, templates
40
+ |
41
+ | imports
42
+ v
43
+ graphrag-core (Layer 1)
44
+
45
+ Ingestion Extraction Graph Store Search
46
+ Curation Registry Tool Library Orchestration
47
+ ```
48
+
49
+ ## Install
50
+
51
+ ```bash
52
+ pip install graphrag-core # core (in-memory backends)
53
+ pip install graphrag-core[neo4j] # + Neo4j graph store and search
54
+ pip install graphrag-core[anthropic] # + Claude LLM client
55
+ pip install graphrag-core[all] # everything
56
+ ```
57
+
58
+ ## Quick Start
59
+
60
+ ```python
61
+ import asyncio
62
+ from graphrag_core import (
63
+ TextParser, TokenChunker, IngestionPipeline,
64
+ InMemoryGraphStore, InMemorySearchEngine,
65
+ LLMExtractionEngine, OntologySchema, NodeTypeDefinition,
66
+ PropertyDefinition, RelationshipTypeDefinition,
67
+ ToolLibrary, register_core_tools,
68
+ )
69
+ from graphrag_core.models import ChunkConfig, DocumentChunk, GraphNode, ImportRun
70
+ from datetime import datetime
71
+
72
+ async def main():
73
+ # 1. Ingest a document
74
+ pipeline = IngestionPipeline(parser=TextParser(), chunker=TokenChunker())
75
+ chunks = await pipeline.ingest(b"Alice works at Acme Corp.", "text/plain")
76
+
77
+ # 2. Define your domain schema
78
+ schema = OntologySchema(
79
+ node_types=[
80
+ NodeTypeDefinition(
81
+ label="Person",
82
+ properties=[PropertyDefinition(name="name", type="string", required=True)],
83
+ required_properties=["name"],
84
+ ),
85
+ NodeTypeDefinition(
86
+ label="Company",
87
+ properties=[PropertyDefinition(name="name", type="string", required=True)],
88
+ required_properties=["name"],
89
+ ),
90
+ ],
91
+ relationship_types=[
92
+ RelationshipTypeDefinition(type="WORKS_AT", source_types=["Person"], target_types=["Company"]),
93
+ ],
94
+ )
95
+
96
+ # 3. Extract entities (requires an LLMClient implementation)
97
+ # engine = LLMExtractionEngine(llm_client=your_client)
98
+ # result = await engine.extract(chunks, schema, import_run)
99
+
100
+ # 4. Store in graph
101
+ store = InMemoryGraphStore()
102
+ await store.merge_node(GraphNode(id="p1", label="Person", properties={"name": "Alice"}), "run-1")
103
+ await store.merge_node(GraphNode(id="c1", label="Company", properties={"name": "Acme Corp"}), "run-1")
104
+
105
+ # 5. Search
106
+ search = InMemorySearchEngine(
107
+ nodes=[await store.get_node("p1"), await store.get_node("c1")],
108
+ )
109
+ results = await search.fulltext_search("Acme", top_k=5)
110
+ print(results)
111
+
112
+ # 6. Wire up tools for agents
113
+ library = ToolLibrary()
114
+ register_core_tools(library, store, search)
115
+ result = await library.execute("get_entity", entity_id="p1")
116
+ print(result)
117
+
118
+ asyncio.run(main())
119
+ ```
120
+
121
+ ## Building Blocks
122
+
123
+ | # | Block | Interface | Implementation | Status |
124
+ |---|---|---|---|---|
125
+ | 1 | Document Ingestion | `DocumentParser`, `Chunker` | PDF, DOCX, Text, Markdown parsers; TokenChunker | Done |
126
+ | 2 | Entity Extraction | `ExtractionEngine`, `LLMClient` | LLMExtractionEngine, AnthropicLLMClient | Done |
127
+ | 3 | Knowledge Graph | `GraphStore` | InMemoryGraphStore, Neo4jGraphStore | Done |
128
+ | 4 | Hybrid Search | `SearchEngine` | InMemorySearchEngine, Neo4jHybridSearch (RRF) | Done |
129
+ | 5 | Governed Curation | `DetectionLayer` | DeterministicDetectionLayer, CurationPipeline | Done (detection layer) |
130
+ | 6 | Entity Registry | `EntityRegistry` | InMemoryEntityRegistry (fuzzy matching) | Done |
131
+ | 7 | Tool Library | `ToolLibrary` | 4 core tools (get_entity, search, audit_trail, related) | Done |
132
+ | 8 | Orchestration | `Agent`, `Orchestrator` | SequentialOrchestrator, AgentContext | Done |
133
+
134
+ Protocols marked with `(Protocol only)` have no default implementation yet:
135
+ - `LLMCurationLayer`, `ApprovalGateway` (BB5 layers 2-3)
136
+ - `ReportRenderer` (BB8)
137
+ - `EmbeddingModel` (cross-cutting)
138
+
139
+ ## Extension Pattern
140
+
141
+ ```python
142
+ from graphrag_core import OntologySchema, ToolLibrary, Tool
143
+
144
+ # 1. Define your domain ontology
145
+ schema = OntologySchema(node_types=[...], relationship_types=[...])
146
+
147
+ # 2. Register domain-specific tools
148
+ library = ToolLibrary()
149
+ library.register(Tool(name="my_tool", description="...", parameters={}, handler=my_handler))
150
+
151
+ # 3. Implement domain agents
152
+ class MyAgent:
153
+ name = "analyst"
154
+ async def execute(self, context):
155
+ result = await context.tool_library.execute("my_tool")
156
+ context.workflow_state["analysis"] = result.data
157
+ return AgentResult(agent_name=self.name, success=True)
158
+ ```
159
+
160
+ ## Development
161
+
162
+ ```bash
163
+ # Clone and install
164
+ git clone https://github.com/cdel1/graphrag-core.git
165
+ cd graphrag-core
166
+ uv sync --all-extras
167
+
168
+ # Run unit tests
169
+ uv run pytest tests/ -x -q
170
+
171
+ # Run integration tests (requires Neo4j)
172
+ docker run -d --name neo4j-test -p 7474:7474 -p 7687:7687 \
173
+ -e NEO4J_AUTH=neo4j/development neo4j:5-community
174
+ uv run pytest tests/ -x --run-integration
175
+
176
+ # Build
177
+ uv build
178
+ ```
179
+
180
+ ## License
181
+
182
+ MIT