grai-build 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grai_build-0.3.0/LICENSE +21 -0
- grai_build-0.3.0/PKG-INFO +374 -0
- grai_build-0.3.0/README.md +346 -0
- grai_build-0.3.0/grai/__init__.py +11 -0
- grai_build-0.3.0/grai/cli/__init__.py +5 -0
- grai_build-0.3.0/grai/cli/main.py +2546 -0
- grai_build-0.3.0/grai/core/__init__.py +1 -0
- grai_build-0.3.0/grai/core/cache/__init__.py +33 -0
- grai_build-0.3.0/grai/core/cache/build_cache.py +352 -0
- grai_build-0.3.0/grai/core/compiler/__init__.py +23 -0
- grai_build-0.3.0/grai/core/compiler/cypher_compiler.py +426 -0
- grai_build-0.3.0/grai/core/exporter/__init__.py +13 -0
- grai_build-0.3.0/grai/core/exporter/ir_exporter.py +343 -0
- grai_build-0.3.0/grai/core/lineage/__init__.py +42 -0
- grai_build-0.3.0/grai/core/lineage/lineage_tracker.py +685 -0
- grai_build-0.3.0/grai/core/loader/__init__.py +21 -0
- grai_build-0.3.0/grai/core/loader/neo4j_loader.py +514 -0
- grai_build-0.3.0/grai/core/models.py +344 -0
- grai_build-0.3.0/grai/core/parser/__init__.py +25 -0
- grai_build-0.3.0/grai/core/parser/yaml_parser.py +375 -0
- grai_build-0.3.0/grai/core/validator/__init__.py +25 -0
- grai_build-0.3.0/grai/core/validator/validator.py +475 -0
- grai_build-0.3.0/grai/core/visualizer/__init__.py +650 -0
- grai_build-0.3.0/grai/core/visualizer/visualizer.py +15 -0
- grai_build-0.3.0/grai/templates/__init__.py +1 -0
- grai_build-0.3.0/grai_build.egg-info/PKG-INFO +374 -0
- grai_build-0.3.0/grai_build.egg-info/SOURCES.txt +41 -0
- grai_build-0.3.0/grai_build.egg-info/dependency_links.txt +1 -0
- grai_build-0.3.0/grai_build.egg-info/entry_points.txt +2 -0
- grai_build-0.3.0/grai_build.egg-info/requires.txt +13 -0
- grai_build-0.3.0/grai_build.egg-info/top_level.txt +1 -0
- grai_build-0.3.0/pyproject.toml +72 -0
- grai_build-0.3.0/setup.cfg +4 -0
- grai_build-0.3.0/tests/test_cache.py +602 -0
- grai_build-0.3.0/tests/test_cli.py +658 -0
- grai_build-0.3.0/tests/test_compiler.py +486 -0
- grai_build-0.3.0/tests/test_exporter.py +368 -0
- grai_build-0.3.0/tests/test_lineage.py +568 -0
- grai_build-0.3.0/tests/test_loader.py +506 -0
- grai_build-0.3.0/tests/test_models.py +203 -0
- grai_build-0.3.0/tests/test_parser.py +413 -0
- grai_build-0.3.0/tests/test_validator.py +529 -0
- grai_build-0.3.0/tests/test_visualizer.py +255 -0
grai_build-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Andrew Santora
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grai-build
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Schema-as-code for graph databases - Documentation like dbt, migrations for Neo4j
|
|
5
|
+
Author-email: Andrew Santora <andrew@grai.build>
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Python: >=3.11
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: pydantic>=2.0
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Requires-Dist: typer>=0.9.0
|
|
18
|
+
Requires-Dist: rich>=13.0
|
|
19
|
+
Requires-Dist: neo4j>=5.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
23
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
24
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
25
|
+
Requires-Dist: mypy>=1.5; extra == "dev"
|
|
26
|
+
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# grai.build
|
|
30
|
+
|
|
31
|
+
> **Schema-as-code for graph databases** - Documentation like dbt, migrations for Neo4j
|
|
32
|
+
|
|
33
|
+
[](https://github.com/asantora05/grai.build/actions/workflows/ci.yml)
|
|
34
|
+
[](https://codecov.io/gh/asantora05/grai.build)
|
|
35
|
+
[](https://www.python.org/downloads/)
|
|
36
|
+
[](https://opensource.org/licenses/MIT)
|
|
37
|
+
[](https://github.com/psf/black)
|
|
38
|
+
|
|
39
|
+
## π What is grai.build?
|
|
40
|
+
|
|
41
|
+
**grai.build brings dbt's documentation experience to graph databases** - define your schema in YAML, generate beautiful docs, and manage migrations.
|
|
42
|
+
|
|
43
|
+
It manages your graph **schema**, not your data. You define entities and relations in YAML, and grai.build:
|
|
44
|
+
|
|
45
|
+
- β
**Validates** your schema for consistency
|
|
46
|
+
- β
**Generates** Cypher constraints and indexes
|
|
47
|
+
- β
**Documents** your graph structure automatically (like `dbt docs`)
|
|
48
|
+
- β
**Tracks lineage** with interactive visualizations
|
|
49
|
+
- β
**Integrates** with your CI/CD pipeline
|
|
50
|
+
|
|
51
|
+
**What it's NOT:**
|
|
52
|
+
|
|
53
|
+
- β Not an ETL tool (use Airflow, Prefect, or dbt for data loading)
|
|
54
|
+
- β Not a data transformation framework (dbt does this for SQL)
|
|
55
|
+
- β Not a replacement for your existing data infrastructure
|
|
56
|
+
|
|
57
|
+
**Think of it as:**
|
|
58
|
+
|
|
59
|
+
- **Like dbt:** Declarative YAML definitions, beautiful documentation, lineage tracking
|
|
60
|
+
- **Like Alembic/Flyway:** Database migrations and schema management
|
|
61
|
+
- **For graphs:** Manages Neo4j schema while your pipelines handle data
|
|
62
|
+
|
|
63
|
+
## π Quick Start
|
|
64
|
+
|
|
65
|
+
### Installation
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install grai-build
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Create Your First Project
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Initialize a new project
|
|
75
|
+
grai init my-graph-project
|
|
76
|
+
cd my-graph-project
|
|
77
|
+
|
|
78
|
+
# Validate and build
|
|
79
|
+
grai build
|
|
80
|
+
|
|
81
|
+
# Generate documentation (like dbt docs)
|
|
82
|
+
grai docs --serve
|
|
83
|
+
|
|
84
|
+
# Deploy schema to Neo4j
|
|
85
|
+
grai run --uri bolt://localhost:7687 --user neo4j --password secret
|
|
86
|
+
|
|
87
|
+
# Load sample data for local testing
|
|
88
|
+
grai run --load-csv --password secret
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## π Project Structure
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
my-graph-project/
|
|
95
|
+
βββ grai.yml # Project manifest
|
|
96
|
+
βββ entities/
|
|
97
|
+
β βββ customer.yml # Entity definitions
|
|
98
|
+
β βββ product.yml
|
|
99
|
+
βββ relations/
|
|
100
|
+
β βββ purchased.yml # Relation definitions
|
|
101
|
+
βββ target/ # Compiled output
|
|
102
|
+
βββ neo4j/
|
|
103
|
+
βββ compiled.cypher
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## π Example
|
|
107
|
+
|
|
108
|
+
### Entity: `entities/customer.yml`
|
|
109
|
+
|
|
110
|
+
```yaml
|
|
111
|
+
entity: customer
|
|
112
|
+
source: analytics.customers
|
|
113
|
+
keys: [customer_id]
|
|
114
|
+
properties:
|
|
115
|
+
- name: customer_id
|
|
116
|
+
type: string
|
|
117
|
+
- name: name
|
|
118
|
+
type: string
|
|
119
|
+
- name: region
|
|
120
|
+
type: string
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Relation: `relations/purchased.yml`
|
|
124
|
+
|
|
125
|
+
```yaml
|
|
126
|
+
relation: PURCHASED
|
|
127
|
+
from: customer
|
|
128
|
+
to: product
|
|
129
|
+
source: analytics.orders
|
|
130
|
+
mappings:
|
|
131
|
+
from_key: customer_id
|
|
132
|
+
to_key: product_id
|
|
133
|
+
properties:
|
|
134
|
+
- name: order_id
|
|
135
|
+
type: string
|
|
136
|
+
- name: order_date
|
|
137
|
+
type: datetime
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Compile to Cypher
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
grai build
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**Output (`target/neo4j/compiled.cypher`):**
|
|
147
|
+
|
|
148
|
+
```cypher
|
|
149
|
+
// Create Customer nodes
|
|
150
|
+
MERGE (n:customer {customer_id: row.customer_id})
|
|
151
|
+
SET n.name = row.name,
|
|
152
|
+
n.region = row.region;
|
|
153
|
+
|
|
154
|
+
// Create Product nodes
|
|
155
|
+
MERGE (n:product {product_id: row.product_id})
|
|
156
|
+
SET n.name = row.name;
|
|
157
|
+
|
|
158
|
+
// Create PURCHASED relations
|
|
159
|
+
MATCH (from:customer {customer_id: row.customer_id})
|
|
160
|
+
MATCH (to:product {product_id: row.product_id})
|
|
161
|
+
MERGE (from)-[r:PURCHASED]->(to)
|
|
162
|
+
SET r.order_id = row.order_id,
|
|
163
|
+
r.order_date = row.order_date;
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## π― Features
|
|
167
|
+
|
|
168
|
+
- **Declarative modeling** - Define your graph schema in YAML (like dbt models)
|
|
169
|
+
- **Schema validation** - Catch errors before deployment
|
|
170
|
+
- **Documentation generation** - Beautiful HTML docs with `grai docs` (like `dbt docs generate/serve`)
|
|
171
|
+
- **Lineage visualization** - Interactive graph and Mermaid diagrams showing data flow
|
|
172
|
+
- **Multi-backend support** - Start with Neo4j, expand to Gremlin later
|
|
173
|
+
- **CLI-first** - Integrates into your CI/CD pipeline
|
|
174
|
+
- **Type-safe** - Built with Pydantic for robust validation
|
|
175
|
+
- **Extensible** - Easy to add custom backends and transformations
|
|
176
|
+
|
|
177
|
+
## ποΈ Real-World Usage
|
|
178
|
+
|
|
179
|
+
### Local Development
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
# 1. Define schema
|
|
183
|
+
vim entities/customer.yml
|
|
184
|
+
|
|
185
|
+
# 2. Validate
|
|
186
|
+
grai validate
|
|
187
|
+
|
|
188
|
+
# 3. Generate documentation
|
|
189
|
+
grai docs --serve # Opens browser with interactive docs
|
|
190
|
+
|
|
191
|
+
# 4. Deploy schema
|
|
192
|
+
grai run --schema-only
|
|
193
|
+
|
|
194
|
+
# 5. Test with sample data
|
|
195
|
+
grai run --load-csv
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Production Deployment
|
|
199
|
+
|
|
200
|
+
```yaml
|
|
201
|
+
# .github/workflows/deploy-schema.yml
|
|
202
|
+
name: Deploy Graph Schema
|
|
203
|
+
|
|
204
|
+
on:
|
|
205
|
+
push:
|
|
206
|
+
branches: [main]
|
|
207
|
+
|
|
208
|
+
jobs:
|
|
209
|
+
deploy:
|
|
210
|
+
runs-on: ubuntu-latest
|
|
211
|
+
steps:
|
|
212
|
+
- uses: actions/checkout@v3
|
|
213
|
+
|
|
214
|
+
- name: Validate Schema
|
|
215
|
+
run: grai validate
|
|
216
|
+
|
|
217
|
+
- name: Deploy to Production
|
|
218
|
+
run: |
|
|
219
|
+
grai run --schema-only \
|
|
220
|
+
--uri ${{ secrets.NEO4J_URI }} \
|
|
221
|
+
--user ${{ secrets.NEO4J_USER }} \
|
|
222
|
+
--password ${{ secrets.NEO4J_PASSWORD }}
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### With Your ETL Pipeline
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
# Your Airflow DAG
|
|
229
|
+
from airflow import DAG
|
|
230
|
+
from airflow.operators.bash import BashOperator
|
|
231
|
+
from your_etl import load_customers_to_neo4j
|
|
232
|
+
|
|
233
|
+
dag = DAG('graph_pipeline')
|
|
234
|
+
|
|
235
|
+
# 1. grai.build ensures schema is up-to-date
|
|
236
|
+
deploy_schema = BashOperator(
|
|
237
|
+
task_id='deploy_schema',
|
|
238
|
+
bash_command='grai run --schema-only',
|
|
239
|
+
dag=dag
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# 2. Your ETL loads the actual data
|
|
243
|
+
load_data = PythonOperator(
|
|
244
|
+
task_id='load_data',
|
|
245
|
+
python_callable=load_customers_to_neo4j,
|
|
246
|
+
dag=dag
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
deploy_schema >> load_data
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## π¦ Architecture
|
|
253
|
+
|
|
254
|
+
```
|
|
255
|
+
grai/
|
|
256
|
+
βββ cli/ # Typer-based CLI commands
|
|
257
|
+
βββ core/
|
|
258
|
+
β βββ models.py # Pydantic models (Entity, Relation, Property)
|
|
259
|
+
β βββ parser/ # YAML β Python models
|
|
260
|
+
β βββ validator/ # Schema validation
|
|
261
|
+
β βββ compiler/ # Generate Cypher/Gremlin
|
|
262
|
+
β βββ loader/ # Execute against databases
|
|
263
|
+
β βββ utils/ # Shared utilities
|
|
264
|
+
βββ templates/ # Project templates
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## π§ͺ Development
|
|
268
|
+
|
|
269
|
+
### Setup
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
# Clone the repo
|
|
273
|
+
git clone https://github.com/asantora05/grai.build.git
|
|
274
|
+
cd grai.build
|
|
275
|
+
|
|
276
|
+
# Install with dev dependencies
|
|
277
|
+
pip install -e ".[dev]"
|
|
278
|
+
|
|
279
|
+
# Run tests
|
|
280
|
+
pytest
|
|
281
|
+
|
|
282
|
+
# Format code
|
|
283
|
+
black grai/
|
|
284
|
+
ruff check grai/
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## π Documentation
|
|
288
|
+
|
|
289
|
+
Generate beautiful, interactive documentation for your graph:
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
# Generate and serve documentation locally
|
|
293
|
+
grai docs --serve
|
|
294
|
+
|
|
295
|
+
# Generate to custom directory
|
|
296
|
+
grai docs --output ./my-docs
|
|
297
|
+
|
|
298
|
+
# Just generate (don't serve)
|
|
299
|
+
grai docs
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
The documentation includes:
|
|
303
|
+
|
|
304
|
+
- π Project overview with stats
|
|
305
|
+
- π¦ Entity catalog with properties
|
|
306
|
+
- π Relation catalog with mappings
|
|
307
|
+
- πΈοΈ Interactive graph visualization (D3.js)
|
|
308
|
+
- π Lineage diagrams (Mermaid.js)
|
|
309
|
+
|
|
310
|
+
For development guidance, check out the [instructions](.github/instructions/instructions.instructions.md).
|
|
311
|
+
|
|
312
|
+
## πΊοΈ Roadmap
|
|
313
|
+
|
|
314
|
+
- [x] Core Pydantic models
|
|
315
|
+
- [x] YAML parser
|
|
316
|
+
- [x] Schema validator
|
|
317
|
+
- [x] Cypher compiler
|
|
318
|
+
- [x] Neo4j loader
|
|
319
|
+
- [x] CLI commands (`init`, `build`, `validate`, `run`, `docs`)
|
|
320
|
+
- [x] Graph IR export (JSON)
|
|
321
|
+
- [x] Documentation generation (dbt-style)
|
|
322
|
+
- [x] Lineage visualization (Mermaid + D3.js)
|
|
323
|
+
- [ ] Graph visualization improvements
|
|
324
|
+
- [ ] Gremlin backend support
|
|
325
|
+
- [ ] Incremental sync
|
|
326
|
+
- [ ] Schema versioning and migrations
|
|
327
|
+
|
|
328
|
+
## π Current Status
|
|
329
|
+
|
|
330
|
+
**v0.3.0** - Feature-complete MVP with documentation
|
|
331
|
+
|
|
332
|
+
- β
**Core Models** - Pydantic models for Entity, Relation, Property
|
|
333
|
+
- β
**YAML Parser** - Parse and load entity/relation definitions
|
|
334
|
+
- β
**Schema Validator** - Validate references and mappings
|
|
335
|
+
- β
**Cypher Compiler** - Generate Neo4j constraints and indexes
|
|
336
|
+
- β
**Neo4j Loader** - Execute Cypher against Neo4j instances
|
|
337
|
+
- β
**Documentation Generator** - Interactive HTML docs (like dbt docs)
|
|
338
|
+
- β
**Lineage Tracking** - Visualize data flow and dependencies
|
|
339
|
+
- β
**Graph Visualizer** - D3.js and Cytoscape visualizations
|
|
340
|
+
- β
**Build Cache** - Incremental builds for faster iteration
|
|
341
|
+
- β
**CLI Commands** - Full command suite (`init`, `build`, `validate`, `run`, `docs`, etc.)
|
|
342
|
+
|
|
343
|
+
**257 tests passing | High coverage across all modules**
|
|
344
|
+
|
|
345
|
+
See it in action:
|
|
346
|
+
|
|
347
|
+
```bash
|
|
348
|
+
# Initialize example project
|
|
349
|
+
grai init my-project
|
|
350
|
+
cd my-project
|
|
351
|
+
|
|
352
|
+
# Generate and view documentation
|
|
353
|
+
grai docs --serve
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
## π€ Contributing
|
|
357
|
+
|
|
358
|
+
Contributions are welcome! This is an early-stage project, so there's plenty of room for improvement.
|
|
359
|
+
|
|
360
|
+
## π License
|
|
361
|
+
|
|
362
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
363
|
+
|
|
364
|
+
## π‘ Inspiration
|
|
365
|
+
|
|
366
|
+
This project is inspired by:
|
|
367
|
+
|
|
368
|
+
- [dbt](https://www.getdbt.com/) - Analytics engineering workflow
|
|
369
|
+
- [SQLMesh](https://sqlmesh.com/) - Data transformation framework
|
|
370
|
+
- [Amundsen](https://www.amundsen.io/) - Data discovery and metadata
|
|
371
|
+
|
|
372
|
+
---
|
|
373
|
+
|
|
374
|
+
**Built with β€οΈ for the graph database community**
|