grai-build 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. grai_build-0.3.0/LICENSE +21 -0
  2. grai_build-0.3.0/PKG-INFO +374 -0
  3. grai_build-0.3.0/README.md +346 -0
  4. grai_build-0.3.0/grai/__init__.py +11 -0
  5. grai_build-0.3.0/grai/cli/__init__.py +5 -0
  6. grai_build-0.3.0/grai/cli/main.py +2546 -0
  7. grai_build-0.3.0/grai/core/__init__.py +1 -0
  8. grai_build-0.3.0/grai/core/cache/__init__.py +33 -0
  9. grai_build-0.3.0/grai/core/cache/build_cache.py +352 -0
  10. grai_build-0.3.0/grai/core/compiler/__init__.py +23 -0
  11. grai_build-0.3.0/grai/core/compiler/cypher_compiler.py +426 -0
  12. grai_build-0.3.0/grai/core/exporter/__init__.py +13 -0
  13. grai_build-0.3.0/grai/core/exporter/ir_exporter.py +343 -0
  14. grai_build-0.3.0/grai/core/lineage/__init__.py +42 -0
  15. grai_build-0.3.0/grai/core/lineage/lineage_tracker.py +685 -0
  16. grai_build-0.3.0/grai/core/loader/__init__.py +21 -0
  17. grai_build-0.3.0/grai/core/loader/neo4j_loader.py +514 -0
  18. grai_build-0.3.0/grai/core/models.py +344 -0
  19. grai_build-0.3.0/grai/core/parser/__init__.py +25 -0
  20. grai_build-0.3.0/grai/core/parser/yaml_parser.py +375 -0
  21. grai_build-0.3.0/grai/core/validator/__init__.py +25 -0
  22. grai_build-0.3.0/grai/core/validator/validator.py +475 -0
  23. grai_build-0.3.0/grai/core/visualizer/__init__.py +650 -0
  24. grai_build-0.3.0/grai/core/visualizer/visualizer.py +15 -0
  25. grai_build-0.3.0/grai/templates/__init__.py +1 -0
  26. grai_build-0.3.0/grai_build.egg-info/PKG-INFO +374 -0
  27. grai_build-0.3.0/grai_build.egg-info/SOURCES.txt +41 -0
  28. grai_build-0.3.0/grai_build.egg-info/dependency_links.txt +1 -0
  29. grai_build-0.3.0/grai_build.egg-info/entry_points.txt +2 -0
  30. grai_build-0.3.0/grai_build.egg-info/requires.txt +13 -0
  31. grai_build-0.3.0/grai_build.egg-info/top_level.txt +1 -0
  32. grai_build-0.3.0/pyproject.toml +72 -0
  33. grai_build-0.3.0/setup.cfg +4 -0
  34. grai_build-0.3.0/tests/test_cache.py +602 -0
  35. grai_build-0.3.0/tests/test_cli.py +658 -0
  36. grai_build-0.3.0/tests/test_compiler.py +486 -0
  37. grai_build-0.3.0/tests/test_exporter.py +368 -0
  38. grai_build-0.3.0/tests/test_lineage.py +568 -0
  39. grai_build-0.3.0/tests/test_loader.py +506 -0
  40. grai_build-0.3.0/tests/test_models.py +203 -0
  41. grai_build-0.3.0/tests/test_parser.py +413 -0
  42. grai_build-0.3.0/tests/test_validator.py +529 -0
  43. grai_build-0.3.0/tests/test_visualizer.py +255 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Andrew Santora
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,374 @@
1
+ Metadata-Version: 2.4
2
+ Name: grai-build
3
+ Version: 0.3.0
4
+ Summary: Schema-as-code for graph databases - Documentation like dbt, migrations for Neo4j
5
+ Author-email: Andrew Santora <andrew@grai.build>
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Python: >=3.11
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: pydantic>=2.0
16
+ Requires-Dist: pyyaml>=6.0
17
+ Requires-Dist: typer>=0.9.0
18
+ Requires-Dist: rich>=13.0
19
+ Requires-Dist: neo4j>=5.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.4; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
23
+ Requires-Dist: black>=23.0; extra == "dev"
24
+ Requires-Dist: ruff>=0.1; extra == "dev"
25
+ Requires-Dist: mypy>=1.5; extra == "dev"
26
+ Requires-Dist: pre-commit>=3.0; extra == "dev"
27
+ Dynamic: license-file
28
+
29
+ # grai.build
30
+
31
+ > **Schema-as-code for graph databases** - Documentation like dbt, migrations for Neo4j
32
+
33
+ [![CI](https://github.com/asantora05/grai.build/workflows/CI/badge.svg)](https://github.com/asantora05/grai.build/actions/workflows/ci.yml)
34
+ [![codecov](https://codecov.io/gh/asantora05/grai.build/graph/badge.svg?token=FIV3O0YYVR)](https://codecov.io/gh/asantora05/grai.build)
35
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
36
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
37
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
38
+
39
+ ## πŸ“˜ What is grai.build?
40
+
41
+ **grai.build brings dbt's documentation experience to graph databases** - define your schema in YAML, generate beautiful docs, and manage migrations.
42
+
43
+ It manages your graph **schema**, not your data. You define entities and relations in YAML, and grai.build:
44
+
45
+ - βœ… **Validates** your schema for consistency
46
+ - βœ… **Generates** Cypher constraints and indexes
47
+ - βœ… **Documents** your graph structure automatically (like `dbt docs`)
48
+ - βœ… **Tracks lineage** with interactive visualizations
49
+ - βœ… **Integrates** with your CI/CD pipeline
50
+
51
+ **What it's NOT:**
52
+
53
+ - ❌ Not an ETL tool (use Airflow, Prefect, or dbt for data loading)
54
+ - ❌ Not a data transformation framework (dbt does this for SQL)
55
+ - ❌ Not a replacement for your existing data infrastructure
56
+
57
+ **Think of it as:**
58
+
59
+ - **Like dbt:** Declarative YAML definitions, beautiful documentation, lineage tracking
60
+ - **Like Alembic/Flyway:** Database migrations and schema management
61
+ - **For graphs:** Manages Neo4j schema while your pipelines handle data
62
+
63
+ ## πŸš€ Quick Start
64
+
65
+ ### Installation
66
+
67
+ ```bash
68
+ pip install grai-build
69
+ ```
70
+
71
+ ### Create Your First Project
72
+
73
+ ```bash
74
+ # Initialize a new project
75
+ grai init my-graph-project
76
+ cd my-graph-project
77
+
78
+ # Validate and build
79
+ grai build
80
+
81
+ # Generate documentation (like dbt docs)
82
+ grai docs --serve
83
+
84
+ # Deploy schema to Neo4j
85
+ grai run --uri bolt://localhost:7687 --user neo4j --password secret
86
+
87
+ # Load sample data for local testing
88
+ grai run --load-csv --password secret
89
+ ```
90
+
91
+ ## πŸ“‚ Project Structure
92
+
93
+ ```
94
+ my-graph-project/
95
+ β”œβ”€β”€ grai.yml # Project manifest
96
+ β”œβ”€β”€ entities/
97
+ β”‚ β”œβ”€β”€ customer.yml # Entity definitions
98
+ β”‚ └── product.yml
99
+ β”œβ”€β”€ relations/
100
+ β”‚ └── purchased.yml # Relation definitions
101
+ └── target/ # Compiled output
102
+ └── neo4j/
103
+ └── compiled.cypher
104
+ ```
105
+
106
+ ## πŸ“ Example
107
+
108
+ ### Entity: `entities/customer.yml`
109
+
110
+ ```yaml
111
+ entity: customer
112
+ source: analytics.customers
113
+ keys: [customer_id]
114
+ properties:
115
+ - name: customer_id
116
+ type: string
117
+ - name: name
118
+ type: string
119
+ - name: region
120
+ type: string
121
+ ```
122
+
123
+ ### Relation: `relations/purchased.yml`
124
+
125
+ ```yaml
126
+ relation: PURCHASED
127
+ from: customer
128
+ to: product
129
+ source: analytics.orders
130
+ mappings:
131
+ from_key: customer_id
132
+ to_key: product_id
133
+ properties:
134
+ - name: order_id
135
+ type: string
136
+ - name: order_date
137
+ type: datetime
138
+ ```
139
+
140
+ ### Compile to Cypher
141
+
142
+ ```bash
143
+ grai build
144
+ ```
145
+
146
+ **Output (`target/neo4j/compiled.cypher`):**
147
+
148
+ ```cypher
149
+ // Create Customer nodes
150
+ MERGE (n:customer {customer_id: row.customer_id})
151
+ SET n.name = row.name,
152
+ n.region = row.region;
153
+
154
+ // Create Product nodes
155
+ MERGE (n:product {product_id: row.product_id})
156
+ SET n.name = row.name;
157
+
158
+ // Create PURCHASED relations
159
+ MATCH (from:customer {customer_id: row.customer_id})
160
+ MATCH (to:product {product_id: row.product_id})
161
+ MERGE (from)-[r:PURCHASED]->(to)
162
+ SET r.order_id = row.order_id,
163
+ r.order_date = row.order_date;
164
+ ```
165
+
166
+ ## 🎯 Features
167
+
168
+ - **Declarative modeling** - Define your graph schema in YAML (like dbt models)
169
+ - **Schema validation** - Catch errors before deployment
170
+ - **Documentation generation** - Beautiful HTML docs with `grai docs` (like `dbt docs generate/serve`)
171
+ - **Lineage visualization** - Interactive graph and Mermaid diagrams showing data flow
172
+ - **Multi-backend support** - Start with Neo4j, expand to Gremlin later
173
+ - **CLI-first** - Integrates into your CI/CD pipeline
174
+ - **Type-safe** - Built with Pydantic for robust validation
175
+ - **Extensible** - Easy to add custom backends and transformations
176
+
177
+ ## πŸ—οΈ Real-World Usage
178
+
179
+ ### Local Development
180
+
181
+ ```bash
182
+ # 1. Define schema
183
+ vim entities/customer.yml
184
+
185
+ # 2. Validate
186
+ grai validate
187
+
188
+ # 3. Generate documentation
189
+ grai docs --serve # Opens browser with interactive docs
190
+
191
+ # 4. Deploy schema
192
+ grai run --schema-only
193
+
194
+ # 5. Test with sample data
195
+ grai run --load-csv
196
+ ```
197
+
198
+ ### Production Deployment
199
+
200
+ ```yaml
201
+ # .github/workflows/deploy-schema.yml
202
+ name: Deploy Graph Schema
203
+
204
+ on:
205
+ push:
206
+ branches: [main]
207
+
208
+ jobs:
209
+ deploy:
210
+ runs-on: ubuntu-latest
211
+ steps:
212
+ - uses: actions/checkout@v3
213
+
214
+ - name: Validate Schema
215
+ run: grai validate
216
+
217
+ - name: Deploy to Production
218
+ run: |
219
+ grai run --schema-only \
220
+ --uri ${{ secrets.NEO4J_URI }} \
221
+ --user ${{ secrets.NEO4J_USER }} \
222
+ --password ${{ secrets.NEO4J_PASSWORD }}
223
+ ```
224
+
225
+ ### With Your ETL Pipeline
226
+
227
+ ```python
228
+ # Your Airflow DAG
229
+ from airflow import DAG
230
+ from airflow.operators.bash import BashOperator
231
+ from your_etl import load_customers_to_neo4j
232
+
233
+ dag = DAG('graph_pipeline')
234
+
235
+ # 1. grai.build ensures schema is up-to-date
236
+ deploy_schema = BashOperator(
237
+ task_id='deploy_schema',
238
+ bash_command='grai run --schema-only',
239
+ dag=dag
240
+ )
241
+
242
+ # 2. Your ETL loads the actual data
243
+ load_data = PythonOperator(
244
+ task_id='load_data',
245
+ python_callable=load_customers_to_neo4j,
246
+ dag=dag
247
+ )
248
+
249
+ deploy_schema >> load_data
250
+ ```
251
+
252
+ ## πŸ“¦ Architecture
253
+
254
+ ```
255
+ grai/
256
+ β”œβ”€β”€ cli/ # Typer-based CLI commands
257
+ β”œβ”€β”€ core/
258
+ β”‚ β”œβ”€β”€ models.py # Pydantic models (Entity, Relation, Property)
259
+ β”‚ β”œβ”€β”€ parser/ # YAML β†’ Python models
260
+ β”‚ β”œβ”€β”€ validator/ # Schema validation
261
+ β”‚ β”œβ”€β”€ compiler/ # Generate Cypher/Gremlin
262
+ β”‚ β”œβ”€β”€ loader/ # Execute against databases
263
+ β”‚ └── utils/ # Shared utilities
264
+ └── templates/ # Project templates
265
+ ```
266
+
267
+ ## πŸ§ͺ Development
268
+
269
+ ### Setup
270
+
271
+ ```bash
272
+ # Clone the repo
273
+ git clone https://github.com/asantora05/grai.build.git
274
+ cd grai.build
275
+
276
+ # Install with dev dependencies
277
+ pip install -e ".[dev]"
278
+
279
+ # Run tests
280
+ pytest
281
+
282
+ # Format code
283
+ black grai/
284
+ ruff check grai/
285
+ ```
286
+
287
+ ## πŸ“– Documentation
288
+
289
+ Generate beautiful, interactive documentation for your graph:
290
+
291
+ ```bash
292
+ # Generate and serve documentation locally
293
+ grai docs --serve
294
+
295
+ # Generate to custom directory
296
+ grai docs --output ./my-docs
297
+
298
+ # Just generate (don't serve)
299
+ grai docs
300
+ ```
301
+
302
+ The documentation includes:
303
+
304
+ - πŸ“Š Project overview with stats
305
+ - πŸ“¦ Entity catalog with properties
306
+ - πŸ”— Relation catalog with mappings
307
+ - πŸ•ΈοΈ Interactive graph visualization (D3.js)
308
+ - πŸ”„ Lineage diagrams (Mermaid.js)
309
+
310
+ For development guidance, check out the [instructions](.github/instructions/instructions.instructions.md).
311
+
312
+ ## πŸ—ΊοΈ Roadmap
313
+
314
+ - [x] Core Pydantic models
315
+ - [x] YAML parser
316
+ - [x] Schema validator
317
+ - [x] Cypher compiler
318
+ - [x] Neo4j loader
319
+ - [x] CLI commands (`init`, `build`, `validate`, `run`, `docs`)
320
+ - [x] Graph IR export (JSON)
321
+ - [x] Documentation generation (dbt-style)
322
+ - [x] Lineage visualization (Mermaid + D3.js)
323
+ - [ ] Graph visualization improvements
324
+ - [ ] Gremlin backend support
325
+ - [ ] Incremental sync
326
+ - [ ] Schema versioning and migrations
327
+
328
+ ## πŸ“Š Current Status
329
+
330
+ **v0.3.0** - Feature-complete MVP with documentation
331
+
332
+ - βœ… **Core Models** - Pydantic models for Entity, Relation, Property
333
+ - βœ… **YAML Parser** - Parse and load entity/relation definitions
334
+ - βœ… **Schema Validator** - Validate references and mappings
335
+ - βœ… **Cypher Compiler** - Generate Neo4j constraints and indexes
336
+ - βœ… **Neo4j Loader** - Execute Cypher against Neo4j instances
337
+ - βœ… **Documentation Generator** - Interactive HTML docs (like dbt docs)
338
+ - βœ… **Lineage Tracking** - Visualize data flow and dependencies
339
+ - βœ… **Graph Visualizer** - D3.js and Cytoscape visualizations
340
+ - βœ… **Build Cache** - Incremental builds for faster iteration
341
+ - βœ… **CLI Commands** - Full command suite (`init`, `build`, `validate`, `run`, `docs`, etc.)
342
+
343
+ **257 tests passing | High coverage across all modules**
344
+
345
+ See it in action:
346
+
347
+ ```bash
348
+ # Initialize example project
349
+ grai init my-project
350
+ cd my-project
351
+
352
+ # Generate and view documentation
353
+ grai docs --serve
354
+ ```
355
+
356
+ ## 🀝 Contributing
357
+
358
+ Contributions are welcome! This is an early-stage project, so there's plenty of room for improvement.
359
+
360
+ ## πŸ“„ License
361
+
362
+ MIT License - see [LICENSE](LICENSE) for details.
363
+
364
+ ## πŸ’‘ Inspiration
365
+
366
+ This project is inspired by:
367
+
368
+ - [dbt](https://www.getdbt.com/) - Analytics engineering workflow
369
+ - [SQLMesh](https://sqlmesh.com/) - Data transformation framework
370
+ - [Amundsen](https://www.amundsen.io/) - Data discovery and metadata
371
+
372
+ ---
373
+
374
+ **Built with ❀️ for the graph database community**