graflo 1.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. graflo-1.3.7/.gitignore +79 -0
  2. graflo-1.3.7/LICENSE +126 -0
  3. graflo-1.3.7/PKG-INFO +243 -0
  4. graflo-1.3.7/README.md +214 -0
  5. graflo-1.3.7/graflo/README.md +18 -0
  6. graflo-1.3.7/graflo/__init__.py +70 -0
  7. graflo-1.3.7/graflo/architecture/__init__.py +38 -0
  8. graflo-1.3.7/graflo/architecture/actor.py +1276 -0
  9. graflo-1.3.7/graflo/architecture/actor_util.py +450 -0
  10. graflo-1.3.7/graflo/architecture/edge.py +418 -0
  11. graflo-1.3.7/graflo/architecture/onto.py +376 -0
  12. graflo-1.3.7/graflo/architecture/onto_sql.py +54 -0
  13. graflo-1.3.7/graflo/architecture/resource.py +163 -0
  14. graflo-1.3.7/graflo/architecture/schema.py +135 -0
  15. graflo-1.3.7/graflo/architecture/transform.py +292 -0
  16. graflo-1.3.7/graflo/architecture/util.py +89 -0
  17. graflo-1.3.7/graflo/architecture/vertex.py +562 -0
  18. graflo-1.3.7/graflo/caster.py +736 -0
  19. graflo-1.3.7/graflo/cli/__init__.py +14 -0
  20. graflo-1.3.7/graflo/cli/ingest.py +203 -0
  21. graflo-1.3.7/graflo/cli/manage_dbs.py +197 -0
  22. graflo-1.3.7/graflo/cli/plot_schema.py +132 -0
  23. graflo-1.3.7/graflo/cli/xml2json.py +93 -0
  24. graflo-1.3.7/graflo/data_source/__init__.py +48 -0
  25. graflo-1.3.7/graflo/data_source/api.py +339 -0
  26. graflo-1.3.7/graflo/data_source/base.py +95 -0
  27. graflo-1.3.7/graflo/data_source/factory.py +304 -0
  28. graflo-1.3.7/graflo/data_source/file.py +148 -0
  29. graflo-1.3.7/graflo/data_source/memory.py +70 -0
  30. graflo-1.3.7/graflo/data_source/registry.py +82 -0
  31. graflo-1.3.7/graflo/data_source/sql.py +183 -0
  32. graflo-1.3.7/graflo/db/__init__.py +44 -0
  33. graflo-1.3.7/graflo/db/arango/__init__.py +22 -0
  34. graflo-1.3.7/graflo/db/arango/conn.py +1025 -0
  35. graflo-1.3.7/graflo/db/arango/query.py +180 -0
  36. graflo-1.3.7/graflo/db/arango/util.py +88 -0
  37. graflo-1.3.7/graflo/db/conn.py +377 -0
  38. graflo-1.3.7/graflo/db/connection/__init__.py +6 -0
  39. graflo-1.3.7/graflo/db/connection/config_mapping.py +18 -0
  40. graflo-1.3.7/graflo/db/connection/onto.py +717 -0
  41. graflo-1.3.7/graflo/db/connection/wsgi.py +29 -0
  42. graflo-1.3.7/graflo/db/manager.py +119 -0
  43. graflo-1.3.7/graflo/db/neo4j/__init__.py +16 -0
  44. graflo-1.3.7/graflo/db/neo4j/conn.py +639 -0
  45. graflo-1.3.7/graflo/db/postgres/__init__.py +37 -0
  46. graflo-1.3.7/graflo/db/postgres/conn.py +948 -0
  47. graflo-1.3.7/graflo/db/postgres/fuzzy_matcher.py +281 -0
  48. graflo-1.3.7/graflo/db/postgres/heuristics.py +133 -0
  49. graflo-1.3.7/graflo/db/postgres/inference_utils.py +428 -0
  50. graflo-1.3.7/graflo/db/postgres/resource_mapping.py +273 -0
  51. graflo-1.3.7/graflo/db/postgres/schema_inference.py +372 -0
  52. graflo-1.3.7/graflo/db/postgres/types.py +148 -0
  53. graflo-1.3.7/graflo/db/postgres/util.py +87 -0
  54. graflo-1.3.7/graflo/db/tigergraph/__init__.py +9 -0
  55. graflo-1.3.7/graflo/db/tigergraph/conn.py +2365 -0
  56. graflo-1.3.7/graflo/db/tigergraph/onto.py +26 -0
  57. graflo-1.3.7/graflo/db/util.py +49 -0
  58. graflo-1.3.7/graflo/filter/__init__.py +21 -0
  59. graflo-1.3.7/graflo/filter/onto.py +525 -0
  60. graflo-1.3.7/graflo/logging.conf +22 -0
  61. graflo-1.3.7/graflo/onto.py +312 -0
  62. graflo-1.3.7/graflo/plot/__init__.py +17 -0
  63. graflo-1.3.7/graflo/plot/plotter.py +616 -0
  64. graflo-1.3.7/graflo/util/__init__.py +23 -0
  65. graflo-1.3.7/graflo/util/chunker.py +807 -0
  66. graflo-1.3.7/graflo/util/merge.py +150 -0
  67. graflo-1.3.7/graflo/util/misc.py +37 -0
  68. graflo-1.3.7/graflo/util/onto.py +422 -0
  69. graflo-1.3.7/graflo/util/transform.py +454 -0
  70. graflo-1.3.7/pyproject.toml +67 -0
@@ -0,0 +1,79 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.pyc
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .idea
12
+ .Python
13
+ env/
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *,cover
48
+ .hypothesis/
49
+
50
+ # Translations
51
+ *.mo
52
+ *.pot
53
+
54
+ # Sphinx documentation
55
+ docs/_build/
56
+
57
+ # PyBuilder
58
+ target/
59
+
60
+ #Ipython Notebook
61
+ .ipynb_checkpoints
62
+
63
+ #Latex
64
+ *.aux
65
+ *.fdb_latexmk
66
+ *.fls
67
+ *.log
68
+ *.synctex.gz
69
+
70
+ # cache
71
+ .pytest_cache
72
+
73
+ .mypy_cache
74
+
75
+ *.log.*
76
+ #*/**/*png
77
+ */**/*pdf
78
+
79
+ site/
graflo-1.3.7/LICENSE ADDED
@@ -0,0 +1,126 @@
1
+ Business Source License 1.1
2
+
3
+ Parameters
4
+
5
+ Licensor: Growgraph SAS
6
+ Licensed Work: GraFlo
7
+ The Licensed Work is (c) 2025 GraFlo
8
+ Formerly GraphCast
9
+
10
+ Additional Use Grant:
11
+ The Licensor hereby grants you the right to make production use of the Licensed Work
12
+ if any of the following conditions are met:
13
+
14
+ - Your organization's annual revenue does not exceed USD 100,000
15
+ - You are not offering a commercially available product or service that
16
+ competes with the Licensor's products or services
17
+
18
+ Definitions:
19
+ "Production Use" means any use of the Licensed Work in a production environment,
20
+ including but not limited to:
21
+
22
+ - Deploying the Licensed Work to serve production traffic
23
+ - Using the Licensed Work to process real customer data
24
+ - Incorporating the Licensed Work into a commercial product or service
25
+ - Using the Licensed Work to generate revenue directly or indirectly
26
+
27
+ "Competing Product or Service" means any product or service that:
28
+
29
+ - Provides substantially similar functionality to the Licensed Work
30
+ - Offers the Licensed Work as a hosted or managed service
31
+ - Incorporates the Licensed Work into a broader product or service that competes
32
+ with the Licensor's current or reasonably anticipated products or services
33
+
34
+ "Non-Production Use" means:
35
+
36
+ - Development and testing in isolated environments
37
+ - Academic research and educational purposes
38
+ - Evaluation of the Licensed Work for potential purchase of a commercial license
39
+ - Personal, non-commercial projects where the Licensed Work is not used to generate revenue
40
+
41
+ Change Date: 2029-05-01
42
+
43
+ Change License: Apache License, Version 2.0
44
+
45
+ For information about alternative licensing arrangements for the Software,
46
+ please contact: <team@growgraph.dev>
47
+
48
+ Notice
49
+
50
+ The Business Source License (this document, or the “License”) is not an Open
51
+ Source license. However, the Licensed Work will eventually be made available
52
+ under an Open Source License, as stated in this License.
53
+
54
+ License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
55
+ “Business Source License” is a trademark of MariaDB Corporation Ab.
56
+
57
+ -----------------------------------------------------------------------------
58
+
59
+ Business Source License 1.1
60
+
61
+ Terms
62
+
63
+ The Licensor hereby grants you the right to copy, modify, create derivative
64
+ works, redistribute, and make non-production use of the Licensed Work. The
65
+ Licensor may make an Additional Use Grant, above, permitting limited
66
+ production use.
67
+
68
+ Effective on the Change Date, or the fourth anniversary of the first publicly
69
+ available distribution of a specific version of the Licensed Work under this
70
+ License, whichever comes first, the Licensor hereby grants you rights under
71
+ the terms of the Change License, and the rights granted in the paragraph
72
+ above terminate.
73
+
74
+ If your use of the Licensed Work does not comply with the requirements
75
+ currently in effect as described in this License, you must purchase a
76
+ commercial license from the Licensor, its affiliated entities, or authorized
77
+ resellers, or you must refrain from using the Licensed Work.
78
+
79
+ All copies of the original and modified Licensed Work, and derivative works
80
+ of the Licensed Work, are subject to this License. This License applies
81
+ separately for each version of the Licensed Work and the Change Date may vary
82
+ for each version of the Licensed Work released by Licensor.
83
+
84
+ You must conspicuously display this License on each original or modified copy
85
+ of the Licensed Work. If you receive the Licensed Work in original or
86
+ modified form from a third party, the terms and conditions set forth in this
87
+ License apply to your use of that work.
88
+
89
+ Any use of the Licensed Work in violation of this License will automatically
90
+ terminate your rights under this License for the current and all other
91
+ versions of the Licensed Work.
92
+
93
+ This License does not grant you any right in any trademark or logo of
94
+ Licensor or its affiliates (provided that you may use a trademark or logo of
95
+ Licensor as expressly required by this License).
96
+
97
+ TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
98
+ AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
99
+ EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
100
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
101
+ TITLE.
102
+
103
+ MariaDB hereby grants you permission to use this License’s text to license
104
+ your works, and to refer to it using the trademark “Business Source License”,
105
+ as long as you comply with the Covenants of Licensor below.
106
+
107
+ Covenants of Licensor
108
+
109
+ In consideration of the right to use this License’s text and the “Business
110
+ Source License” name and trademark, Licensor covenants to MariaDB, and to all
111
+ other recipients of the licensed work to be provided by Licensor:
112
+
113
+ 1. To specify as the Change License the GPL Version 2.0 or any later version,
114
+ or a license that is compatible with GPL Version 2.0 or a later version,
115
+ where “compatible” means that software provided under the Change License can
116
+ be included in a program with software provided under GPL Version 2.0 or a
117
+ later version. Licensor may specify additional Change Licenses without
118
+ limitation.
119
+
120
+ 2. To either: (a) specify an additional grant of rights to use that does not
121
+ impose any additional restriction on the right granted in this License, as
122
+ the Additional Use Grant; or (b) insert the text “None”.
123
+
124
+ 3. To specify a Change Date.
125
+
126
+ 4. Not to modify this License in any other way.
graflo-1.3.7/PKG-INFO ADDED
@@ -0,0 +1,243 @@
1
+ Metadata-Version: 2.4
2
+ Name: graflo
3
+ Version: 1.3.7
4
+ Summary: A framework for transforming tabular (CSV, SQL) and hierarchical data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j, TigerGraph). Features automatic PostgreSQL schema inference.
5
+ Author-email: Alexander Belikov <alexander@growgraph.dev>
6
+ License-File: LICENSE
7
+ Requires-Python: ~=3.10.0
8
+ Requires-Dist: click<9,>=8.2.0
9
+ Requires-Dist: dataclass-wizard>=0.34.0
10
+ Requires-Dist: ijson<4,>=3.2.3
11
+ Requires-Dist: neo4j<6,>=5.22.0
12
+ Requires-Dist: networkx~=3.3
13
+ Requires-Dist: pandas-stubs==2.3.0.250703
14
+ Requires-Dist: pandas<3,>=2.0.3
15
+ Requires-Dist: psycopg2-binary>=2.9.11
16
+ Requires-Dist: pydantic-settings>=2.12.0
17
+ Requires-Dist: pydantic>=2.12.5
18
+ Requires-Dist: python-arango<9,>=8.1.2
19
+ Requires-Dist: pytigergraph>=1.9.0
20
+ Requires-Dist: requests>=2.31.0
21
+ Requires-Dist: sqlalchemy>=2.0.0
22
+ Requires-Dist: strenum>=0.4.15
23
+ Requires-Dist: suthing>=0.5.0
24
+ Requires-Dist: urllib3>=2.0.0
25
+ Requires-Dist: xmltodict<0.15,>=0.14.2
26
+ Provides-Extra: plot
27
+ Requires-Dist: pygraphviz>=1.14; extra == 'plot'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # GraFlo <img src="https://raw.githubusercontent.com/growgraph/graflo/main/docs/assets/favicon.ico" alt="graflo logo" style="height: 32px; width:32px;"/>
31
+
32
+ A framework for transforming **tabular** (CSV, SQL) and **hierarchical** data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j, **TigerGraph**).
33
+
34
+ > **⚠️ Package Renamed**: This package was formerly known as `graphcast`.
35
+
36
+ ![Python](https://img.shields.io/badge/python-3.10-blue.svg)
37
+ [![PyPI version](https://badge.fury.io/py/graflo.svg)](https://badge.fury.io/py/graflo)
38
+ [![PyPI Downloads](https://static.pepy.tech/badge/graflo)](https://pepy.tech/projects/graflo)
39
+ [![License: BSL](https://img.shields.io/badge/license-BSL--1.1-green)](https://github.com/growgraph/graflo/blob/main/LICENSE)
40
+ [![pre-commit](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml)
41
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15446131.svg)]( https://doi.org/10.5281/zenodo.15446131)
42
+
43
+ ## Core Concepts
44
+
45
+ ### Property Graphs
46
+ graflo works with property graphs, which consist of:
47
+
48
+ - **Vertices**: Nodes with properties and optional unique identifiers
49
+ - **Edges**: Relationships between vertices with their own properties
50
+ - **Properties**: Both vertices and edges may have properties
51
+
52
+ ### Schema
53
+ The Schema defines how your data should be transformed into a graph and contains:
54
+
55
+ - **Vertex Definitions**: Specify vertex types, their properties, and unique identifiers
56
+ - Fields can be specified as strings (backward compatible) or typed `Field` objects with types (INT, FLOAT, STRING, DATETIME, BOOL)
57
+ - Type information enables better validation and database-specific optimizations
58
+ - **Edge Definitions**: Define relationships between vertices and their properties
59
+ - Weight fields support typed definitions for better type safety
60
+ - **Resource Mapping**: describe how data sources map to vertices and edges
61
+ - **Transforms**: Modify data during the casting process
62
+ - **Automatic Schema Inference**: Generate schemas automatically from PostgreSQL 3NF databases
63
+
64
+ ### Resources
65
+ Resources are your data sources that can be:
66
+
67
+ - **Table-like**: CSV files, database tables
68
+ - **JSON-like**: JSON files, nested data structures
69
+
70
+ ## Features
71
+
72
+ - **Graph Transformation Meta-language**: A powerful declarative language to describe how your data becomes a property graph:
73
+ - Define vertex and edge structures with typed fields
74
+ - Set compound indexes for vertices and edges
75
+ - Use blank vertices for complex relationships
76
+ - Specify edge constraints and properties with typed weight fields
77
+ - Apply advanced filtering and transformations
78
+ - **Typed Schema Definitions**: Enhanced type support throughout the schema system
79
+ - Vertex fields support types (INT, FLOAT, STRING, DATETIME, BOOL) for better validation
80
+ - Edge weight fields can specify types for improved type safety
81
+ - Backward compatible: fields without types default to None (suitable for databases like ArangoDB)
82
+ - **🚀 PostgreSQL Schema Inference**: **Automatically generate schemas from PostgreSQL 3NF databases** - No manual schema definition needed!
83
+ - Introspect PostgreSQL schemas to identify vertex-like and edge-like tables
84
+ - Automatically map PostgreSQL data types to graflo Field types (INT, FLOAT, STRING, DATETIME, BOOL)
85
+ - Infer vertex configurations from table structures with proper indexes
86
+ - Infer edge configurations from foreign key relationships
87
+ - Create Resource mappings from PostgreSQL tables automatically
88
+ - Direct database access - ingest data without exporting to files first
89
+ - **Parallel processing**: Use as many cores as you have
90
+ - **Database support**: Ingest into ArangoDB, Neo4j, and **TigerGraph** using the same API (database agnostic). Source data from PostgreSQL and other SQL databases.
91
+ - **Server-side filtering**: Efficient querying with server-side filtering support (TigerGraph REST++ API)
92
+
93
+ ## Documentation
94
+ Full documentation is available at: [growgraph.github.io/graflo](https://growgraph.github.io/graflo)
95
+
96
+ ## Installation
97
+
98
+ ```bash
99
+ pip install graflo
100
+ ```
101
+
102
+ ## Usage Examples
103
+
104
+ ### Simple ingest
105
+
106
+ ```python
107
+ from suthing import FileHandle
108
+
109
+ from graflo import Schema, Caster, Patterns
110
+ from graflo.db.connection.onto import ArangoConfig
111
+
112
+ schema = Schema.from_dict(FileHandle.load("schema.yaml"))
113
+
114
+ # Option 1: Load config from docker/arango/.env (recommended)
115
+ conn_conf = ArangoConfig.from_docker_env()
116
+
117
+ # Option 2: Load from environment variables
118
+ # Set: ARANGO_URI, ARANGO_USERNAME, ARANGO_PASSWORD, ARANGO_DATABASE
119
+ conn_conf = ArangoConfig.from_env()
120
+
121
+ # Option 3: Load with custom prefix (for multiple configs)
122
+ # Set: USER_ARANGO_URI, USER_ARANGO_USERNAME, USER_ARANGO_PASSWORD, USER_ARANGO_DATABASE
123
+ user_conn_conf = ArangoConfig.from_env(prefix="USER")
124
+
125
+ # Option 4: Create config directly
126
+ # conn_conf = ArangoConfig(
127
+ # uri="http://localhost:8535",
128
+ # username="root",
129
+ # password="123",
130
+ # database="mygraph", # For ArangoDB, 'database' maps to schema/graph
131
+ # )
132
+ # Note: If 'database' (or 'schema_name' for TigerGraph) is not set,
133
+ # Caster will automatically use Schema.general.name as fallback
134
+
135
+ from graflo.util.onto import FilePattern
136
+ import pathlib
137
+
138
+ # Create Patterns with file patterns
139
+ patterns = Patterns()
140
+ patterns.add_file_pattern(
141
+ "work",
142
+ FilePattern(regex="\Sjson$", sub_path=pathlib.Path("./data"), resource_name="work")
143
+ )
144
+
145
+ # Or use resource_mapping for simpler initialization
146
+ # patterns = Patterns(
147
+ # _resource_mapping={
148
+ # "work": "./data/work.json",
149
+ # }
150
+ # )
151
+
152
+ schema.fetch_resource()
153
+
154
+ from graflo.caster import IngestionParams
155
+
156
+ caster = Caster(schema)
157
+
158
+ ingestion_params = IngestionParams(
159
+ clean_start=False, # Set to True to wipe existing database
160
+ # max_items=1000, # Optional: limit number of items to process
161
+ # batch_size=10000, # Optional: customize batch size
162
+ )
163
+
164
+ caster.ingest(
165
+ output_config=conn_conf, # Target database config
166
+ patterns=patterns, # Source data patterns
167
+ ingestion_params=ingestion_params,
168
+ )
169
+ ```
170
+
171
+ ### PostgreSQL Schema Inference
172
+
173
+ ```python
174
+ from graflo.db.postgres import PostgresConnection
175
+ from graflo.db.postgres.heuristics import infer_schema_from_postgres
176
+ from graflo.db.connection.onto import PostgresConfig
177
+ from graflo import Caster
178
+ from graflo.onto import DBFlavor
179
+
180
+ # Connect to PostgreSQL
181
+ postgres_config = PostgresConfig.from_docker_env() # or PostgresConfig.from_env()
182
+ postgres_conn = PostgresConnection(postgres_config)
183
+
184
+ # Infer schema from PostgreSQL 3NF database
185
+ schema = infer_schema_from_postgres(
186
+ postgres_conn,
187
+ schema_name="public", # PostgreSQL schema name
188
+ db_flavor=DBFlavor.ARANGO # Target graph database flavor
189
+ )
190
+
191
+ # Close PostgreSQL connection
192
+ postgres_conn.close()
193
+
194
+ # Use the inferred schema with Caster
195
+ caster = Caster(schema)
196
+ # ... continue with ingestion
197
+ ```
198
+
199
+ ## Development
200
+
201
+ To install requirements
202
+
203
+ ```shell
204
+ git clone git@github.com:growgraph/graflo.git && cd graflo
205
+ uv sync --dev
206
+ ```
207
+
208
+ ### Tests
209
+
210
+ #### Test databases
211
+ Spin up Arango from [arango docker folder](./docker/arango) by
212
+
213
+ ```shell
214
+ docker-compose --env-file .env up arango
215
+ ```
216
+
217
+ Neo4j from [neo4j docker folder](./docker/neo4j) by
218
+
219
+ ```shell
220
+ docker-compose --env-file .env up neo4j
221
+ ```
222
+
223
+ and TigerGraph from [tigergraph docker folder](./docker/tigergraph) by
224
+
225
+ ```shell
226
+ docker-compose --env-file .env up tigergraph
227
+ ```
228
+
229
+ To run unit tests
230
+
231
+ ```shell
232
+ pytest test
233
+ ```
234
+
235
+ ## Requirements
236
+
237
+ - Python 3.10+
238
+ - python-arango
239
+ - sqlalchemy>=2.0.0 (for PostgreSQL and SQL data sources)
240
+
241
+ ## Contributing
242
+
243
+ Contributions are welcome! Please feel free to submit a Pull Request.
graflo-1.3.7/README.md ADDED
@@ -0,0 +1,214 @@
1
+ # GraFlo <img src="https://raw.githubusercontent.com/growgraph/graflo/main/docs/assets/favicon.ico" alt="graflo logo" style="height: 32px; width:32px;"/>
2
+
3
+ A framework for transforming **tabular** (CSV, SQL) and **hierarchical** data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j, **TigerGraph**).
4
+
5
+ > **⚠️ Package Renamed**: This package was formerly known as `graphcast`.
6
+
7
+ ![Python](https://img.shields.io/badge/python-3.10-blue.svg)
8
+ [![PyPI version](https://badge.fury.io/py/graflo.svg)](https://badge.fury.io/py/graflo)
9
+ [![PyPI Downloads](https://static.pepy.tech/badge/graflo)](https://pepy.tech/projects/graflo)
10
+ [![License: BSL](https://img.shields.io/badge/license-BSL--1.1-green)](https://github.com/growgraph/graflo/blob/main/LICENSE)
11
+ [![pre-commit](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15446131.svg)]( https://doi.org/10.5281/zenodo.15446131)
13
+
14
+ ## Core Concepts
15
+
16
+ ### Property Graphs
17
+ graflo works with property graphs, which consist of:
18
+
19
+ - **Vertices**: Nodes with properties and optional unique identifiers
20
+ - **Edges**: Relationships between vertices with their own properties
21
+ - **Properties**: Both vertices and edges may have properties
22
+
23
+ ### Schema
24
+ The Schema defines how your data should be transformed into a graph and contains:
25
+
26
+ - **Vertex Definitions**: Specify vertex types, their properties, and unique identifiers
27
+ - Fields can be specified as strings (backward compatible) or typed `Field` objects with types (INT, FLOAT, STRING, DATETIME, BOOL)
28
+ - Type information enables better validation and database-specific optimizations
29
+ - **Edge Definitions**: Define relationships between vertices and their properties
30
+ - Weight fields support typed definitions for better type safety
31
+ - **Resource Mapping**: describe how data sources map to vertices and edges
32
+ - **Transforms**: Modify data during the casting process
33
+ - **Automatic Schema Inference**: Generate schemas automatically from PostgreSQL 3NF databases
34
+
35
+ ### Resources
36
+ Resources are your data sources that can be:
37
+
38
+ - **Table-like**: CSV files, database tables
39
+ - **JSON-like**: JSON files, nested data structures
40
+
41
+ ## Features
42
+
43
+ - **Graph Transformation Meta-language**: A powerful declarative language to describe how your data becomes a property graph:
44
+ - Define vertex and edge structures with typed fields
45
+ - Set compound indexes for vertices and edges
46
+ - Use blank vertices for complex relationships
47
+ - Specify edge constraints and properties with typed weight fields
48
+ - Apply advanced filtering and transformations
49
+ - **Typed Schema Definitions**: Enhanced type support throughout the schema system
50
+ - Vertex fields support types (INT, FLOAT, STRING, DATETIME, BOOL) for better validation
51
+ - Edge weight fields can specify types for improved type safety
52
+ - Backward compatible: fields without types default to None (suitable for databases like ArangoDB)
53
+ - **🚀 PostgreSQL Schema Inference**: **Automatically generate schemas from PostgreSQL 3NF databases** - No manual schema definition needed!
54
+ - Introspect PostgreSQL schemas to identify vertex-like and edge-like tables
55
+ - Automatically map PostgreSQL data types to graflo Field types (INT, FLOAT, STRING, DATETIME, BOOL)
56
+ - Infer vertex configurations from table structures with proper indexes
57
+ - Infer edge configurations from foreign key relationships
58
+ - Create Resource mappings from PostgreSQL tables automatically
59
+ - Direct database access - ingest data without exporting to files first
60
+ - **Parallel processing**: Use as many cores as you have
61
+ - **Database support**: Ingest into ArangoDB, Neo4j, and **TigerGraph** using the same API (database agnostic). Source data from PostgreSQL and other SQL databases.
62
+ - **Server-side filtering**: Efficient querying with server-side filtering support (TigerGraph REST++ API)
63
+
64
+ ## Documentation
65
+ Full documentation is available at: [growgraph.github.io/graflo](https://growgraph.github.io/graflo)
66
+
67
+ ## Installation
68
+
69
+ ```bash
70
+ pip install graflo
71
+ ```
72
+
73
+ ## Usage Examples
74
+
75
+ ### Simple ingest
76
+
77
+ ```python
78
+ from suthing import FileHandle
79
+
80
+ from graflo import Schema, Caster, Patterns
81
+ from graflo.db.connection.onto import ArangoConfig
82
+
83
+ schema = Schema.from_dict(FileHandle.load("schema.yaml"))
84
+
85
+ # Option 1: Load config from docker/arango/.env (recommended)
86
+ conn_conf = ArangoConfig.from_docker_env()
87
+
88
+ # Option 2: Load from environment variables
89
+ # Set: ARANGO_URI, ARANGO_USERNAME, ARANGO_PASSWORD, ARANGO_DATABASE
90
+ conn_conf = ArangoConfig.from_env()
91
+
92
+ # Option 3: Load with custom prefix (for multiple configs)
93
+ # Set: USER_ARANGO_URI, USER_ARANGO_USERNAME, USER_ARANGO_PASSWORD, USER_ARANGO_DATABASE
94
+ user_conn_conf = ArangoConfig.from_env(prefix="USER")
95
+
96
+ # Option 4: Create config directly
97
+ # conn_conf = ArangoConfig(
98
+ # uri="http://localhost:8535",
99
+ # username="root",
100
+ # password="123",
101
+ # database="mygraph", # For ArangoDB, 'database' maps to schema/graph
102
+ # )
103
+ # Note: If 'database' (or 'schema_name' for TigerGraph) is not set,
104
+ # Caster will automatically use Schema.general.name as fallback
105
+
106
+ from graflo.util.onto import FilePattern
107
+ import pathlib
108
+
109
+ # Create Patterns with file patterns
110
+ patterns = Patterns()
111
+ patterns.add_file_pattern(
112
+ "work",
113
+ FilePattern(regex="\Sjson$", sub_path=pathlib.Path("./data"), resource_name="work")
114
+ )
115
+
116
+ # Or use resource_mapping for simpler initialization
117
+ # patterns = Patterns(
118
+ # _resource_mapping={
119
+ # "work": "./data/work.json",
120
+ # }
121
+ # )
122
+
123
+ schema.fetch_resource()
124
+
125
+ from graflo.caster import IngestionParams
126
+
127
+ caster = Caster(schema)
128
+
129
+ ingestion_params = IngestionParams(
130
+ clean_start=False, # Set to True to wipe existing database
131
+ # max_items=1000, # Optional: limit number of items to process
132
+ # batch_size=10000, # Optional: customize batch size
133
+ )
134
+
135
+ caster.ingest(
136
+ output_config=conn_conf, # Target database config
137
+ patterns=patterns, # Source data patterns
138
+ ingestion_params=ingestion_params,
139
+ )
140
+ ```
141
+
142
+ ### PostgreSQL Schema Inference
143
+
144
+ ```python
145
+ from graflo.db.postgres import PostgresConnection
146
+ from graflo.db.postgres.heuristics import infer_schema_from_postgres
147
+ from graflo.db.connection.onto import PostgresConfig
148
+ from graflo import Caster
149
+ from graflo.onto import DBFlavor
150
+
151
+ # Connect to PostgreSQL
152
+ postgres_config = PostgresConfig.from_docker_env() # or PostgresConfig.from_env()
153
+ postgres_conn = PostgresConnection(postgres_config)
154
+
155
+ # Infer schema from PostgreSQL 3NF database
156
+ schema = infer_schema_from_postgres(
157
+ postgres_conn,
158
+ schema_name="public", # PostgreSQL schema name
159
+ db_flavor=DBFlavor.ARANGO # Target graph database flavor
160
+ )
161
+
162
+ # Close PostgreSQL connection
163
+ postgres_conn.close()
164
+
165
+ # Use the inferred schema with Caster
166
+ caster = Caster(schema)
167
+ # ... continue with ingestion
168
+ ```
169
+
170
+ ## Development
171
+
172
+ To install requirements
173
+
174
+ ```shell
175
+ git clone git@github.com:growgraph/graflo.git && cd graflo
176
+ uv sync --dev
177
+ ```
178
+
179
+ ### Tests
180
+
181
+ #### Test databases
182
+ Spin up Arango from [arango docker folder](./docker/arango) by
183
+
184
+ ```shell
185
+ docker-compose --env-file .env up arango
186
+ ```
187
+
188
+ Neo4j from [neo4j docker folder](./docker/neo4j) by
189
+
190
+ ```shell
191
+ docker-compose --env-file .env up neo4j
192
+ ```
193
+
194
+ and TigerGraph from [tigergraph docker folder](./docker/tigergraph) by
195
+
196
+ ```shell
197
+ docker-compose --env-file .env up tigergraph
198
+ ```
199
+
200
+ To run unit tests
201
+
202
+ ```shell
203
+ pytest test
204
+ ```
205
+
206
+ ## Requirements
207
+
208
+ - Python 3.10+
209
+ - python-arango
210
+ - sqlalchemy>=2.0.0 (for PostgreSQL and SQL data sources)
211
+
212
+ ## Contributing
213
+
214
+ Contributions are welcome! Please feel free to submit a Pull Request.
@@ -0,0 +1,18 @@
1
+ ### Table Config
2
+
3
+ Table part of the config specifies how input sources will be transformed and mapped to vertex collections.
4
+
5
+ ```yaml
6
+ table:
7
+ - tabletype: ibes
8
+ encoding: ISO-8859-1
9
+ transforms:
10
+ - foo: parse_date_ibes
11
+ module: graflo.util.transform
12
+ input:
13
+ - ANNDATS
14
+ - ANNTIMS
15
+ output:
16
+ - datetime_announce
17
+ ```
18
+