graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,454 @@
1
+ """Data transformation utilities for graph operations.
2
+
3
+ This module provides utility functions for transforming and standardizing data
4
+ in various formats, particularly for graph database operations. It includes
5
+ functions for date parsing, string standardization, and data cleaning.
6
+
7
+ Key Functions:
8
+ - standardize: Standardize string keys and names
9
+ - parse_date_*: Various date parsing functions for different formats
10
+ - cast_ibes_analyst: Parse and standardize analyst names
11
+ - clear_first_level_nones: Clean dictionaries by removing None values
12
+ - parse_multi_item: Parse complex multi-item strings
13
+ - pick_unique_dict: Remove duplicate dictionaries
14
+
15
+ Example:
16
+ >>> name = standardize("John. Doe, Smith")
17
+ >>> date = parse_date_standard("2023-01-01")
18
+ >>> analyst = cast_ibes_analyst("ADKINS/NARRA")
19
+ """
20
+
21
+ import logging
22
+ import re
23
+ import time
24
+ from collections import defaultdict
25
+ from datetime import datetime
26
+
27
+ ORDINAL_SUFFIX = ["st", "nd", "rd", "th"]
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def standardize(k):
33
+ """Standardizes a string key by removing periods and splitting.
34
+
35
+ Handles comma and space-separated strings, normalizing their format.
36
+
37
+ Args:
38
+ k (str): Input string to be standardized.
39
+
40
+ Returns:
41
+ str: Cleaned and standardized string.
42
+
43
+ Example:
44
+ >>> standardize("John. Doe, Smith")
45
+ 'John,Doe,Smith'
46
+ >>> standardize("John Doe Smith")
47
+ 'John,Doe,Smith'
48
+ """
49
+ k = k.translate(str.maketrans({".": ""}))
50
+ # try to split by ", "
51
+ k = k.split(", ")
52
+ if len(k) < 2:
53
+ k = k[0].split(" ")
54
+ else:
55
+ k[1] = k[1].translate(str.maketrans({" ": ""}))
56
+ return ",".join(k)
57
+
58
+
59
+ def parse_date_standard(input_str):
60
+ """Parse a date string in YYYY-MM-DD format.
61
+
62
+ Args:
63
+ input_str (str): Date string in YYYY-MM-DD format.
64
+
65
+ Returns:
66
+ tuple: (year, month, day) as integers.
67
+
68
+ Example:
69
+ >>> parse_date_standard("2023-01-01")
70
+ (2023, 1, 1)
71
+ """
72
+ dt = datetime.strptime(input_str, "%Y-%m-%d")
73
+ return dt.year, dt.month, dt.day
74
+
75
+
76
+ def parse_date_conf(input_str):
77
+ """Parse a date string in YYYYMMDD format.
78
+
79
+ Args:
80
+ input_str (str): Date string in YYYYMMDD format.
81
+
82
+ Returns:
83
+ tuple: (year, month, day) as integers.
84
+
85
+ Example:
86
+ >>> parse_date_conf("20230101")
87
+ (2023, 1, 1)
88
+ """
89
+ dt = datetime.strptime(input_str, "%Y%m%d")
90
+ return dt.year, dt.month, dt.day
91
+
92
+
93
+ def parse_date_ibes(date0, time0):
94
+ """Converts IBES date and time to ISO 8601 format datetime.
95
+
96
+ Args:
97
+ date0 (str/int): Date in YYYYMMDD format.
98
+ time0 (str): Time in HH:MM:SS format.
99
+
100
+ Returns:
101
+ str: Datetime in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ).
102
+
103
+ Example:
104
+ >>> parse_date_ibes(20160126, "9:35:52")
105
+ '2016-01-26T09:35:52Z'
106
+ """
107
+ date0 = str(date0)
108
+ year, month, day = date0[:4], date0[4:6], date0[6:]
109
+ full_datetime = f"{year}-{month}-{day}T{time0}Z"
110
+
111
+ return full_datetime
112
+
113
+
114
+ def parse_date_yahoo(date0):
115
+ """Convert Yahoo Finance date to ISO 8601 format.
116
+
117
+ Args:
118
+ date0 (str): Date in YYYY-MM-DD format.
119
+
120
+ Returns:
121
+ str: Datetime in ISO 8601 format with noon time.
122
+
123
+ Example:
124
+ >>> parse_date_yahoo("2023-01-01")
125
+ '2023-01-01T12:00:00Z'
126
+ """
127
+ full_datetime = f"{date0}T12:00:00Z"
128
+ return full_datetime
129
+
130
+
131
+ def round_str(x, **kwargs):
132
+ """Round a string number to specified precision.
133
+
134
+ Args:
135
+ x (str): String representation of a number.
136
+ **kwargs: Additional arguments for round() function.
137
+
138
+ Returns:
139
+ float: Rounded number.
140
+
141
+ Example:
142
+ >>> round_str("3.14159", ndigits=2)
143
+ 3.14
144
+ """
145
+ return round(float(x), **kwargs)
146
+
147
+
148
+ def parse_date_standard_to_epoch(input_str):
149
+ """Convert standard date string to Unix epoch timestamp.
150
+
151
+ Args:
152
+ input_str (str): Date string in YYYY-MM-DD format.
153
+
154
+ Returns:
155
+ float: Unix epoch timestamp.
156
+
157
+ Example:
158
+ >>> parse_date_standard_to_epoch("2023-01-01")
159
+ 1672531200.0
160
+ """
161
+ dt = datetime.strptime(input_str, "%Y-%m-%d").timetuple()
162
+ timestamp = time.mktime(dt)
163
+ return timestamp
164
+
165
+
166
+ def cast_ibes_analyst(s):
167
+ """Splits and normalizes analyst name strings.
168
+
169
+ Handles various name formats like 'ADKINS/NARRA' or 'ARFSTROM J'.
170
+
171
+ Args:
172
+ s (str): Analyst name string.
173
+
174
+ Returns:
175
+ tuple: (last_name, first_initial)
176
+
177
+ Examples:
178
+ >>> cast_ibes_analyst('ADKINS/NARRA')
179
+ ('ADKINS', 'N')
180
+ >>> cast_ibes_analyst('ARFSTROM J')
181
+ ('ARFSTROM', 'J')
182
+ """
183
+ if " " in s or "\t" in s:
184
+ r = s.split()[:2]
185
+ if len(r) < 2:
186
+ return r[0], ""
187
+ else:
188
+ return r[0], r[1][:1]
189
+ else:
190
+ r = s.split("/")
191
+ if s.startswith("/"):
192
+ r = r[1:3]
193
+ else:
194
+ r = r[:2]
195
+ if len(r) < 2:
196
+ return r[0], ""
197
+ else:
198
+ return r[0], r[1][:1]
199
+
200
+
201
+ def parse_date_reference(input_str):
202
+ """Extract year from a date reference string.
203
+
204
+ Args:
205
+ input_str (str): Date reference string.
206
+
207
+ Returns:
208
+ int: Year from the date reference.
209
+
210
+ Example:
211
+ >>> parse_date_reference("1923, May 10")
212
+ 1923
213
+ """
214
+ return _parse_date_reference(input_str)["year"]
215
+
216
+
217
+ def _parse_date_reference(input_str):
218
+ """Parse complex, human-written date references.
219
+
220
+ Handles various date formats like:
221
+ - "1923, May 10"
222
+ - "1923, July"
223
+ - "1921, Sept"
224
+ - "1935-36"
225
+ - "1926, December 24th"
226
+
227
+ Args:
228
+ input_str (str): Date string in various formats.
229
+
230
+ Returns:
231
+ dict: Parsed date information with keys 'year', optional 'month', 'day'.
232
+
233
+ Example:
234
+ >>> _parse_date_reference("1923, May 10")
235
+ {'year': 1923, 'month': 5, 'day': 10}
236
+ """
237
+ if "," in input_str:
238
+ if len(input_str.split(" ")) == 3:
239
+ if input_str[-2:] in ORDINAL_SUFFIX:
240
+ input_str = input_str[:-2]
241
+ try:
242
+ dt = datetime.strptime(input_str, "%Y, %B %d")
243
+ return {"year": dt.year, "month": dt.month, "day": dt.day}
244
+ except:
245
+ try:
246
+ aux = input_str.split(" ")
247
+ input_str = " ".join([aux[0]] + [aux[1][:3]] + [aux[2]])
248
+ dt = datetime.strptime(input_str, "%Y, %b %d")
249
+ return {"year": dt.year, "month": dt.month, "day": dt.day}
250
+ except:
251
+ return {"year": input_str}
252
+ else:
253
+ try:
254
+ dt = datetime.strptime(input_str, "%Y, %B")
255
+ return {"year": dt.year, "month": dt.month}
256
+ except:
257
+ try:
258
+ aux = input_str.split(" ")
259
+ input_str = " ".join([aux[0]] + [aux[1][:3]])
260
+ dt = datetime.strptime(input_str, "%Y, %b")
261
+ return {"year": dt.year, "month": dt.month}
262
+ except:
263
+ return {"year": input_str}
264
+ else:
265
+ try:
266
+ dt = datetime.strptime(input_str[:4], "%Y")
267
+ return {"year": dt.year}
268
+ except:
269
+ return {"year": input_str}
270
+
271
+
272
+ def try_int(x):
273
+ """Attempt to convert a value to integer.
274
+
275
+ Args:
276
+ x: Value to convert.
277
+
278
+ Returns:
279
+ int or original value: Integer if conversion successful, original value otherwise.
280
+
281
+ Example:
282
+ >>> try_int("123")
283
+ 123
284
+ >>> try_int("abc")
285
+ 'abc'
286
+ """
287
+ try:
288
+ x = int(x)
289
+ return x
290
+ except:
291
+ return x
292
+
293
+
294
+ def clear_first_level_nones(docs, keys_keep_nones: list | None = None):
295
+ """Removes None values from dictionaries, with optional key exceptions.
296
+
297
+ Args:
298
+ docs (list): List of dictionaries to clean.
299
+ keys_keep_nones (list, optional): Keys to keep even if their value is None.
300
+
301
+ Returns:
302
+ list: Cleaned list of dictionaries.
303
+
304
+ Example:
305
+ >>> docs = [{"a": 1, "b": None}, {"a": None, "b": 2}]
306
+ >>> clear_first_level_nones(docs, keys_keep_nones=["a"])
307
+ [{"a": 1}, {"a": None, "b": 2}]
308
+ """
309
+ if keys_keep_nones is not None:
310
+ docs = [
311
+ {k: v for k, v in tdict.items() if v or k in keys_keep_nones}
312
+ for tdict in docs
313
+ ]
314
+ return docs
315
+
316
+
317
+ def parse_multi_item(s, mapper: dict, direct: list):
318
+ """Parses complex multi-item strings into structured data.
319
+
320
+ Supports parsing strings with quoted or bracketed items.
321
+
322
+ Args:
323
+ s (str): Input string to parse.
324
+ mapper (dict): Mapping of input keys to output keys.
325
+ direct (list): Direct keys to extract.
326
+
327
+ Returns:
328
+ defaultdict: Parsed items with lists as values.
329
+
330
+ Example:
331
+ >>> s = '[name: John, age: 30] [name: Jane, age: 25]'
332
+ >>> mapper = {"name": "full_name"}
333
+ >>> direct = ["age"]
334
+ >>> parse_multi_item(s, mapper, direct)
335
+ defaultdict(list, {'full_name': ['John', 'Jane'], 'age': ['30', '25']})
336
+ """
337
+ if "'" in s:
338
+ items_str = re.findall(r"\"(.*?)\"", s) + re.findall(r"\'(.*?)\'", s)
339
+ else:
340
+ # remove brackets
341
+ items_str = re.findall(r"\[([^]]+)", s)[0].split()
342
+ r: defaultdict[str, list] = defaultdict(list)
343
+ for item in items_str:
344
+ doc0 = [ss.strip().split(":") for ss in item.split(",")]
345
+ if all([len(x) == 2 for x in doc0]):
346
+ doc0_dict = dict(doc0)
347
+ for n_init, n_final in mapper.items():
348
+ try:
349
+ r[n_final] += [doc0_dict[n_init]]
350
+ except KeyError:
351
+ r[n_final] += [None]
352
+
353
+ for n_final in direct:
354
+ # Use field.name for dictionary keys (JSON serialization requires strings)
355
+ # Handle both Field objects and strings for backward compatibility
356
+ key = n_final.name if hasattr(n_final, "name") else str(n_final)
357
+ try:
358
+ r[key] += [doc0_dict[key]]
359
+ except KeyError:
360
+ r[key] += [None]
361
+ else:
362
+ for key, value in zip(direct, doc0):
363
+ # Use field.name for dictionary keys (JSON serialization requires strings)
364
+ # Handle both Field objects and strings for backward compatibility
365
+ key_str = key.name if hasattr(key, "name") else str(key)
366
+ r[key_str] += [value]
367
+
368
+ return r
369
+
370
+
371
+ def pick_unique_dict(docs):
372
+ """Removes duplicate dictionaries from a list.
373
+
374
+ Uses a hash-based approach to identify unique dictionaries, which is more
375
+ efficient than JSON serialization and preserves original object types.
376
+
377
+ Args:
378
+ docs (list): List of dictionaries.
379
+
380
+ Returns:
381
+ list: List of unique dictionaries (preserving original objects).
382
+
383
+ Example:
384
+ >>> docs = [{"a": 1}, {"a": 1}, {"b": 2}]
385
+ >>> pick_unique_dict(docs)
386
+ [{"a": 1}, {"b": 2}]
387
+ """
388
+ from datetime import date, datetime, time
389
+ from decimal import Decimal
390
+
391
+ def make_hashable(obj):
392
+ """Convert an object to a hashable representation.
393
+
394
+ Handles nested structures, datetime objects, and Decimal types.
395
+
396
+ Args:
397
+ obj: Object to make hashable
398
+
399
+ Returns:
400
+ Hashable representation of the object
401
+ """
402
+ if isinstance(obj, dict):
403
+ # Sort items by key for consistent hashing
404
+ return tuple(sorted((k, make_hashable(v)) for k, v in obj.items()))
405
+ elif isinstance(obj, (list, tuple)):
406
+ return tuple(make_hashable(item) for item in obj)
407
+ elif isinstance(obj, (datetime, date, time)):
408
+ # Convert to ISO format string for hashing
409
+ return ("__datetime__", obj.isoformat())
410
+ elif isinstance(obj, Decimal):
411
+ # Convert to string representation to preserve precision
412
+ return ("__decimal__", str(obj))
413
+ elif isinstance(obj, set):
414
+ # Convert set to sorted tuple for consistent hashing
415
+ return tuple(sorted(make_hashable(item) for item in obj))
416
+ else:
417
+ # Primitive types (int, float, str, bool, None) are already hashable
418
+ return obj
419
+
420
+ # Use a dict to preserve insertion order and original objects
421
+ seen = {}
422
+ for doc in docs:
423
+ # Create hashable representation
424
+ hashable_repr = make_hashable(doc)
425
+ # Use hashable representation as key, original doc as value
426
+ if hashable_repr not in seen:
427
+ seen[hashable_repr] = doc
428
+
429
+ # Return list of unique documents (preserving original objects)
430
+ return list(seen.values())
431
+
432
+
433
+ def split_keep_part(s: str, sep="/", keep=-1) -> str:
434
+ """Split a string and keep specified parts.
435
+
436
+ Args:
437
+ s (str): String to split.
438
+ sep (str): Separator to split on.
439
+ keep (int or list): Index or indices to keep.
440
+
441
+ Returns:
442
+ str: Joined string of kept parts.
443
+
444
+ Example:
445
+ >>> split_keep_part("a/b/c", keep=0)
446
+ 'a'
447
+ >>> split_keep_part("a/b/c", keep=[0, 2])
448
+ 'a/c'
449
+ """
450
+ if isinstance(keep, list):
451
+ items = s.split(sep)
452
+ return sep.join(items[k] for k in keep)
453
+ else:
454
+ return s.split(sep)[keep]
@@ -0,0 +1,243 @@
1
+ Metadata-Version: 2.4
2
+ Name: graflo
3
+ Version: 1.3.7
4
+ Summary: A framework for transforming tabular (CSV, SQL) and hierarchical data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j, TigerGraph). Features automatic PostgreSQL schema inference.
5
+ Author-email: Alexander Belikov <alexander@growgraph.dev>
6
+ License-File: LICENSE
7
+ Requires-Python: ~=3.10.0
8
+ Requires-Dist: click<9,>=8.2.0
9
+ Requires-Dist: dataclass-wizard>=0.34.0
10
+ Requires-Dist: ijson<4,>=3.2.3
11
+ Requires-Dist: neo4j<6,>=5.22.0
12
+ Requires-Dist: networkx~=3.3
13
+ Requires-Dist: pandas-stubs==2.3.0.250703
14
+ Requires-Dist: pandas<3,>=2.0.3
15
+ Requires-Dist: psycopg2-binary>=2.9.11
16
+ Requires-Dist: pydantic-settings>=2.12.0
17
+ Requires-Dist: pydantic>=2.12.5
18
+ Requires-Dist: python-arango<9,>=8.1.2
19
+ Requires-Dist: pytigergraph>=1.9.0
20
+ Requires-Dist: requests>=2.31.0
21
+ Requires-Dist: sqlalchemy>=2.0.0
22
+ Requires-Dist: strenum>=0.4.15
23
+ Requires-Dist: suthing>=0.5.0
24
+ Requires-Dist: urllib3>=2.0.0
25
+ Requires-Dist: xmltodict<0.15,>=0.14.2
26
+ Provides-Extra: plot
27
+ Requires-Dist: pygraphviz>=1.14; extra == 'plot'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # GraFlo <img src="https://raw.githubusercontent.com/growgraph/graflo/main/docs/assets/favicon.ico" alt="graflo logo" style="height: 32px; width:32px;"/>
31
+
32
+ A framework for transforming **tabular** (CSV, SQL) and **hierarchical** data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j, **TigerGraph**).
33
+
34
+ > **⚠️ Package Renamed**: This package was formerly known as `graphcast`.
35
+
36
+ ![Python](https://img.shields.io/badge/python-3.10-blue.svg)
37
+ [![PyPI version](https://badge.fury.io/py/graflo.svg)](https://badge.fury.io/py/graflo)
38
+ [![PyPI Downloads](https://static.pepy.tech/badge/graflo)](https://pepy.tech/projects/graflo)
39
+ [![License: BSL](https://img.shields.io/badge/license-BSL--1.1-green)](https://github.com/growgraph/graflo/blob/main/LICENSE)
40
+ [![pre-commit](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml)
41
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15446131.svg)]( https://doi.org/10.5281/zenodo.15446131)
42
+
43
+ ## Core Concepts
44
+
45
+ ### Property Graphs
46
+ graflo works with property graphs, which consist of:
47
+
48
+ - **Vertices**: Nodes with properties and optional unique identifiers
49
+ - **Edges**: Relationships between vertices with their own properties
50
+ - **Properties**: Both vertices and edges may have properties
51
+
52
+ ### Schema
53
+ The Schema defines how your data should be transformed into a graph and contains:
54
+
55
+ - **Vertex Definitions**: Specify vertex types, their properties, and unique identifiers
56
+ - Fields can be specified as strings (backward compatible) or typed `Field` objects with types (INT, FLOAT, STRING, DATETIME, BOOL)
57
+ - Type information enables better validation and database-specific optimizations
58
+ - **Edge Definitions**: Define relationships between vertices and their properties
59
+ - Weight fields support typed definitions for better type safety
60
+ - **Resource Mapping**: describe how data sources map to vertices and edges
61
+ - **Transforms**: Modify data during the casting process
62
+ - **Automatic Schema Inference**: Generate schemas automatically from PostgreSQL 3NF databases
63
+
64
+ ### Resources
65
+ Resources are your data sources that can be:
66
+
67
+ - **Table-like**: CSV files, database tables
68
+ - **JSON-like**: JSON files, nested data structures
69
+
70
+ ## Features
71
+
72
+ - **Graph Transformation Meta-language**: A powerful declarative language to describe how your data becomes a property graph:
73
+ - Define vertex and edge structures with typed fields
74
+ - Set compound indexes for vertices and edges
75
+ - Use blank vertices for complex relationships
76
+ - Specify edge constraints and properties with typed weight fields
77
+ - Apply advanced filtering and transformations
78
+ - **Typed Schema Definitions**: Enhanced type support throughout the schema system
79
+ - Vertex fields support types (INT, FLOAT, STRING, DATETIME, BOOL) for better validation
80
+ - Edge weight fields can specify types for improved type safety
81
+ - Backward compatible: fields without types default to None (suitable for databases like ArangoDB)
82
+ - **🚀 PostgreSQL Schema Inference**: **Automatically generate schemas from PostgreSQL 3NF databases** - No manual schema definition needed!
83
+ - Introspect PostgreSQL schemas to identify vertex-like and edge-like tables
84
+ - Automatically map PostgreSQL data types to graflo Field types (INT, FLOAT, STRING, DATETIME, BOOL)
85
+ - Infer vertex configurations from table structures with proper indexes
86
+ - Infer edge configurations from foreign key relationships
87
+ - Create Resource mappings from PostgreSQL tables automatically
88
+ - Direct database access - ingest data without exporting to files first
89
+ - **Parallel processing**: Use as many cores as you have
90
+ - **Database support**: Ingest into ArangoDB, Neo4j, and **TigerGraph** using the same API (database agnostic). Source data from PostgreSQL and other SQL databases.
91
+ - **Server-side filtering**: Efficient querying with server-side filtering support (TigerGraph REST++ API)
92
+
93
+ ## Documentation
94
+ Full documentation is available at: [growgraph.github.io/graflo](https://growgraph.github.io/graflo)
95
+
96
+ ## Installation
97
+
98
+ ```bash
99
+ pip install graflo
100
+ ```
101
+
102
+ ## Usage Examples
103
+
104
+ ### Simple ingest
105
+
106
+ ```python
107
+ from suthing import FileHandle
108
+
109
+ from graflo import Schema, Caster, Patterns
110
+ from graflo.db.connection.onto import ArangoConfig
111
+
112
+ schema = Schema.from_dict(FileHandle.load("schema.yaml"))
113
+
114
+ # Option 1: Load config from docker/arango/.env (recommended)
115
+ conn_conf = ArangoConfig.from_docker_env()
116
+
117
+ # Option 2: Load from environment variables
118
+ # Set: ARANGO_URI, ARANGO_USERNAME, ARANGO_PASSWORD, ARANGO_DATABASE
119
+ conn_conf = ArangoConfig.from_env()
120
+
121
+ # Option 3: Load with custom prefix (for multiple configs)
122
+ # Set: USER_ARANGO_URI, USER_ARANGO_USERNAME, USER_ARANGO_PASSWORD, USER_ARANGO_DATABASE
123
+ user_conn_conf = ArangoConfig.from_env(prefix="USER")
124
+
125
+ # Option 4: Create config directly
126
+ # conn_conf = ArangoConfig(
127
+ # uri="http://localhost:8535",
128
+ # username="root",
129
+ # password="123",
130
+ # database="mygraph", # For ArangoDB, 'database' maps to schema/graph
131
+ # )
132
+ # Note: If 'database' (or 'schema_name' for TigerGraph) is not set,
133
+ # Caster will automatically use Schema.general.name as fallback
134
+
135
+ from graflo.util.onto import FilePattern
136
+ import pathlib
137
+
138
+ # Create Patterns with file patterns
139
+ patterns = Patterns()
140
+ patterns.add_file_pattern(
141
+ "work",
142
+ FilePattern(regex="\Sjson$", sub_path=pathlib.Path("./data"), resource_name="work")
143
+ )
144
+
145
+ # Or use resource_mapping for simpler initialization
146
+ # patterns = Patterns(
147
+ # _resource_mapping={
148
+ # "work": "./data/work.json",
149
+ # }
150
+ # )
151
+
152
+ schema.fetch_resource()
153
+
154
+ from graflo.caster import IngestionParams
155
+
156
+ caster = Caster(schema)
157
+
158
+ ingestion_params = IngestionParams(
159
+ clean_start=False, # Set to True to wipe existing database
160
+ # max_items=1000, # Optional: limit number of items to process
161
+ # batch_size=10000, # Optional: customize batch size
162
+ )
163
+
164
+ caster.ingest(
165
+ output_config=conn_conf, # Target database config
166
+ patterns=patterns, # Source data patterns
167
+ ingestion_params=ingestion_params,
168
+ )
169
+ ```
170
+
171
+ ### PostgreSQL Schema Inference
172
+
173
+ ```python
174
+ from graflo.db.postgres import PostgresConnection
175
+ from graflo.db.postgres.heuristics import infer_schema_from_postgres
176
+ from graflo.db.connection.onto import PostgresConfig
177
+ from graflo import Caster
178
+ from graflo.onto import DBFlavor
179
+
180
+ # Connect to PostgreSQL
181
+ postgres_config = PostgresConfig.from_docker_env() # or PostgresConfig.from_env()
182
+ postgres_conn = PostgresConnection(postgres_config)
183
+
184
+ # Infer schema from PostgreSQL 3NF database
185
+ schema = infer_schema_from_postgres(
186
+ postgres_conn,
187
+ schema_name="public", # PostgreSQL schema name
188
+ db_flavor=DBFlavor.ARANGO # Target graph database flavor
189
+ )
190
+
191
+ # Close PostgreSQL connection
192
+ postgres_conn.close()
193
+
194
+ # Use the inferred schema with Caster
195
+ caster = Caster(schema)
196
+ # ... continue with ingestion
197
+ ```
198
+
199
+ ## Development
200
+
201
+ To install requirements
202
+
203
+ ```shell
204
+ git clone git@github.com:growgraph/graflo.git && cd graflo
205
+ uv sync --dev
206
+ ```
207
+
208
+ ### Tests
209
+
210
+ #### Test databases
211
+ Spin up Arango from [arango docker folder](./docker/arango) by
212
+
213
+ ```shell
214
+ docker-compose --env-file .env up arango
215
+ ```
216
+
217
+ Neo4j from [neo4j docker folder](./docker/neo4j) by
218
+
219
+ ```shell
220
+ docker-compose --env-file .env up neo4j
221
+ ```
222
+
223
+ and TigerGraph from [tigergraph docker folder](./docker/tigergraph) by
224
+
225
+ ```shell
226
+ docker-compose --env-file .env up tigergraph
227
+ ```
228
+
229
+ To run unit tests
230
+
231
+ ```shell
232
+ pytest test
233
+ ```
234
+
235
+ ## Requirements
236
+
237
+ - Python 3.10+
238
+ - python-arango
239
+ - sqlalchemy>=2.0.0 (for PostgreSQL and SQL data sources)
240
+
241
+ ## Contributing
242
+
243
+ Contributions are welcome! Please feel free to submit a Pull Request.