graflo 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1120 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +297 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +586 -0
  13. graflo/caster.py +655 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +194 -0
  16. graflo/cli/manage_dbs.py +197 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/data_source/__init__.py +48 -0
  20. graflo/data_source/api.py +339 -0
  21. graflo/data_source/base.py +97 -0
  22. graflo/data_source/factory.py +298 -0
  23. graflo/data_source/file.py +133 -0
  24. graflo/data_source/memory.py +72 -0
  25. graflo/data_source/registry.py +82 -0
  26. graflo/data_source/sql.py +185 -0
  27. graflo/db/__init__.py +44 -0
  28. graflo/db/arango/__init__.py +22 -0
  29. graflo/db/arango/conn.py +1026 -0
  30. graflo/db/arango/query.py +180 -0
  31. graflo/db/arango/util.py +88 -0
  32. graflo/db/conn.py +377 -0
  33. graflo/db/connection/__init__.py +6 -0
  34. graflo/db/connection/config_mapping.py +18 -0
  35. graflo/db/connection/onto.py +688 -0
  36. graflo/db/connection/wsgi.py +29 -0
  37. graflo/db/manager.py +119 -0
  38. graflo/db/neo4j/__init__.py +16 -0
  39. graflo/db/neo4j/conn.py +639 -0
  40. graflo/db/postgres/__init__.py +156 -0
  41. graflo/db/postgres/conn.py +425 -0
  42. graflo/db/postgres/resource_mapping.py +139 -0
  43. graflo/db/postgres/schema_inference.py +245 -0
  44. graflo/db/postgres/types.py +148 -0
  45. graflo/db/tigergraph/__init__.py +9 -0
  46. graflo/db/tigergraph/conn.py +2212 -0
  47. graflo/db/util.py +49 -0
  48. graflo/filter/__init__.py +21 -0
  49. graflo/filter/onto.py +525 -0
  50. graflo/logging.conf +22 -0
  51. graflo/onto.py +190 -0
  52. graflo/plot/__init__.py +17 -0
  53. graflo/plot/plotter.py +556 -0
  54. graflo/util/__init__.py +23 -0
  55. graflo/util/chunker.py +751 -0
  56. graflo/util/merge.py +150 -0
  57. graflo/util/misc.py +37 -0
  58. graflo/util/onto.py +332 -0
  59. graflo/util/transform.py +448 -0
  60. graflo-1.3.3.dist-info/METADATA +190 -0
  61. graflo-1.3.3.dist-info/RECORD +64 -0
  62. graflo-1.3.3.dist-info/WHEEL +4 -0
  63. graflo-1.3.3.dist-info/entry_points.txt +5 -0
  64. graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,448 @@
1
+ """Data transformation utilities for graph operations.
2
+
3
+ This module provides utility functions for transforming and standardizing data
4
+ in various formats, particularly for graph database operations. It includes
5
+ functions for date parsing, string standardization, and data cleaning.
6
+
7
+ Key Functions:
8
+ - standardize: Standardize string keys and names
9
+ - parse_date_*: Various date parsing functions for different formats
10
+ - cast_ibes_analyst: Parse and standardize analyst names
11
+ - clear_first_level_nones: Clean dictionaries by removing None values
12
+ - parse_multi_item: Parse complex multi-item strings
13
+ - pick_unique_dict: Remove duplicate dictionaries
14
+
15
+ Example:
16
+ >>> name = standardize("John. Doe, Smith")
17
+ >>> date = parse_date_standard("2023-01-01")
18
+ >>> analyst = cast_ibes_analyst("ADKINS/NARRA")
19
+ """
20
+
21
+ import logging
22
+ import re
23
+ import time
24
+ from collections import defaultdict
25
+ from datetime import datetime
26
+
27
+ ORDINAL_SUFFIX = ["st", "nd", "rd", "th"]
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def standardize(k):
33
+ """Standardizes a string key by removing periods and splitting.
34
+
35
+ Handles comma and space-separated strings, normalizing their format.
36
+
37
+ Args:
38
+ k (str): Input string to be standardized.
39
+
40
+ Returns:
41
+ str: Cleaned and standardized string.
42
+
43
+ Example:
44
+ >>> standardize("John. Doe, Smith")
45
+ 'John,Doe,Smith'
46
+ >>> standardize("John Doe Smith")
47
+ 'John,Doe,Smith'
48
+ """
49
+ k = k.translate(str.maketrans({".": ""}))
50
+ # try to split by ", "
51
+ k = k.split(", ")
52
+ if len(k) < 2:
53
+ k = k[0].split(" ")
54
+ else:
55
+ k[1] = k[1].translate(str.maketrans({" ": ""}))
56
+ return ",".join(k)
57
+
58
+
59
+ def parse_date_standard(input_str):
60
+ """Parse a date string in YYYY-MM-DD format.
61
+
62
+ Args:
63
+ input_str (str): Date string in YYYY-MM-DD format.
64
+
65
+ Returns:
66
+ tuple: (year, month, day) as integers.
67
+
68
+ Example:
69
+ >>> parse_date_standard("2023-01-01")
70
+ (2023, 1, 1)
71
+ """
72
+ dt = datetime.strptime(input_str, "%Y-%m-%d")
73
+ return dt.year, dt.month, dt.day
74
+
75
+
76
+ def parse_date_conf(input_str):
77
+ """Parse a date string in YYYYMMDD format.
78
+
79
+ Args:
80
+ input_str (str): Date string in YYYYMMDD format.
81
+
82
+ Returns:
83
+ tuple: (year, month, day) as integers.
84
+
85
+ Example:
86
+ >>> parse_date_conf("20230101")
87
+ (2023, 1, 1)
88
+ """
89
+ dt = datetime.strptime(input_str, "%Y%m%d")
90
+ return dt.year, dt.month, dt.day
91
+
92
+
93
+ def parse_date_ibes(date0, time0):
94
+ """Converts IBES date and time to ISO 8601 format datetime.
95
+
96
+ Args:
97
+ date0 (str/int): Date in YYYYMMDD format.
98
+ time0 (str): Time in HH:MM:SS format.
99
+
100
+ Returns:
101
+ str: Datetime in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ).
102
+
103
+ Example:
104
+ >>> parse_date_ibes(20160126, "9:35:52")
105
+ '2016-01-26T09:35:52Z'
106
+ """
107
+ date0 = str(date0)
108
+ year, month, day = date0[:4], date0[4:6], date0[6:]
109
+ full_datetime = f"{year}-{month}-{day}T{time0}Z"
110
+
111
+ return full_datetime
112
+
113
+
114
+ def parse_date_yahoo(date0):
115
+ """Convert Yahoo Finance date to ISO 8601 format.
116
+
117
+ Args:
118
+ date0 (str): Date in YYYY-MM-DD format.
119
+
120
+ Returns:
121
+ str: Datetime in ISO 8601 format with noon time.
122
+
123
+ Example:
124
+ >>> parse_date_yahoo("2023-01-01")
125
+ '2023-01-01T12:00:00Z'
126
+ """
127
+ full_datetime = f"{date0}T12:00:00Z"
128
+ return full_datetime
129
+
130
+
131
+ def round_str(x, **kwargs):
132
+ """Round a string number to specified precision.
133
+
134
+ Args:
135
+ x (str): String representation of a number.
136
+ **kwargs: Additional arguments for round() function.
137
+
138
+ Returns:
139
+ float: Rounded number.
140
+
141
+ Example:
142
+ >>> round_str("3.14159", ndigits=2)
143
+ 3.14
144
+ """
145
+ return round(float(x), **kwargs)
146
+
147
+
148
+ def parse_date_standard_to_epoch(input_str):
149
+ """Convert standard date string to Unix epoch timestamp.
150
+
151
+ Args:
152
+ input_str (str): Date string in YYYY-MM-DD format.
153
+
154
+ Returns:
155
+ float: Unix epoch timestamp.
156
+
157
+ Example:
158
+ >>> parse_date_standard_to_epoch("2023-01-01")
159
+ 1672531200.0
160
+ """
161
+ dt = datetime.strptime(input_str, "%Y-%m-%d").timetuple()
162
+ timestamp = time.mktime(dt)
163
+ return timestamp
164
+
165
+
166
+ def cast_ibes_analyst(s):
167
+ """Splits and normalizes analyst name strings.
168
+
169
+ Handles various name formats like 'ADKINS/NARRA' or 'ARFSTROM J'.
170
+
171
+ Args:
172
+ s (str): Analyst name string.
173
+
174
+ Returns:
175
+ tuple: (last_name, first_initial)
176
+
177
+ Examples:
178
+ >>> cast_ibes_analyst('ADKINS/NARRA')
179
+ ('ADKINS', 'N')
180
+ >>> cast_ibes_analyst('ARFSTROM J')
181
+ ('ARFSTROM', 'J')
182
+ """
183
+ if " " in s or "\t" in s:
184
+ r = s.split()[:2]
185
+ if len(r) < 2:
186
+ return r[0], ""
187
+ else:
188
+ return r[0], r[1][:1]
189
+ else:
190
+ r = s.split("/")
191
+ if s.startswith("/"):
192
+ r = r[1:3]
193
+ else:
194
+ r = r[:2]
195
+ if len(r) < 2:
196
+ return r[0], ""
197
+ else:
198
+ return r[0], r[1][:1]
199
+
200
+
201
+ def parse_date_reference(input_str):
202
+ """Extract year from a date reference string.
203
+
204
+ Args:
205
+ input_str (str): Date reference string.
206
+
207
+ Returns:
208
+ int: Year from the date reference.
209
+
210
+ Example:
211
+ >>> parse_date_reference("1923, May 10")
212
+ 1923
213
+ """
214
+ return _parse_date_reference(input_str)["year"]
215
+
216
+
217
+ def _parse_date_reference(input_str):
218
+ """Parse complex, human-written date references.
219
+
220
+ Handles various date formats like:
221
+ - "1923, May 10"
222
+ - "1923, July"
223
+ - "1921, Sept"
224
+ - "1935-36"
225
+ - "1926, December 24th"
226
+
227
+ Args:
228
+ input_str (str): Date string in various formats.
229
+
230
+ Returns:
231
+ dict: Parsed date information with keys 'year', optional 'month', 'day'.
232
+
233
+ Example:
234
+ >>> _parse_date_reference("1923, May 10")
235
+ {'year': 1923, 'month': 5, 'day': 10}
236
+ """
237
+ if "," in input_str:
238
+ if len(input_str.split(" ")) == 3:
239
+ if input_str[-2:] in ORDINAL_SUFFIX:
240
+ input_str = input_str[:-2]
241
+ try:
242
+ dt = datetime.strptime(input_str, "%Y, %B %d")
243
+ return {"year": dt.year, "month": dt.month, "day": dt.day}
244
+ except:
245
+ try:
246
+ aux = input_str.split(" ")
247
+ input_str = " ".join([aux[0]] + [aux[1][:3]] + [aux[2]])
248
+ dt = datetime.strptime(input_str, "%Y, %b %d")
249
+ return {"year": dt.year, "month": dt.month, "day": dt.day}
250
+ except:
251
+ return {"year": input_str}
252
+ else:
253
+ try:
254
+ dt = datetime.strptime(input_str, "%Y, %B")
255
+ return {"year": dt.year, "month": dt.month}
256
+ except:
257
+ try:
258
+ aux = input_str.split(" ")
259
+ input_str = " ".join([aux[0]] + [aux[1][:3]])
260
+ dt = datetime.strptime(input_str, "%Y, %b")
261
+ return {"year": dt.year, "month": dt.month}
262
+ except:
263
+ return {"year": input_str}
264
+ else:
265
+ try:
266
+ dt = datetime.strptime(input_str[:4], "%Y")
267
+ return {"year": dt.year}
268
+ except:
269
+ return {"year": input_str}
270
+
271
+
272
+ def try_int(x):
273
+ """Attempt to convert a value to integer.
274
+
275
+ Args:
276
+ x: Value to convert.
277
+
278
+ Returns:
279
+ int or original value: Integer if conversion successful, original value otherwise.
280
+
281
+ Example:
282
+ >>> try_int("123")
283
+ 123
284
+ >>> try_int("abc")
285
+ 'abc'
286
+ """
287
+ try:
288
+ x = int(x)
289
+ return x
290
+ except:
291
+ return x
292
+
293
+
294
+ def clear_first_level_nones(docs, keys_keep_nones: list | None = None):
295
+ """Removes None values from dictionaries, with optional key exceptions.
296
+
297
+ Args:
298
+ docs (list): List of dictionaries to clean.
299
+ keys_keep_nones (list, optional): Keys to keep even if their value is None.
300
+
301
+ Returns:
302
+ list: Cleaned list of dictionaries.
303
+
304
+ Example:
305
+ >>> docs = [{"a": 1, "b": None}, {"a": None, "b": 2}]
306
+ >>> clear_first_level_nones(docs, keys_keep_nones=["a"])
307
+ [{"a": 1}, {"a": None, "b": 2}]
308
+ """
309
+ if keys_keep_nones is not None:
310
+ docs = [
311
+ {k: v for k, v in tdict.items() if v or k in keys_keep_nones}
312
+ for tdict in docs
313
+ ]
314
+ return docs
315
+
316
+
317
+ def parse_multi_item(s, mapper: dict, direct: list):
318
+ """Parses complex multi-item strings into structured data.
319
+
320
+ Supports parsing strings with quoted or bracketed items.
321
+
322
+ Args:
323
+ s (str): Input string to parse.
324
+ mapper (dict): Mapping of input keys to output keys.
325
+ direct (list): Direct keys to extract.
326
+
327
+ Returns:
328
+ defaultdict: Parsed items with lists as values.
329
+
330
+ Example:
331
+ >>> s = '[name: John, age: 30] [name: Jane, age: 25]'
332
+ >>> mapper = {"name": "full_name"}
333
+ >>> direct = ["age"]
334
+ >>> parse_multi_item(s, mapper, direct)
335
+ defaultdict(list, {'full_name': ['John', 'Jane'], 'age': ['30', '25']})
336
+ """
337
+ if "'" in s:
338
+ items_str = re.findall(r"\"(.*?)\"", s) + re.findall(r"\'(.*?)\'", s)
339
+ else:
340
+ # remove brackets
341
+ items_str = re.findall(r"\[([^]]+)", s)[0].split()
342
+ r: defaultdict[str, list] = defaultdict(list)
343
+ for item in items_str:
344
+ doc0 = [ss.strip().split(":") for ss in item.split(",")]
345
+ if all([len(x) == 2 for x in doc0]):
346
+ doc0_dict = dict(doc0)
347
+ for n_init, n_final in mapper.items():
348
+ try:
349
+ r[n_final] += [doc0_dict[n_init]]
350
+ except KeyError:
351
+ r[n_final] += [None]
352
+
353
+ for n_final in direct:
354
+ try:
355
+ r[n_final] += [doc0_dict[n_final]]
356
+ except KeyError:
357
+ r[n_final] += [None]
358
+ else:
359
+ for key, value in zip(direct, doc0):
360
+ r[key] += [value]
361
+
362
+ return r
363
+
364
+
365
+ def pick_unique_dict(docs):
366
+ """Removes duplicate dictionaries from a list.
367
+
368
+ Uses a hash-based approach to identify unique dictionaries, which is more
369
+ efficient than JSON serialization and preserves original object types.
370
+
371
+ Args:
372
+ docs (list): List of dictionaries.
373
+
374
+ Returns:
375
+ list: List of unique dictionaries (preserving original objects).
376
+
377
+ Example:
378
+ >>> docs = [{"a": 1}, {"a": 1}, {"b": 2}]
379
+ >>> pick_unique_dict(docs)
380
+ [{"a": 1}, {"b": 2}]
381
+ """
382
+ from datetime import date, datetime, time
383
+ from decimal import Decimal
384
+
385
+ def make_hashable(obj):
386
+ """Convert an object to a hashable representation.
387
+
388
+ Handles nested structures, datetime objects, and Decimal types.
389
+
390
+ Args:
391
+ obj: Object to make hashable
392
+
393
+ Returns:
394
+ Hashable representation of the object
395
+ """
396
+ if isinstance(obj, dict):
397
+ # Sort items by key for consistent hashing
398
+ return tuple(sorted((k, make_hashable(v)) for k, v in obj.items()))
399
+ elif isinstance(obj, (list, tuple)):
400
+ return tuple(make_hashable(item) for item in obj)
401
+ elif isinstance(obj, (datetime, date, time)):
402
+ # Convert to ISO format string for hashing
403
+ return ("__datetime__", obj.isoformat())
404
+ elif isinstance(obj, Decimal):
405
+ # Convert to string representation to preserve precision
406
+ return ("__decimal__", str(obj))
407
+ elif isinstance(obj, set):
408
+ # Convert set to sorted tuple for consistent hashing
409
+ return tuple(sorted(make_hashable(item) for item in obj))
410
+ else:
411
+ # Primitive types (int, float, str, bool, None) are already hashable
412
+ return obj
413
+
414
+ # Use a dict to preserve insertion order and original objects
415
+ seen = {}
416
+ for doc in docs:
417
+ # Create hashable representation
418
+ hashable_repr = make_hashable(doc)
419
+ # Use hashable representation as key, original doc as value
420
+ if hashable_repr not in seen:
421
+ seen[hashable_repr] = doc
422
+
423
+ # Return list of unique documents (preserving original objects)
424
+ return list(seen.values())
425
+
426
+
427
+ def split_keep_part(s: str, sep="/", keep=-1) -> str:
428
+ """Split a string and keep specified parts.
429
+
430
+ Args:
431
+ s (str): String to split.
432
+ sep (str): Separator to split on.
433
+ keep (int or list): Index or indices to keep.
434
+
435
+ Returns:
436
+ str: Joined string of kept parts.
437
+
438
+ Example:
439
+ >>> split_keep_part("a/b/c", keep=0)
440
+ 'a'
441
+ >>> split_keep_part("a/b/c", keep=[0, 2])
442
+ 'a/c'
443
+ """
444
+ if isinstance(keep, list):
445
+ items = s.split(sep)
446
+ return sep.join(items[k] for k in keep)
447
+ else:
448
+ return s.split(sep)[keep]
@@ -0,0 +1,190 @@
1
+ Metadata-Version: 2.4
2
+ Name: graflo
3
+ Version: 1.3.3
4
+ Summary: A framework for transforming tabular (CSV, SQL) and hierarchical data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j)
5
+ Author-email: Alexander Belikov <alexander@growgraph.dev>
6
+ License-File: LICENSE
7
+ Requires-Python: ~=3.10.0
8
+ Requires-Dist: click<9,>=8.2.0
9
+ Requires-Dist: dataclass-wizard>=0.34.0
10
+ Requires-Dist: ijson<4,>=3.2.3
11
+ Requires-Dist: neo4j<6,>=5.22.0
12
+ Requires-Dist: networkx~=3.3
13
+ Requires-Dist: pandas-stubs==2.3.0.250703
14
+ Requires-Dist: pandas<3,>=2.0.3
15
+ Requires-Dist: psycopg2-binary>=2.9.11
16
+ Requires-Dist: pydantic-settings>=2.12.0
17
+ Requires-Dist: pydantic>=2.12.5
18
+ Requires-Dist: python-arango<9,>=8.1.2
19
+ Requires-Dist: pytigergraph>=1.9.0
20
+ Requires-Dist: requests>=2.31.0
21
+ Requires-Dist: sqlalchemy>=2.0.0
22
+ Requires-Dist: strenum>=0.4.15
23
+ Requires-Dist: suthing>=0.5.0
24
+ Requires-Dist: urllib3>=2.0.0
25
+ Requires-Dist: xmltodict<0.15,>=0.14.2
26
+ Provides-Extra: plot
27
+ Requires-Dist: pygraphviz>=1.14; extra == 'plot'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # GraFlo <img src="https://raw.githubusercontent.com/growgraph/graflo/main/docs/assets/favicon.ico" alt="graflo logo" style="height: 32px; width:32px;"/>
31
+
32
+ A framework for transforming **tabular** (CSV, SQL) and **hierarchical** data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j, **TigerGraph**).
33
+
34
+ > **⚠️ Package Renamed**: This package was formerly known as `graphcast`.
35
+
36
+ ![Python](https://img.shields.io/badge/python-3.11-blue.svg)
37
+ [![PyPI version](https://badge.fury.io/py/graflo.svg)](https://badge.fury.io/py/graflo)
38
+ [![PyPI Downloads](https://static.pepy.tech/badge/graflo)](https://pepy.tech/projects/graflo)
39
+ [![License: BSL](https://img.shields.io/badge/license-BSL--1.1-green)](https://github.com/growgraph/graflo/blob/main/LICENSE)
40
+ [![pre-commit](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml)
41
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15446131.svg)]( https://doi.org/10.5281/zenodo.15446131)
42
+
43
+ ## Core Concepts
44
+
45
+ ### Property Graphs
46
+ graflo works with property graphs, which consist of:
47
+
48
+ - **Vertices**: Nodes with properties and optional unique identifiers
49
+ - **Edges**: Relationships between vertices with their own properties
50
+ - **Properties**: Both vertices and edges may have properties
51
+
52
+ ### Schema
53
+ The Schema defines how your data should be transformed into a graph and contains:
54
+
55
+ - **Vertex Definitions**: Specify vertex types, their properties, and unique identifiers
56
+ - **Edge Definitions**: Define relationships between vertices and their properties
57
+ - **Resource Mapping**: describe how data sources map to vertices and edges
58
+ - **Transforms**: Modify data during the casting process
59
+
60
+ ### Resources
61
+ Resources are your data sources that can be:
62
+
63
+ - **Table-like**: CSV files, database tables
64
+ - **JSON-like**: JSON files, nested data structures
65
+
66
+ ## Features
67
+
68
+ - **Graph Transformation Meta-language**: A powerful declarative language to describe how your data becomes a property graph:
69
+ - Define vertex and edge structures
70
+ - Set compound indexes for vertices and edges
71
+ - Use blank vertices for complex relationships
72
+ - Specify edge constraints and properties
73
+ - Apply advanced filtering and transformations
74
+ - **Parallel processing**: Use as many cores as you have
75
+ - **Database support**: Ingest into ArangoDB, Neo4j, and **TigerGraph** using the same API (database agnostic). Source data from PostgreSQL and other SQL databases. Automatically infer graph schemas from PostgreSQL 3NF databases.
76
+ - **Server-side filtering**: Efficient querying with server-side filtering support (TigerGraph REST++ API)
77
+
78
+ ## Documentation
79
+ Full documentation is available at: [growgraph.github.io/graflo](https://growgraph.github.io/graflo)
80
+
81
+ ## Installation
82
+
83
+ ```bash
84
+ pip install graflo
85
+ ```
86
+
87
+ ## Usage Examples
88
+
89
+ ### Simple ingest
90
+
91
+ ```python
92
+ from suthing import FileHandle
93
+
94
+ from graflo import Schema, Caster, Patterns
95
+ from graflo.db.connection.onto import ArangoConfig
96
+
97
+ schema = Schema.from_dict(FileHandle.load("schema.yaml"))
98
+
99
+ # Option 1: Load config from docker/arango/.env (recommended)
100
+ conn_conf = ArangoConfig.from_docker_env()
101
+
102
+ # Option 2: Load from environment variables
103
+ # Set: ARANGO_URI, ARANGO_USERNAME, ARANGO_PASSWORD, ARANGO_DATABASE
104
+ conn_conf = ArangoConfig.from_env()
105
+
106
+ # Option 3: Load with custom prefix (for multiple configs)
107
+ # Set: USER_ARANGO_URI, USER_ARANGO_USERNAME, USER_ARANGO_PASSWORD, USER_ARANGO_DATABASE
108
+ user_conn_conf = ArangoConfig.from_env(prefix="USER")
109
+
110
+ # Option 4: Create config directly
111
+ # conn_conf = ArangoConfig(
112
+ # uri="http://localhost:8535",
113
+ # username="root",
114
+ # password="123",
115
+ # database="mygraph", # For ArangoDB, 'database' maps to schema/graph
116
+ # )
117
+ # Note: If 'database' (or 'schema_name' for TigerGraph) is not set,
118
+ # Caster will automatically use Schema.general.name as fallback
119
+
120
+ from graflo.util.onto import FilePattern
121
+ import pathlib
122
+
123
+ # Create Patterns with file patterns
124
+ patterns = Patterns()
125
+ patterns.add_file_pattern(
126
+ "work",
127
+ FilePattern(regex="\Sjson$", sub_path=pathlib.Path("./data"), resource_name="work")
128
+ )
129
+
130
+ # Or use resource_mapping for simpler initialization
131
+ # patterns = Patterns(
132
+ # _resource_mapping={
133
+ # "work": "./data/work.json",
134
+ # }
135
+ # )
136
+
137
+ schema.fetch_resource()
138
+
139
+ caster = Caster(schema)
140
+
141
+ caster.ingest(
142
+ output_config=conn_conf, # Target database config
143
+ patterns=patterns, # Source data patterns
144
+ )
145
+ ```
146
+
147
+ ## Development
148
+
149
+ To install requirements
150
+
151
+ ```shell
152
+ git clone git@github.com:growgraph/graflo.git && cd graflo
153
+ uv sync --dev
154
+ ```
155
+
156
+ ### Tests
157
+
158
+ #### Test databases
159
+ Spin up Arango from [arango docker folder](./docker/arango) by
160
+
161
+ ```shell
162
+ docker-compose --env-file .env up arango
163
+ ```
164
+
165
+ Neo4j from [neo4j docker folder](./docker/neo4j) by
166
+
167
+ ```shell
168
+ docker-compose --env-file .env up neo4j
169
+ ```
170
+
171
+ and TigerGraph from [tigergraph docker folder](./docker/tigergraph) by
172
+
173
+ ```shell
174
+ docker-compose --env-file .env up tigergraph
175
+ ```
176
+
177
+ To run unit tests
178
+
179
+ ```shell
180
+ pytest test
181
+ ```
182
+
183
+ ## Requirements
184
+
185
+ - Python 3.11+
186
+ - python-arango
187
+
188
+ ## Contributing
189
+
190
+ Contributions are welcome! Please feel free to submit a Pull Request.
@@ -0,0 +1,64 @@
1
+ graflo/README.md,sha256=epqV1Cpmogy6RnTNeu-K0JhO_gVY82RzeLFN6kvG7Is,377
2
+ graflo/__init__.py,sha256=Tr4mksr6gp3fuHEx51dbudTYJTaZQH0AHVWQCCMNhs4,1857
3
+ graflo/caster.py,sha256=xQ8f_Z0_EMYCLieDGhwudAc_C0sFA-9j3tcAt977Nfk,26384
4
+ graflo/logging.conf,sha256=coIMi-VlXrBEfoczsY986ax20NZ2e11yi4hFWSwpwDM,372
5
+ graflo/onto.py,sha256=cygPqXGFHXJAFMJU6_OZ1ZxsnuH6w50mvSEGqkn-rc0,5911
6
+ graflo/architecture/__init__.py,sha256=BptkdI26979ljHMRh59owuuPF9Z2NERCXT_iRNX4kRs,1093
7
+ graflo/architecture/actor.py,sha256=wK0TUVrdSXbAEVi_BI_9zblUArepx96-rxDPRAX2sfE,35657
8
+ graflo/architecture/actor_util.py,sha256=Vu61PL_YDPbGGTjCVlhh6DKXVSHs4F4Xfccc7DlgQqI,16967
9
+ graflo/architecture/edge.py,sha256=pdEcvnDuoDhHqmWNBDce7Rbppdtrlq2aMuKYyYa-Drg,9419
10
+ graflo/architecture/onto.py,sha256=CRqiuyf7hmPxvNElOkfQZ7Mmr_SAecDG0FFfawgWA4s,11378
11
+ graflo/architecture/resource.py,sha256=IqSoLlFfygWUf_1Z6Sh66CW6N8BRU49oJWJXtzcQ4a4,5048
12
+ graflo/architecture/schema.py,sha256=HpxHmZwv4lhTEsqOouvByjJGbW4RuMOdPqVFVaIGtiI,4503
13
+ graflo/architecture/transform.py,sha256=TCl3U8mQIFI0IDt72BrzFLAgFB5C0hXeL_66UCaQnpg,9539
14
+ graflo/architecture/util.py,sha256=UYu_WP5aKxL92rzBWHJHMQ0B-i3eFfdQqDKJZaAXaIc,2971
15
+ graflo/architecture/vertex.py,sha256=28xFAEmv2RXoErGAZz1I7JLf2-V9SiD3IsnXSHll2Ws,20560
16
+ graflo/cli/__init__.py,sha256=u5YFigokhv46foL9P5YQDOIICQ2v6CMErlByucaarSs,449
17
+ graflo/cli/ingest.py,sha256=cGSJWCHJ3CPop48QSlzyI9lH4idCeaSsIbREE3j-7AQ,6271
18
+ graflo/cli/manage_dbs.py,sha256=-6Iv4OovRu44lbMjB6Kd4aIlE9kW3dJgDVCeXEtu-og,6423
19
+ graflo/cli/plot_schema.py,sha256=rRmg8VfOl23fhebD2NahembfiWywvBU_4yXU_VVkCYQ,4300
20
+ graflo/cli/xml2json.py,sha256=pi4KDtOsVVVuxmu0DtZUOJCP3VXjDGOfHQOR6HaVQk4,2668
21
+ graflo/data_source/__init__.py,sha256=6hX1f6k5rRFUmIrHuYhWuqLJX7cLAUj-_1_nzlrB-DY,1506
22
+ graflo/data_source/api.py,sha256=WRYao-4hH7Qsl_FJwoiK6bZNYDbOKamp58MqVs87AmE,12175
23
+ graflo/data_source/base.py,sha256=1YuxC1cjRpEuGlnve15J5zYbghhIcVyEJyfDEykWR8Y,2932
24
+ graflo/data_source/factory.py,sha256=XD6OL5yQUrllY7L7rZhzJKEK5vUj91PkxMRjyqcLOt8,11467
25
+ graflo/data_source/file.py,sha256=C2xP2QgPz2cvDZhH_auy1SnqC8jFcmbQflT4WmOsl7c,3953
26
+ graflo/data_source/memory.py,sha256=V3BQdWz-nAzJY1YEanSNk1vHGLvh-MEbYCKvMqyvYdE,2451
27
+ graflo/data_source/registry.py,sha256=FEVuRTw21YuDyRaf53nzYRigg9WHreuddNrxSSyNrL0,2606
28
+ graflo/data_source/sql.py,sha256=2F0pVhZl1W_3pXB1znTQKMRn_XWoKA2h94Lj7ae06w0,6241
29
+ graflo/db/__init__.py,sha256=I_hBGfVqZYvg68Cioa_f2mnLcU78_na6_vwD5nUbnro,1446
30
+ graflo/db/conn.py,sha256=EPsn3wervFRzSpyV2naNhyXOpedySpE_0ls3cQbwIaQ,11884
31
+ graflo/db/manager.py,sha256=lTdM20PtTS8PU119dpZH5WtD25O9zjbCOwi1icAbieI,4072
32
+ graflo/db/util.py,sha256=IgS32BaWKoSpsq1yqIfq9iVmXkJJFdRBDX4NOi66dgA,1461
33
+ graflo/db/arango/__init__.py,sha256=_au3IvlYmjiSym2gPCLngyTjpZLRSHE877SiHUd7o-E,642
34
+ graflo/db/arango/conn.py,sha256=l0et3NoTekn9RtT27eTyz5z5v3q-2Uti8yLQk_Kkwto,36945
35
+ graflo/db/arango/query.py,sha256=t_RKltpWh0L3V0FJst38vMf7QsmjzTndgBR3ooAahmM,5655
36
+ graflo/db/arango/util.py,sha256=AUH70v_8NuQ4Eicrh1IHApnn_-E5wPQfzgKcYHdwHT0,2971
37
+ graflo/db/connection/__init__.py,sha256=RBZxV2EkqWkQuFH-bEose33ENGsLKB2C8T8HuUjOaBQ,80
38
+ graflo/db/connection/config_mapping.py,sha256=mFFMEt51nh8yN5ar9fGaidogkIqwbAbrOoAUj8rz7S4,418
39
+ graflo/db/connection/onto.py,sha256=8lNsi4PCJVdny0W4Lgtju-wPoNUmzZ0ChJO97cRFsF0,25614
40
+ graflo/db/connection/wsgi.py,sha256=smqkQwvoSb0MEFlcxaJYjZFF1LeNMfqidcTqYzgP9BE,989
41
+ graflo/db/neo4j/__init__.py,sha256=KB4zd06CNCmTs5KotfQ_4petaCLOJnUZ1oD4CaRJTnQ,547
42
+ graflo/db/neo4j/conn.py,sha256=luQfmJvKKW0Q__0eXR7HhwSkci47jlPI8nXeev1lHoA,22325
43
+ graflo/db/postgres/__init__.py,sha256=2hci6gzA51Dol2D6C4flgGEImk7wTRJIYN7D_24sz-g,5023
44
+ graflo/db/postgres/conn.py,sha256=sM6tai-H3_ptBzb6kF5sMUefe5v-bQ_nWxkQ1Zp79Go,15732
45
+ graflo/db/postgres/resource_mapping.py,sha256=DGbdYwRrYuDTHnINj5w84LKhmYqBCVGuXQ4zsQKLLgE,4742
46
+ graflo/db/postgres/schema_inference.py,sha256=rUyH_CQnnR7ZD5raXTeN9nCyAVlGqCReTeaAmWF4ksg,8587
47
+ graflo/db/postgres/types.py,sha256=tEkfEOTvYz2s9U53QG47LbPdu9txtzyHFNuiKUTtGTs,4559
48
+ graflo/db/tigergraph/__init__.py,sha256=qmC6xVS9-s4LlWC9G8lH9DzSHjqjoSg4DqFzb42eIa0,254
49
+ graflo/db/tigergraph/conn.py,sha256=PW4CsmR17JpI47iHAFDs_cwJ-4lT1n6nEc3n6hO2u1Q,91335
50
+ graflo/filter/__init__.py,sha256=OyV5pavlOrWQeS4fwHA-tcKcNccXxGr2d--TKPYlXyE,814
51
+ graflo/filter/onto.py,sha256=iLFPXqiBZ1FTQbaHBxHD8gatFk024yGXYHCzyxkbaMw,16791
52
+ graflo/plot/__init__.py,sha256=ITlvw8_FRNXPqtjiou946Hv923iixGvR6c4Uqrg-L2o,485
53
+ graflo/plot/plotter.py,sha256=yv-tyKJViLU3CKg8lfBWJ8_WCxdcnGgY1G_VzhHq0lE,17710
54
+ graflo/util/__init__.py,sha256=aZsSxHD1vzADUvzHVuYrx2TL-EUye7tcw2IeX81EB38,706
55
+ graflo/util/chunker.py,sha256=d379ILBggFl7KxX8-lc6ID1iVnbfj5DM5IUPfa2wLuI,22244
56
+ graflo/util/merge.py,sha256=mFn6GU6nL99Ug2PxFjuBJBwaPQPotP5MSB822YS34HI,5619
57
+ graflo/util/misc.py,sha256=Fwl8HhbBm-ZXbqDotnZ39gDPoTfUKeonZAi4pPWY99M,1171
58
+ graflo/util/onto.py,sha256=RlVDrQ_uX_576qeqxkQs7_2przbq8F6uR8pp4oOCFfo,11767
59
+ graflo/util/transform.py,sha256=zufL-COvnsHFK4vTMW4CPZLt1j0a2o4jEWjPs9Nw1lI,12739
60
+ graflo-1.3.3.dist-info/METADATA,sha256=YANCi3rU4XdTXm0FDArfoz33U82zl52kyY72lCsIehE,6233
61
+ graflo-1.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
62
+ graflo-1.3.3.dist-info/entry_points.txt,sha256=kjDXqHgIrppqxQe6RO6XjLrq7VL50AaVHhLg7c95Oxc,190
63
+ graflo-1.3.3.dist-info/licenses/LICENSE,sha256=ILn9MXR5AfuRRtOF8abQWhm0wO8kckf4IBdc6mKaRG8,5593
64
+ graflo-1.3.3.dist-info/RECORD,,