graflo 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1120 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +297 -0
- graflo/architecture/onto.py +374 -0
- graflo/architecture/resource.py +161 -0
- graflo/architecture/schema.py +136 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +93 -0
- graflo/architecture/vertex.py +586 -0
- graflo/caster.py +655 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +194 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +97 -0
- graflo/data_source/factory.py +298 -0
- graflo/data_source/file.py +133 -0
- graflo/data_source/memory.py +72 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +185 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1026 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +688 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +156 -0
- graflo/db/postgres/conn.py +425 -0
- graflo/db/postgres/resource_mapping.py +139 -0
- graflo/db/postgres/schema_inference.py +245 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2212 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +190 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +556 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +751 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +332 -0
- graflo/util/transform.py +448 -0
- graflo-1.3.3.dist-info/METADATA +190 -0
- graflo-1.3.3.dist-info/RECORD +64 -0
- graflo-1.3.3.dist-info/WHEEL +4 -0
- graflo-1.3.3.dist-info/entry_points.txt +5 -0
- graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
graflo/util/transform.py
ADDED
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
"""Data transformation utilities for graph operations.
|
|
2
|
+
|
|
3
|
+
This module provides utility functions for transforming and standardizing data
|
|
4
|
+
in various formats, particularly for graph database operations. It includes
|
|
5
|
+
functions for date parsing, string standardization, and data cleaning.
|
|
6
|
+
|
|
7
|
+
Key Functions:
|
|
8
|
+
- standardize: Standardize string keys and names
|
|
9
|
+
- parse_date_*: Various date parsing functions for different formats
|
|
10
|
+
- cast_ibes_analyst: Parse and standardize analyst names
|
|
11
|
+
- clear_first_level_nones: Clean dictionaries by removing None values
|
|
12
|
+
- parse_multi_item: Parse complex multi-item strings
|
|
13
|
+
- pick_unique_dict: Remove duplicate dictionaries
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
>>> name = standardize("John. Doe, Smith")
|
|
17
|
+
>>> date = parse_date_standard("2023-01-01")
|
|
18
|
+
>>> analyst = cast_ibes_analyst("ADKINS/NARRA")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
import re
|
|
23
|
+
import time
|
|
24
|
+
from collections import defaultdict
|
|
25
|
+
from datetime import datetime
|
|
26
|
+
|
|
27
|
+
ORDINAL_SUFFIX = ["st", "nd", "rd", "th"]
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def standardize(k):
|
|
33
|
+
"""Standardizes a string key by removing periods and splitting.
|
|
34
|
+
|
|
35
|
+
Handles comma and space-separated strings, normalizing their format.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
k (str): Input string to be standardized.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
str: Cleaned and standardized string.
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
>>> standardize("John. Doe, Smith")
|
|
45
|
+
'John,Doe,Smith'
|
|
46
|
+
>>> standardize("John Doe Smith")
|
|
47
|
+
'John,Doe,Smith'
|
|
48
|
+
"""
|
|
49
|
+
k = k.translate(str.maketrans({".": ""}))
|
|
50
|
+
# try to split by ", "
|
|
51
|
+
k = k.split(", ")
|
|
52
|
+
if len(k) < 2:
|
|
53
|
+
k = k[0].split(" ")
|
|
54
|
+
else:
|
|
55
|
+
k[1] = k[1].translate(str.maketrans({" ": ""}))
|
|
56
|
+
return ",".join(k)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse_date_standard(input_str):
|
|
60
|
+
"""Parse a date string in YYYY-MM-DD format.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
input_str (str): Date string in YYYY-MM-DD format.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
tuple: (year, month, day) as integers.
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
>>> parse_date_standard("2023-01-01")
|
|
70
|
+
(2023, 1, 1)
|
|
71
|
+
"""
|
|
72
|
+
dt = datetime.strptime(input_str, "%Y-%m-%d")
|
|
73
|
+
return dt.year, dt.month, dt.day
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def parse_date_conf(input_str):
|
|
77
|
+
"""Parse a date string in YYYYMMDD format.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
input_str (str): Date string in YYYYMMDD format.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
tuple: (year, month, day) as integers.
|
|
84
|
+
|
|
85
|
+
Example:
|
|
86
|
+
>>> parse_date_conf("20230101")
|
|
87
|
+
(2023, 1, 1)
|
|
88
|
+
"""
|
|
89
|
+
dt = datetime.strptime(input_str, "%Y%m%d")
|
|
90
|
+
return dt.year, dt.month, dt.day
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def parse_date_ibes(date0, time0):
|
|
94
|
+
"""Converts IBES date and time to ISO 8601 format datetime.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
date0 (str/int): Date in YYYYMMDD format.
|
|
98
|
+
time0 (str): Time in HH:MM:SS format.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
str: Datetime in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ).
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
>>> parse_date_ibes(20160126, "9:35:52")
|
|
105
|
+
'2016-01-26T09:35:52Z'
|
|
106
|
+
"""
|
|
107
|
+
date0 = str(date0)
|
|
108
|
+
year, month, day = date0[:4], date0[4:6], date0[6:]
|
|
109
|
+
full_datetime = f"{year}-{month}-{day}T{time0}Z"
|
|
110
|
+
|
|
111
|
+
return full_datetime
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def parse_date_yahoo(date0):
|
|
115
|
+
"""Convert Yahoo Finance date to ISO 8601 format.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
date0 (str): Date in YYYY-MM-DD format.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
str: Datetime in ISO 8601 format with noon time.
|
|
122
|
+
|
|
123
|
+
Example:
|
|
124
|
+
>>> parse_date_yahoo("2023-01-01")
|
|
125
|
+
'2023-01-01T12:00:00Z'
|
|
126
|
+
"""
|
|
127
|
+
full_datetime = f"{date0}T12:00:00Z"
|
|
128
|
+
return full_datetime
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def round_str(x, **kwargs):
|
|
132
|
+
"""Round a string number to specified precision.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
x (str): String representation of a number.
|
|
136
|
+
**kwargs: Additional arguments for round() function.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
float: Rounded number.
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
>>> round_str("3.14159", ndigits=2)
|
|
143
|
+
3.14
|
|
144
|
+
"""
|
|
145
|
+
return round(float(x), **kwargs)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def parse_date_standard_to_epoch(input_str):
|
|
149
|
+
"""Convert standard date string to Unix epoch timestamp.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
input_str (str): Date string in YYYY-MM-DD format.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
float: Unix epoch timestamp.
|
|
156
|
+
|
|
157
|
+
Example:
|
|
158
|
+
>>> parse_date_standard_to_epoch("2023-01-01")
|
|
159
|
+
1672531200.0
|
|
160
|
+
"""
|
|
161
|
+
dt = datetime.strptime(input_str, "%Y-%m-%d").timetuple()
|
|
162
|
+
timestamp = time.mktime(dt)
|
|
163
|
+
return timestamp
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def cast_ibes_analyst(s):
|
|
167
|
+
"""Splits and normalizes analyst name strings.
|
|
168
|
+
|
|
169
|
+
Handles various name formats like 'ADKINS/NARRA' or 'ARFSTROM J'.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
s (str): Analyst name string.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
tuple: (last_name, first_initial)
|
|
176
|
+
|
|
177
|
+
Examples:
|
|
178
|
+
>>> cast_ibes_analyst('ADKINS/NARRA')
|
|
179
|
+
('ADKINS', 'N')
|
|
180
|
+
>>> cast_ibes_analyst('ARFSTROM J')
|
|
181
|
+
('ARFSTROM', 'J')
|
|
182
|
+
"""
|
|
183
|
+
if " " in s or "\t" in s:
|
|
184
|
+
r = s.split()[:2]
|
|
185
|
+
if len(r) < 2:
|
|
186
|
+
return r[0], ""
|
|
187
|
+
else:
|
|
188
|
+
return r[0], r[1][:1]
|
|
189
|
+
else:
|
|
190
|
+
r = s.split("/")
|
|
191
|
+
if s.startswith("/"):
|
|
192
|
+
r = r[1:3]
|
|
193
|
+
else:
|
|
194
|
+
r = r[:2]
|
|
195
|
+
if len(r) < 2:
|
|
196
|
+
return r[0], ""
|
|
197
|
+
else:
|
|
198
|
+
return r[0], r[1][:1]
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def parse_date_reference(input_str):
|
|
202
|
+
"""Extract year from a date reference string.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
input_str (str): Date reference string.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
int: Year from the date reference.
|
|
209
|
+
|
|
210
|
+
Example:
|
|
211
|
+
>>> parse_date_reference("1923, May 10")
|
|
212
|
+
1923
|
|
213
|
+
"""
|
|
214
|
+
return _parse_date_reference(input_str)["year"]
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _parse_date_reference(input_str):
|
|
218
|
+
"""Parse complex, human-written date references.
|
|
219
|
+
|
|
220
|
+
Handles various date formats like:
|
|
221
|
+
- "1923, May 10"
|
|
222
|
+
- "1923, July"
|
|
223
|
+
- "1921, Sept"
|
|
224
|
+
- "1935-36"
|
|
225
|
+
- "1926, December 24th"
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
input_str (str): Date string in various formats.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
dict: Parsed date information with keys 'year', optional 'month', 'day'.
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
>>> _parse_date_reference("1923, May 10")
|
|
235
|
+
{'year': 1923, 'month': 5, 'day': 10}
|
|
236
|
+
"""
|
|
237
|
+
if "," in input_str:
|
|
238
|
+
if len(input_str.split(" ")) == 3:
|
|
239
|
+
if input_str[-2:] in ORDINAL_SUFFIX:
|
|
240
|
+
input_str = input_str[:-2]
|
|
241
|
+
try:
|
|
242
|
+
dt = datetime.strptime(input_str, "%Y, %B %d")
|
|
243
|
+
return {"year": dt.year, "month": dt.month, "day": dt.day}
|
|
244
|
+
except:
|
|
245
|
+
try:
|
|
246
|
+
aux = input_str.split(" ")
|
|
247
|
+
input_str = " ".join([aux[0]] + [aux[1][:3]] + [aux[2]])
|
|
248
|
+
dt = datetime.strptime(input_str, "%Y, %b %d")
|
|
249
|
+
return {"year": dt.year, "month": dt.month, "day": dt.day}
|
|
250
|
+
except:
|
|
251
|
+
return {"year": input_str}
|
|
252
|
+
else:
|
|
253
|
+
try:
|
|
254
|
+
dt = datetime.strptime(input_str, "%Y, %B")
|
|
255
|
+
return {"year": dt.year, "month": dt.month}
|
|
256
|
+
except:
|
|
257
|
+
try:
|
|
258
|
+
aux = input_str.split(" ")
|
|
259
|
+
input_str = " ".join([aux[0]] + [aux[1][:3]])
|
|
260
|
+
dt = datetime.strptime(input_str, "%Y, %b")
|
|
261
|
+
return {"year": dt.year, "month": dt.month}
|
|
262
|
+
except:
|
|
263
|
+
return {"year": input_str}
|
|
264
|
+
else:
|
|
265
|
+
try:
|
|
266
|
+
dt = datetime.strptime(input_str[:4], "%Y")
|
|
267
|
+
return {"year": dt.year}
|
|
268
|
+
except:
|
|
269
|
+
return {"year": input_str}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def try_int(x):
|
|
273
|
+
"""Attempt to convert a value to integer.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
x: Value to convert.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
int or original value: Integer if conversion successful, original value otherwise.
|
|
280
|
+
|
|
281
|
+
Example:
|
|
282
|
+
>>> try_int("123")
|
|
283
|
+
123
|
|
284
|
+
>>> try_int("abc")
|
|
285
|
+
'abc'
|
|
286
|
+
"""
|
|
287
|
+
try:
|
|
288
|
+
x = int(x)
|
|
289
|
+
return x
|
|
290
|
+
except:
|
|
291
|
+
return x
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def clear_first_level_nones(docs, keys_keep_nones: list | None = None):
|
|
295
|
+
"""Removes None values from dictionaries, with optional key exceptions.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
docs (list): List of dictionaries to clean.
|
|
299
|
+
keys_keep_nones (list, optional): Keys to keep even if their value is None.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
list: Cleaned list of dictionaries.
|
|
303
|
+
|
|
304
|
+
Example:
|
|
305
|
+
>>> docs = [{"a": 1, "b": None}, {"a": None, "b": 2}]
|
|
306
|
+
>>> clear_first_level_nones(docs, keys_keep_nones=["a"])
|
|
307
|
+
[{"a": 1}, {"a": None, "b": 2}]
|
|
308
|
+
"""
|
|
309
|
+
if keys_keep_nones is not None:
|
|
310
|
+
docs = [
|
|
311
|
+
{k: v for k, v in tdict.items() if v or k in keys_keep_nones}
|
|
312
|
+
for tdict in docs
|
|
313
|
+
]
|
|
314
|
+
return docs
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def parse_multi_item(s, mapper: dict, direct: list):
|
|
318
|
+
"""Parses complex multi-item strings into structured data.
|
|
319
|
+
|
|
320
|
+
Supports parsing strings with quoted or bracketed items.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
s (str): Input string to parse.
|
|
324
|
+
mapper (dict): Mapping of input keys to output keys.
|
|
325
|
+
direct (list): Direct keys to extract.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
defaultdict: Parsed items with lists as values.
|
|
329
|
+
|
|
330
|
+
Example:
|
|
331
|
+
>>> s = '[name: John, age: 30] [name: Jane, age: 25]'
|
|
332
|
+
>>> mapper = {"name": "full_name"}
|
|
333
|
+
>>> direct = ["age"]
|
|
334
|
+
>>> parse_multi_item(s, mapper, direct)
|
|
335
|
+
defaultdict(list, {'full_name': ['John', 'Jane'], 'age': ['30', '25']})
|
|
336
|
+
"""
|
|
337
|
+
if "'" in s:
|
|
338
|
+
items_str = re.findall(r"\"(.*?)\"", s) + re.findall(r"\'(.*?)\'", s)
|
|
339
|
+
else:
|
|
340
|
+
# remove brackets
|
|
341
|
+
items_str = re.findall(r"\[([^]]+)", s)[0].split()
|
|
342
|
+
r: defaultdict[str, list] = defaultdict(list)
|
|
343
|
+
for item in items_str:
|
|
344
|
+
doc0 = [ss.strip().split(":") for ss in item.split(",")]
|
|
345
|
+
if all([len(x) == 2 for x in doc0]):
|
|
346
|
+
doc0_dict = dict(doc0)
|
|
347
|
+
for n_init, n_final in mapper.items():
|
|
348
|
+
try:
|
|
349
|
+
r[n_final] += [doc0_dict[n_init]]
|
|
350
|
+
except KeyError:
|
|
351
|
+
r[n_final] += [None]
|
|
352
|
+
|
|
353
|
+
for n_final in direct:
|
|
354
|
+
try:
|
|
355
|
+
r[n_final] += [doc0_dict[n_final]]
|
|
356
|
+
except KeyError:
|
|
357
|
+
r[n_final] += [None]
|
|
358
|
+
else:
|
|
359
|
+
for key, value in zip(direct, doc0):
|
|
360
|
+
r[key] += [value]
|
|
361
|
+
|
|
362
|
+
return r
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def pick_unique_dict(docs):
|
|
366
|
+
"""Removes duplicate dictionaries from a list.
|
|
367
|
+
|
|
368
|
+
Uses a hash-based approach to identify unique dictionaries, which is more
|
|
369
|
+
efficient than JSON serialization and preserves original object types.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
docs (list): List of dictionaries.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
list: List of unique dictionaries (preserving original objects).
|
|
376
|
+
|
|
377
|
+
Example:
|
|
378
|
+
>>> docs = [{"a": 1}, {"a": 1}, {"b": 2}]
|
|
379
|
+
>>> pick_unique_dict(docs)
|
|
380
|
+
[{"a": 1}, {"b": 2}]
|
|
381
|
+
"""
|
|
382
|
+
from datetime import date, datetime, time
|
|
383
|
+
from decimal import Decimal
|
|
384
|
+
|
|
385
|
+
def make_hashable(obj):
|
|
386
|
+
"""Convert an object to a hashable representation.
|
|
387
|
+
|
|
388
|
+
Handles nested structures, datetime objects, and Decimal types.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
obj: Object to make hashable
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Hashable representation of the object
|
|
395
|
+
"""
|
|
396
|
+
if isinstance(obj, dict):
|
|
397
|
+
# Sort items by key for consistent hashing
|
|
398
|
+
return tuple(sorted((k, make_hashable(v)) for k, v in obj.items()))
|
|
399
|
+
elif isinstance(obj, (list, tuple)):
|
|
400
|
+
return tuple(make_hashable(item) for item in obj)
|
|
401
|
+
elif isinstance(obj, (datetime, date, time)):
|
|
402
|
+
# Convert to ISO format string for hashing
|
|
403
|
+
return ("__datetime__", obj.isoformat())
|
|
404
|
+
elif isinstance(obj, Decimal):
|
|
405
|
+
# Convert to string representation to preserve precision
|
|
406
|
+
return ("__decimal__", str(obj))
|
|
407
|
+
elif isinstance(obj, set):
|
|
408
|
+
# Convert set to sorted tuple for consistent hashing
|
|
409
|
+
return tuple(sorted(make_hashable(item) for item in obj))
|
|
410
|
+
else:
|
|
411
|
+
# Primitive types (int, float, str, bool, None) are already hashable
|
|
412
|
+
return obj
|
|
413
|
+
|
|
414
|
+
# Use a dict to preserve insertion order and original objects
|
|
415
|
+
seen = {}
|
|
416
|
+
for doc in docs:
|
|
417
|
+
# Create hashable representation
|
|
418
|
+
hashable_repr = make_hashable(doc)
|
|
419
|
+
# Use hashable representation as key, original doc as value
|
|
420
|
+
if hashable_repr not in seen:
|
|
421
|
+
seen[hashable_repr] = doc
|
|
422
|
+
|
|
423
|
+
# Return list of unique documents (preserving original objects)
|
|
424
|
+
return list(seen.values())
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def split_keep_part(s: str, sep="/", keep=-1) -> str:
|
|
428
|
+
"""Split a string and keep specified parts.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
s (str): String to split.
|
|
432
|
+
sep (str): Separator to split on.
|
|
433
|
+
keep (int or list): Index or indices to keep.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
str: Joined string of kept parts.
|
|
437
|
+
|
|
438
|
+
Example:
|
|
439
|
+
>>> split_keep_part("a/b/c", keep=0)
|
|
440
|
+
'a'
|
|
441
|
+
>>> split_keep_part("a/b/c", keep=[0, 2])
|
|
442
|
+
'a/c'
|
|
443
|
+
"""
|
|
444
|
+
if isinstance(keep, list):
|
|
445
|
+
items = s.split(sep)
|
|
446
|
+
return sep.join(items[k] for k in keep)
|
|
447
|
+
else:
|
|
448
|
+
return s.split(sep)[keep]
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: graflo
|
|
3
|
+
Version: 1.3.3
|
|
4
|
+
Summary: A framework for transforming tabular (CSV, SQL) and hierarchical data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j)
|
|
5
|
+
Author-email: Alexander Belikov <alexander@growgraph.dev>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: ~=3.10.0
|
|
8
|
+
Requires-Dist: click<9,>=8.2.0
|
|
9
|
+
Requires-Dist: dataclass-wizard>=0.34.0
|
|
10
|
+
Requires-Dist: ijson<4,>=3.2.3
|
|
11
|
+
Requires-Dist: neo4j<6,>=5.22.0
|
|
12
|
+
Requires-Dist: networkx~=3.3
|
|
13
|
+
Requires-Dist: pandas-stubs==2.3.0.250703
|
|
14
|
+
Requires-Dist: pandas<3,>=2.0.3
|
|
15
|
+
Requires-Dist: psycopg2-binary>=2.9.11
|
|
16
|
+
Requires-Dist: pydantic-settings>=2.12.0
|
|
17
|
+
Requires-Dist: pydantic>=2.12.5
|
|
18
|
+
Requires-Dist: python-arango<9,>=8.1.2
|
|
19
|
+
Requires-Dist: pytigergraph>=1.9.0
|
|
20
|
+
Requires-Dist: requests>=2.31.0
|
|
21
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
|
22
|
+
Requires-Dist: strenum>=0.4.15
|
|
23
|
+
Requires-Dist: suthing>=0.5.0
|
|
24
|
+
Requires-Dist: urllib3>=2.0.0
|
|
25
|
+
Requires-Dist: xmltodict<0.15,>=0.14.2
|
|
26
|
+
Provides-Extra: plot
|
|
27
|
+
Requires-Dist: pygraphviz>=1.14; extra == 'plot'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# GraFlo <img src="https://raw.githubusercontent.com/growgraph/graflo/main/docs/assets/favicon.ico" alt="graflo logo" style="height: 32px; width:32px;"/>
|
|
31
|
+
|
|
32
|
+
A framework for transforming **tabular** (CSV, SQL) and **hierarchical** data (JSON, XML) into property graphs and ingesting them into graph databases (ArangoDB, Neo4j, **TigerGraph**).
|
|
33
|
+
|
|
34
|
+
> **⚠️ Package Renamed**: This package was formerly known as `graphcast`.
|
|
35
|
+
|
|
36
|
+

|
|
37
|
+
[](https://badge.fury.io/py/graflo)
|
|
38
|
+
[](https://pepy.tech/projects/graflo)
|
|
39
|
+
[](https://github.com/growgraph/graflo/blob/main/LICENSE)
|
|
40
|
+
[](https://github.com/growgraph/graflo/actions/workflows/pre-commit.yml)
|
|
41
|
+
[]( https://doi.org/10.5281/zenodo.15446131)
|
|
42
|
+
|
|
43
|
+
## Core Concepts
|
|
44
|
+
|
|
45
|
+
### Property Graphs
|
|
46
|
+
graflo works with property graphs, which consist of:
|
|
47
|
+
|
|
48
|
+
- **Vertices**: Nodes with properties and optional unique identifiers
|
|
49
|
+
- **Edges**: Relationships between vertices with their own properties
|
|
50
|
+
- **Properties**: Both vertices and edges may have properties
|
|
51
|
+
|
|
52
|
+
### Schema
|
|
53
|
+
The Schema defines how your data should be transformed into a graph and contains:
|
|
54
|
+
|
|
55
|
+
- **Vertex Definitions**: Specify vertex types, their properties, and unique identifiers
|
|
56
|
+
- **Edge Definitions**: Define relationships between vertices and their properties
|
|
57
|
+
- **Resource Mapping**: describe how data sources map to vertices and edges
|
|
58
|
+
- **Transforms**: Modify data during the casting process
|
|
59
|
+
|
|
60
|
+
### Resources
|
|
61
|
+
Resources are your data sources that can be:
|
|
62
|
+
|
|
63
|
+
- **Table-like**: CSV files, database tables
|
|
64
|
+
- **JSON-like**: JSON files, nested data structures
|
|
65
|
+
|
|
66
|
+
## Features
|
|
67
|
+
|
|
68
|
+
- **Graph Transformation Meta-language**: A powerful declarative language to describe how your data becomes a property graph:
|
|
69
|
+
- Define vertex and edge structures
|
|
70
|
+
- Set compound indexes for vertices and edges
|
|
71
|
+
- Use blank vertices for complex relationships
|
|
72
|
+
- Specify edge constraints and properties
|
|
73
|
+
- Apply advanced filtering and transformations
|
|
74
|
+
- **Parallel processing**: Use as many cores as you have
|
|
75
|
+
- **Database support**: Ingest into ArangoDB, Neo4j, and **TigerGraph** using the same API (database agnostic). Source data from PostgreSQL and other SQL databases. Automatically infer graph schemas from PostgreSQL 3NF databases.
|
|
76
|
+
- **Server-side filtering**: Efficient querying with server-side filtering support (TigerGraph REST++ API)
|
|
77
|
+
|
|
78
|
+
## Documentation
|
|
79
|
+
Full documentation is available at: [growgraph.github.io/graflo](https://growgraph.github.io/graflo)
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install graflo
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Usage Examples
|
|
88
|
+
|
|
89
|
+
### Simple ingest
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from suthing import FileHandle
|
|
93
|
+
|
|
94
|
+
from graflo import Schema, Caster, Patterns
|
|
95
|
+
from graflo.db.connection.onto import ArangoConfig
|
|
96
|
+
|
|
97
|
+
schema = Schema.from_dict(FileHandle.load("schema.yaml"))
|
|
98
|
+
|
|
99
|
+
# Option 1: Load config from docker/arango/.env (recommended)
|
|
100
|
+
conn_conf = ArangoConfig.from_docker_env()
|
|
101
|
+
|
|
102
|
+
# Option 2: Load from environment variables
|
|
103
|
+
# Set: ARANGO_URI, ARANGO_USERNAME, ARANGO_PASSWORD, ARANGO_DATABASE
|
|
104
|
+
conn_conf = ArangoConfig.from_env()
|
|
105
|
+
|
|
106
|
+
# Option 3: Load with custom prefix (for multiple configs)
|
|
107
|
+
# Set: USER_ARANGO_URI, USER_ARANGO_USERNAME, USER_ARANGO_PASSWORD, USER_ARANGO_DATABASE
|
|
108
|
+
user_conn_conf = ArangoConfig.from_env(prefix="USER")
|
|
109
|
+
|
|
110
|
+
# Option 4: Create config directly
|
|
111
|
+
# conn_conf = ArangoConfig(
|
|
112
|
+
# uri="http://localhost:8535",
|
|
113
|
+
# username="root",
|
|
114
|
+
# password="123",
|
|
115
|
+
# database="mygraph", # For ArangoDB, 'database' maps to schema/graph
|
|
116
|
+
# )
|
|
117
|
+
# Note: If 'database' (or 'schema_name' for TigerGraph) is not set,
|
|
118
|
+
# Caster will automatically use Schema.general.name as fallback
|
|
119
|
+
|
|
120
|
+
from graflo.util.onto import FilePattern
|
|
121
|
+
import pathlib
|
|
122
|
+
|
|
123
|
+
# Create Patterns with file patterns
|
|
124
|
+
patterns = Patterns()
|
|
125
|
+
patterns.add_file_pattern(
|
|
126
|
+
"work",
|
|
127
|
+
FilePattern(regex="\Sjson$", sub_path=pathlib.Path("./data"), resource_name="work")
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Or use resource_mapping for simpler initialization
|
|
131
|
+
# patterns = Patterns(
|
|
132
|
+
# _resource_mapping={
|
|
133
|
+
# "work": "./data/work.json",
|
|
134
|
+
# }
|
|
135
|
+
# )
|
|
136
|
+
|
|
137
|
+
schema.fetch_resource()
|
|
138
|
+
|
|
139
|
+
caster = Caster(schema)
|
|
140
|
+
|
|
141
|
+
caster.ingest(
|
|
142
|
+
output_config=conn_conf, # Target database config
|
|
143
|
+
patterns=patterns, # Source data patterns
|
|
144
|
+
)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Development
|
|
148
|
+
|
|
149
|
+
To install requirements
|
|
150
|
+
|
|
151
|
+
```shell
|
|
152
|
+
git clone git@github.com:growgraph/graflo.git && cd graflo
|
|
153
|
+
uv sync --dev
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Tests
|
|
157
|
+
|
|
158
|
+
#### Test databases
|
|
159
|
+
Spin up Arango from [arango docker folder](./docker/arango) by
|
|
160
|
+
|
|
161
|
+
```shell
|
|
162
|
+
docker-compose --env-file .env up arango
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Neo4j from [neo4j docker folder](./docker/neo4j) by
|
|
166
|
+
|
|
167
|
+
```shell
|
|
168
|
+
docker-compose --env-file .env up neo4j
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
and TigerGraph from [tigergraph docker folder](./docker/tigergraph) by
|
|
172
|
+
|
|
173
|
+
```shell
|
|
174
|
+
docker-compose --env-file .env up tigergraph
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
To run unit tests
|
|
178
|
+
|
|
179
|
+
```shell
|
|
180
|
+
pytest test
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Requirements
|
|
184
|
+
|
|
185
|
+
- Python 3.11+
|
|
186
|
+
- python-arango
|
|
187
|
+
|
|
188
|
+
## Contributing
|
|
189
|
+
|
|
190
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
graflo/README.md,sha256=epqV1Cpmogy6RnTNeu-K0JhO_gVY82RzeLFN6kvG7Is,377
|
|
2
|
+
graflo/__init__.py,sha256=Tr4mksr6gp3fuHEx51dbudTYJTaZQH0AHVWQCCMNhs4,1857
|
|
3
|
+
graflo/caster.py,sha256=xQ8f_Z0_EMYCLieDGhwudAc_C0sFA-9j3tcAt977Nfk,26384
|
|
4
|
+
graflo/logging.conf,sha256=coIMi-VlXrBEfoczsY986ax20NZ2e11yi4hFWSwpwDM,372
|
|
5
|
+
graflo/onto.py,sha256=cygPqXGFHXJAFMJU6_OZ1ZxsnuH6w50mvSEGqkn-rc0,5911
|
|
6
|
+
graflo/architecture/__init__.py,sha256=BptkdI26979ljHMRh59owuuPF9Z2NERCXT_iRNX4kRs,1093
|
|
7
|
+
graflo/architecture/actor.py,sha256=wK0TUVrdSXbAEVi_BI_9zblUArepx96-rxDPRAX2sfE,35657
|
|
8
|
+
graflo/architecture/actor_util.py,sha256=Vu61PL_YDPbGGTjCVlhh6DKXVSHs4F4Xfccc7DlgQqI,16967
|
|
9
|
+
graflo/architecture/edge.py,sha256=pdEcvnDuoDhHqmWNBDce7Rbppdtrlq2aMuKYyYa-Drg,9419
|
|
10
|
+
graflo/architecture/onto.py,sha256=CRqiuyf7hmPxvNElOkfQZ7Mmr_SAecDG0FFfawgWA4s,11378
|
|
11
|
+
graflo/architecture/resource.py,sha256=IqSoLlFfygWUf_1Z6Sh66CW6N8BRU49oJWJXtzcQ4a4,5048
|
|
12
|
+
graflo/architecture/schema.py,sha256=HpxHmZwv4lhTEsqOouvByjJGbW4RuMOdPqVFVaIGtiI,4503
|
|
13
|
+
graflo/architecture/transform.py,sha256=TCl3U8mQIFI0IDt72BrzFLAgFB5C0hXeL_66UCaQnpg,9539
|
|
14
|
+
graflo/architecture/util.py,sha256=UYu_WP5aKxL92rzBWHJHMQ0B-i3eFfdQqDKJZaAXaIc,2971
|
|
15
|
+
graflo/architecture/vertex.py,sha256=28xFAEmv2RXoErGAZz1I7JLf2-V9SiD3IsnXSHll2Ws,20560
|
|
16
|
+
graflo/cli/__init__.py,sha256=u5YFigokhv46foL9P5YQDOIICQ2v6CMErlByucaarSs,449
|
|
17
|
+
graflo/cli/ingest.py,sha256=cGSJWCHJ3CPop48QSlzyI9lH4idCeaSsIbREE3j-7AQ,6271
|
|
18
|
+
graflo/cli/manage_dbs.py,sha256=-6Iv4OovRu44lbMjB6Kd4aIlE9kW3dJgDVCeXEtu-og,6423
|
|
19
|
+
graflo/cli/plot_schema.py,sha256=rRmg8VfOl23fhebD2NahembfiWywvBU_4yXU_VVkCYQ,4300
|
|
20
|
+
graflo/cli/xml2json.py,sha256=pi4KDtOsVVVuxmu0DtZUOJCP3VXjDGOfHQOR6HaVQk4,2668
|
|
21
|
+
graflo/data_source/__init__.py,sha256=6hX1f6k5rRFUmIrHuYhWuqLJX7cLAUj-_1_nzlrB-DY,1506
|
|
22
|
+
graflo/data_source/api.py,sha256=WRYao-4hH7Qsl_FJwoiK6bZNYDbOKamp58MqVs87AmE,12175
|
|
23
|
+
graflo/data_source/base.py,sha256=1YuxC1cjRpEuGlnve15J5zYbghhIcVyEJyfDEykWR8Y,2932
|
|
24
|
+
graflo/data_source/factory.py,sha256=XD6OL5yQUrllY7L7rZhzJKEK5vUj91PkxMRjyqcLOt8,11467
|
|
25
|
+
graflo/data_source/file.py,sha256=C2xP2QgPz2cvDZhH_auy1SnqC8jFcmbQflT4WmOsl7c,3953
|
|
26
|
+
graflo/data_source/memory.py,sha256=V3BQdWz-nAzJY1YEanSNk1vHGLvh-MEbYCKvMqyvYdE,2451
|
|
27
|
+
graflo/data_source/registry.py,sha256=FEVuRTw21YuDyRaf53nzYRigg9WHreuddNrxSSyNrL0,2606
|
|
28
|
+
graflo/data_source/sql.py,sha256=2F0pVhZl1W_3pXB1znTQKMRn_XWoKA2h94Lj7ae06w0,6241
|
|
29
|
+
graflo/db/__init__.py,sha256=I_hBGfVqZYvg68Cioa_f2mnLcU78_na6_vwD5nUbnro,1446
|
|
30
|
+
graflo/db/conn.py,sha256=EPsn3wervFRzSpyV2naNhyXOpedySpE_0ls3cQbwIaQ,11884
|
|
31
|
+
graflo/db/manager.py,sha256=lTdM20PtTS8PU119dpZH5WtD25O9zjbCOwi1icAbieI,4072
|
|
32
|
+
graflo/db/util.py,sha256=IgS32BaWKoSpsq1yqIfq9iVmXkJJFdRBDX4NOi66dgA,1461
|
|
33
|
+
graflo/db/arango/__init__.py,sha256=_au3IvlYmjiSym2gPCLngyTjpZLRSHE877SiHUd7o-E,642
|
|
34
|
+
graflo/db/arango/conn.py,sha256=l0et3NoTekn9RtT27eTyz5z5v3q-2Uti8yLQk_Kkwto,36945
|
|
35
|
+
graflo/db/arango/query.py,sha256=t_RKltpWh0L3V0FJst38vMf7QsmjzTndgBR3ooAahmM,5655
|
|
36
|
+
graflo/db/arango/util.py,sha256=AUH70v_8NuQ4Eicrh1IHApnn_-E5wPQfzgKcYHdwHT0,2971
|
|
37
|
+
graflo/db/connection/__init__.py,sha256=RBZxV2EkqWkQuFH-bEose33ENGsLKB2C8T8HuUjOaBQ,80
|
|
38
|
+
graflo/db/connection/config_mapping.py,sha256=mFFMEt51nh8yN5ar9fGaidogkIqwbAbrOoAUj8rz7S4,418
|
|
39
|
+
graflo/db/connection/onto.py,sha256=8lNsi4PCJVdny0W4Lgtju-wPoNUmzZ0ChJO97cRFsF0,25614
|
|
40
|
+
graflo/db/connection/wsgi.py,sha256=smqkQwvoSb0MEFlcxaJYjZFF1LeNMfqidcTqYzgP9BE,989
|
|
41
|
+
graflo/db/neo4j/__init__.py,sha256=KB4zd06CNCmTs5KotfQ_4petaCLOJnUZ1oD4CaRJTnQ,547
|
|
42
|
+
graflo/db/neo4j/conn.py,sha256=luQfmJvKKW0Q__0eXR7HhwSkci47jlPI8nXeev1lHoA,22325
|
|
43
|
+
graflo/db/postgres/__init__.py,sha256=2hci6gzA51Dol2D6C4flgGEImk7wTRJIYN7D_24sz-g,5023
|
|
44
|
+
graflo/db/postgres/conn.py,sha256=sM6tai-H3_ptBzb6kF5sMUefe5v-bQ_nWxkQ1Zp79Go,15732
|
|
45
|
+
graflo/db/postgres/resource_mapping.py,sha256=DGbdYwRrYuDTHnINj5w84LKhmYqBCVGuXQ4zsQKLLgE,4742
|
|
46
|
+
graflo/db/postgres/schema_inference.py,sha256=rUyH_CQnnR7ZD5raXTeN9nCyAVlGqCReTeaAmWF4ksg,8587
|
|
47
|
+
graflo/db/postgres/types.py,sha256=tEkfEOTvYz2s9U53QG47LbPdu9txtzyHFNuiKUTtGTs,4559
|
|
48
|
+
graflo/db/tigergraph/__init__.py,sha256=qmC6xVS9-s4LlWC9G8lH9DzSHjqjoSg4DqFzb42eIa0,254
|
|
49
|
+
graflo/db/tigergraph/conn.py,sha256=PW4CsmR17JpI47iHAFDs_cwJ-4lT1n6nEc3n6hO2u1Q,91335
|
|
50
|
+
graflo/filter/__init__.py,sha256=OyV5pavlOrWQeS4fwHA-tcKcNccXxGr2d--TKPYlXyE,814
|
|
51
|
+
graflo/filter/onto.py,sha256=iLFPXqiBZ1FTQbaHBxHD8gatFk024yGXYHCzyxkbaMw,16791
|
|
52
|
+
graflo/plot/__init__.py,sha256=ITlvw8_FRNXPqtjiou946Hv923iixGvR6c4Uqrg-L2o,485
|
|
53
|
+
graflo/plot/plotter.py,sha256=yv-tyKJViLU3CKg8lfBWJ8_WCxdcnGgY1G_VzhHq0lE,17710
|
|
54
|
+
graflo/util/__init__.py,sha256=aZsSxHD1vzADUvzHVuYrx2TL-EUye7tcw2IeX81EB38,706
|
|
55
|
+
graflo/util/chunker.py,sha256=d379ILBggFl7KxX8-lc6ID1iVnbfj5DM5IUPfa2wLuI,22244
|
|
56
|
+
graflo/util/merge.py,sha256=mFn6GU6nL99Ug2PxFjuBJBwaPQPotP5MSB822YS34HI,5619
|
|
57
|
+
graflo/util/misc.py,sha256=Fwl8HhbBm-ZXbqDotnZ39gDPoTfUKeonZAi4pPWY99M,1171
|
|
58
|
+
graflo/util/onto.py,sha256=RlVDrQ_uX_576qeqxkQs7_2przbq8F6uR8pp4oOCFfo,11767
|
|
59
|
+
graflo/util/transform.py,sha256=zufL-COvnsHFK4vTMW4CPZLt1j0a2o4jEWjPs9Nw1lI,12739
|
|
60
|
+
graflo-1.3.3.dist-info/METADATA,sha256=YANCi3rU4XdTXm0FDArfoz33U82zl52kyY72lCsIehE,6233
|
|
61
|
+
graflo-1.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
62
|
+
graflo-1.3.3.dist-info/entry_points.txt,sha256=kjDXqHgIrppqxQe6RO6XjLrq7VL50AaVHhLg7c95Oxc,190
|
|
63
|
+
graflo-1.3.3.dist-info/licenses/LICENSE,sha256=ILn9MXR5AfuRRtOF8abQWhm0wO8kckf4IBdc6mKaRG8,5593
|
|
64
|
+
graflo-1.3.3.dist-info/RECORD,,
|