graflo 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1120 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +297 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +586 -0
  13. graflo/caster.py +655 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +194 -0
  16. graflo/cli/manage_dbs.py +197 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/data_source/__init__.py +48 -0
  20. graflo/data_source/api.py +339 -0
  21. graflo/data_source/base.py +97 -0
  22. graflo/data_source/factory.py +298 -0
  23. graflo/data_source/file.py +133 -0
  24. graflo/data_source/memory.py +72 -0
  25. graflo/data_source/registry.py +82 -0
  26. graflo/data_source/sql.py +185 -0
  27. graflo/db/__init__.py +44 -0
  28. graflo/db/arango/__init__.py +22 -0
  29. graflo/db/arango/conn.py +1026 -0
  30. graflo/db/arango/query.py +180 -0
  31. graflo/db/arango/util.py +88 -0
  32. graflo/db/conn.py +377 -0
  33. graflo/db/connection/__init__.py +6 -0
  34. graflo/db/connection/config_mapping.py +18 -0
  35. graflo/db/connection/onto.py +688 -0
  36. graflo/db/connection/wsgi.py +29 -0
  37. graflo/db/manager.py +119 -0
  38. graflo/db/neo4j/__init__.py +16 -0
  39. graflo/db/neo4j/conn.py +639 -0
  40. graflo/db/postgres/__init__.py +156 -0
  41. graflo/db/postgres/conn.py +425 -0
  42. graflo/db/postgres/resource_mapping.py +139 -0
  43. graflo/db/postgres/schema_inference.py +245 -0
  44. graflo/db/postgres/types.py +148 -0
  45. graflo/db/tigergraph/__init__.py +9 -0
  46. graflo/db/tigergraph/conn.py +2212 -0
  47. graflo/db/util.py +49 -0
  48. graflo/filter/__init__.py +21 -0
  49. graflo/filter/onto.py +525 -0
  50. graflo/logging.conf +22 -0
  51. graflo/onto.py +190 -0
  52. graflo/plot/__init__.py +17 -0
  53. graflo/plot/plotter.py +556 -0
  54. graflo/util/__init__.py +23 -0
  55. graflo/util/chunker.py +751 -0
  56. graflo/util/merge.py +150 -0
  57. graflo/util/misc.py +37 -0
  58. graflo/util/onto.py +332 -0
  59. graflo/util/transform.py +448 -0
  60. graflo-1.3.3.dist-info/METADATA +190 -0
  61. graflo-1.3.3.dist-info/RECORD +64 -0
  62. graflo-1.3.3.dist-info/WHEEL +4 -0
  63. graflo-1.3.3.dist-info/entry_points.txt +5 -0
  64. graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,139 @@
1
+ """Resource mapping from PostgreSQL tables to graflo Resources.
2
+
3
+ This module provides functionality to map PostgreSQL tables (both vertex and edge tables)
4
+ to graflo Resource objects that can be used for data ingestion.
5
+ """
6
+
7
+ import logging
8
+ from typing import Any
9
+
10
+ from graflo.architecture.edge import EdgeConfig
11
+ from graflo.architecture.resource import Resource
12
+ from graflo.architecture.vertex import VertexConfig
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class PostgresResourceMapper:
18
+ """Maps PostgreSQL tables to graflo Resources.
19
+
20
+ This class creates Resource objects that map PostgreSQL tables to graph vertices
21
+ and edges, enabling ingestion of relational data into graph databases.
22
+ """
23
+
24
+ def create_vertex_resource(self, table_name: str, vertex_name: str) -> Resource:
25
+ """Create a Resource for a vertex table.
26
+
27
+ Args:
28
+ table_name: Name of the PostgreSQL table
29
+ vertex_name: Name of the vertex type (typically same as table_name)
30
+
31
+ Returns:
32
+ Resource: Resource configured to ingest vertex data
33
+ """
34
+ # Create apply list with VertexActor
35
+ # The actor wrapper will interpret {"vertex": vertex_name} as VertexActor
36
+ apply = [{"vertex": vertex_name}]
37
+
38
+ resource = Resource(
39
+ resource_name=table_name,
40
+ apply=apply,
41
+ )
42
+
43
+ logger.debug(
44
+ f"Created vertex resource '{table_name}' for vertex '{vertex_name}'"
45
+ )
46
+
47
+ return resource
48
+
49
+ def create_edge_resource(
50
+ self,
51
+ edge_table_info: dict[str, Any],
52
+ vertex_config: VertexConfig,
53
+ ) -> Resource:
54
+ """Create a Resource for an edge table.
55
+
56
+ Args:
57
+ edge_table_info: Edge table information from introspection
58
+ vertex_config: Vertex configuration for source/target validation
59
+
60
+ Returns:
61
+ Resource: Resource configured to ingest edge data
62
+ """
63
+ table_name = edge_table_info["name"]
64
+ source_table = edge_table_info["source_table"]
65
+ target_table = edge_table_info["target_table"]
66
+
67
+ # Verify source and target vertices exist
68
+ if source_table not in vertex_config.vertex_set:
69
+ raise ValueError(
70
+ f"Source vertex '{source_table}' for edge table '{table_name}' "
71
+ f"not found in vertex config"
72
+ )
73
+
74
+ if target_table not in vertex_config.vertex_set:
75
+ raise ValueError(
76
+ f"Target vertex '{target_table}' for edge table '{table_name}' "
77
+ f"not found in vertex config"
78
+ )
79
+
80
+ # Create apply list with EdgeActor
81
+ # The actor wrapper will interpret {"source": source, "target": target} as EdgeActor
82
+ apply = [{"source": source_table, "target": target_table}]
83
+
84
+ resource = Resource(
85
+ resource_name=table_name,
86
+ apply=apply,
87
+ )
88
+
89
+ logger.debug(
90
+ f"Created edge resource '{table_name}' from {source_table} to {target_table}"
91
+ )
92
+
93
+ return resource
94
+
95
+ def map_tables_to_resources(
96
+ self,
97
+ introspection_result: dict[str, Any],
98
+ vertex_config: VertexConfig,
99
+ edge_config: EdgeConfig,
100
+ ) -> list[Resource]:
101
+ """Map all PostgreSQL tables to Resources.
102
+
103
+ Creates Resources for both vertex and edge tables, enabling ingestion
104
+ of the entire database schema.
105
+
106
+ Args:
107
+ introspection_result: Result from PostgresConnection.introspect_schema()
108
+ vertex_config: Inferred vertex configuration
109
+ edge_config: Inferred edge configuration
110
+
111
+ Returns:
112
+ list[Resource]: List of Resources for all tables
113
+ """
114
+ resources = []
115
+
116
+ # Map vertex tables to resources
117
+ vertex_tables = introspection_result.get("vertex_tables", [])
118
+ for table_info in vertex_tables:
119
+ table_name = table_info["name"]
120
+ vertex_name = table_name # Use table name as vertex name
121
+ resource = self.create_vertex_resource(table_name, vertex_name)
122
+ resources.append(resource)
123
+
124
+ # Map edge tables to resources
125
+ edge_tables = introspection_result.get("edge_tables", [])
126
+ for edge_table_info in edge_tables:
127
+ try:
128
+ resource = self.create_edge_resource(edge_table_info, vertex_config)
129
+ resources.append(resource)
130
+ except ValueError as e:
131
+ logger.warning(f"Skipping edge resource creation: {e}")
132
+ continue
133
+
134
+ logger.info(
135
+ f"Mapped {len(vertex_tables)} vertex tables and {len(edge_tables)} edge tables "
136
+ f"to {len(resources)} resources"
137
+ )
138
+
139
+ return resources
@@ -0,0 +1,245 @@
1
+ """Schema inference from PostgreSQL database introspection.
2
+
3
+ This module provides functionality to infer graflo Schema objects from PostgreSQL
4
+ 3NF database schemas by analyzing table structures, relationships, and column types.
5
+ """
6
+
7
+ import logging
8
+ from typing import Any, Optional
9
+
10
+ from graflo.architecture.edge import Edge, EdgeConfig, WeightConfig
11
+ from graflo.architecture.onto import Index, IndexType
12
+ from graflo.architecture.schema import Schema, SchemaMetadata
13
+ from graflo.architecture.vertex import Field, Vertex, VertexConfig
14
+ from graflo.onto import DBFlavor
15
+
16
+ from .types import PostgresTypeMapper
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class PostgresSchemaInferencer:
22
+ """Infers graflo Schema from PostgreSQL schema introspection results.
23
+
24
+ This class takes the output from PostgresConnection.introspect_schema() and
25
+ generates a complete graflo Schema with vertices, edges, and weights.
26
+ """
27
+
28
+ def __init__(self, db_flavor: DBFlavor = DBFlavor.ARANGO):
29
+ """Initialize the schema inferencer.
30
+
31
+ Args:
32
+ db_flavor: Target database flavor for the inferred schema
33
+ """
34
+ self.db_flavor = db_flavor
35
+ self.type_mapper = PostgresTypeMapper()
36
+
37
+ def infer_vertex_config(self, introspection_result: dict[str, Any]) -> VertexConfig:
38
+ """Infer VertexConfig from vertex tables.
39
+
40
+ Args:
41
+ introspection_result: Result from PostgresConnection.introspect_schema()
42
+
43
+ Returns:
44
+ VertexConfig: Inferred vertex configuration
45
+ """
46
+ vertex_tables = introspection_result.get("vertex_tables", [])
47
+ vertices = []
48
+
49
+ for table_info in vertex_tables:
50
+ table_name = table_info["name"]
51
+ columns = table_info["columns"]
52
+ pk_columns = table_info["primary_key"]
53
+
54
+ # Create fields from columns
55
+ fields = []
56
+ for col in columns:
57
+ field_name = col["name"]
58
+ field_type = self.type_mapper.map_type(col["type"])
59
+ fields.append(Field(name=field_name, type=field_type))
60
+
61
+ # Create indexes from primary key
62
+ indexes = []
63
+ if pk_columns:
64
+ indexes.append(
65
+ Index(fields=pk_columns, type=IndexType.PERSISTENT, unique=True)
66
+ )
67
+
68
+ # Create vertex
69
+ vertex = Vertex(
70
+ name=table_name,
71
+ dbname=table_name,
72
+ fields=fields,
73
+ indexes=indexes,
74
+ )
75
+
76
+ vertices.append(vertex)
77
+ logger.debug(
78
+ f"Inferred vertex '{table_name}' with {len(fields)} fields and "
79
+ f"{len(indexes)} indexes"
80
+ )
81
+
82
+ return VertexConfig(vertices=vertices, db_flavor=self.db_flavor)
83
+
84
+ def infer_edge_weights(
85
+ self, edge_table_info: dict[str, Any]
86
+ ) -> Optional[WeightConfig]:
87
+ """Infer edge weights from edge table columns.
88
+
89
+ Args:
90
+ edge_table_info: Edge table information from introspection
91
+
92
+ Returns:
93
+ WeightConfig if there are weight columns, None otherwise
94
+ """
95
+ columns = edge_table_info["columns"]
96
+ pk_columns = set(edge_table_info["primary_key"])
97
+ fk_columns = {fk["column"] for fk in edge_table_info["foreign_keys"]}
98
+
99
+ # Find non-PK, non-FK columns (these become weights)
100
+ weight_columns = [
101
+ col
102
+ for col in columns
103
+ if col["name"] not in pk_columns and col["name"] not in fk_columns
104
+ ]
105
+
106
+ if not weight_columns:
107
+ return None
108
+
109
+ # Extract column names as direct weights
110
+ direct_weights = [col["name"] for col in weight_columns]
111
+
112
+ logger.debug(
113
+ f"Inferred {len(direct_weights)} weights for edge table "
114
+ f"'{edge_table_info['name']}': {direct_weights}"
115
+ )
116
+
117
+ return WeightConfig(direct=direct_weights)
118
+
119
+ def infer_edge_config(
120
+ self,
121
+ introspection_result: dict[str, Any],
122
+ vertex_config: VertexConfig,
123
+ ) -> EdgeConfig:
124
+ """Infer EdgeConfig from edge tables.
125
+
126
+ Args:
127
+ introspection_result: Result from PostgresConnection.introspect_schema()
128
+ vertex_config: Inferred vertex configuration
129
+
130
+ Returns:
131
+ EdgeConfig: Inferred edge configuration
132
+ """
133
+ edge_tables = introspection_result.get("edge_tables", [])
134
+ edges = []
135
+
136
+ vertex_names = vertex_config.vertex_set
137
+
138
+ for edge_table_info in edge_tables:
139
+ table_name = edge_table_info["name"]
140
+ source_table = edge_table_info["source_table"]
141
+ target_table = edge_table_info["target_table"]
142
+ fk_columns = edge_table_info["foreign_keys"]
143
+ pk_columns = edge_table_info["primary_key"]
144
+
145
+ # Verify source and target vertices exist
146
+ if source_table not in vertex_names:
147
+ logger.warning(
148
+ f"Source vertex '{source_table}' for edge table '{table_name}' "
149
+ f"not found in vertex config, skipping"
150
+ )
151
+ continue
152
+
153
+ if target_table not in vertex_names:
154
+ logger.warning(
155
+ f"Target vertex '{target_table}' for edge table '{table_name}' "
156
+ f"not found in vertex config, skipping"
157
+ )
158
+ continue
159
+
160
+ # Infer weights
161
+ weights = self.infer_edge_weights(edge_table_info)
162
+
163
+ # Create indexes from primary key and foreign keys
164
+ indexes = []
165
+ if pk_columns:
166
+ indexes.append(
167
+ Index(fields=pk_columns, type=IndexType.PERSISTENT, unique=True)
168
+ )
169
+
170
+ # Add indexes for foreign keys (for efficient lookups)
171
+ # Note: Only add index if not already covered by primary key
172
+ pk_set = set(pk_columns)
173
+ for fk in fk_columns:
174
+ fk_column_name = fk["column"]
175
+ # Skip if FK column is part of primary key (already indexed)
176
+ if fk_column_name not in pk_set:
177
+ indexes.append(
178
+ Index(
179
+ fields=[fk_column_name],
180
+ type=IndexType.PERSISTENT,
181
+ unique=False,
182
+ )
183
+ )
184
+
185
+ # Create edge
186
+ edge = Edge(
187
+ source=source_table,
188
+ target=target_table,
189
+ indexes=indexes,
190
+ weights=weights,
191
+ collection_name=table_name,
192
+ )
193
+
194
+ edges.append(edge)
195
+ logger.debug(
196
+ f"Inferred edge '{table_name}' from {source_table} to {target_table} "
197
+ f"with {len(indexes)} indexes"
198
+ )
199
+
200
+ return EdgeConfig(edges=edges)
201
+
202
+ def infer_schema(
203
+ self, introspection_result: dict[str, Any], schema_name: str | None = None
204
+ ) -> Schema:
205
+ """Infer complete Schema from PostgreSQL introspection.
206
+
207
+ Args:
208
+ introspection_result: Result from PostgresConnection.introspect_schema()
209
+ schema_name: Optional schema name (defaults to schema_name from introspection)
210
+
211
+ Returns:
212
+ Schema: Complete inferred schema with vertices, edges, and metadata
213
+ """
214
+ if schema_name is None:
215
+ schema_name = introspection_result.get("schema_name", "public")
216
+
217
+ logger.info(f"Inferring schema from PostgreSQL schema '{schema_name}'")
218
+
219
+ # Infer vertex configuration
220
+ vertex_config = self.infer_vertex_config(introspection_result)
221
+ logger.info(f"Inferred {len(vertex_config.vertices)} vertices")
222
+
223
+ # Infer edge configuration
224
+ edge_config = self.infer_edge_config(introspection_result, vertex_config)
225
+ edges_count = len(list(edge_config.edges_list()))
226
+ logger.info(f"Inferred {edges_count} edges")
227
+
228
+ # Create schema metadata
229
+ metadata = SchemaMetadata(name=schema_name)
230
+
231
+ # Create schema (resources will be added separately)
232
+ schema = Schema(
233
+ general=metadata,
234
+ vertex_config=vertex_config,
235
+ edge_config=edge_config,
236
+ resources=[], # Resources will be created separately
237
+ )
238
+
239
+ logger.info(
240
+ f"Successfully inferred schema '{schema_name}' with "
241
+ f"{len(vertex_config.vertices)} vertices and "
242
+ f"{len(list(edge_config.edges_list()))} edges"
243
+ )
244
+
245
+ return schema
@@ -0,0 +1,148 @@
1
+ """Type mapping utilities for PostgreSQL to graflo type conversion.
2
+
3
+ This module provides utilities for mapping PostgreSQL data types to graflo Field types,
4
+ enabling automatic schema inference from PostgreSQL database schemas.
5
+ """
6
+
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class PostgresTypeMapper:
13
+ """Maps PostgreSQL data types to graflo Field types.
14
+
15
+ This class provides static methods for converting PostgreSQL type names
16
+ (from information_schema or pg_catalog) to graflo Field type strings.
17
+ """
18
+
19
+ # Mapping of PostgreSQL types to graflo Field types
20
+ TYPE_MAPPING = {
21
+ # Integer types
22
+ "integer": "INT",
23
+ "int": "INT",
24
+ "int4": "INT",
25
+ "smallint": "INT",
26
+ "int2": "INT",
27
+ "bigint": "INT",
28
+ "int8": "INT",
29
+ "serial": "INT",
30
+ "bigserial": "INT",
31
+ "smallserial": "INT",
32
+ # Floating point types
33
+ "real": "FLOAT",
34
+ "float4": "FLOAT",
35
+ "double precision": "FLOAT",
36
+ "float8": "FLOAT",
37
+ "numeric": "FLOAT",
38
+ "decimal": "FLOAT",
39
+ # Boolean
40
+ "boolean": "BOOL",
41
+ "bool": "BOOL",
42
+ # String types
43
+ "character varying": "STRING",
44
+ "varchar": "STRING",
45
+ "character": "STRING",
46
+ "char": "STRING",
47
+ "text": "STRING",
48
+ # Date/time types (mapped to DATETIME)
49
+ "timestamp": "DATETIME",
50
+ "timestamp without time zone": "DATETIME",
51
+ "timestamp with time zone": "DATETIME",
52
+ "timestamptz": "DATETIME",
53
+ "date": "DATETIME",
54
+ "time": "DATETIME",
55
+ "time without time zone": "DATETIME",
56
+ "time with time zone": "DATETIME",
57
+ "timetz": "DATETIME",
58
+ "interval": "STRING", # Interval is duration, keep as STRING
59
+ # JSON types
60
+ "json": "STRING",
61
+ "jsonb": "STRING",
62
+ # Binary types
63
+ "bytea": "STRING",
64
+ # UUID
65
+ "uuid": "STRING",
66
+ }
67
+
68
+ @classmethod
69
+ def map_type(cls, postgres_type: str) -> str:
70
+ """Map PostgreSQL type to graflo Field type.
71
+
72
+ Args:
73
+ postgres_type: PostgreSQL type name (e.g., 'int4', 'varchar', 'timestamp')
74
+
75
+ Returns:
76
+ str: graflo Field type (INT, FLOAT, BOOL, or STRING)
77
+ """
78
+ # Normalize type name: lowercase and remove length specifications
79
+ normalized = postgres_type.lower().strip()
80
+
81
+ # Remove length specifications like (255) or (10,2)
82
+ if "(" in normalized:
83
+ normalized = normalized.split("(")[0].strip()
84
+
85
+ # Check direct mapping
86
+ if normalized in cls.TYPE_MAPPING:
87
+ return cls.TYPE_MAPPING[normalized]
88
+
89
+ # Check for partial matches (e.g., "character varying" might be stored as "varying")
90
+ for pg_type, graflo_type in cls.TYPE_MAPPING.items():
91
+ if pg_type in normalized or normalized in pg_type:
92
+ logger.debug(
93
+ f"Mapped PostgreSQL type '{postgres_type}' to graflo type '{graflo_type}' "
94
+ f"(partial match with '{pg_type}')"
95
+ )
96
+ return graflo_type
97
+
98
+ # Default to STRING for unknown types
99
+ logger.warning(
100
+ f"Unknown PostgreSQL type '{postgres_type}', defaulting to STRING"
101
+ )
102
+ return "STRING"
103
+
104
+ @classmethod
105
+ def is_datetime_type(cls, postgres_type: str) -> bool:
106
+ """Check if a PostgreSQL type is a datetime type.
107
+
108
+ Args:
109
+ postgres_type: PostgreSQL type name
110
+
111
+ Returns:
112
+ bool: True if the type is a datetime-related type
113
+ """
114
+ normalized = postgres_type.lower().strip()
115
+ datetime_types = [
116
+ "timestamp",
117
+ "date",
118
+ "time",
119
+ "interval",
120
+ "timestamptz",
121
+ "timetz",
122
+ ]
123
+ return any(dt_type in normalized for dt_type in datetime_types)
124
+
125
+ @classmethod
126
+ def is_numeric_type(cls, postgres_type: str) -> bool:
127
+ """Check if a PostgreSQL type is a numeric type.
128
+
129
+ Args:
130
+ postgres_type: PostgreSQL type name
131
+
132
+ Returns:
133
+ bool: True if the type is numeric
134
+ """
135
+ normalized = postgres_type.lower().strip()
136
+ numeric_types = [
137
+ "integer",
138
+ "int",
139
+ "bigint",
140
+ "smallint",
141
+ "serial",
142
+ "real",
143
+ "double precision",
144
+ "numeric",
145
+ "decimal",
146
+ "float",
147
+ ]
148
+ return any(nt_type in normalized for nt_type in numeric_types)
@@ -0,0 +1,9 @@
1
+ """TigerGraph database connection implementation.
2
+
3
+ This package provides TigerGraph-specific database connection implementations
4
+ and utilities for graph database operations.
5
+ """
6
+
7
+ from .conn import TigerGraphConnection
8
+
9
+ __all__ = ["TigerGraphConnection"]