graflo 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1120 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +297 -0
- graflo/architecture/onto.py +374 -0
- graflo/architecture/resource.py +161 -0
- graflo/architecture/schema.py +136 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +93 -0
- graflo/architecture/vertex.py +586 -0
- graflo/caster.py +655 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +194 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +97 -0
- graflo/data_source/factory.py +298 -0
- graflo/data_source/file.py +133 -0
- graflo/data_source/memory.py +72 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +185 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1026 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +688 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +156 -0
- graflo/db/postgres/conn.py +425 -0
- graflo/db/postgres/resource_mapping.py +139 -0
- graflo/db/postgres/schema_inference.py +245 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2212 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +190 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +556 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +751 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +332 -0
- graflo/util/transform.py +448 -0
- graflo-1.3.3.dist-info/METADATA +190 -0
- graflo-1.3.3.dist-info/RECORD +64 -0
- graflo-1.3.3.dist-info/WHEEL +4 -0
- graflo-1.3.3.dist-info/entry_points.txt +5 -0
- graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""PostgreSQL database implementation.
|
|
2
|
+
|
|
3
|
+
This package provides PostgreSQL-specific implementations for schema introspection
|
|
4
|
+
and connection management. It focuses on reading and analyzing 3NF schemas to identify
|
|
5
|
+
vertex-like and edge-like tables, and inferring graflo Schema objects.
|
|
6
|
+
|
|
7
|
+
Key Components:
|
|
8
|
+
- PostgresConnection: PostgreSQL connection and schema introspection implementation
|
|
9
|
+
- PostgresSchemaInferencer: Infers graflo Schema from PostgreSQL schemas
|
|
10
|
+
- PostgresResourceMapper: Maps PostgreSQL tables to graflo Resources
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from graflo.db.postgres import PostgresConnection, infer_schema_from_postgres
|
|
14
|
+
>>> from graflo.db.connection.onto import PostgresConfig
|
|
15
|
+
>>> config = PostgresConfig.from_docker_env()
|
|
16
|
+
>>> conn = PostgresConnection(config)
|
|
17
|
+
>>> schema = infer_schema_from_postgres(conn, schema_name="public")
|
|
18
|
+
>>> conn.close()
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from .conn import PostgresConnection
|
|
22
|
+
from .resource_mapping import PostgresResourceMapper
|
|
23
|
+
from .schema_inference import PostgresSchemaInferencer
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"PostgresConnection",
|
|
27
|
+
"PostgresSchemaInferencer",
|
|
28
|
+
"PostgresResourceMapper",
|
|
29
|
+
"infer_schema_from_postgres",
|
|
30
|
+
"create_resources_from_postgres",
|
|
31
|
+
"create_patterns_from_postgres",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def infer_schema_from_postgres(
|
|
36
|
+
conn: PostgresConnection, schema_name: str | None = None, db_flavor=None
|
|
37
|
+
):
|
|
38
|
+
"""Convenience function to infer a graflo Schema from PostgreSQL database.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
conn: PostgresConnection instance
|
|
42
|
+
schema_name: Schema name to introspect (defaults to config schema_name or 'public')
|
|
43
|
+
db_flavor: Target database flavor (defaults to ARANGO)
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Schema: Inferred schema with vertices, edges, and resources
|
|
47
|
+
"""
|
|
48
|
+
from graflo.onto import DBFlavor
|
|
49
|
+
|
|
50
|
+
if db_flavor is None:
|
|
51
|
+
db_flavor = DBFlavor.ARANGO
|
|
52
|
+
|
|
53
|
+
# Introspect the schema
|
|
54
|
+
introspection_result = conn.introspect_schema(schema_name=schema_name)
|
|
55
|
+
|
|
56
|
+
# Infer schema
|
|
57
|
+
inferencer = PostgresSchemaInferencer(db_flavor=db_flavor)
|
|
58
|
+
schema = inferencer.infer_schema(introspection_result, schema_name=schema_name)
|
|
59
|
+
|
|
60
|
+
# Create and add resources
|
|
61
|
+
mapper = PostgresResourceMapper()
|
|
62
|
+
resources = mapper.map_tables_to_resources(
|
|
63
|
+
introspection_result, schema.vertex_config, schema.edge_config
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Update schema with resources
|
|
67
|
+
schema.resources = resources
|
|
68
|
+
# Re-initialize to set up resource mappings
|
|
69
|
+
schema.__post_init__()
|
|
70
|
+
|
|
71
|
+
return schema
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def create_resources_from_postgres(
|
|
75
|
+
conn: PostgresConnection, schema, schema_name: str | None = None
|
|
76
|
+
):
|
|
77
|
+
"""Create Resources from PostgreSQL tables for an existing schema.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
conn: PostgresConnection instance
|
|
81
|
+
schema: Existing Schema object
|
|
82
|
+
schema_name: Schema name to introspect
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
list[Resource]: List of Resources for PostgreSQL tables
|
|
86
|
+
"""
|
|
87
|
+
# Introspect the schema
|
|
88
|
+
introspection_result = conn.introspect_schema(schema_name=schema_name)
|
|
89
|
+
|
|
90
|
+
# Map tables to resources
|
|
91
|
+
mapper = PostgresResourceMapper()
|
|
92
|
+
resources = mapper.map_tables_to_resources(
|
|
93
|
+
introspection_result, schema.vertex_config, schema.edge_config
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return resources
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def create_patterns_from_postgres(
|
|
100
|
+
conn: PostgresConnection, schema_name: str | None = None
|
|
101
|
+
):
|
|
102
|
+
"""Create Patterns from PostgreSQL tables.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
conn: PostgresConnection instance
|
|
106
|
+
schema_name: Schema name to introspect
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Patterns: Patterns object with TablePattern instances for all tables
|
|
110
|
+
"""
|
|
111
|
+
from graflo.util.onto import Patterns, TablePattern
|
|
112
|
+
|
|
113
|
+
# Introspect the schema
|
|
114
|
+
introspection_result = conn.introspect_schema(schema_name=schema_name)
|
|
115
|
+
|
|
116
|
+
# Create patterns
|
|
117
|
+
patterns = Patterns()
|
|
118
|
+
|
|
119
|
+
# Get schema name
|
|
120
|
+
effective_schema = schema_name or introspection_result.get("schema_name", "public")
|
|
121
|
+
|
|
122
|
+
# Store the connection config
|
|
123
|
+
config_key = "default"
|
|
124
|
+
patterns.postgres_configs[(config_key, effective_schema)] = conn.config
|
|
125
|
+
|
|
126
|
+
# Add patterns for vertex tables
|
|
127
|
+
for table_info in introspection_result.get("vertex_tables", []):
|
|
128
|
+
table_name = table_info["name"]
|
|
129
|
+
table_pattern = TablePattern(
|
|
130
|
+
table_name=table_name,
|
|
131
|
+
schema_name=effective_schema,
|
|
132
|
+
resource_name=table_name,
|
|
133
|
+
)
|
|
134
|
+
patterns.patterns[table_name] = table_pattern
|
|
135
|
+
patterns.postgres_table_configs[table_name] = (
|
|
136
|
+
config_key,
|
|
137
|
+
effective_schema,
|
|
138
|
+
table_name,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Add patterns for edge tables
|
|
142
|
+
for table_info in introspection_result.get("edge_tables", []):
|
|
143
|
+
table_name = table_info["name"]
|
|
144
|
+
table_pattern = TablePattern(
|
|
145
|
+
table_name=table_name,
|
|
146
|
+
schema_name=effective_schema,
|
|
147
|
+
resource_name=table_name,
|
|
148
|
+
)
|
|
149
|
+
patterns.patterns[table_name] = table_pattern
|
|
150
|
+
patterns.postgres_table_configs[table_name] = (
|
|
151
|
+
config_key,
|
|
152
|
+
effective_schema,
|
|
153
|
+
table_name,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return patterns
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
"""PostgreSQL connection implementation for schema introspection.
|
|
2
|
+
|
|
3
|
+
This module implements PostgreSQL connection and schema introspection functionality,
|
|
4
|
+
specifically designed to analyze 3NF schemas and identify vertex-like and edge-like tables.
|
|
5
|
+
|
|
6
|
+
Key Features:
|
|
7
|
+
- Connection management using psycopg2
|
|
8
|
+
- Schema introspection (tables, columns, constraints)
|
|
9
|
+
- Vertex/edge table detection heuristics
|
|
10
|
+
- Structured schema information extraction
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from graflo.db.postgres import PostgresConnection
|
|
14
|
+
>>> from graflo.db.connection.onto import PostgresConfig
|
|
15
|
+
>>> config = PostgresConfig.from_docker_env()
|
|
16
|
+
>>> conn = PostgresConnection(config)
|
|
17
|
+
>>> schema_info = conn.introspect_schema()
|
|
18
|
+
>>> print(schema_info["vertex_tables"])
|
|
19
|
+
>>> conn.close()
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
from typing import Any
|
|
24
|
+
from urllib.parse import urlparse
|
|
25
|
+
|
|
26
|
+
import psycopg2
|
|
27
|
+
from psycopg2.extras import RealDictCursor
|
|
28
|
+
|
|
29
|
+
from graflo.db.connection.onto import PostgresConfig
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class PostgresConnection:
|
|
35
|
+
"""PostgreSQL connection for schema introspection.
|
|
36
|
+
|
|
37
|
+
This class provides PostgreSQL-specific functionality for connecting to databases
|
|
38
|
+
and introspecting 3NF schemas to identify vertex-like and edge-like tables.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
config: PostgreSQL connection configuration
|
|
42
|
+
conn: psycopg2 connection instance
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, config: PostgresConfig):
|
|
46
|
+
"""Initialize PostgreSQL connection.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
config: PostgreSQL connection configuration containing URI and credentials
|
|
50
|
+
"""
|
|
51
|
+
self.config = config
|
|
52
|
+
|
|
53
|
+
# Parse URI to extract connection parameters
|
|
54
|
+
if config.uri is None:
|
|
55
|
+
raise ValueError("PostgreSQL connection requires a URI to be configured")
|
|
56
|
+
|
|
57
|
+
parsed = urlparse(config.uri)
|
|
58
|
+
|
|
59
|
+
# Extract connection parameters
|
|
60
|
+
host = parsed.hostname or "localhost"
|
|
61
|
+
port = parsed.port or 5432
|
|
62
|
+
database = config.database or parsed.path.lstrip("/") or "postgres"
|
|
63
|
+
user = config.username or parsed.username or "postgres"
|
|
64
|
+
password = config.password or parsed.password
|
|
65
|
+
|
|
66
|
+
# Build connection parameters dict
|
|
67
|
+
conn_params = {
|
|
68
|
+
"host": host,
|
|
69
|
+
"port": port,
|
|
70
|
+
"database": database,
|
|
71
|
+
"user": user,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if password:
|
|
75
|
+
conn_params["password"] = password
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
self.conn = psycopg2.connect(**conn_params)
|
|
79
|
+
logger.info(f"Successfully connected to PostgreSQL database '{database}'")
|
|
80
|
+
except Exception as e:
|
|
81
|
+
logger.error(f"Failed to connect to PostgreSQL: {e}", exc_info=True)
|
|
82
|
+
raise
|
|
83
|
+
|
|
84
|
+
def read(self, query: str, params: tuple | None = None) -> list[dict[str, Any]]:
|
|
85
|
+
"""Execute a SELECT query and return results as a list of dictionaries.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
query: SQL SELECT query to execute
|
|
89
|
+
params: Optional tuple of parameters for parameterized queries
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
List of dictionaries, where each dictionary represents a row with column names as keys
|
|
93
|
+
"""
|
|
94
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
95
|
+
if params:
|
|
96
|
+
cursor.execute(query, params)
|
|
97
|
+
else:
|
|
98
|
+
cursor.execute(query)
|
|
99
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
100
|
+
|
|
101
|
+
def close(self):
|
|
102
|
+
"""Close the PostgreSQL connection."""
|
|
103
|
+
if hasattr(self, "conn") and self.conn:
|
|
104
|
+
try:
|
|
105
|
+
self.conn.close()
|
|
106
|
+
logger.debug("PostgreSQL connection closed")
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.warning(
|
|
109
|
+
f"Error closing PostgreSQL connection: {e}", exc_info=True
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def get_tables(self, schema_name: str | None = None) -> list[dict[str, Any]]:
|
|
113
|
+
"""Get all tables in the specified schema.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
schema_name: Schema name to query. If None, uses 'public' or config schema_name.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
List of table information dictionaries with keys: table_name, table_schema
|
|
120
|
+
"""
|
|
121
|
+
if schema_name is None:
|
|
122
|
+
schema_name = self.config.schema_name or "public"
|
|
123
|
+
|
|
124
|
+
query = """
|
|
125
|
+
SELECT table_name, table_schema
|
|
126
|
+
FROM information_schema.tables
|
|
127
|
+
WHERE table_schema = %s
|
|
128
|
+
AND table_type = 'BASE TABLE'
|
|
129
|
+
ORDER BY table_name;
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
133
|
+
cursor.execute(query, (schema_name,))
|
|
134
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
135
|
+
|
|
136
|
+
def get_table_columns(
|
|
137
|
+
self, table_name: str, schema_name: str | None = None
|
|
138
|
+
) -> list[dict[str, Any]]:
|
|
139
|
+
"""Get columns for a specific table with types and descriptions.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
table_name: Name of the table
|
|
143
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
List of column information dictionaries with keys:
|
|
147
|
+
name, type, description, is_nullable, column_default
|
|
148
|
+
"""
|
|
149
|
+
if schema_name is None:
|
|
150
|
+
schema_name = self.config.schema_name or "public"
|
|
151
|
+
|
|
152
|
+
query = """
|
|
153
|
+
SELECT
|
|
154
|
+
c.column_name as name,
|
|
155
|
+
c.data_type as type,
|
|
156
|
+
c.udt_name as udt_name,
|
|
157
|
+
c.character_maximum_length,
|
|
158
|
+
c.is_nullable,
|
|
159
|
+
c.column_default,
|
|
160
|
+
COALESCE(d.description, '') as description
|
|
161
|
+
FROM information_schema.columns c
|
|
162
|
+
LEFT JOIN pg_catalog.pg_statio_all_tables st
|
|
163
|
+
ON st.schemaname = c.table_schema
|
|
164
|
+
AND st.relname = c.table_name
|
|
165
|
+
LEFT JOIN pg_catalog.pg_description d
|
|
166
|
+
ON d.objoid = st.relid
|
|
167
|
+
AND d.objsubid = c.ordinal_position
|
|
168
|
+
WHERE c.table_schema = %s
|
|
169
|
+
AND c.table_name = %s
|
|
170
|
+
ORDER BY c.ordinal_position;
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
174
|
+
cursor.execute(query, (schema_name, table_name))
|
|
175
|
+
columns = []
|
|
176
|
+
for row in cursor.fetchall():
|
|
177
|
+
col_dict = dict(row)
|
|
178
|
+
# Format type with length if applicable
|
|
179
|
+
if col_dict["character_maximum_length"]:
|
|
180
|
+
col_dict["type"] = (
|
|
181
|
+
f"{col_dict['type']}({col_dict['character_maximum_length']})"
|
|
182
|
+
)
|
|
183
|
+
# Use udt_name if it's more specific (e.g., varchar, int4)
|
|
184
|
+
if col_dict["udt_name"] and col_dict["udt_name"] != col_dict["type"]:
|
|
185
|
+
col_dict["type"] = col_dict["udt_name"]
|
|
186
|
+
# Remove helper fields
|
|
187
|
+
col_dict.pop("character_maximum_length", None)
|
|
188
|
+
col_dict.pop("udt_name", None)
|
|
189
|
+
columns.append(col_dict)
|
|
190
|
+
return columns
|
|
191
|
+
|
|
192
|
+
def get_primary_keys(
|
|
193
|
+
self, table_name: str, schema_name: str | None = None
|
|
194
|
+
) -> list[str]:
|
|
195
|
+
"""Get primary key columns for a table.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
table_name: Name of the table
|
|
199
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
List of primary key column names
|
|
203
|
+
"""
|
|
204
|
+
if schema_name is None:
|
|
205
|
+
schema_name = self.config.schema_name or "public"
|
|
206
|
+
|
|
207
|
+
query = """
|
|
208
|
+
SELECT kcu.column_name
|
|
209
|
+
FROM information_schema.table_constraints tc
|
|
210
|
+
JOIN information_schema.key_column_usage kcu
|
|
211
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
212
|
+
AND tc.table_schema = kcu.table_schema
|
|
213
|
+
WHERE tc.constraint_type = 'PRIMARY KEY'
|
|
214
|
+
AND tc.table_schema = %s
|
|
215
|
+
AND tc.table_name = %s
|
|
216
|
+
ORDER BY kcu.ordinal_position;
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
with self.conn.cursor() as cursor:
|
|
220
|
+
cursor.execute(query, (schema_name, table_name))
|
|
221
|
+
return [row[0] for row in cursor.fetchall()]
|
|
222
|
+
|
|
223
|
+
def get_foreign_keys(
|
|
224
|
+
self, table_name: str, schema_name: str | None = None
|
|
225
|
+
) -> list[dict[str, Any]]:
|
|
226
|
+
"""Get foreign key relationships for a table.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
table_name: Name of the table
|
|
230
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
List of foreign key dictionaries with keys:
|
|
234
|
+
column, references_table, references_column, constraint_name
|
|
235
|
+
"""
|
|
236
|
+
if schema_name is None:
|
|
237
|
+
schema_name = self.config.schema_name or "public"
|
|
238
|
+
|
|
239
|
+
query = """
|
|
240
|
+
SELECT
|
|
241
|
+
kcu.column_name as column,
|
|
242
|
+
ccu.table_name as references_table,
|
|
243
|
+
ccu.column_name as references_column,
|
|
244
|
+
tc.constraint_name
|
|
245
|
+
FROM information_schema.table_constraints tc
|
|
246
|
+
JOIN information_schema.key_column_usage kcu
|
|
247
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
248
|
+
AND tc.table_schema = kcu.table_schema
|
|
249
|
+
JOIN information_schema.constraint_column_usage ccu
|
|
250
|
+
ON ccu.constraint_name = tc.constraint_name
|
|
251
|
+
AND ccu.table_schema = tc.table_schema
|
|
252
|
+
WHERE tc.constraint_type = 'FOREIGN KEY'
|
|
253
|
+
AND tc.table_schema = %s
|
|
254
|
+
AND tc.table_name = %s
|
|
255
|
+
ORDER BY kcu.ordinal_position;
|
|
256
|
+
"""
|
|
257
|
+
|
|
258
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
259
|
+
cursor.execute(query, (schema_name, table_name))
|
|
260
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
261
|
+
|
|
262
|
+
def detect_vertex_tables(
|
|
263
|
+
self, schema_name: str | None = None
|
|
264
|
+
) -> list[dict[str, Any]]:
|
|
265
|
+
"""Detect vertex-like tables in the schema.
|
|
266
|
+
|
|
267
|
+
Heuristic: Tables with a primary key and descriptive columns
|
|
268
|
+
(not just foreign keys). These represent entities.
|
|
269
|
+
|
|
270
|
+
Note: Tables with exactly 2 foreign keys are considered edge tables
|
|
271
|
+
and are excluded from vertex tables.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
List of vertex table information dictionaries
|
|
278
|
+
"""
|
|
279
|
+
if schema_name is None:
|
|
280
|
+
schema_name = self.config.schema_name or "public"
|
|
281
|
+
|
|
282
|
+
tables = self.get_tables(schema_name)
|
|
283
|
+
vertex_tables = []
|
|
284
|
+
|
|
285
|
+
for table_info in tables:
|
|
286
|
+
table_name = table_info["table_name"]
|
|
287
|
+
pk_columns = self.get_primary_keys(table_name, schema_name)
|
|
288
|
+
fk_columns = self.get_foreign_keys(table_name, schema_name)
|
|
289
|
+
all_columns = self.get_table_columns(table_name, schema_name)
|
|
290
|
+
|
|
291
|
+
# Vertex-like tables have:
|
|
292
|
+
# 1. A primary key
|
|
293
|
+
# 2. Not exactly 2 foreign keys (those are edge tables)
|
|
294
|
+
# 3. Descriptive columns beyond just foreign keys
|
|
295
|
+
|
|
296
|
+
if not pk_columns:
|
|
297
|
+
continue # Skip tables without primary keys
|
|
298
|
+
|
|
299
|
+
# Skip tables with exactly 2 FKs (these are edge tables)
|
|
300
|
+
if len(fk_columns) == 2:
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# Count non-FK, non-PK columns (descriptive columns)
|
|
304
|
+
fk_column_names = {fk["column"] for fk in fk_columns}
|
|
305
|
+
pk_column_names = set(pk_columns)
|
|
306
|
+
descriptive_columns = [
|
|
307
|
+
col
|
|
308
|
+
for col in all_columns
|
|
309
|
+
if col["name"] not in fk_column_names
|
|
310
|
+
and col["name"] not in pk_column_names
|
|
311
|
+
]
|
|
312
|
+
|
|
313
|
+
# If table has descriptive columns, consider it vertex-like
|
|
314
|
+
if descriptive_columns:
|
|
315
|
+
# Mark primary key columns
|
|
316
|
+
pk_set = set(pk_columns)
|
|
317
|
+
for col in all_columns:
|
|
318
|
+
col["is_pk"] = col["name"] in pk_set
|
|
319
|
+
|
|
320
|
+
vertex_tables.append(
|
|
321
|
+
{
|
|
322
|
+
"name": table_name,
|
|
323
|
+
"schema": schema_name,
|
|
324
|
+
"columns": all_columns,
|
|
325
|
+
"primary_key": pk_columns,
|
|
326
|
+
"foreign_keys": fk_columns,
|
|
327
|
+
}
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
return vertex_tables
|
|
331
|
+
|
|
332
|
+
def detect_edge_tables(
|
|
333
|
+
self, schema_name: str | None = None
|
|
334
|
+
) -> list[dict[str, Any]]:
|
|
335
|
+
"""Detect edge-like tables in the schema.
|
|
336
|
+
|
|
337
|
+
Heuristic: Junction tables with exactly 2 foreign keys pointing to other tables.
|
|
338
|
+
These represent relationships between entities.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
List of edge table information dictionaries with source_table and target_table
|
|
345
|
+
"""
|
|
346
|
+
if schema_name is None:
|
|
347
|
+
schema_name = self.config.schema_name or "public"
|
|
348
|
+
|
|
349
|
+
tables = self.get_tables(schema_name)
|
|
350
|
+
edge_tables = []
|
|
351
|
+
|
|
352
|
+
for table_info in tables:
|
|
353
|
+
table_name = table_info["table_name"]
|
|
354
|
+
fk_columns = self.get_foreign_keys(table_name, schema_name)
|
|
355
|
+
|
|
356
|
+
# Edge-like tables have exactly 2 foreign keys
|
|
357
|
+
if len(fk_columns) == 2:
|
|
358
|
+
all_columns = self.get_table_columns(table_name, schema_name)
|
|
359
|
+
pk_columns = self.get_primary_keys(table_name, schema_name)
|
|
360
|
+
|
|
361
|
+
# Mark primary key columns
|
|
362
|
+
pk_set = set(pk_columns)
|
|
363
|
+
for col in all_columns:
|
|
364
|
+
col["is_pk"] = col["name"] in pk_set
|
|
365
|
+
|
|
366
|
+
# Determine source and target tables
|
|
367
|
+
source_fk = fk_columns[0]
|
|
368
|
+
target_fk = fk_columns[1]
|
|
369
|
+
|
|
370
|
+
edge_tables.append(
|
|
371
|
+
{
|
|
372
|
+
"name": table_name,
|
|
373
|
+
"schema": schema_name,
|
|
374
|
+
"columns": all_columns,
|
|
375
|
+
"primary_key": pk_columns,
|
|
376
|
+
"foreign_keys": fk_columns,
|
|
377
|
+
"source_table": source_fk["references_table"],
|
|
378
|
+
"target_table": target_fk["references_table"],
|
|
379
|
+
"source_column": source_fk["column"],
|
|
380
|
+
"target_column": target_fk["column"],
|
|
381
|
+
}
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
return edge_tables
|
|
385
|
+
|
|
386
|
+
def introspect_schema(self, schema_name: str | None = None) -> dict[str, Any]:
|
|
387
|
+
"""Introspect the database schema and return structured information.
|
|
388
|
+
|
|
389
|
+
This is the main method that analyzes the schema and returns information
|
|
390
|
+
about vertex-like and edge-like tables.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
Dictionary with keys:
|
|
397
|
+
- vertex_tables: List of vertex table information
|
|
398
|
+
- edge_tables: List of edge table information
|
|
399
|
+
- schema_name: The schema name that was analyzed
|
|
400
|
+
"""
|
|
401
|
+
if schema_name is None:
|
|
402
|
+
schema_name = self.config.schema_name or "public"
|
|
403
|
+
|
|
404
|
+
logger.info(f"Introspecting PostgreSQL schema '{schema_name}'")
|
|
405
|
+
|
|
406
|
+
vertex_tables = self.detect_vertex_tables(schema_name)
|
|
407
|
+
edge_tables = self.detect_edge_tables(schema_name)
|
|
408
|
+
|
|
409
|
+
# Mark primary key columns in column lists
|
|
410
|
+
for table_info in vertex_tables + edge_tables:
|
|
411
|
+
pk_set = set(table_info["primary_key"])
|
|
412
|
+
for col in table_info["columns"]:
|
|
413
|
+
col["is_pk"] = col["name"] in pk_set
|
|
414
|
+
|
|
415
|
+
result = {
|
|
416
|
+
"vertex_tables": vertex_tables,
|
|
417
|
+
"edge_tables": edge_tables,
|
|
418
|
+
"schema_name": schema_name,
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
logger.info(
|
|
422
|
+
f"Found {len(vertex_tables)} vertex-like tables and {len(edge_tables)} edge-like tables"
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
return result
|