graflo 1.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graflo might be problematic. Click here for more details.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1276 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +418 -0
- graflo/architecture/onto.py +376 -0
- graflo/architecture/onto_sql.py +54 -0
- graflo/architecture/resource.py +163 -0
- graflo/architecture/schema.py +135 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +89 -0
- graflo/architecture/vertex.py +562 -0
- graflo/caster.py +736 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +203 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +95 -0
- graflo/data_source/factory.py +304 -0
- graflo/data_source/file.py +148 -0
- graflo/data_source/memory.py +70 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +183 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1025 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +717 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +37 -0
- graflo/db/postgres/conn.py +948 -0
- graflo/db/postgres/fuzzy_matcher.py +281 -0
- graflo/db/postgres/heuristics.py +133 -0
- graflo/db/postgres/inference_utils.py +428 -0
- graflo/db/postgres/resource_mapping.py +273 -0
- graflo/db/postgres/schema_inference.py +372 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/postgres/util.py +87 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2365 -0
- graflo/db/tigergraph/onto.py +26 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +312 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +616 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +807 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +422 -0
- graflo/util/transform.py +454 -0
- graflo-1.3.7.dist-info/METADATA +243 -0
- graflo-1.3.7.dist-info/RECORD +70 -0
- graflo-1.3.7.dist-info/WHEEL +4 -0
- graflo-1.3.7.dist-info/entry_points.txt +5 -0
- graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
|
@@ -0,0 +1,948 @@
|
|
|
1
|
+
"""PostgreSQL connection implementation for schema introspection.
|
|
2
|
+
|
|
3
|
+
This module implements PostgreSQL connection and schema introspection functionality,
|
|
4
|
+
specifically designed to analyze 3NF schemas and identify vertex-like and edge-like tables.
|
|
5
|
+
|
|
6
|
+
Key Features:
|
|
7
|
+
- Connection management using psycopg2
|
|
8
|
+
- Schema introspection (tables, columns, constraints)
|
|
9
|
+
- Vertex/edge table detection heuristics
|
|
10
|
+
- Structured schema information extraction
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from graflo.db.postgres import PostgresConnection
|
|
14
|
+
>>> from graflo.db.connection.onto import PostgresConfig
|
|
15
|
+
>>> config = PostgresConfig.from_docker_env()
|
|
16
|
+
>>> conn = PostgresConnection(config)
|
|
17
|
+
>>> schema_info = conn.introspect_schema()
|
|
18
|
+
>>> print(schema_info.vertex_tables)
|
|
19
|
+
>>> conn.close()
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
import psycopg2
|
|
26
|
+
from psycopg2.extras import RealDictCursor
|
|
27
|
+
|
|
28
|
+
from graflo.architecture.onto_sql import (
|
|
29
|
+
ColumnInfo,
|
|
30
|
+
ForeignKeyInfo,
|
|
31
|
+
VertexTableInfo,
|
|
32
|
+
EdgeTableInfo,
|
|
33
|
+
SchemaIntrospectionResult,
|
|
34
|
+
)
|
|
35
|
+
from graflo.db.connection.onto import PostgresConfig
|
|
36
|
+
|
|
37
|
+
from .inference_utils import (
|
|
38
|
+
FuzzyMatchCache,
|
|
39
|
+
infer_edge_vertices_from_table_name,
|
|
40
|
+
infer_vertex_from_column_name,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class PostgresConnection:
|
|
47
|
+
"""PostgreSQL connection for schema introspection.
|
|
48
|
+
|
|
49
|
+
This class provides PostgreSQL-specific functionality for connecting to databases
|
|
50
|
+
and introspecting 3NF schemas to identify vertex-like and edge-like tables.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
config: PostgreSQL connection configuration
|
|
54
|
+
conn: psycopg2 connection instance
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, config: PostgresConfig):
|
|
58
|
+
"""Initialize PostgreSQL connection.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
config: PostgreSQL connection configuration containing URI and credentials
|
|
62
|
+
"""
|
|
63
|
+
self.config = config
|
|
64
|
+
|
|
65
|
+
# Validate required config values
|
|
66
|
+
if config.uri is None:
|
|
67
|
+
raise ValueError("PostgreSQL connection requires a URI to be configured")
|
|
68
|
+
if config.database is None:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"PostgreSQL connection requires a database name to be configured"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Use config properties directly - all fallbacks are handled in PostgresConfig
|
|
74
|
+
host = config.hostname or "localhost"
|
|
75
|
+
port = int(config.port) if config.port else 5432
|
|
76
|
+
database = config.database
|
|
77
|
+
user = config.username or "postgres"
|
|
78
|
+
password = config.password
|
|
79
|
+
|
|
80
|
+
# Build connection parameters dict
|
|
81
|
+
conn_params = {
|
|
82
|
+
"host": host,
|
|
83
|
+
"port": port,
|
|
84
|
+
"database": database,
|
|
85
|
+
"user": user,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if password:
|
|
89
|
+
conn_params["password"] = password
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
self.conn = psycopg2.connect(**conn_params)
|
|
93
|
+
logger.info(f"Successfully connected to PostgreSQL database '{database}'")
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.error(f"Failed to connect to PostgreSQL: {e}", exc_info=True)
|
|
96
|
+
raise
|
|
97
|
+
|
|
98
|
+
def read(self, query: str, params: tuple | None = None) -> list[dict[str, Any]]:
|
|
99
|
+
"""Execute a SELECT query and return results as a list of dictionaries.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
query: SQL SELECT query to execute
|
|
103
|
+
params: Optional tuple of parameters for parameterized queries
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
List of dictionaries, where each dictionary represents a row with column names as keys.
|
|
107
|
+
Decimal values are converted to float for compatibility with graph databases.
|
|
108
|
+
"""
|
|
109
|
+
from decimal import Decimal
|
|
110
|
+
|
|
111
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
112
|
+
if params:
|
|
113
|
+
cursor.execute(query, params)
|
|
114
|
+
else:
|
|
115
|
+
cursor.execute(query)
|
|
116
|
+
|
|
117
|
+
# Convert rows to dictionaries and convert Decimal to float
|
|
118
|
+
results = []
|
|
119
|
+
for row in cursor.fetchall():
|
|
120
|
+
row_dict = dict(row)
|
|
121
|
+
# Convert Decimal to float for JSON/graph database compatibility
|
|
122
|
+
for key, value in row_dict.items():
|
|
123
|
+
if isinstance(value, Decimal):
|
|
124
|
+
row_dict[key] = float(value)
|
|
125
|
+
results.append(row_dict)
|
|
126
|
+
|
|
127
|
+
return results
|
|
128
|
+
|
|
129
|
+
def __enter__(self):
|
|
130
|
+
"""Enter the context manager.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
PostgresConnection: Self for use in 'with' statements
|
|
134
|
+
"""
|
|
135
|
+
return self
|
|
136
|
+
|
|
137
|
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
|
138
|
+
"""Exit the context manager.
|
|
139
|
+
|
|
140
|
+
Ensures the connection is properly closed when exiting the context.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
exc_type: Exception type if an exception occurred
|
|
144
|
+
exc_value: Exception value if an exception occurred
|
|
145
|
+
exc_traceback: Exception traceback if an exception occurred
|
|
146
|
+
"""
|
|
147
|
+
self.close()
|
|
148
|
+
return False # Don't suppress exceptions
|
|
149
|
+
|
|
150
|
+
def close(self):
|
|
151
|
+
"""Close the PostgreSQL connection."""
|
|
152
|
+
if hasattr(self, "conn") and self.conn:
|
|
153
|
+
try:
|
|
154
|
+
self.conn.close()
|
|
155
|
+
logger.debug("PostgreSQL connection closed")
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.warning(
|
|
158
|
+
f"Error closing PostgreSQL connection: {e}", exc_info=True
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def _check_information_schema_reliable(self, schema_name: str) -> bool:
|
|
162
|
+
"""Check if information_schema is reliable for the given schema.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
schema_name: Schema name to check
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
True if information_schema appears reliable, False otherwise
|
|
169
|
+
"""
|
|
170
|
+
try:
|
|
171
|
+
# Try to query information_schema.tables
|
|
172
|
+
query = """
|
|
173
|
+
SELECT COUNT(*) as count
|
|
174
|
+
FROM information_schema.tables
|
|
175
|
+
WHERE table_schema = %s
|
|
176
|
+
AND table_type = 'BASE TABLE'
|
|
177
|
+
"""
|
|
178
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
179
|
+
cursor.execute(query, (schema_name,))
|
|
180
|
+
result = cursor.fetchone()
|
|
181
|
+
# If query succeeds, check if we can also query constraints
|
|
182
|
+
pk_query = """
|
|
183
|
+
SELECT COUNT(*) as count
|
|
184
|
+
FROM information_schema.table_constraints tc
|
|
185
|
+
JOIN information_schema.key_column_usage kcu
|
|
186
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
187
|
+
AND tc.table_schema = kcu.table_schema
|
|
188
|
+
WHERE tc.constraint_type = 'PRIMARY KEY'
|
|
189
|
+
AND tc.table_schema = %s
|
|
190
|
+
"""
|
|
191
|
+
cursor.execute(pk_query, (schema_name,))
|
|
192
|
+
pk_result = cursor.fetchone()
|
|
193
|
+
# If both queries work, information_schema seems reliable
|
|
194
|
+
return result is not None and pk_result is not None
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.debug(f"information_schema check failed: {e}")
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
def _get_tables_pg_catalog(self, schema_name: str) -> list[dict[str, Any]]:
|
|
200
|
+
"""Get all tables using pg_catalog (fallback method).
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
schema_name: Schema name to query
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
List of table information dictionaries with keys: table_name, table_schema
|
|
207
|
+
"""
|
|
208
|
+
query = """
|
|
209
|
+
SELECT
|
|
210
|
+
c.relname as table_name,
|
|
211
|
+
n.nspname as table_schema
|
|
212
|
+
FROM pg_catalog.pg_class c
|
|
213
|
+
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
|
214
|
+
WHERE n.nspname = %s
|
|
215
|
+
AND c.relkind = 'r'
|
|
216
|
+
AND NOT c.relispartition
|
|
217
|
+
ORDER BY c.relname;
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
221
|
+
cursor.execute(query, (schema_name,))
|
|
222
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
223
|
+
|
|
224
|
+
def get_tables(self, schema_name: str | None = None) -> list[dict[str, Any]]:
|
|
225
|
+
"""Get all tables in the specified schema.
|
|
226
|
+
|
|
227
|
+
Tries information_schema first, falls back to pg_catalog if needed.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
schema_name: Schema name to query. If None, uses 'public' or config schema_name.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
List of table information dictionaries with keys: table_name, table_schema
|
|
234
|
+
"""
|
|
235
|
+
if schema_name is None:
|
|
236
|
+
schema_name = self.config.schema_name or "public"
|
|
237
|
+
|
|
238
|
+
# Try information_schema first
|
|
239
|
+
try:
|
|
240
|
+
query = """
|
|
241
|
+
SELECT table_name, table_schema
|
|
242
|
+
FROM information_schema.tables
|
|
243
|
+
WHERE table_schema = %s
|
|
244
|
+
AND table_type = 'BASE TABLE'
|
|
245
|
+
ORDER BY table_name;
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
249
|
+
cursor.execute(query, (schema_name,))
|
|
250
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
251
|
+
# If we got results, check if information_schema is reliable
|
|
252
|
+
if results and self._check_information_schema_reliable(schema_name):
|
|
253
|
+
return results
|
|
254
|
+
# If no results or unreliable, fall back to pg_catalog
|
|
255
|
+
logger.debug(
|
|
256
|
+
f"information_schema returned no results or is unreliable, "
|
|
257
|
+
f"falling back to pg_catalog for schema '{schema_name}'"
|
|
258
|
+
)
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.debug(
|
|
261
|
+
f"information_schema query failed: {e}, falling back to pg_catalog"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Fallback to pg_catalog
|
|
265
|
+
return self._get_tables_pg_catalog(schema_name)
|
|
266
|
+
|
|
267
|
+
def _get_table_columns_pg_catalog(
|
|
268
|
+
self, table_name: str, schema_name: str
|
|
269
|
+
) -> list[dict[str, Any]]:
|
|
270
|
+
"""Get columns using pg_catalog (fallback method).
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
table_name: Name of the table
|
|
274
|
+
schema_name: Schema name
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
List of column information dictionaries with keys:
|
|
278
|
+
name, type, description, is_nullable, column_default
|
|
279
|
+
"""
|
|
280
|
+
query = """
|
|
281
|
+
SELECT
|
|
282
|
+
a.attname as name,
|
|
283
|
+
pg_catalog.format_type(a.atttypid, a.atttypmod) as type,
|
|
284
|
+
CASE WHEN a.attnotnull THEN 'NO' ELSE 'YES' END as is_nullable,
|
|
285
|
+
pg_catalog.pg_get_expr(d.adbin, d.adrelid) as column_default,
|
|
286
|
+
COALESCE(dsc.description, '') as description
|
|
287
|
+
FROM pg_catalog.pg_attribute a
|
|
288
|
+
JOIN pg_catalog.pg_class c ON c.oid = a.attrelid
|
|
289
|
+
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
|
290
|
+
LEFT JOIN pg_catalog.pg_attrdef d ON d.adrelid = a.attrelid AND d.adnum = a.attnum
|
|
291
|
+
LEFT JOIN pg_catalog.pg_description dsc ON dsc.objoid = a.attrelid AND dsc.objsubid = a.attnum
|
|
292
|
+
WHERE n.nspname = %s
|
|
293
|
+
AND c.relname = %s
|
|
294
|
+
AND a.attnum > 0
|
|
295
|
+
AND NOT a.attisdropped
|
|
296
|
+
ORDER BY a.attnum;
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
300
|
+
cursor.execute(query, (schema_name, table_name))
|
|
301
|
+
columns = []
|
|
302
|
+
for row in cursor.fetchall():
|
|
303
|
+
col_dict = dict(row)
|
|
304
|
+
# Normalize type format
|
|
305
|
+
if col_dict["type"]:
|
|
306
|
+
# Remove length info from type if present (e.g., "character varying(255)" -> "varchar")
|
|
307
|
+
type_str = col_dict["type"]
|
|
308
|
+
if "(" in type_str:
|
|
309
|
+
base_type = type_str.split("(")[0]
|
|
310
|
+
# Map common types
|
|
311
|
+
type_mapping = {
|
|
312
|
+
"character varying": "varchar",
|
|
313
|
+
"character": "char",
|
|
314
|
+
"double precision": "float8",
|
|
315
|
+
"real": "float4",
|
|
316
|
+
"integer": "int4",
|
|
317
|
+
"bigint": "int8",
|
|
318
|
+
"smallint": "int2",
|
|
319
|
+
}
|
|
320
|
+
col_dict["type"] = type_mapping.get(
|
|
321
|
+
base_type.lower(), base_type.lower()
|
|
322
|
+
)
|
|
323
|
+
else:
|
|
324
|
+
type_mapping = {
|
|
325
|
+
"character varying": "varchar",
|
|
326
|
+
"character": "char",
|
|
327
|
+
"double precision": "float8",
|
|
328
|
+
"real": "float4",
|
|
329
|
+
"integer": "int4",
|
|
330
|
+
"bigint": "int8",
|
|
331
|
+
"smallint": "int2",
|
|
332
|
+
}
|
|
333
|
+
col_dict["type"] = type_mapping.get(
|
|
334
|
+
type_str.lower(), type_str.lower()
|
|
335
|
+
)
|
|
336
|
+
columns.append(col_dict)
|
|
337
|
+
return columns
|
|
338
|
+
|
|
339
|
+
def get_table_columns(
|
|
340
|
+
self, table_name: str, schema_name: str | None = None
|
|
341
|
+
) -> list[dict[str, Any]]:
|
|
342
|
+
"""Get columns for a specific table with types and descriptions.
|
|
343
|
+
|
|
344
|
+
Tries information_schema first, falls back to pg_catalog if needed.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
table_name: Name of the table
|
|
348
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
List of column information dictionaries with keys:
|
|
352
|
+
name, type, description, is_nullable, column_default
|
|
353
|
+
"""
|
|
354
|
+
if schema_name is None:
|
|
355
|
+
schema_name = self.config.schema_name or "public"
|
|
356
|
+
|
|
357
|
+
# Try information_schema first
|
|
358
|
+
try:
|
|
359
|
+
query = """
|
|
360
|
+
SELECT
|
|
361
|
+
c.column_name as name,
|
|
362
|
+
c.data_type as type,
|
|
363
|
+
c.udt_name as udt_name,
|
|
364
|
+
c.character_maximum_length,
|
|
365
|
+
c.is_nullable,
|
|
366
|
+
c.column_default,
|
|
367
|
+
COALESCE(d.description, '') as description
|
|
368
|
+
FROM information_schema.columns c
|
|
369
|
+
LEFT JOIN pg_catalog.pg_statio_all_tables st
|
|
370
|
+
ON st.schemaname = c.table_schema
|
|
371
|
+
AND st.relname = c.table_name
|
|
372
|
+
LEFT JOIN pg_catalog.pg_description d
|
|
373
|
+
ON d.objoid = st.relid
|
|
374
|
+
AND d.objsubid = c.ordinal_position
|
|
375
|
+
WHERE c.table_schema = %s
|
|
376
|
+
AND c.table_name = %s
|
|
377
|
+
ORDER BY c.ordinal_position;
|
|
378
|
+
"""
|
|
379
|
+
|
|
380
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
381
|
+
cursor.execute(query, (schema_name, table_name))
|
|
382
|
+
columns = []
|
|
383
|
+
for row in cursor.fetchall():
|
|
384
|
+
col_dict = dict(row)
|
|
385
|
+
# Format type with length if applicable
|
|
386
|
+
if col_dict["character_maximum_length"]:
|
|
387
|
+
col_dict["type"] = (
|
|
388
|
+
f"{col_dict['type']}({col_dict['character_maximum_length']})"
|
|
389
|
+
)
|
|
390
|
+
# Use udt_name if it's more specific (e.g., varchar, int4)
|
|
391
|
+
if (
|
|
392
|
+
col_dict["udt_name"]
|
|
393
|
+
and col_dict["udt_name"] != col_dict["type"]
|
|
394
|
+
):
|
|
395
|
+
col_dict["type"] = col_dict["udt_name"]
|
|
396
|
+
# Remove helper fields
|
|
397
|
+
col_dict.pop("character_maximum_length", None)
|
|
398
|
+
col_dict.pop("udt_name", None)
|
|
399
|
+
columns.append(col_dict)
|
|
400
|
+
|
|
401
|
+
# If we got results and information_schema is reliable, return them
|
|
402
|
+
if columns and self._check_information_schema_reliable(schema_name):
|
|
403
|
+
return columns
|
|
404
|
+
# Otherwise fall back to pg_catalog
|
|
405
|
+
logger.debug(
|
|
406
|
+
f"information_schema returned no results or is unreliable, "
|
|
407
|
+
f"falling back to pg_catalog for table '{schema_name}.{table_name}'"
|
|
408
|
+
)
|
|
409
|
+
except Exception as e:
|
|
410
|
+
logger.debug(
|
|
411
|
+
f"information_schema query failed: {e}, falling back to pg_catalog"
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Fallback to pg_catalog
|
|
415
|
+
return self._get_table_columns_pg_catalog(table_name, schema_name)
|
|
416
|
+
|
|
417
|
+
def _get_primary_keys_pg_catalog(
|
|
418
|
+
self, table_name: str, schema_name: str
|
|
419
|
+
) -> list[str]:
|
|
420
|
+
"""Get primary key columns using pg_catalog (fallback method).
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
table_name: Name of the table
|
|
424
|
+
schema_name: Schema name
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
List of primary key column names
|
|
428
|
+
"""
|
|
429
|
+
query = """
|
|
430
|
+
SELECT a.attname
|
|
431
|
+
FROM pg_catalog.pg_constraint con
|
|
432
|
+
JOIN pg_catalog.pg_class c ON c.oid = con.conrelid
|
|
433
|
+
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
|
434
|
+
JOIN pg_catalog.pg_attribute a ON a.attrelid = con.conrelid AND a.attnum = ANY(con.conkey)
|
|
435
|
+
WHERE n.nspname = %s
|
|
436
|
+
AND c.relname = %s
|
|
437
|
+
AND con.contype = 'p'
|
|
438
|
+
ORDER BY array_position(con.conkey, a.attnum);
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
with self.conn.cursor() as cursor:
|
|
442
|
+
cursor.execute(query, (schema_name, table_name))
|
|
443
|
+
return [row[0] for row in cursor.fetchall()]
|
|
444
|
+
|
|
445
|
+
def get_primary_keys(
|
|
446
|
+
self, table_name: str, schema_name: str | None = None
|
|
447
|
+
) -> list[str]:
|
|
448
|
+
"""Get primary key columns for a table.
|
|
449
|
+
|
|
450
|
+
Tries information_schema first, falls back to pg_catalog if needed.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
table_name: Name of the table
|
|
454
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
List of primary key column names
|
|
458
|
+
"""
|
|
459
|
+
if schema_name is None:
|
|
460
|
+
schema_name = self.config.schema_name or "public"
|
|
461
|
+
|
|
462
|
+
# Try information_schema first
|
|
463
|
+
try:
|
|
464
|
+
query = """
|
|
465
|
+
SELECT kcu.column_name
|
|
466
|
+
FROM information_schema.table_constraints tc
|
|
467
|
+
JOIN information_schema.key_column_usage kcu
|
|
468
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
469
|
+
AND tc.table_schema = kcu.table_schema
|
|
470
|
+
WHERE tc.constraint_type = 'PRIMARY KEY'
|
|
471
|
+
AND tc.table_schema = %s
|
|
472
|
+
AND tc.table_name = %s
|
|
473
|
+
ORDER BY kcu.ordinal_position;
|
|
474
|
+
"""
|
|
475
|
+
|
|
476
|
+
with self.conn.cursor() as cursor:
|
|
477
|
+
cursor.execute(query, (schema_name, table_name))
|
|
478
|
+
results = [row[0] for row in cursor.fetchall()]
|
|
479
|
+
# If we got results and information_schema is reliable, return them
|
|
480
|
+
if results and self._check_information_schema_reliable(schema_name):
|
|
481
|
+
return results
|
|
482
|
+
# Otherwise fall back to pg_catalog
|
|
483
|
+
logger.debug(
|
|
484
|
+
f"information_schema returned no results or is unreliable, "
|
|
485
|
+
f"falling back to pg_catalog for primary keys of '{schema_name}.{table_name}'"
|
|
486
|
+
)
|
|
487
|
+
except Exception as e:
|
|
488
|
+
logger.debug(
|
|
489
|
+
f"information_schema query failed: {e}, falling back to pg_catalog"
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# Fallback to pg_catalog
|
|
493
|
+
return self._get_primary_keys_pg_catalog(table_name, schema_name)
|
|
494
|
+
|
|
495
|
+
def _get_foreign_keys_pg_catalog(
|
|
496
|
+
self, table_name: str, schema_name: str
|
|
497
|
+
) -> list[dict[str, Any]]:
|
|
498
|
+
"""Get foreign key relationships using pg_catalog (fallback method).
|
|
499
|
+
|
|
500
|
+
Handles both single-column and multi-column foreign keys.
|
|
501
|
+
For multi-column foreign keys, returns one row per column.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
table_name: Name of the table
|
|
505
|
+
schema_name: Schema name
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
List of foreign key dictionaries with keys:
|
|
509
|
+
column, references_table, references_column, constraint_name
|
|
510
|
+
"""
|
|
511
|
+
# Use generate_subscripts for better compatibility with older PostgreSQL versions
|
|
512
|
+
query = """
|
|
513
|
+
SELECT
|
|
514
|
+
a.attname as column,
|
|
515
|
+
ref_c.relname as references_table,
|
|
516
|
+
ref_a.attname as references_column,
|
|
517
|
+
con.conname as constraint_name
|
|
518
|
+
FROM pg_catalog.pg_constraint con
|
|
519
|
+
JOIN pg_catalog.pg_class c ON c.oid = con.conrelid
|
|
520
|
+
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
|
521
|
+
JOIN pg_catalog.pg_class ref_c ON ref_c.oid = con.confrelid
|
|
522
|
+
JOIN generate_subscripts(con.conkey, 1) AS i ON true
|
|
523
|
+
JOIN pg_catalog.pg_attribute a ON a.attrelid = con.conrelid AND a.attnum = con.conkey[i]
|
|
524
|
+
JOIN pg_catalog.pg_attribute ref_a ON ref_a.attrelid = con.confrelid AND ref_a.attnum = con.confkey[i]
|
|
525
|
+
WHERE n.nspname = %s
|
|
526
|
+
AND c.relname = %s
|
|
527
|
+
AND con.contype = 'f'
|
|
528
|
+
ORDER BY con.conname, i;
|
|
529
|
+
"""
|
|
530
|
+
|
|
531
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
532
|
+
cursor.execute(query, (schema_name, table_name))
|
|
533
|
+
return [dict(row) for row in cursor.fetchall()]
|
|
534
|
+
|
|
535
|
+
def get_foreign_keys(
|
|
536
|
+
self, table_name: str, schema_name: str | None = None
|
|
537
|
+
) -> list[dict[str, Any]]:
|
|
538
|
+
"""Get foreign key relationships for a table.
|
|
539
|
+
|
|
540
|
+
Tries information_schema first, falls back to pg_catalog if needed.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
table_name: Name of the table
|
|
544
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
List of foreign key dictionaries with keys:
|
|
548
|
+
column, references_table, references_column, constraint_name
|
|
549
|
+
"""
|
|
550
|
+
if schema_name is None:
|
|
551
|
+
schema_name = self.config.schema_name or "public"
|
|
552
|
+
|
|
553
|
+
# Try information_schema first
|
|
554
|
+
try:
|
|
555
|
+
query = """
|
|
556
|
+
SELECT
|
|
557
|
+
kcu.column_name as column,
|
|
558
|
+
ccu.table_name as references_table,
|
|
559
|
+
ccu.column_name as references_column,
|
|
560
|
+
tc.constraint_name
|
|
561
|
+
FROM information_schema.table_constraints tc
|
|
562
|
+
JOIN information_schema.key_column_usage kcu
|
|
563
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
564
|
+
AND tc.table_schema = kcu.table_schema
|
|
565
|
+
JOIN information_schema.constraint_column_usage ccu
|
|
566
|
+
ON ccu.constraint_name = tc.constraint_name
|
|
567
|
+
AND ccu.table_schema = tc.table_schema
|
|
568
|
+
WHERE tc.constraint_type = 'FOREIGN KEY'
|
|
569
|
+
AND tc.table_schema = %s
|
|
570
|
+
AND tc.table_name = %s
|
|
571
|
+
ORDER BY kcu.ordinal_position;
|
|
572
|
+
"""
|
|
573
|
+
|
|
574
|
+
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
575
|
+
cursor.execute(query, (schema_name, table_name))
|
|
576
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
577
|
+
# If we got results and information_schema is reliable, return them
|
|
578
|
+
if self._check_information_schema_reliable(schema_name):
|
|
579
|
+
return results
|
|
580
|
+
# Otherwise fall back to pg_catalog
|
|
581
|
+
logger.debug(
|
|
582
|
+
f"information_schema returned no results or is unreliable, "
|
|
583
|
+
f"falling back to pg_catalog for foreign keys of '{schema_name}.{table_name}'"
|
|
584
|
+
)
|
|
585
|
+
except Exception as e:
|
|
586
|
+
logger.debug(
|
|
587
|
+
f"information_schema query failed: {e}, falling back to pg_catalog"
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
# Fallback to pg_catalog
|
|
591
|
+
return self._get_foreign_keys_pg_catalog(table_name, schema_name)
|
|
592
|
+
|
|
593
|
+
def _is_edge_like_table(
|
|
594
|
+
self, table_name: str, pk_columns: list[str], fk_columns: list[dict[str, Any]]
|
|
595
|
+
) -> bool:
|
|
596
|
+
"""Determine if a table is edge-like based on heuristics.
|
|
597
|
+
|
|
598
|
+
Heuristics:
|
|
599
|
+
1. Tables with 2 or more primary keys are likely edge tables
|
|
600
|
+
2. Tables with exactly 2 foreign keys are likely edge tables
|
|
601
|
+
3. Tables with names starting with 'rel_' are likely edge tables
|
|
602
|
+
4. Tables where primary key columns match foreign key columns are likely edge tables
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
table_name: Name of the table
|
|
606
|
+
pk_columns: List of primary key column names
|
|
607
|
+
fk_columns: List of foreign key dictionaries
|
|
608
|
+
|
|
609
|
+
Returns:
|
|
610
|
+
True if table appears to be edge-like, False otherwise
|
|
611
|
+
"""
|
|
612
|
+
# Heuristic 1: Tables with 2 or more primary keys are likely edge tables
|
|
613
|
+
if len(pk_columns) >= 2:
|
|
614
|
+
return True
|
|
615
|
+
|
|
616
|
+
# Heuristic 2: Tables with exactly 2 foreign keys are likely edge tables
|
|
617
|
+
if len(fk_columns) == 2:
|
|
618
|
+
return True
|
|
619
|
+
|
|
620
|
+
# Heuristic 3: Tables with names starting with 'rel_' are likely edge tables
|
|
621
|
+
if table_name.startswith("rel_"):
|
|
622
|
+
return True
|
|
623
|
+
|
|
624
|
+
# Heuristic 4: If primary key columns match foreign key columns, it's likely an edge table
|
|
625
|
+
fk_column_names = {fk["column"] for fk in fk_columns}
|
|
626
|
+
pk_set = set(pk_columns)
|
|
627
|
+
# If all PK columns are FK columns and we have at least 2 FKs, it's likely an edge table
|
|
628
|
+
if pk_set.issubset(fk_column_names) and len(fk_columns) >= 2:
|
|
629
|
+
return True
|
|
630
|
+
|
|
631
|
+
return False
|
|
632
|
+
|
|
633
|
+
def detect_vertex_tables(
|
|
634
|
+
self, schema_name: str | None = None
|
|
635
|
+
) -> list[VertexTableInfo]:
|
|
636
|
+
"""Detect vertex-like tables in the schema.
|
|
637
|
+
|
|
638
|
+
Heuristic: Tables with a primary key and descriptive columns
|
|
639
|
+
(not just foreign keys). These represent entities.
|
|
640
|
+
|
|
641
|
+
Note: Tables identified as edge-like are excluded from vertex tables.
|
|
642
|
+
|
|
643
|
+
Args:
|
|
644
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
645
|
+
|
|
646
|
+
Returns:
|
|
647
|
+
List of vertex table information dictionaries
|
|
648
|
+
"""
|
|
649
|
+
if schema_name is None:
|
|
650
|
+
schema_name = self.config.schema_name or "public"
|
|
651
|
+
|
|
652
|
+
tables = self.get_tables(schema_name)
|
|
653
|
+
vertex_tables = []
|
|
654
|
+
|
|
655
|
+
for table_info in tables:
|
|
656
|
+
table_name = table_info["table_name"]
|
|
657
|
+
pk_columns = self.get_primary_keys(table_name, schema_name)
|
|
658
|
+
fk_columns = self.get_foreign_keys(table_name, schema_name)
|
|
659
|
+
all_columns = self.get_table_columns(table_name, schema_name)
|
|
660
|
+
|
|
661
|
+
# Vertex-like tables have:
|
|
662
|
+
# 1. A primary key
|
|
663
|
+
# 2. Not identified as edge-like tables
|
|
664
|
+
# 3. Descriptive columns beyond just foreign keys
|
|
665
|
+
|
|
666
|
+
if not pk_columns:
|
|
667
|
+
continue # Skip tables without primary keys
|
|
668
|
+
|
|
669
|
+
# Skip edge-like tables
|
|
670
|
+
if self._is_edge_like_table(table_name, pk_columns, fk_columns):
|
|
671
|
+
continue
|
|
672
|
+
|
|
673
|
+
# Count non-FK, non-PK columns (descriptive columns)
|
|
674
|
+
fk_column_names = {fk["column"] for fk in fk_columns}
|
|
675
|
+
pk_column_names = set(pk_columns)
|
|
676
|
+
descriptive_columns = [
|
|
677
|
+
col
|
|
678
|
+
for col in all_columns
|
|
679
|
+
if col["name"] not in fk_column_names
|
|
680
|
+
and col["name"] not in pk_column_names
|
|
681
|
+
]
|
|
682
|
+
|
|
683
|
+
# If table has descriptive columns, consider it vertex-like
|
|
684
|
+
if descriptive_columns:
|
|
685
|
+
# Mark primary key columns and convert to ColumnInfo
|
|
686
|
+
pk_set = set(pk_columns)
|
|
687
|
+
column_infos = []
|
|
688
|
+
for col in all_columns:
|
|
689
|
+
column_infos.append(
|
|
690
|
+
ColumnInfo(
|
|
691
|
+
name=col["name"],
|
|
692
|
+
type=col["type"],
|
|
693
|
+
description=col.get("description", ""),
|
|
694
|
+
is_nullable=col.get("is_nullable", "YES"),
|
|
695
|
+
column_default=col.get("column_default"),
|
|
696
|
+
is_pk=col["name"] in pk_set,
|
|
697
|
+
)
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
# Convert foreign keys to ForeignKeyInfo
|
|
701
|
+
fk_infos = []
|
|
702
|
+
for fk in fk_columns:
|
|
703
|
+
fk_infos.append(
|
|
704
|
+
ForeignKeyInfo(
|
|
705
|
+
column=fk["column"],
|
|
706
|
+
references_table=fk["references_table"],
|
|
707
|
+
references_column=fk.get("references_column"),
|
|
708
|
+
constraint_name=fk.get("constraint_name"),
|
|
709
|
+
)
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
vertex_tables.append(
|
|
713
|
+
VertexTableInfo(
|
|
714
|
+
name=table_name,
|
|
715
|
+
schema_name=schema_name,
|
|
716
|
+
columns=column_infos,
|
|
717
|
+
primary_key=pk_columns,
|
|
718
|
+
foreign_keys=fk_infos,
|
|
719
|
+
)
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
return vertex_tables
|
|
723
|
+
|
|
724
|
+
def detect_edge_tables(
|
|
725
|
+
self,
|
|
726
|
+
schema_name: str | None = None,
|
|
727
|
+
vertex_table_names: list[str] | None = None,
|
|
728
|
+
) -> list[EdgeTableInfo]:
|
|
729
|
+
"""Detect edge-like tables in the schema.
|
|
730
|
+
|
|
731
|
+
Heuristic: Tables with 2 or more primary keys, or exactly 2 foreign keys,
|
|
732
|
+
or names starting with 'rel_'. These represent relationships between entities.
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
736
|
+
vertex_table_names: Optional list of vertex table names for fuzzy matching.
|
|
737
|
+
If None, will be inferred from detect_vertex_tables().
|
|
738
|
+
|
|
739
|
+
Returns:
|
|
740
|
+
List of edge table information dictionaries with source_table and target_table
|
|
741
|
+
"""
|
|
742
|
+
if schema_name is None:
|
|
743
|
+
schema_name = self.config.schema_name or "public"
|
|
744
|
+
|
|
745
|
+
# Get vertex table names if not provided
|
|
746
|
+
if vertex_table_names is None:
|
|
747
|
+
vertex_tables = self.detect_vertex_tables(schema_name)
|
|
748
|
+
vertex_table_names = [vt.name for vt in vertex_tables]
|
|
749
|
+
|
|
750
|
+
# Create fuzzy match cache once for all tables (significant performance improvement)
|
|
751
|
+
match_cache = FuzzyMatchCache(vertex_table_names)
|
|
752
|
+
|
|
753
|
+
tables = self.get_tables(schema_name)
|
|
754
|
+
edge_tables = []
|
|
755
|
+
|
|
756
|
+
for table_info in tables:
|
|
757
|
+
table_name = table_info["table_name"]
|
|
758
|
+
pk_columns = self.get_primary_keys(table_name, schema_name)
|
|
759
|
+
fk_columns = self.get_foreign_keys(table_name, schema_name)
|
|
760
|
+
|
|
761
|
+
# Skip tables without primary keys
|
|
762
|
+
if not pk_columns:
|
|
763
|
+
continue
|
|
764
|
+
|
|
765
|
+
# Check if table is edge-like
|
|
766
|
+
if not self._is_edge_like_table(table_name, pk_columns, fk_columns):
|
|
767
|
+
continue
|
|
768
|
+
|
|
769
|
+
all_columns = self.get_table_columns(table_name, schema_name)
|
|
770
|
+
|
|
771
|
+
# Mark primary key columns and convert to ColumnInfo
|
|
772
|
+
pk_set = set(pk_columns)
|
|
773
|
+
column_infos = []
|
|
774
|
+
for col in all_columns:
|
|
775
|
+
column_infos.append(
|
|
776
|
+
ColumnInfo(
|
|
777
|
+
name=col["name"],
|
|
778
|
+
type=col["type"],
|
|
779
|
+
description=col.get("description", ""),
|
|
780
|
+
is_nullable=col.get("is_nullable", "YES"),
|
|
781
|
+
column_default=col.get("column_default"),
|
|
782
|
+
is_pk=col["name"] in pk_set,
|
|
783
|
+
)
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
# Convert foreign keys to ForeignKeyInfo
|
|
787
|
+
fk_infos = []
|
|
788
|
+
for fk in fk_columns:
|
|
789
|
+
fk_infos.append(
|
|
790
|
+
ForeignKeyInfo(
|
|
791
|
+
column=fk["column"],
|
|
792
|
+
references_table=fk["references_table"],
|
|
793
|
+
references_column=fk.get("references_column"),
|
|
794
|
+
constraint_name=fk.get("constraint_name"),
|
|
795
|
+
)
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
# Determine source and target tables
|
|
799
|
+
source_table = None
|
|
800
|
+
target_table = None
|
|
801
|
+
source_column = None
|
|
802
|
+
target_column = None
|
|
803
|
+
relation_name = None
|
|
804
|
+
|
|
805
|
+
# If we have exactly 2 foreign keys, use them directly
|
|
806
|
+
if len(fk_infos) == 2:
|
|
807
|
+
source_fk = fk_infos[0]
|
|
808
|
+
target_fk = fk_infos[1]
|
|
809
|
+
source_table = source_fk.references_table
|
|
810
|
+
target_table = target_fk.references_table
|
|
811
|
+
source_column = source_fk.column
|
|
812
|
+
target_column = target_fk.column
|
|
813
|
+
# Still try to infer relation from table name
|
|
814
|
+
fk_dicts = [
|
|
815
|
+
{
|
|
816
|
+
"column": fk.column,
|
|
817
|
+
"references_table": fk.references_table,
|
|
818
|
+
}
|
|
819
|
+
for fk in fk_infos
|
|
820
|
+
]
|
|
821
|
+
_, _, relation_name = infer_edge_vertices_from_table_name(
|
|
822
|
+
table_name, pk_columns, fk_dicts, vertex_table_names, match_cache
|
|
823
|
+
)
|
|
824
|
+
# If we have 2 or more primary keys, try to infer from table name and structure
|
|
825
|
+
elif len(pk_columns) >= 2:
|
|
826
|
+
# Convert fk_infos to dicts for _infer_edge_vertices_from_table_name
|
|
827
|
+
fk_dicts = [
|
|
828
|
+
{
|
|
829
|
+
"column": fk.column,
|
|
830
|
+
"references_table": fk.references_table,
|
|
831
|
+
}
|
|
832
|
+
for fk in fk_infos
|
|
833
|
+
]
|
|
834
|
+
|
|
835
|
+
# Try to infer from table name pattern
|
|
836
|
+
inferred_source, inferred_target, relation_name = (
|
|
837
|
+
infer_edge_vertices_from_table_name(
|
|
838
|
+
table_name,
|
|
839
|
+
pk_columns,
|
|
840
|
+
fk_dicts,
|
|
841
|
+
vertex_table_names,
|
|
842
|
+
match_cache,
|
|
843
|
+
)
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
if inferred_source and inferred_target:
|
|
847
|
+
source_table = inferred_source
|
|
848
|
+
target_table = inferred_target
|
|
849
|
+
# Try to match PK columns to FK columns for source/target columns
|
|
850
|
+
if fk_infos:
|
|
851
|
+
# Use first FK for source, second for target if available
|
|
852
|
+
if len(fk_infos) >= 2:
|
|
853
|
+
source_column = fk_infos[0].column
|
|
854
|
+
target_column = fk_infos[1].column
|
|
855
|
+
elif len(fk_infos) == 1:
|
|
856
|
+
# Self-reference case
|
|
857
|
+
source_column = fk_infos[0].column
|
|
858
|
+
target_column = fk_infos[0].column
|
|
859
|
+
else:
|
|
860
|
+
# Use PK columns as source/target columns
|
|
861
|
+
source_column = pk_columns[0]
|
|
862
|
+
target_column = (
|
|
863
|
+
pk_columns[1] if len(pk_columns) > 1 else pk_columns[0]
|
|
864
|
+
)
|
|
865
|
+
elif fk_infos:
|
|
866
|
+
# Fallback: use FK references if available
|
|
867
|
+
if len(fk_infos) >= 2:
|
|
868
|
+
source_table = fk_infos[0].references_table
|
|
869
|
+
target_table = fk_infos[1].references_table
|
|
870
|
+
source_column = fk_infos[0].column
|
|
871
|
+
target_column = fk_infos[1].column
|
|
872
|
+
elif len(fk_infos) == 1:
|
|
873
|
+
source_table = fk_infos[0].references_table
|
|
874
|
+
target_table = fk_infos[0].references_table
|
|
875
|
+
source_column = fk_infos[0].column
|
|
876
|
+
target_column = fk_infos[0].column
|
|
877
|
+
else:
|
|
878
|
+
# Last resort: use PK columns and infer table names from column names
|
|
879
|
+
source_column = pk_columns[0]
|
|
880
|
+
target_column = (
|
|
881
|
+
pk_columns[1] if len(pk_columns) > 1 else pk_columns[0]
|
|
882
|
+
)
|
|
883
|
+
# Use robust inference logic to extract vertex names from column names
|
|
884
|
+
source_table = infer_vertex_from_column_name(
|
|
885
|
+
source_column, vertex_table_names, match_cache
|
|
886
|
+
)
|
|
887
|
+
target_table = infer_vertex_from_column_name(
|
|
888
|
+
target_column, vertex_table_names, match_cache
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
# Only add if we have source and target information
|
|
892
|
+
if source_table and target_table:
|
|
893
|
+
edge_tables.append(
|
|
894
|
+
EdgeTableInfo(
|
|
895
|
+
name=table_name,
|
|
896
|
+
schema_name=schema_name,
|
|
897
|
+
columns=column_infos,
|
|
898
|
+
primary_key=pk_columns,
|
|
899
|
+
foreign_keys=fk_infos,
|
|
900
|
+
source_table=source_table,
|
|
901
|
+
target_table=target_table,
|
|
902
|
+
source_column=source_column or pk_columns[0],
|
|
903
|
+
target_column=target_column
|
|
904
|
+
or (pk_columns[1] if len(pk_columns) > 1 else pk_columns[0]),
|
|
905
|
+
relation=relation_name,
|
|
906
|
+
)
|
|
907
|
+
)
|
|
908
|
+
else:
|
|
909
|
+
logger.warning(
|
|
910
|
+
f"Could not determine source/target tables for edge-like table '{table_name}'. "
|
|
911
|
+
f"Skipping."
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
return edge_tables
|
|
915
|
+
|
|
916
|
+
def introspect_schema(
|
|
917
|
+
self, schema_name: str | None = None
|
|
918
|
+
) -> SchemaIntrospectionResult:
|
|
919
|
+
"""Introspect the database schema and return structured information.
|
|
920
|
+
|
|
921
|
+
This is the main method that analyzes the schema and returns information
|
|
922
|
+
about vertex-like and edge-like tables.
|
|
923
|
+
|
|
924
|
+
Args:
|
|
925
|
+
schema_name: Schema name. If None, uses 'public' or config schema_name.
|
|
926
|
+
|
|
927
|
+
Returns:
|
|
928
|
+
SchemaIntrospectionResult with vertex_tables, edge_tables, and schema_name
|
|
929
|
+
"""
|
|
930
|
+
if schema_name is None:
|
|
931
|
+
schema_name = self.config.schema_name or "public"
|
|
932
|
+
|
|
933
|
+
logger.info(f"Introspecting PostgreSQL schema '{schema_name}'")
|
|
934
|
+
|
|
935
|
+
vertex_tables = self.detect_vertex_tables(schema_name)
|
|
936
|
+
edge_tables = self.detect_edge_tables(schema_name)
|
|
937
|
+
|
|
938
|
+
result = SchemaIntrospectionResult(
|
|
939
|
+
vertex_tables=vertex_tables,
|
|
940
|
+
edge_tables=edge_tables,
|
|
941
|
+
schema_name=schema_name,
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
logger.info(
|
|
945
|
+
f"Found {len(vertex_tables)} vertex-like tables and {len(edge_tables)} edge-like tables"
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
return result
|