graflo 1.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graflo might be problematic. Click here for more details.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1276 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +418 -0
- graflo/architecture/onto.py +376 -0
- graflo/architecture/onto_sql.py +54 -0
- graflo/architecture/resource.py +163 -0
- graflo/architecture/schema.py +135 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +89 -0
- graflo/architecture/vertex.py +562 -0
- graflo/caster.py +736 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +203 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +95 -0
- graflo/data_source/factory.py +304 -0
- graflo/data_source/file.py +148 -0
- graflo/data_source/memory.py +70 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +183 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1025 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +717 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +37 -0
- graflo/db/postgres/conn.py +948 -0
- graflo/db/postgres/fuzzy_matcher.py +281 -0
- graflo/db/postgres/heuristics.py +133 -0
- graflo/db/postgres/inference_utils.py +428 -0
- graflo/db/postgres/resource_mapping.py +273 -0
- graflo/db/postgres/schema_inference.py +372 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/postgres/util.py +87 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2365 -0
- graflo/db/tigergraph/onto.py +26 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +312 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +616 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +807 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +422 -0
- graflo/util/transform.py +454 -0
- graflo-1.3.7.dist-info/METADATA +243 -0
- graflo-1.3.7.dist-info/RECORD +70 -0
- graflo-1.3.7.dist-info/WHEEL +4 -0
- graflo-1.3.7.dist-info/entry_points.txt +5 -0
- graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""Schema inference from PostgreSQL database introspection.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to infer graflo Schema objects from PostgreSQL
|
|
4
|
+
3NF database schemas by analyzing table structures, relationships, and column types.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from graflo.architecture.edge import Edge, EdgeConfig, WeightConfig
|
|
12
|
+
from graflo.architecture.onto import Index, IndexType
|
|
13
|
+
from graflo.architecture.schema import Schema, SchemaMetadata
|
|
14
|
+
from graflo.architecture.vertex import Field, FieldType, Vertex, VertexConfig
|
|
15
|
+
from graflo.onto import DBFlavor
|
|
16
|
+
|
|
17
|
+
from ...architecture.onto_sql import EdgeTableInfo, SchemaIntrospectionResult
|
|
18
|
+
from .conn import PostgresConnection
|
|
19
|
+
from .types import PostgresTypeMapper
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class PostgresSchemaInferencer:
|
|
25
|
+
"""Infers graflo Schema from PostgreSQL schema introspection results.
|
|
26
|
+
|
|
27
|
+
This class takes the output from PostgresConnection.introspect_schema() and
|
|
28
|
+
generates a complete graflo Schema with vertices, edges, and weights.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
db_flavor: DBFlavor = DBFlavor.ARANGO,
|
|
34
|
+
conn: PostgresConnection | None = None,
|
|
35
|
+
):
|
|
36
|
+
"""Initialize the schema inferencer.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
db_flavor: Target database flavor for the inferred schema
|
|
40
|
+
conn: PostgreSQL connection for sampling data to infer types (optional)
|
|
41
|
+
"""
|
|
42
|
+
self.db_flavor = db_flavor
|
|
43
|
+
self.type_mapper = PostgresTypeMapper()
|
|
44
|
+
self.conn = conn
|
|
45
|
+
|
|
46
|
+
def infer_vertex_config(
|
|
47
|
+
self, introspection_result: SchemaIntrospectionResult
|
|
48
|
+
) -> VertexConfig:
|
|
49
|
+
"""Infer VertexConfig from vertex tables.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
introspection_result: Result from PostgresConnection.introspect_schema()
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
VertexConfig: Inferred vertex configuration
|
|
56
|
+
"""
|
|
57
|
+
vertex_tables = introspection_result.vertex_tables
|
|
58
|
+
vertices = []
|
|
59
|
+
|
|
60
|
+
for table_info in vertex_tables:
|
|
61
|
+
table_name = table_info.name
|
|
62
|
+
columns = table_info.columns
|
|
63
|
+
pk_columns = table_info.primary_key
|
|
64
|
+
|
|
65
|
+
# Create fields from columns
|
|
66
|
+
fields = []
|
|
67
|
+
for col in columns:
|
|
68
|
+
field_name = col.name
|
|
69
|
+
field_type = self.type_mapper.map_type(col.type)
|
|
70
|
+
fields.append(Field(name=field_name, type=field_type))
|
|
71
|
+
|
|
72
|
+
# Create indexes from primary key
|
|
73
|
+
indexes = []
|
|
74
|
+
if pk_columns:
|
|
75
|
+
indexes.append(
|
|
76
|
+
Index(fields=pk_columns, type=IndexType.PERSISTENT, unique=True)
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Create vertex
|
|
80
|
+
vertex = Vertex(
|
|
81
|
+
name=table_name,
|
|
82
|
+
dbname=table_name,
|
|
83
|
+
fields=fields,
|
|
84
|
+
indexes=indexes,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
vertices.append(vertex)
|
|
88
|
+
logger.debug(
|
|
89
|
+
f"Inferred vertex '{table_name}' with {len(fields)} fields and "
|
|
90
|
+
f"{len(indexes)} indexes"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return VertexConfig(vertices=vertices, db_flavor=self.db_flavor)
|
|
94
|
+
|
|
95
|
+
def _infer_type_from_samples(
|
|
96
|
+
self, table_name: str, schema_name: str, column_name: str, pg_type: str
|
|
97
|
+
) -> str:
|
|
98
|
+
"""Infer field type by sampling 5 rows from the table.
|
|
99
|
+
|
|
100
|
+
Uses heuristics to determine if a column contains integers, floats, datetimes, etc.
|
|
101
|
+
Falls back to PostgreSQL type mapping if sampling fails or is unavailable.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
table_name: Name of the table
|
|
105
|
+
schema_name: Schema name
|
|
106
|
+
column_name: Name of the column to sample
|
|
107
|
+
pg_type: PostgreSQL type from schema introspection
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
str: FieldType value (INT, FLOAT, DATETIME, STRING, etc.)
|
|
111
|
+
"""
|
|
112
|
+
# First try PostgreSQL type mapping
|
|
113
|
+
mapped_type = self.type_mapper.map_type(pg_type)
|
|
114
|
+
|
|
115
|
+
# If we have a connection, sample data to refine the type
|
|
116
|
+
if self.conn is None:
|
|
117
|
+
logger.debug(
|
|
118
|
+
f"No connection available for sampling, using mapped type '{mapped_type}' "
|
|
119
|
+
f"for column '{column_name}' in table '{table_name}'"
|
|
120
|
+
)
|
|
121
|
+
return mapped_type
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
# Sample 5 rows from the table
|
|
125
|
+
query = (
|
|
126
|
+
f'SELECT "{column_name}" FROM "{schema_name}"."{table_name}" LIMIT 5'
|
|
127
|
+
)
|
|
128
|
+
samples = self.conn.read(query)
|
|
129
|
+
|
|
130
|
+
if not samples:
|
|
131
|
+
logger.debug(
|
|
132
|
+
f"No samples found for column '{column_name}' in table '{table_name}', "
|
|
133
|
+
f"using mapped type '{mapped_type}'"
|
|
134
|
+
)
|
|
135
|
+
return mapped_type
|
|
136
|
+
|
|
137
|
+
# Extract non-None values
|
|
138
|
+
values = [
|
|
139
|
+
row[column_name] for row in samples if row[column_name] is not None
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
if not values:
|
|
143
|
+
logger.debug(
|
|
144
|
+
f"All samples are NULL for column '{column_name}' in table '{table_name}', "
|
|
145
|
+
f"using mapped type '{mapped_type}'"
|
|
146
|
+
)
|
|
147
|
+
return mapped_type
|
|
148
|
+
|
|
149
|
+
# Heuristics to infer type from values
|
|
150
|
+
# Check for integers (all values are integers)
|
|
151
|
+
if all(isinstance(v, int) for v in values):
|
|
152
|
+
logger.debug(
|
|
153
|
+
f"Inferred INT type for column '{column_name}' in table '{table_name}' "
|
|
154
|
+
f"from samples"
|
|
155
|
+
)
|
|
156
|
+
return FieldType.INT.value
|
|
157
|
+
|
|
158
|
+
# Check for floats (all values are floats or ints that could be floats)
|
|
159
|
+
if all(isinstance(v, (int, float)) for v in values):
|
|
160
|
+
# If any value has decimal part, it's a float
|
|
161
|
+
if any(isinstance(v, float) and v != float(int(v)) for v in values):
|
|
162
|
+
logger.debug(
|
|
163
|
+
f"Inferred FLOAT type for column '{column_name}' in table '{table_name}' "
|
|
164
|
+
f"from samples"
|
|
165
|
+
)
|
|
166
|
+
return FieldType.FLOAT.value
|
|
167
|
+
# All integers, but might be stored as float - check PostgreSQL type
|
|
168
|
+
if mapped_type == FieldType.FLOAT.value:
|
|
169
|
+
return FieldType.FLOAT.value
|
|
170
|
+
return FieldType.INT.value
|
|
171
|
+
|
|
172
|
+
# Check for datetime/date objects
|
|
173
|
+
from datetime import date, datetime, time
|
|
174
|
+
|
|
175
|
+
if all(isinstance(v, (datetime, date, time)) for v in values):
|
|
176
|
+
logger.debug(
|
|
177
|
+
f"Inferred DATETIME type for column '{column_name}' in table '{table_name}' "
|
|
178
|
+
f"from samples"
|
|
179
|
+
)
|
|
180
|
+
return FieldType.DATETIME.value
|
|
181
|
+
|
|
182
|
+
# Check for ISO format datetime strings
|
|
183
|
+
if all(isinstance(v, str) for v in values):
|
|
184
|
+
# Try to parse as ISO datetime
|
|
185
|
+
iso_datetime_count = 0
|
|
186
|
+
for v in values:
|
|
187
|
+
try:
|
|
188
|
+
# Try ISO format (with or without timezone)
|
|
189
|
+
datetime.fromisoformat(v.replace("Z", "+00:00"))
|
|
190
|
+
iso_datetime_count += 1
|
|
191
|
+
except (ValueError, AttributeError):
|
|
192
|
+
# Try other common formats
|
|
193
|
+
try:
|
|
194
|
+
datetime.strptime(v, "%Y-%m-%d %H:%M:%S")
|
|
195
|
+
iso_datetime_count += 1
|
|
196
|
+
except ValueError:
|
|
197
|
+
try:
|
|
198
|
+
datetime.strptime(v, "%Y-%m-%d")
|
|
199
|
+
iso_datetime_count += 1
|
|
200
|
+
except ValueError:
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
# If most values look like datetimes, infer DATETIME
|
|
204
|
+
if iso_datetime_count >= len(values) * 0.8: # 80% threshold
|
|
205
|
+
logger.debug(
|
|
206
|
+
f"Inferred DATETIME type for column '{column_name}' in table '{table_name}' "
|
|
207
|
+
f"from ISO format strings"
|
|
208
|
+
)
|
|
209
|
+
return FieldType.DATETIME.value
|
|
210
|
+
|
|
211
|
+
# Default to mapped type
|
|
212
|
+
logger.debug(
|
|
213
|
+
f"Using mapped type '{mapped_type}' for column '{column_name}' in table '{table_name}' "
|
|
214
|
+
f"(could not infer from samples)"
|
|
215
|
+
)
|
|
216
|
+
return mapped_type
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
logger.warning(
|
|
220
|
+
f"Error sampling data for column '{column_name}' in table '{table_name}': {e}. "
|
|
221
|
+
f"Using mapped type '{mapped_type}'"
|
|
222
|
+
)
|
|
223
|
+
return mapped_type
|
|
224
|
+
|
|
225
|
+
def infer_edge_weights(self, edge_table_info: EdgeTableInfo) -> WeightConfig | None:
|
|
226
|
+
"""Infer edge weights from edge table columns with types.
|
|
227
|
+
|
|
228
|
+
Uses PostgreSQL column types and optionally samples data to infer accurate types.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
edge_table_info: Edge table information from introspection
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
WeightConfig if there are weight columns, None otherwise
|
|
235
|
+
"""
|
|
236
|
+
columns = edge_table_info.columns
|
|
237
|
+
pk_columns = set(edge_table_info.primary_key)
|
|
238
|
+
fk_columns = {fk.column for fk in edge_table_info.foreign_keys}
|
|
239
|
+
|
|
240
|
+
# Find non-PK, non-FK columns (these become weights)
|
|
241
|
+
weight_columns = [
|
|
242
|
+
col
|
|
243
|
+
for col in columns
|
|
244
|
+
if col.name not in pk_columns and col.name not in fk_columns
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
if not weight_columns:
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
# Create Field objects with types for each weight column
|
|
251
|
+
direct_weights = []
|
|
252
|
+
for col in weight_columns:
|
|
253
|
+
# Infer type: use PostgreSQL type first, then sample if needed
|
|
254
|
+
field_type = self._infer_type_from_samples(
|
|
255
|
+
edge_table_info.name,
|
|
256
|
+
edge_table_info.schema_name,
|
|
257
|
+
col.name,
|
|
258
|
+
col.type,
|
|
259
|
+
)
|
|
260
|
+
direct_weights.append(Field(name=col.name, type=field_type))
|
|
261
|
+
|
|
262
|
+
logger.debug(
|
|
263
|
+
f"Inferred {len(direct_weights)} weights for edge table "
|
|
264
|
+
f"'{edge_table_info.name}': {[f.name for f in direct_weights]}"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return WeightConfig(direct=direct_weights)
|
|
268
|
+
|
|
269
|
+
def infer_edge_config(
|
|
270
|
+
self,
|
|
271
|
+
introspection_result: SchemaIntrospectionResult,
|
|
272
|
+
vertex_config: VertexConfig,
|
|
273
|
+
) -> EdgeConfig:
|
|
274
|
+
"""Infer EdgeConfig from edge tables.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
introspection_result: Result from PostgresConnection.introspect_schema()
|
|
278
|
+
vertex_config: Inferred vertex configuration
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
EdgeConfig: Inferred edge configuration
|
|
282
|
+
"""
|
|
283
|
+
edge_tables = introspection_result.edge_tables
|
|
284
|
+
edges = []
|
|
285
|
+
|
|
286
|
+
vertex_names = vertex_config.vertex_set
|
|
287
|
+
|
|
288
|
+
for edge_table_info in edge_tables:
|
|
289
|
+
table_name = edge_table_info.name
|
|
290
|
+
source_table = edge_table_info.source_table
|
|
291
|
+
target_table = edge_table_info.target_table
|
|
292
|
+
|
|
293
|
+
# Verify source and target vertices exist
|
|
294
|
+
if source_table not in vertex_names:
|
|
295
|
+
logger.warning(
|
|
296
|
+
f"Source vertex '{source_table}' for edge table '{table_name}' "
|
|
297
|
+
f"not found in vertex config, skipping"
|
|
298
|
+
)
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
if target_table not in vertex_names:
|
|
302
|
+
logger.warning(
|
|
303
|
+
f"Target vertex '{target_table}' for edge table '{table_name}' "
|
|
304
|
+
f"not found in vertex config, skipping"
|
|
305
|
+
)
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
# Infer weights
|
|
309
|
+
weights = self.infer_edge_weights(edge_table_info)
|
|
310
|
+
indexes = []
|
|
311
|
+
# Create edge
|
|
312
|
+
edge = Edge(
|
|
313
|
+
source=source_table,
|
|
314
|
+
target=target_table,
|
|
315
|
+
indexes=indexes,
|
|
316
|
+
weights=weights,
|
|
317
|
+
relation=edge_table_info.relation,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
edges.append(edge)
|
|
321
|
+
logger.debug(
|
|
322
|
+
f"Inferred edge '{table_name}' from {source_table} to {target_table}"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
return EdgeConfig(edges=edges)
|
|
326
|
+
|
|
327
|
+
def infer_schema(
|
|
328
|
+
self,
|
|
329
|
+
introspection_result: SchemaIntrospectionResult,
|
|
330
|
+
schema_name: str | None = None,
|
|
331
|
+
) -> Schema:
|
|
332
|
+
"""Infer complete Schema from PostgreSQL introspection.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
introspection_result: Result from PostgresConnection.introspect_schema()
|
|
336
|
+
schema_name: Schema name (defaults to schema_name from introspection if None)
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Schema: Complete inferred schema with vertices, edges, and metadata
|
|
340
|
+
"""
|
|
341
|
+
if schema_name is None:
|
|
342
|
+
schema_name = introspection_result.schema_name
|
|
343
|
+
|
|
344
|
+
logger.info(f"Inferring schema from PostgreSQL schema '{schema_name}'")
|
|
345
|
+
|
|
346
|
+
# Infer vertex configuration
|
|
347
|
+
vertex_config = self.infer_vertex_config(introspection_result)
|
|
348
|
+
logger.info(f"Inferred {len(vertex_config.vertices)} vertices")
|
|
349
|
+
|
|
350
|
+
# Infer edge configuration
|
|
351
|
+
edge_config = self.infer_edge_config(introspection_result, vertex_config)
|
|
352
|
+
edges_count = len(list(edge_config.edges_list()))
|
|
353
|
+
logger.info(f"Inferred {edges_count} edges")
|
|
354
|
+
|
|
355
|
+
# Create schema metadata
|
|
356
|
+
metadata = SchemaMetadata(name=schema_name)
|
|
357
|
+
|
|
358
|
+
# Create schema (resources will be added separately)
|
|
359
|
+
schema = Schema(
|
|
360
|
+
general=metadata,
|
|
361
|
+
vertex_config=vertex_config,
|
|
362
|
+
edge_config=edge_config,
|
|
363
|
+
resources=[], # Resources will be created separately
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
logger.info(
|
|
367
|
+
f"Successfully inferred schema '{schema_name}' with "
|
|
368
|
+
f"{len(vertex_config.vertices)} vertices and "
|
|
369
|
+
f"{len(list(edge_config.edges_list()))} edges"
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
return schema
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Type mapping utilities for PostgreSQL to graflo type conversion.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for mapping PostgreSQL data types to graflo Field types,
|
|
4
|
+
enabling automatic schema inference from PostgreSQL database schemas.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PostgresTypeMapper:
|
|
13
|
+
"""Maps PostgreSQL data types to graflo Field types.
|
|
14
|
+
|
|
15
|
+
This class provides static methods for converting PostgreSQL type names
|
|
16
|
+
(from information_schema or pg_catalog) to graflo Field type strings.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# Mapping of PostgreSQL types to graflo Field types
|
|
20
|
+
TYPE_MAPPING = {
|
|
21
|
+
# Integer types
|
|
22
|
+
"integer": "INT",
|
|
23
|
+
"int": "INT",
|
|
24
|
+
"int4": "INT",
|
|
25
|
+
"smallint": "INT",
|
|
26
|
+
"int2": "INT",
|
|
27
|
+
"bigint": "INT",
|
|
28
|
+
"int8": "INT",
|
|
29
|
+
"serial": "INT",
|
|
30
|
+
"bigserial": "INT",
|
|
31
|
+
"smallserial": "INT",
|
|
32
|
+
# Floating point types
|
|
33
|
+
"real": "FLOAT",
|
|
34
|
+
"float4": "FLOAT",
|
|
35
|
+
"double precision": "FLOAT",
|
|
36
|
+
"float8": "FLOAT",
|
|
37
|
+
"numeric": "FLOAT",
|
|
38
|
+
"decimal": "FLOAT",
|
|
39
|
+
# Boolean
|
|
40
|
+
"boolean": "BOOL",
|
|
41
|
+
"bool": "BOOL",
|
|
42
|
+
# String types
|
|
43
|
+
"character varying": "STRING",
|
|
44
|
+
"varchar": "STRING",
|
|
45
|
+
"character": "STRING",
|
|
46
|
+
"char": "STRING",
|
|
47
|
+
"text": "STRING",
|
|
48
|
+
# Date/time types (mapped to DATETIME)
|
|
49
|
+
"timestamp": "DATETIME",
|
|
50
|
+
"timestamp without time zone": "DATETIME",
|
|
51
|
+
"timestamp with time zone": "DATETIME",
|
|
52
|
+
"timestamptz": "DATETIME",
|
|
53
|
+
"date": "DATETIME",
|
|
54
|
+
"time": "DATETIME",
|
|
55
|
+
"time without time zone": "DATETIME",
|
|
56
|
+
"time with time zone": "DATETIME",
|
|
57
|
+
"timetz": "DATETIME",
|
|
58
|
+
"interval": "STRING", # Interval is duration, keep as STRING
|
|
59
|
+
# JSON types
|
|
60
|
+
"json": "STRING",
|
|
61
|
+
"jsonb": "STRING",
|
|
62
|
+
# Binary types
|
|
63
|
+
"bytea": "STRING",
|
|
64
|
+
# UUID
|
|
65
|
+
"uuid": "STRING",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def map_type(cls, postgres_type: str) -> str:
|
|
70
|
+
"""Map PostgreSQL type to graflo Field type.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
postgres_type: PostgreSQL type name (e.g., 'int4', 'varchar', 'timestamp')
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
str: graflo Field type (INT, FLOAT, BOOL, or STRING)
|
|
77
|
+
"""
|
|
78
|
+
# Normalize type name: lowercase and remove length specifications
|
|
79
|
+
normalized = postgres_type.lower().strip()
|
|
80
|
+
|
|
81
|
+
# Remove length specifications like (255) or (10,2)
|
|
82
|
+
if "(" in normalized:
|
|
83
|
+
normalized = normalized.split("(")[0].strip()
|
|
84
|
+
|
|
85
|
+
# Check direct mapping
|
|
86
|
+
if normalized in cls.TYPE_MAPPING:
|
|
87
|
+
return cls.TYPE_MAPPING[normalized]
|
|
88
|
+
|
|
89
|
+
# Check for partial matches (e.g., "character varying" might be stored as "varying")
|
|
90
|
+
for pg_type, graflo_type in cls.TYPE_MAPPING.items():
|
|
91
|
+
if pg_type in normalized or normalized in pg_type:
|
|
92
|
+
logger.debug(
|
|
93
|
+
f"Mapped PostgreSQL type '{postgres_type}' to graflo type '{graflo_type}' "
|
|
94
|
+
f"(partial match with '{pg_type}')"
|
|
95
|
+
)
|
|
96
|
+
return graflo_type
|
|
97
|
+
|
|
98
|
+
# Default to STRING for unknown types
|
|
99
|
+
logger.warning(
|
|
100
|
+
f"Unknown PostgreSQL type '{postgres_type}', defaulting to STRING"
|
|
101
|
+
)
|
|
102
|
+
return "STRING"
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def is_datetime_type(cls, postgres_type: str) -> bool:
|
|
106
|
+
"""Check if a PostgreSQL type is a datetime type.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
postgres_type: PostgreSQL type name
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
bool: True if the type is a datetime-related type
|
|
113
|
+
"""
|
|
114
|
+
normalized = postgres_type.lower().strip()
|
|
115
|
+
datetime_types = [
|
|
116
|
+
"timestamp",
|
|
117
|
+
"date",
|
|
118
|
+
"time",
|
|
119
|
+
"interval",
|
|
120
|
+
"timestamptz",
|
|
121
|
+
"timetz",
|
|
122
|
+
]
|
|
123
|
+
return any(dt_type in normalized for dt_type in datetime_types)
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def is_numeric_type(cls, postgres_type: str) -> bool:
|
|
127
|
+
"""Check if a PostgreSQL type is a numeric type.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
postgres_type: PostgreSQL type name
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
bool: True if the type is numeric
|
|
134
|
+
"""
|
|
135
|
+
normalized = postgres_type.lower().strip()
|
|
136
|
+
numeric_types = [
|
|
137
|
+
"integer",
|
|
138
|
+
"int",
|
|
139
|
+
"bigint",
|
|
140
|
+
"smallint",
|
|
141
|
+
"serial",
|
|
142
|
+
"real",
|
|
143
|
+
"double precision",
|
|
144
|
+
"numeric",
|
|
145
|
+
"decimal",
|
|
146
|
+
"float",
|
|
147
|
+
]
|
|
148
|
+
return any(nt_type in normalized for nt_type in numeric_types)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from graflo.db import PostgresConnection
|
|
4
|
+
from graflo.db.connection.onto import PostgresConfig
|
|
5
|
+
from graflo.db.postgres.heuristics import logger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_schema_from_sql_file(
|
|
9
|
+
config: PostgresConfig,
|
|
10
|
+
schema_file: str | Path,
|
|
11
|
+
continue_on_error: bool = True,
|
|
12
|
+
) -> None:
|
|
13
|
+
"""Load SQL schema file into PostgreSQL database.
|
|
14
|
+
|
|
15
|
+
Parses a SQL file and executes all statements sequentially. Useful for
|
|
16
|
+
initializing a database with tables, constraints, and initial data.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
config: PostgreSQL connection configuration
|
|
20
|
+
schema_file: Path to SQL file to execute
|
|
21
|
+
continue_on_error: If True, continue executing remaining statements
|
|
22
|
+
even if one fails. If False, raise exception on first error.
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
FileNotFoundError: If schema_file does not exist
|
|
26
|
+
Exception: If continue_on_error is False and a statement fails
|
|
27
|
+
"""
|
|
28
|
+
schema_path = Path(schema_file)
|
|
29
|
+
|
|
30
|
+
if not schema_path.exists():
|
|
31
|
+
raise FileNotFoundError(f"Schema file not found: {schema_path}")
|
|
32
|
+
|
|
33
|
+
logger.info(f"Loading schema from {schema_path}")
|
|
34
|
+
|
|
35
|
+
# Read SQL file
|
|
36
|
+
with open(schema_path, "r") as f:
|
|
37
|
+
sql_content = f.read()
|
|
38
|
+
|
|
39
|
+
# Parse SQL content into individual statements
|
|
40
|
+
statements = []
|
|
41
|
+
current_statement = []
|
|
42
|
+
for line in sql_content.split("\n"):
|
|
43
|
+
line = line.strip()
|
|
44
|
+
# Skip empty lines and comments
|
|
45
|
+
if not line or line.startswith("--"):
|
|
46
|
+
continue
|
|
47
|
+
current_statement.append(line)
|
|
48
|
+
# Check if line ends with semicolon (end of statement)
|
|
49
|
+
if line.endswith(";"):
|
|
50
|
+
statement = " ".join(current_statement).rstrip(";").strip()
|
|
51
|
+
if statement:
|
|
52
|
+
statements.append(statement)
|
|
53
|
+
current_statement = []
|
|
54
|
+
|
|
55
|
+
# Execute remaining statement if any
|
|
56
|
+
if current_statement:
|
|
57
|
+
statement = " ".join(current_statement).strip()
|
|
58
|
+
if statement:
|
|
59
|
+
statements.append(statement)
|
|
60
|
+
|
|
61
|
+
if not statements:
|
|
62
|
+
logger.warning(f"No SQL statements found in {schema_path}")
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
# Execute statements using a connection context manager
|
|
66
|
+
with PostgresConnection(config) as conn:
|
|
67
|
+
with conn.conn.cursor() as cursor:
|
|
68
|
+
for statement in statements:
|
|
69
|
+
if statement:
|
|
70
|
+
try:
|
|
71
|
+
cursor.execute(statement)
|
|
72
|
+
except Exception as exec_error:
|
|
73
|
+
if continue_on_error:
|
|
74
|
+
# Some statements might fail (like DROP TABLE IF EXISTS when tables don't exist)
|
|
75
|
+
# or duplicate constraints - log but continue
|
|
76
|
+
logger.debug(f"Statement execution note: {exec_error}")
|
|
77
|
+
else:
|
|
78
|
+
logger.error(
|
|
79
|
+
f"Failed to execute statement: {statement[:100]}... Error: {exec_error}"
|
|
80
|
+
)
|
|
81
|
+
raise
|
|
82
|
+
|
|
83
|
+
conn.conn.commit()
|
|
84
|
+
|
|
85
|
+
logger.info(
|
|
86
|
+
f"Successfully loaded schema from {schema_path} ({len(statements)} statements)"
|
|
87
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""TigerGraph database connection implementation.
|
|
2
|
+
|
|
3
|
+
This package provides TigerGraph-specific database connection implementations
|
|
4
|
+
and utilities for graph database operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .conn import TigerGraphConnection
|
|
8
|
+
|
|
9
|
+
__all__ = ["TigerGraphConnection"]
|