graflo 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graflo/README.md +18 -0
- graflo/__init__.py +70 -0
- graflo/architecture/__init__.py +38 -0
- graflo/architecture/actor.py +1120 -0
- graflo/architecture/actor_util.py +450 -0
- graflo/architecture/edge.py +297 -0
- graflo/architecture/onto.py +374 -0
- graflo/architecture/resource.py +161 -0
- graflo/architecture/schema.py +136 -0
- graflo/architecture/transform.py +292 -0
- graflo/architecture/util.py +93 -0
- graflo/architecture/vertex.py +586 -0
- graflo/caster.py +655 -0
- graflo/cli/__init__.py +14 -0
- graflo/cli/ingest.py +194 -0
- graflo/cli/manage_dbs.py +197 -0
- graflo/cli/plot_schema.py +132 -0
- graflo/cli/xml2json.py +93 -0
- graflo/data_source/__init__.py +48 -0
- graflo/data_source/api.py +339 -0
- graflo/data_source/base.py +97 -0
- graflo/data_source/factory.py +298 -0
- graflo/data_source/file.py +133 -0
- graflo/data_source/memory.py +72 -0
- graflo/data_source/registry.py +82 -0
- graflo/data_source/sql.py +185 -0
- graflo/db/__init__.py +44 -0
- graflo/db/arango/__init__.py +22 -0
- graflo/db/arango/conn.py +1026 -0
- graflo/db/arango/query.py +180 -0
- graflo/db/arango/util.py +88 -0
- graflo/db/conn.py +377 -0
- graflo/db/connection/__init__.py +6 -0
- graflo/db/connection/config_mapping.py +18 -0
- graflo/db/connection/onto.py +688 -0
- graflo/db/connection/wsgi.py +29 -0
- graflo/db/manager.py +119 -0
- graflo/db/neo4j/__init__.py +16 -0
- graflo/db/neo4j/conn.py +639 -0
- graflo/db/postgres/__init__.py +156 -0
- graflo/db/postgres/conn.py +425 -0
- graflo/db/postgres/resource_mapping.py +139 -0
- graflo/db/postgres/schema_inference.py +245 -0
- graflo/db/postgres/types.py +148 -0
- graflo/db/tigergraph/__init__.py +9 -0
- graflo/db/tigergraph/conn.py +2212 -0
- graflo/db/util.py +49 -0
- graflo/filter/__init__.py +21 -0
- graflo/filter/onto.py +525 -0
- graflo/logging.conf +22 -0
- graflo/onto.py +190 -0
- graflo/plot/__init__.py +17 -0
- graflo/plot/plotter.py +556 -0
- graflo/util/__init__.py +23 -0
- graflo/util/chunker.py +751 -0
- graflo/util/merge.py +150 -0
- graflo/util/misc.py +37 -0
- graflo/util/onto.py +332 -0
- graflo/util/transform.py +448 -0
- graflo-1.3.3.dist-info/METADATA +190 -0
- graflo-1.3.3.dist-info/RECORD +64 -0
- graflo-1.3.3.dist-info/WHEEL +4 -0
- graflo-1.3.3.dist-info/entry_points.txt +5 -0
- graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""SQL database data source implementation.
|
|
2
|
+
|
|
3
|
+
This module provides a data source for SQL databases using SQLAlchemy-style
|
|
4
|
+
configuration. It supports parameterized queries and pagination.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import dataclasses
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any, Iterator
|
|
12
|
+
|
|
13
|
+
from sqlalchemy import create_engine, text
|
|
14
|
+
from sqlalchemy.engine import Engine
|
|
15
|
+
|
|
16
|
+
from graflo.data_source.base import AbstractDataSource, DataSourceType
|
|
17
|
+
from graflo.onto import BaseDataclass
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class SQLConfig(BaseDataclass):
|
|
24
|
+
"""Configuration for SQL data source.
|
|
25
|
+
|
|
26
|
+
Uses SQLAlchemy connection string format.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
connection_string: SQLAlchemy connection string
|
|
30
|
+
(e.g., 'postgresql://user:pass@localhost/dbname')
|
|
31
|
+
query: SQL query string (supports parameterized queries)
|
|
32
|
+
params: Query parameters as dictionary (for parameterized queries)
|
|
33
|
+
pagination: Whether to use pagination (default: True)
|
|
34
|
+
page_size: Number of rows per page (default: 1000)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
connection_string: str
|
|
38
|
+
query: str
|
|
39
|
+
params: dict[str, Any] = dataclasses.field(default_factory=dict)
|
|
40
|
+
pagination: bool = True
|
|
41
|
+
page_size: int = 1000
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclasses.dataclass
|
|
45
|
+
class SQLDataSource(AbstractDataSource):
|
|
46
|
+
"""Data source for SQL databases.
|
|
47
|
+
|
|
48
|
+
This class provides a data source for SQL databases using SQLAlchemy.
|
|
49
|
+
It supports parameterized queries and pagination. Returns rows as
|
|
50
|
+
dictionaries with column names as keys.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
config: SQL configuration
|
|
54
|
+
engine: SQLAlchemy engine (created on first use)
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
config: SQLConfig
|
|
58
|
+
engine: Engine | None = dataclasses.field(default=None, init=False)
|
|
59
|
+
|
|
60
|
+
def __post_init__(self):
|
|
61
|
+
"""Initialize the SQL data source."""
|
|
62
|
+
super().__post_init__()
|
|
63
|
+
self.source_type = DataSourceType.SQL
|
|
64
|
+
|
|
65
|
+
def _get_engine(self) -> Engine:
|
|
66
|
+
"""Get or create SQLAlchemy engine.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
SQLAlchemy engine instance
|
|
70
|
+
"""
|
|
71
|
+
if self.engine is None:
|
|
72
|
+
self.engine = create_engine(self.config.connection_string)
|
|
73
|
+
return self.engine
|
|
74
|
+
|
|
75
|
+
def _add_pagination(self, query: str, offset: int, limit: int) -> str:
|
|
76
|
+
"""Add pagination to SQL query.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
query: Original SQL query
|
|
80
|
+
offset: Offset value
|
|
81
|
+
limit: Limit value
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Query with pagination added
|
|
85
|
+
"""
|
|
86
|
+
# Check if query already has LIMIT/OFFSET
|
|
87
|
+
query_upper = query.upper().strip()
|
|
88
|
+
if "LIMIT" in query_upper or "OFFSET" in query_upper:
|
|
89
|
+
# Query already has pagination, return as-is
|
|
90
|
+
return query
|
|
91
|
+
|
|
92
|
+
# Add pagination based on database type
|
|
93
|
+
# For most SQL databases, use LIMIT/OFFSET
|
|
94
|
+
# For SQL Server, use TOP and OFFSET/FETCH
|
|
95
|
+
connection_string_lower = self.config.connection_string.lower()
|
|
96
|
+
|
|
97
|
+
if "sqlserver" in connection_string_lower or "mssql" in connection_string_lower:
|
|
98
|
+
# SQL Server syntax
|
|
99
|
+
return f"{query} OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
|
|
100
|
+
elif "oracle" in connection_string_lower:
|
|
101
|
+
# Oracle syntax (using ROWNUM or FETCH)
|
|
102
|
+
return f"{query} OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
|
|
103
|
+
else:
|
|
104
|
+
# Standard SQL (PostgreSQL, MySQL, SQLite, etc.)
|
|
105
|
+
return f"{query} LIMIT {limit} OFFSET {offset}"
|
|
106
|
+
|
|
107
|
+
def iter_batches(
|
|
108
|
+
self, batch_size: int = 1000, limit: int | None = None
|
|
109
|
+
) -> Iterator[list[dict]]:
|
|
110
|
+
"""Iterate over SQL query results in batches.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
batch_size: Number of items per batch
|
|
114
|
+
limit: Maximum number of items to retrieve
|
|
115
|
+
|
|
116
|
+
Yields:
|
|
117
|
+
list[dict]: Batches of rows as dictionaries
|
|
118
|
+
"""
|
|
119
|
+
engine = self._get_engine()
|
|
120
|
+
total_items = 0
|
|
121
|
+
offset = 0
|
|
122
|
+
|
|
123
|
+
# Use configured page size or batch size, whichever is smaller
|
|
124
|
+
page_size = min(self.config.page_size, batch_size)
|
|
125
|
+
|
|
126
|
+
while True:
|
|
127
|
+
# Build query
|
|
128
|
+
if self.config.pagination:
|
|
129
|
+
query_str = self._add_pagination(
|
|
130
|
+
self.config.query, offset=offset, limit=page_size
|
|
131
|
+
)
|
|
132
|
+
else:
|
|
133
|
+
query_str = self.config.query
|
|
134
|
+
|
|
135
|
+
# Execute query
|
|
136
|
+
try:
|
|
137
|
+
with engine.connect() as conn:
|
|
138
|
+
result = conn.execute(text(query_str), self.config.params)
|
|
139
|
+
rows = result.fetchall()
|
|
140
|
+
|
|
141
|
+
# Convert rows to dictionaries
|
|
142
|
+
batch = []
|
|
143
|
+
from decimal import Decimal
|
|
144
|
+
|
|
145
|
+
for row in rows:
|
|
146
|
+
if limit and total_items >= limit:
|
|
147
|
+
break
|
|
148
|
+
|
|
149
|
+
# Convert row to dictionary
|
|
150
|
+
row_dict = dict(row._mapping)
|
|
151
|
+
# Convert Decimal to float for JSON compatibility
|
|
152
|
+
for key, value in row_dict.items():
|
|
153
|
+
if isinstance(value, Decimal):
|
|
154
|
+
row_dict[key] = float(value)
|
|
155
|
+
batch.append(row_dict)
|
|
156
|
+
total_items += 1
|
|
157
|
+
|
|
158
|
+
# Yield when batch is full
|
|
159
|
+
if len(batch) >= batch_size:
|
|
160
|
+
yield batch
|
|
161
|
+
batch = []
|
|
162
|
+
|
|
163
|
+
# Yield remaining items
|
|
164
|
+
if batch:
|
|
165
|
+
yield batch
|
|
166
|
+
|
|
167
|
+
# Check if we should continue
|
|
168
|
+
if limit and total_items >= limit:
|
|
169
|
+
break
|
|
170
|
+
|
|
171
|
+
# Check if there are more rows
|
|
172
|
+
if len(rows) < page_size:
|
|
173
|
+
# No more rows
|
|
174
|
+
break
|
|
175
|
+
|
|
176
|
+
# Update offset for next iteration
|
|
177
|
+
if self.config.pagination:
|
|
178
|
+
offset += page_size
|
|
179
|
+
else:
|
|
180
|
+
# No pagination, single query
|
|
181
|
+
break
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
logger.error(f"SQL query execution failed: {e}")
|
|
185
|
+
break
|
graflo/db/__init__.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Database connection and management components.
|
|
2
|
+
|
|
3
|
+
This package provides database connection implementations and management utilities
|
|
4
|
+
for different graph databases (ArangoDB, Neo4j, TigerGraph). It includes connection interfaces,
|
|
5
|
+
query execution, and database operations.
|
|
6
|
+
|
|
7
|
+
Key Components:
|
|
8
|
+
- Connection: Abstract database connection interface
|
|
9
|
+
- ConnectionManager: Database connection management
|
|
10
|
+
- ArangoDB: ArangoDB-specific implementation
|
|
11
|
+
- Neo4j: Neo4j-specific implementation
|
|
12
|
+
- TigerGraph: TigerGraph-specific implementation
|
|
13
|
+
- Query: Query generation and execution utilities
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
>>> from graflo.db import ConnectionManager
|
|
17
|
+
>>> from graflo.db.arango import ArangoConnection
|
|
18
|
+
>>> manager = ConnectionManager(
|
|
19
|
+
... connection_config={"url": "http://localhost:8529"},
|
|
20
|
+
... conn_class=ArangoConnection
|
|
21
|
+
... )
|
|
22
|
+
>>> with manager as conn:
|
|
23
|
+
... conn.init_db(schema)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from .arango.conn import ArangoConnection
|
|
27
|
+
from .conn import Connection, ConnectionType
|
|
28
|
+
from .connection import DBConfig, DBType
|
|
29
|
+
from .manager import ConnectionManager
|
|
30
|
+
from .neo4j.conn import Neo4jConnection
|
|
31
|
+
from .postgres.conn import PostgresConnection
|
|
32
|
+
from .tigergraph.conn import TigerGraphConnection
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"Connection",
|
|
36
|
+
"ConnectionType",
|
|
37
|
+
"DBType",
|
|
38
|
+
"DBConfig",
|
|
39
|
+
"ConnectionManager",
|
|
40
|
+
"ArangoConnection",
|
|
41
|
+
"Neo4jConnection",
|
|
42
|
+
"PostgresConnection",
|
|
43
|
+
"TigerGraphConnection",
|
|
44
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""ArangoDB database implementation.
|
|
2
|
+
|
|
3
|
+
This package provides ArangoDB-specific implementations of the database interface,
|
|
4
|
+
including connection management, query execution, and utility functions.
|
|
5
|
+
|
|
6
|
+
Key Components:
|
|
7
|
+
- ArangoConnection: ArangoDB connection implementation
|
|
8
|
+
- Query: AQL query execution and profiling
|
|
9
|
+
- Util: ArangoDB-specific utility functions
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
>>> from graflo.db.arango import ArangoConnection
|
|
13
|
+
>>> conn = ArangoConnection(config)
|
|
14
|
+
>>> cursor = conn.execute("FOR doc IN users RETURN doc")
|
|
15
|
+
>>> results = cursor.batch()
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .conn import ArangoConnection
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ArangoConnection",
|
|
22
|
+
]
|