graflo 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1120 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +297 -0
  7. graflo/architecture/onto.py +374 -0
  8. graflo/architecture/resource.py +161 -0
  9. graflo/architecture/schema.py +136 -0
  10. graflo/architecture/transform.py +292 -0
  11. graflo/architecture/util.py +93 -0
  12. graflo/architecture/vertex.py +586 -0
  13. graflo/caster.py +655 -0
  14. graflo/cli/__init__.py +14 -0
  15. graflo/cli/ingest.py +194 -0
  16. graflo/cli/manage_dbs.py +197 -0
  17. graflo/cli/plot_schema.py +132 -0
  18. graflo/cli/xml2json.py +93 -0
  19. graflo/data_source/__init__.py +48 -0
  20. graflo/data_source/api.py +339 -0
  21. graflo/data_source/base.py +97 -0
  22. graflo/data_source/factory.py +298 -0
  23. graflo/data_source/file.py +133 -0
  24. graflo/data_source/memory.py +72 -0
  25. graflo/data_source/registry.py +82 -0
  26. graflo/data_source/sql.py +185 -0
  27. graflo/db/__init__.py +44 -0
  28. graflo/db/arango/__init__.py +22 -0
  29. graflo/db/arango/conn.py +1026 -0
  30. graflo/db/arango/query.py +180 -0
  31. graflo/db/arango/util.py +88 -0
  32. graflo/db/conn.py +377 -0
  33. graflo/db/connection/__init__.py +6 -0
  34. graflo/db/connection/config_mapping.py +18 -0
  35. graflo/db/connection/onto.py +688 -0
  36. graflo/db/connection/wsgi.py +29 -0
  37. graflo/db/manager.py +119 -0
  38. graflo/db/neo4j/__init__.py +16 -0
  39. graflo/db/neo4j/conn.py +639 -0
  40. graflo/db/postgres/__init__.py +156 -0
  41. graflo/db/postgres/conn.py +425 -0
  42. graflo/db/postgres/resource_mapping.py +139 -0
  43. graflo/db/postgres/schema_inference.py +245 -0
  44. graflo/db/postgres/types.py +148 -0
  45. graflo/db/tigergraph/__init__.py +9 -0
  46. graflo/db/tigergraph/conn.py +2212 -0
  47. graflo/db/util.py +49 -0
  48. graflo/filter/__init__.py +21 -0
  49. graflo/filter/onto.py +525 -0
  50. graflo/logging.conf +22 -0
  51. graflo/onto.py +190 -0
  52. graflo/plot/__init__.py +17 -0
  53. graflo/plot/plotter.py +556 -0
  54. graflo/util/__init__.py +23 -0
  55. graflo/util/chunker.py +751 -0
  56. graflo/util/merge.py +150 -0
  57. graflo/util/misc.py +37 -0
  58. graflo/util/onto.py +332 -0
  59. graflo/util/transform.py +448 -0
  60. graflo-1.3.3.dist-info/METADATA +190 -0
  61. graflo-1.3.3.dist-info/RECORD +64 -0
  62. graflo-1.3.3.dist-info/WHEEL +4 -0
  63. graflo-1.3.3.dist-info/entry_points.txt +5 -0
  64. graflo-1.3.3.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,185 @@
1
+ """SQL database data source implementation.
2
+
3
+ This module provides a data source for SQL databases using SQLAlchemy-style
4
+ configuration. It supports parameterized queries and pagination.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import dataclasses
10
+ import logging
11
+ from typing import Any, Iterator
12
+
13
+ from sqlalchemy import create_engine, text
14
+ from sqlalchemy.engine import Engine
15
+
16
+ from graflo.data_source.base import AbstractDataSource, DataSourceType
17
+ from graflo.onto import BaseDataclass
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclasses.dataclass
23
+ class SQLConfig(BaseDataclass):
24
+ """Configuration for SQL data source.
25
+
26
+ Uses SQLAlchemy connection string format.
27
+
28
+ Attributes:
29
+ connection_string: SQLAlchemy connection string
30
+ (e.g., 'postgresql://user:pass@localhost/dbname')
31
+ query: SQL query string (supports parameterized queries)
32
+ params: Query parameters as dictionary (for parameterized queries)
33
+ pagination: Whether to use pagination (default: True)
34
+ page_size: Number of rows per page (default: 1000)
35
+ """
36
+
37
+ connection_string: str
38
+ query: str
39
+ params: dict[str, Any] = dataclasses.field(default_factory=dict)
40
+ pagination: bool = True
41
+ page_size: int = 1000
42
+
43
+
44
+ @dataclasses.dataclass
45
+ class SQLDataSource(AbstractDataSource):
46
+ """Data source for SQL databases.
47
+
48
+ This class provides a data source for SQL databases using SQLAlchemy.
49
+ It supports parameterized queries and pagination. Returns rows as
50
+ dictionaries with column names as keys.
51
+
52
+ Attributes:
53
+ config: SQL configuration
54
+ engine: SQLAlchemy engine (created on first use)
55
+ """
56
+
57
+ config: SQLConfig
58
+ engine: Engine | None = dataclasses.field(default=None, init=False)
59
+
60
+ def __post_init__(self):
61
+ """Initialize the SQL data source."""
62
+ super().__post_init__()
63
+ self.source_type = DataSourceType.SQL
64
+
65
+ def _get_engine(self) -> Engine:
66
+ """Get or create SQLAlchemy engine.
67
+
68
+ Returns:
69
+ SQLAlchemy engine instance
70
+ """
71
+ if self.engine is None:
72
+ self.engine = create_engine(self.config.connection_string)
73
+ return self.engine
74
+
75
+ def _add_pagination(self, query: str, offset: int, limit: int) -> str:
76
+ """Add pagination to SQL query.
77
+
78
+ Args:
79
+ query: Original SQL query
80
+ offset: Offset value
81
+ limit: Limit value
82
+
83
+ Returns:
84
+ Query with pagination added
85
+ """
86
+ # Check if query already has LIMIT/OFFSET
87
+ query_upper = query.upper().strip()
88
+ if "LIMIT" in query_upper or "OFFSET" in query_upper:
89
+ # Query already has pagination, return as-is
90
+ return query
91
+
92
+ # Add pagination based on database type
93
+ # For most SQL databases, use LIMIT/OFFSET
94
+ # For SQL Server, use TOP and OFFSET/FETCH
95
+ connection_string_lower = self.config.connection_string.lower()
96
+
97
+ if "sqlserver" in connection_string_lower or "mssql" in connection_string_lower:
98
+ # SQL Server syntax
99
+ return f"{query} OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
100
+ elif "oracle" in connection_string_lower:
101
+ # Oracle syntax (using ROWNUM or FETCH)
102
+ return f"{query} OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
103
+ else:
104
+ # Standard SQL (PostgreSQL, MySQL, SQLite, etc.)
105
+ return f"{query} LIMIT {limit} OFFSET {offset}"
106
+
107
+ def iter_batches(
108
+ self, batch_size: int = 1000, limit: int | None = None
109
+ ) -> Iterator[list[dict]]:
110
+ """Iterate over SQL query results in batches.
111
+
112
+ Args:
113
+ batch_size: Number of items per batch
114
+ limit: Maximum number of items to retrieve
115
+
116
+ Yields:
117
+ list[dict]: Batches of rows as dictionaries
118
+ """
119
+ engine = self._get_engine()
120
+ total_items = 0
121
+ offset = 0
122
+
123
+ # Use configured page size or batch size, whichever is smaller
124
+ page_size = min(self.config.page_size, batch_size)
125
+
126
+ while True:
127
+ # Build query
128
+ if self.config.pagination:
129
+ query_str = self._add_pagination(
130
+ self.config.query, offset=offset, limit=page_size
131
+ )
132
+ else:
133
+ query_str = self.config.query
134
+
135
+ # Execute query
136
+ try:
137
+ with engine.connect() as conn:
138
+ result = conn.execute(text(query_str), self.config.params)
139
+ rows = result.fetchall()
140
+
141
+ # Convert rows to dictionaries
142
+ batch = []
143
+ from decimal import Decimal
144
+
145
+ for row in rows:
146
+ if limit and total_items >= limit:
147
+ break
148
+
149
+ # Convert row to dictionary
150
+ row_dict = dict(row._mapping)
151
+ # Convert Decimal to float for JSON compatibility
152
+ for key, value in row_dict.items():
153
+ if isinstance(value, Decimal):
154
+ row_dict[key] = float(value)
155
+ batch.append(row_dict)
156
+ total_items += 1
157
+
158
+ # Yield when batch is full
159
+ if len(batch) >= batch_size:
160
+ yield batch
161
+ batch = []
162
+
163
+ # Yield remaining items
164
+ if batch:
165
+ yield batch
166
+
167
+ # Check if we should continue
168
+ if limit and total_items >= limit:
169
+ break
170
+
171
+ # Check if there are more rows
172
+ if len(rows) < page_size:
173
+ # No more rows
174
+ break
175
+
176
+ # Update offset for next iteration
177
+ if self.config.pagination:
178
+ offset += page_size
179
+ else:
180
+ # No pagination, single query
181
+ break
182
+
183
+ except Exception as e:
184
+ logger.error(f"SQL query execution failed: {e}")
185
+ break
graflo/db/__init__.py ADDED
@@ -0,0 +1,44 @@
1
+ """Database connection and management components.
2
+
3
+ This package provides database connection implementations and management utilities
4
+ for different graph databases (ArangoDB, Neo4j, TigerGraph). It includes connection interfaces,
5
+ query execution, and database operations.
6
+
7
+ Key Components:
8
+ - Connection: Abstract database connection interface
9
+ - ConnectionManager: Database connection management
10
+ - ArangoDB: ArangoDB-specific implementation
11
+ - Neo4j: Neo4j-specific implementation
12
+ - TigerGraph: TigerGraph-specific implementation
13
+ - Query: Query generation and execution utilities
14
+
15
+ Example:
16
+ >>> from graflo.db import ConnectionManager
17
+ >>> from graflo.db.arango import ArangoConnection
18
+ >>> manager = ConnectionManager(
19
+ ... connection_config={"url": "http://localhost:8529"},
20
+ ... conn_class=ArangoConnection
21
+ ... )
22
+ >>> with manager as conn:
23
+ ... conn.init_db(schema)
24
+ """
25
+
26
+ from .arango.conn import ArangoConnection
27
+ from .conn import Connection, ConnectionType
28
+ from .connection import DBConfig, DBType
29
+ from .manager import ConnectionManager
30
+ from .neo4j.conn import Neo4jConnection
31
+ from .postgres.conn import PostgresConnection
32
+ from .tigergraph.conn import TigerGraphConnection
33
+
34
+ __all__ = [
35
+ "Connection",
36
+ "ConnectionType",
37
+ "DBType",
38
+ "DBConfig",
39
+ "ConnectionManager",
40
+ "ArangoConnection",
41
+ "Neo4jConnection",
42
+ "PostgresConnection",
43
+ "TigerGraphConnection",
44
+ ]
@@ -0,0 +1,22 @@
1
+ """ArangoDB database implementation.
2
+
3
+ This package provides ArangoDB-specific implementations of the database interface,
4
+ including connection management, query execution, and utility functions.
5
+
6
+ Key Components:
7
+ - ArangoConnection: ArangoDB connection implementation
8
+ - Query: AQL query execution and profiling
9
+ - Util: ArangoDB-specific utility functions
10
+
11
+ Example:
12
+ >>> from graflo.db.arango import ArangoConnection
13
+ >>> conn = ArangoConnection(config)
14
+ >>> cursor = conn.execute("FOR doc IN users RETURN doc")
15
+ >>> results = cursor.batch()
16
+ """
17
+
18
+ from .conn import ArangoConnection
19
+
20
+ __all__ = [
21
+ "ArangoConnection",
22
+ ]