graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show
  1. graflo/README.md +18 -0
  2. graflo/__init__.py +70 -0
  3. graflo/architecture/__init__.py +38 -0
  4. graflo/architecture/actor.py +1276 -0
  5. graflo/architecture/actor_util.py +450 -0
  6. graflo/architecture/edge.py +418 -0
  7. graflo/architecture/onto.py +376 -0
  8. graflo/architecture/onto_sql.py +54 -0
  9. graflo/architecture/resource.py +163 -0
  10. graflo/architecture/schema.py +135 -0
  11. graflo/architecture/transform.py +292 -0
  12. graflo/architecture/util.py +89 -0
  13. graflo/architecture/vertex.py +562 -0
  14. graflo/caster.py +736 -0
  15. graflo/cli/__init__.py +14 -0
  16. graflo/cli/ingest.py +203 -0
  17. graflo/cli/manage_dbs.py +197 -0
  18. graflo/cli/plot_schema.py +132 -0
  19. graflo/cli/xml2json.py +93 -0
  20. graflo/data_source/__init__.py +48 -0
  21. graflo/data_source/api.py +339 -0
  22. graflo/data_source/base.py +95 -0
  23. graflo/data_source/factory.py +304 -0
  24. graflo/data_source/file.py +148 -0
  25. graflo/data_source/memory.py +70 -0
  26. graflo/data_source/registry.py +82 -0
  27. graflo/data_source/sql.py +183 -0
  28. graflo/db/__init__.py +44 -0
  29. graflo/db/arango/__init__.py +22 -0
  30. graflo/db/arango/conn.py +1025 -0
  31. graflo/db/arango/query.py +180 -0
  32. graflo/db/arango/util.py +88 -0
  33. graflo/db/conn.py +377 -0
  34. graflo/db/connection/__init__.py +6 -0
  35. graflo/db/connection/config_mapping.py +18 -0
  36. graflo/db/connection/onto.py +717 -0
  37. graflo/db/connection/wsgi.py +29 -0
  38. graflo/db/manager.py +119 -0
  39. graflo/db/neo4j/__init__.py +16 -0
  40. graflo/db/neo4j/conn.py +639 -0
  41. graflo/db/postgres/__init__.py +37 -0
  42. graflo/db/postgres/conn.py +948 -0
  43. graflo/db/postgres/fuzzy_matcher.py +281 -0
  44. graflo/db/postgres/heuristics.py +133 -0
  45. graflo/db/postgres/inference_utils.py +428 -0
  46. graflo/db/postgres/resource_mapping.py +273 -0
  47. graflo/db/postgres/schema_inference.py +372 -0
  48. graflo/db/postgres/types.py +148 -0
  49. graflo/db/postgres/util.py +87 -0
  50. graflo/db/tigergraph/__init__.py +9 -0
  51. graflo/db/tigergraph/conn.py +2365 -0
  52. graflo/db/tigergraph/onto.py +26 -0
  53. graflo/db/util.py +49 -0
  54. graflo/filter/__init__.py +21 -0
  55. graflo/filter/onto.py +525 -0
  56. graflo/logging.conf +22 -0
  57. graflo/onto.py +312 -0
  58. graflo/plot/__init__.py +17 -0
  59. graflo/plot/plotter.py +616 -0
  60. graflo/util/__init__.py +23 -0
  61. graflo/util/chunker.py +807 -0
  62. graflo/util/merge.py +150 -0
  63. graflo/util/misc.py +37 -0
  64. graflo/util/onto.py +422 -0
  65. graflo/util/transform.py +454 -0
  66. graflo-1.3.7.dist-info/METADATA +243 -0
  67. graflo-1.3.7.dist-info/RECORD +70 -0
  68. graflo-1.3.7.dist-info/WHEEL +4 -0
  69. graflo-1.3.7.dist-info/entry_points.txt +5 -0
  70. graflo-1.3.7.dist-info/licenses/LICENSE +126 -0
@@ -0,0 +1,183 @@
1
+ """SQL database data source implementation.
2
+
3
+ This module provides a data source for SQL databases using SQLAlchemy-style
4
+ configuration. It supports parameterized queries and pagination.
5
+ """
6
+
7
+ import dataclasses
8
+ import logging
9
+ from typing import Any, Iterator
10
+
11
+ from sqlalchemy import create_engine, text
12
+ from sqlalchemy.engine import Engine
13
+
14
+ from graflo.data_source.base import AbstractDataSource, DataSourceType
15
+ from graflo.onto import BaseDataclass
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclasses.dataclass
21
+ class SQLConfig(BaseDataclass):
22
+ """Configuration for SQL data source.
23
+
24
+ Uses SQLAlchemy connection string format.
25
+
26
+ Attributes:
27
+ connection_string: SQLAlchemy connection string
28
+ (e.g., 'postgresql://user:pass@localhost/dbname')
29
+ query: SQL query string (supports parameterized queries)
30
+ params: Query parameters as dictionary (for parameterized queries)
31
+ pagination: Whether to use pagination (default: True)
32
+ page_size: Number of rows per page (default: 1000)
33
+ """
34
+
35
+ connection_string: str
36
+ query: str
37
+ params: dict[str, Any] = dataclasses.field(default_factory=dict)
38
+ pagination: bool = True
39
+ page_size: int = 1000
40
+
41
+
42
+ @dataclasses.dataclass
43
+ class SQLDataSource(AbstractDataSource):
44
+ """Data source for SQL databases.
45
+
46
+ This class provides a data source for SQL databases using SQLAlchemy.
47
+ It supports parameterized queries and pagination. Returns rows as
48
+ dictionaries with column names as keys.
49
+
50
+ Attributes:
51
+ config: SQL configuration
52
+ engine: SQLAlchemy engine (created on first use)
53
+ """
54
+
55
+ config: SQLConfig
56
+ engine: Engine | None = dataclasses.field(default=None, init=False)
57
+
58
+ def __post_init__(self):
59
+ """Initialize the SQL data source."""
60
+ super().__post_init__()
61
+ self.source_type = DataSourceType.SQL
62
+
63
+ def _get_engine(self) -> Engine:
64
+ """Get or create SQLAlchemy engine.
65
+
66
+ Returns:
67
+ SQLAlchemy engine instance
68
+ """
69
+ if self.engine is None:
70
+ self.engine = create_engine(self.config.connection_string)
71
+ return self.engine
72
+
73
+ def _add_pagination(self, query: str, offset: int, limit: int) -> str:
74
+ """Add pagination to SQL query.
75
+
76
+ Args:
77
+ query: Original SQL query
78
+ offset: Offset value
79
+ limit: Limit value
80
+
81
+ Returns:
82
+ Query with pagination added
83
+ """
84
+ # Check if query already has LIMIT/OFFSET
85
+ query_upper = query.upper().strip()
86
+ if "LIMIT" in query_upper or "OFFSET" in query_upper:
87
+ # Query already has pagination, return as-is
88
+ return query
89
+
90
+ # Add pagination based on database type
91
+ # For most SQL databases, use LIMIT/OFFSET
92
+ # For SQL Server, use TOP and OFFSET/FETCH
93
+ connection_string_lower = self.config.connection_string.lower()
94
+
95
+ if "sqlserver" in connection_string_lower or "mssql" in connection_string_lower:
96
+ # SQL Server syntax
97
+ return f"{query} OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
98
+ elif "oracle" in connection_string_lower:
99
+ # Oracle syntax (using ROWNUM or FETCH)
100
+ return f"{query} OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
101
+ else:
102
+ # Standard SQL (PostgreSQL, MySQL, SQLite, etc.)
103
+ return f"{query} LIMIT {limit} OFFSET {offset}"
104
+
105
+ def iter_batches(
106
+ self, batch_size: int = 1000, limit: int | None = None
107
+ ) -> Iterator[list[dict]]:
108
+ """Iterate over SQL query results in batches.
109
+
110
+ Args:
111
+ batch_size: Number of items per batch
112
+ limit: Maximum number of items to retrieve
113
+
114
+ Yields:
115
+ list[dict]: Batches of rows as dictionaries
116
+ """
117
+ engine = self._get_engine()
118
+ total_items = 0
119
+ offset = 0
120
+
121
+ # Use configured page size or batch size, whichever is smaller
122
+ page_size = min(self.config.page_size, batch_size)
123
+
124
+ while True:
125
+ # Build query
126
+ if self.config.pagination:
127
+ query_str = self._add_pagination(
128
+ self.config.query, offset=offset, limit=page_size
129
+ )
130
+ else:
131
+ query_str = self.config.query
132
+
133
+ # Execute query
134
+ try:
135
+ with engine.connect() as conn:
136
+ result = conn.execute(text(query_str), self.config.params)
137
+ rows = result.fetchall()
138
+
139
+ # Convert rows to dictionaries
140
+ batch = []
141
+ from decimal import Decimal
142
+
143
+ for row in rows:
144
+ if limit and total_items >= limit:
145
+ break
146
+
147
+ # Convert row to dictionary
148
+ row_dict = dict(row._mapping)
149
+ # Convert Decimal to float for JSON compatibility
150
+ for key, value in row_dict.items():
151
+ if isinstance(value, Decimal):
152
+ row_dict[key] = float(value)
153
+ batch.append(row_dict)
154
+ total_items += 1
155
+
156
+ # Yield when batch is full
157
+ if len(batch) >= batch_size:
158
+ yield batch
159
+ batch = []
160
+
161
+ # Yield remaining items
162
+ if batch:
163
+ yield batch
164
+
165
+ # Check if we should continue
166
+ if limit and total_items >= limit:
167
+ break
168
+
169
+ # Check if there are more rows
170
+ if len(rows) < page_size:
171
+ # No more rows
172
+ break
173
+
174
+ # Update offset for next iteration
175
+ if self.config.pagination:
176
+ offset += page_size
177
+ else:
178
+ # No pagination, single query
179
+ break
180
+
181
+ except Exception as e:
182
+ logger.error(f"SQL query execution failed: {e}")
183
+ break
graflo/db/__init__.py ADDED
@@ -0,0 +1,44 @@
1
+ """Database connection and management components.
2
+
3
+ This package provides database connection implementations and management utilities
4
+ for different graph databases (ArangoDB, Neo4j, TigerGraph). It includes connection interfaces,
5
+ query execution, and database operations.
6
+
7
+ Key Components:
8
+ - Connection: Abstract database connection interface
9
+ - ConnectionManager: Database connection management
10
+ - ArangoDB: ArangoDB-specific implementation
11
+ - Neo4j: Neo4j-specific implementation
12
+ - TigerGraph: TigerGraph-specific implementation
13
+ - Query: Query generation and execution utilities
14
+
15
+ Example:
16
+ >>> from graflo.db import ConnectionManager
17
+ >>> from graflo.db.arango import ArangoConnection
18
+ >>> manager = ConnectionManager(
19
+ ... connection_config={"url": "http://localhost:8529"},
20
+ ... conn_class=ArangoConnection
21
+ ... )
22
+ >>> with manager as conn:
23
+ ... conn.init_db(schema)
24
+ """
25
+
26
+ from .arango.conn import ArangoConnection
27
+ from .conn import Connection, ConnectionType
28
+ from .connection import DBConfig, DBType
29
+ from .manager import ConnectionManager
30
+ from .neo4j.conn import Neo4jConnection
31
+ from .postgres.conn import PostgresConnection
32
+ from .tigergraph.conn import TigerGraphConnection
33
+
34
+ __all__ = [
35
+ "Connection",
36
+ "ConnectionType",
37
+ "DBType",
38
+ "DBConfig",
39
+ "ConnectionManager",
40
+ "ArangoConnection",
41
+ "Neo4jConnection",
42
+ "PostgresConnection",
43
+ "TigerGraphConnection",
44
+ ]
@@ -0,0 +1,22 @@
1
+ """ArangoDB database implementation.
2
+
3
+ This package provides ArangoDB-specific implementations of the database interface,
4
+ including connection management, query execution, and utility functions.
5
+
6
+ Key Components:
7
+ - ArangoConnection: ArangoDB connection implementation
8
+ - Query: AQL query execution and profiling
9
+ - Util: ArangoDB-specific utility functions
10
+
11
+ Example:
12
+ >>> from graflo.db.arango import ArangoConnection
13
+ >>> conn = ArangoConnection(config)
14
+ >>> cursor = conn.execute("FOR doc IN users RETURN doc")
15
+ >>> results = cursor.batch()
16
+ """
17
+
18
+ from .conn import ArangoConnection
19
+
20
+ __all__ = [
21
+ "ArangoConnection",
22
+ ]