Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
from typing import Any, Dict, Generator, List, Optional, Literal, Tuple
|
|
2
|
+
import polars as pl
|
|
3
|
+
from flowfile_core.configs import logger
|
|
4
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
5
|
+
from flowfile_core.schemas.input_schema import MinimalFieldInfo, DatabaseSettings
|
|
6
|
+
from sqlalchemy import Engine, inspect, create_engine, text
|
|
7
|
+
from flowfile_core.secrets.secrets import get_encrypted_secret, decrypt_secret
|
|
8
|
+
|
|
9
|
+
from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
|
|
10
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source.utils import get_polars_type, construct_sql_uri
|
|
11
|
+
from flowfile_core.flowfile.database_connection_manager.db_connections import get_local_database_connection
|
|
12
|
+
|
|
13
|
+
QueryMode = Literal['table', 'query']
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_query_columns(engine: Engine, query_text: str):
|
|
17
|
+
"""
|
|
18
|
+
Get column names from a query and assume string type for all columns
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
engine: SQLAlchemy engine object
|
|
22
|
+
query_text: SQL query as a string
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dictionary mapping column names to string type
|
|
26
|
+
"""
|
|
27
|
+
with engine.connect() as connection:
|
|
28
|
+
# Create a text object from the query
|
|
29
|
+
query = text(query_text)
|
|
30
|
+
|
|
31
|
+
# Execute the query to get column names
|
|
32
|
+
result = connection.execute(query)
|
|
33
|
+
column_names = result.keys()
|
|
34
|
+
result.close() # Close the result to avoid consuming the cursor
|
|
35
|
+
|
|
36
|
+
return list(column_names)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_table_column_types(engine: Engine, table_name: str, schema: str = None) -> List[Tuple[str, Any]]:
|
|
40
|
+
"""
|
|
41
|
+
Get column types from a database table using a SQLAlchemy engine
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
engine: SQLAlchemy engine object
|
|
45
|
+
table_name: Name of the table to inspect
|
|
46
|
+
schema: Optional schema name (e.g., 'public' for PostgreSQL)
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Dictionary mapping column names to their SQLAlchemy types
|
|
50
|
+
"""
|
|
51
|
+
inspector = inspect(engine)
|
|
52
|
+
columns = inspector.get_columns(table_name, schema=schema)
|
|
53
|
+
|
|
54
|
+
return [(column['name'], column['type']) for column in columns]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class BaseSqlSource:
|
|
58
|
+
"""
|
|
59
|
+
A simplified base class for SQL sources that handles query generation
|
|
60
|
+
without requiring database connection details.
|
|
61
|
+
"""
|
|
62
|
+
table_name: Optional[str] = None
|
|
63
|
+
query: Optional[str] = None
|
|
64
|
+
schema_name: Optional[str] = None
|
|
65
|
+
query_mode: QueryMode = 'table'
|
|
66
|
+
schema: Optional[List[FlowfileColumn]] = None
|
|
67
|
+
|
|
68
|
+
def __init__(self,
|
|
69
|
+
query: str = None,
|
|
70
|
+
table_name: str = None,
|
|
71
|
+
schema_name: str = None,
|
|
72
|
+
fields: Optional[List[MinimalFieldInfo]] = None):
|
|
73
|
+
"""
|
|
74
|
+
Initialize a BaseSqlSource object.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
query: SQL query string (if query_mode is 'query')
|
|
78
|
+
table_name: Name of the table to query (if query_mode is 'table')
|
|
79
|
+
schema_name: Optional database schema name
|
|
80
|
+
fields: Optional list of field information
|
|
81
|
+
"""
|
|
82
|
+
if schema_name == '':
|
|
83
|
+
schema_name = None
|
|
84
|
+
|
|
85
|
+
# Validate inputs
|
|
86
|
+
if query is not None and table_name is not None:
|
|
87
|
+
raise ValueError("Only one of table_name or query can be provided")
|
|
88
|
+
if query is None and table_name is None:
|
|
89
|
+
raise ValueError("Either table_name or query must be provided")
|
|
90
|
+
|
|
91
|
+
# Set query mode and build query if needed
|
|
92
|
+
if query is not None:
|
|
93
|
+
self.query_mode = 'query'
|
|
94
|
+
self.query = query
|
|
95
|
+
elif table_name is not None:
|
|
96
|
+
self.query_mode = 'table'
|
|
97
|
+
self.table_name = table_name
|
|
98
|
+
self.schema_name = schema_name
|
|
99
|
+
|
|
100
|
+
# Generate the basic query
|
|
101
|
+
if schema_name is not None and schema_name != '':
|
|
102
|
+
self.query = f"SELECT * FROM {schema_name}.{table_name}"
|
|
103
|
+
else:
|
|
104
|
+
self.query = f"SELECT * FROM {table_name}"
|
|
105
|
+
|
|
106
|
+
# Set schema if provided
|
|
107
|
+
if fields:
|
|
108
|
+
self.schema = [FlowfileColumn.from_input(column_name=col.name, data_type=col.data_type) for col in fields]
|
|
109
|
+
|
|
110
|
+
def get_sample_query(self) -> str:
|
|
111
|
+
"""
|
|
112
|
+
Get a sample query that returns a limited number of rows.
|
|
113
|
+
"""
|
|
114
|
+
if self.query_mode == 'query':
|
|
115
|
+
return f"select * from ({self.query}) as main_query LIMIT 1"
|
|
116
|
+
else:
|
|
117
|
+
return f"{self.query} LIMIT 1"
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def _parse_table_name(table_name: str) -> tuple[Optional[str], str]:
|
|
121
|
+
"""
|
|
122
|
+
Parse a table name that may include a schema.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
table_name: Table name possibly in the format 'schema.table'
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Tuple of (schema, table_name)
|
|
129
|
+
"""
|
|
130
|
+
table_parts = table_name.split('.')
|
|
131
|
+
if len(table_parts) > 1:
|
|
132
|
+
# Handle schema.table_name format
|
|
133
|
+
schema = '.'.join(table_parts[:-1])
|
|
134
|
+
table = table_parts[-1]
|
|
135
|
+
return schema, table
|
|
136
|
+
else:
|
|
137
|
+
return None, table_name
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class SqlSource(BaseSqlSource, ExternalDataSource):
|
|
141
|
+
connection_string: Optional[str]
|
|
142
|
+
read_result: Optional[pl.DataFrame] = None
|
|
143
|
+
|
|
144
|
+
def __init__(self,
|
|
145
|
+
connection_string: str,
|
|
146
|
+
query: str = None,
|
|
147
|
+
table_name: str = None,
|
|
148
|
+
schema_name: str = None,
|
|
149
|
+
fields: Optional[List[MinimalFieldInfo]] = None):
|
|
150
|
+
|
|
151
|
+
# Initialize the base class first
|
|
152
|
+
BaseSqlSource.__init__(self, query=query, table_name=table_name, schema_name=schema_name, fields=fields)
|
|
153
|
+
|
|
154
|
+
# Set connection-specific attributes
|
|
155
|
+
self.connection_string = connection_string
|
|
156
|
+
self.read_result = None
|
|
157
|
+
|
|
158
|
+
def get_initial_data(self) -> List[Dict[str, Any]]:
|
|
159
|
+
return []
|
|
160
|
+
|
|
161
|
+
def validate(self) -> None:
|
|
162
|
+
try:
|
|
163
|
+
engine = create_engine(self.connection_string)
|
|
164
|
+
if self.query_mode == 'table':
|
|
165
|
+
try:
|
|
166
|
+
if self.schema_name is not None:
|
|
167
|
+
self._get_columns_from_table_and_schema(engine, self.table_name, self.schema_name)
|
|
168
|
+
if self.table_name is not None:
|
|
169
|
+
self._get_columns_from_table(engine, self.table_name)
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.warning(f"Error getting column info for table {self.table_name}: {e}")
|
|
172
|
+
c = self._get_columns_from_query(engine, self.get_sample_query())
|
|
173
|
+
if len(c) == 0:
|
|
174
|
+
raise ValueError("No columns found in the query")
|
|
175
|
+
else:
|
|
176
|
+
c = self._get_columns_from_query(engine, self.get_sample_query())
|
|
177
|
+
if len(c) == 0:
|
|
178
|
+
raise ValueError("No columns found in the query")
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.error(f"Error validating SQL source: {e}")
|
|
181
|
+
raise e
|
|
182
|
+
|
|
183
|
+
def get_iter(self) -> Generator[Dict[str, Any], None, None]:
|
|
184
|
+
logger.warning('Getting data in iteration, this is suboptimal')
|
|
185
|
+
data = self.data_getter()
|
|
186
|
+
for row in data:
|
|
187
|
+
yield row
|
|
188
|
+
|
|
189
|
+
def get_df(self):
|
|
190
|
+
df = self.get_pl_df()
|
|
191
|
+
return df.to_pandas()
|
|
192
|
+
|
|
193
|
+
def get_sample(self, n: int = 10000) -> Generator[Dict[str, Any], None, None]:
|
|
194
|
+
if self.query_mode == 'table':
|
|
195
|
+
query = f"{self.query} LIMIT {n}"
|
|
196
|
+
try:
|
|
197
|
+
df = pl.read_database_uri(query, self.connection_string)
|
|
198
|
+
return (r for r in df.to_dicts())
|
|
199
|
+
except Exception as e:
|
|
200
|
+
logger.error(f"Error with query: {query}")
|
|
201
|
+
raise e
|
|
202
|
+
else:
|
|
203
|
+
df = self.get_pl_df()
|
|
204
|
+
rows = df.head(n).to_dicts()
|
|
205
|
+
return (r for r in rows)
|
|
206
|
+
|
|
207
|
+
def data_getter(self) -> Generator[Dict[str, Any], None, None]:
|
|
208
|
+
df = self.get_pl_df()
|
|
209
|
+
rows = df.to_dicts()
|
|
210
|
+
return (r for r in rows)
|
|
211
|
+
|
|
212
|
+
def get_pl_df(self) -> pl.DataFrame:
|
|
213
|
+
if self.read_result is None:
|
|
214
|
+
self.read_result = pl.read_database_uri(self.query, self.connection_string)
|
|
215
|
+
return self.read_result
|
|
216
|
+
|
|
217
|
+
def get_flow_file_columns(self) -> List[FlowfileColumn]:
|
|
218
|
+
"""
|
|
219
|
+
Get column information from the SQL source and convert to FlowfileColumn objects
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
List of FlowfileColumn objects representing the columns in the SQL source
|
|
223
|
+
"""
|
|
224
|
+
engine = create_engine(self.connection_string)
|
|
225
|
+
|
|
226
|
+
if self.query_mode == 'table':
|
|
227
|
+
try:
|
|
228
|
+
if self.schema_name is not None:
|
|
229
|
+
return self._get_columns_from_table_and_schema(engine, self.table_name, self.schema_name)
|
|
230
|
+
if self.table_name is not None:
|
|
231
|
+
return self._get_columns_from_table(engine, self.table_name)
|
|
232
|
+
except Exception as e:
|
|
233
|
+
logger.error(f"Error getting column info for table {self.table_name}: {e}")
|
|
234
|
+
|
|
235
|
+
return self._get_columns_from_query(engine, self.get_sample_query())
|
|
236
|
+
|
|
237
|
+
@staticmethod
|
|
238
|
+
def _get_columns_from_table(engine: Engine, table_name: str) -> List[FlowfileColumn]:
|
|
239
|
+
"""
|
|
240
|
+
Get FlowfileColumn objects from a database table
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
engine: SQLAlchemy engine
|
|
244
|
+
table_name: Name of the table (possibly including schema)
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
List of FlowfileColumn objects
|
|
248
|
+
"""
|
|
249
|
+
schema_name, table = BaseSqlSource._parse_table_name(table_name)
|
|
250
|
+
column_types = get_table_column_types(engine, table, schema=schema_name)
|
|
251
|
+
columns = [FlowfileColumn.create_from_polars_dtype(column_name, get_polars_type(column_type))
|
|
252
|
+
for column_name, column_type in column_types]
|
|
253
|
+
|
|
254
|
+
return columns
|
|
255
|
+
|
|
256
|
+
@staticmethod
|
|
257
|
+
def _get_columns_from_table_and_schema(engine: Engine, table_name: str, schema_name: str):
|
|
258
|
+
"""
|
|
259
|
+
Get FlowfileColumn objects from a database table
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
engine: SQLAlchemy engine
|
|
263
|
+
table_name: Name of the table (possibly including schema)
|
|
264
|
+
schema_name: Name of the schema
|
|
265
|
+
Returns:
|
|
266
|
+
List of FlowfileColumn objects
|
|
267
|
+
"""
|
|
268
|
+
column_types = get_table_column_types(engine, table_name, schema=schema_name)
|
|
269
|
+
columns = [FlowfileColumn.create_from_polars_dtype(column_name, get_polars_type(column_type))
|
|
270
|
+
for column_name, column_type in column_types]
|
|
271
|
+
return columns
|
|
272
|
+
|
|
273
|
+
@staticmethod
|
|
274
|
+
def _get_columns_from_query(engine: Engine, query: str) -> List[FlowfileColumn]:
|
|
275
|
+
"""
|
|
276
|
+
Get FlowfileColumn objects from a SQL query
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
engine: SQLAlchemy engine
|
|
280
|
+
query: SQL query string
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
List of FlowfileColumn objects
|
|
284
|
+
"""
|
|
285
|
+
try:
|
|
286
|
+
column_names = get_query_columns(engine, query)
|
|
287
|
+
|
|
288
|
+
columns = [FlowfileColumn.create_from_polars_dtype(column_name, pl.String()) for column_name in
|
|
289
|
+
column_names]
|
|
290
|
+
return columns
|
|
291
|
+
except Exception as e:
|
|
292
|
+
logger.error(f"Error getting column info for query: {e}")
|
|
293
|
+
raise e
|
|
294
|
+
|
|
295
|
+
def parse_schema(self) -> List[FlowfileColumn]:
|
|
296
|
+
return self.get_schema()
|
|
297
|
+
|
|
298
|
+
def get_schema(self) -> List[FlowfileColumn]:
|
|
299
|
+
if self.schema is None:
|
|
300
|
+
self.schema = self.get_flow_file_columns()
|
|
301
|
+
return self.schema
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def create_sql_source_from_db_settings(database_settings: DatabaseSettings, user_id: int) -> SqlSource:
|
|
305
|
+
database_connection = database_settings.database_connection
|
|
306
|
+
if database_settings.connection_mode == 'inline':
|
|
307
|
+
if database_connection is None:
|
|
308
|
+
raise ValueError("Database connection is required in inline mode")
|
|
309
|
+
encrypted_secret = get_encrypted_secret(current_user_id=user_id,
|
|
310
|
+
secret_name=database_connection.password_ref)
|
|
311
|
+
else:
|
|
312
|
+
database_connection = get_local_database_connection(database_settings.database_connection_name, user_id)
|
|
313
|
+
encrypted_secret = database_connection.password.get_secret_value()
|
|
314
|
+
if encrypted_secret is None:
|
|
315
|
+
raise ValueError(f"Secret with name {database_connection.password_ref} not found for user {user_id}")
|
|
316
|
+
|
|
317
|
+
sql_source = SqlSource(connection_string=
|
|
318
|
+
construct_sql_uri(database_type=database_connection.database_type,
|
|
319
|
+
host=database_connection.host,
|
|
320
|
+
port=database_connection.port,
|
|
321
|
+
database=database_connection.database,
|
|
322
|
+
username=database_connection.username,
|
|
323
|
+
password=decrypt_secret(encrypted_secret)),
|
|
324
|
+
query=None if database_settings.query_mode == 'table' else database_settings.query,
|
|
325
|
+
table_name=database_settings.table_name,
|
|
326
|
+
schema_name=database_settings.schema_name,
|
|
327
|
+
)
|
|
328
|
+
return sql_source
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
# Comprehensive mapping from SQLAlchemy types to Polars types
|
|
2
|
+
from typing import Dict, Type, Union, cast, TYPE_CHECKING, Any
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
from polars import DataType as PolarsType
|
|
7
|
+
from sqlalchemy.sql.sqltypes import (
|
|
8
|
+
_Binary, ARRAY, BIGINT, BigInteger, BINARY, BLOB, BOOLEAN, Boolean,
|
|
9
|
+
CHAR, CLOB, Concatenable, DATE, Date, DATETIME, DateTime,
|
|
10
|
+
DECIMAL, DOUBLE, Double, DOUBLE_PRECISION, Enum, FLOAT, Float,
|
|
11
|
+
Indexable, INT, INTEGER, Integer, Interval, JSON, LargeBinary,
|
|
12
|
+
MatchType, NCHAR, NULLTYPE, NullType, NUMERIC, Numeric, NVARCHAR,
|
|
13
|
+
PickleType, REAL, SchemaType, SMALLINT, SmallInteger, String,
|
|
14
|
+
STRINGTYPE, TEXT, Text, TIME, Time, TIMESTAMP, TupleType,
|
|
15
|
+
Unicode, UnicodeText, UUID, Uuid, VARBINARY, VARCHAR
|
|
16
|
+
)
|
|
17
|
+
from sqlalchemy.sql.type_api import (
|
|
18
|
+
ExternalType, TypeDecorator,
|
|
19
|
+
TypeEngine, UserDefinedType, Variant
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
from typing import Optional
|
|
24
|
+
from urllib.parse import quote_plus
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
SqlType = Union[
|
|
29
|
+
Type[_Binary], Type[ARRAY], Type[BIGINT], Type[BigInteger], Type[BINARY],
|
|
30
|
+
Type[BLOB], Type[BOOLEAN], Type[Boolean], Type[CHAR], Type[CLOB],
|
|
31
|
+
Type[Concatenable], Type[DATE], Type[Date], Type[DATETIME], Type[DateTime],
|
|
32
|
+
Type[DECIMAL], Type[DOUBLE], Type[Double], Type[DOUBLE_PRECISION], Type[Enum],
|
|
33
|
+
Type[FLOAT], Type[Float], Type[Indexable], Type[INT], Type[INTEGER],
|
|
34
|
+
Type[Integer], Type[Interval], Type[JSON], Type[LargeBinary], Type[MatchType],
|
|
35
|
+
Type[NCHAR], Type[NULLTYPE], Type[NullType], Type[NUMERIC], Type[Numeric],
|
|
36
|
+
Type[NVARCHAR], Type[PickleType], Type[REAL], Type[SchemaType], Type[SMALLINT],
|
|
37
|
+
Type[SmallInteger], Type[String], Type[STRINGTYPE], Type[TEXT], Type[Text],
|
|
38
|
+
Type[TIME], Type[Time], Type[TIMESTAMP], Type[TupleType], Type[Unicode],
|
|
39
|
+
Type[UnicodeText], Type[UUID], Type[Uuid], Type[VARBINARY], Type[VARCHAR],
|
|
40
|
+
Type[TypeDecorator], Type[TypeEngine], Type[UserDefinedType], Type[Variant],
|
|
41
|
+
Type[ExternalType]
|
|
42
|
+
]
|
|
43
|
+
else:
|
|
44
|
+
SqlType = Any
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Comprehensive mapping from SQLAlchemy types to Polars types
|
|
48
|
+
sqlalchemy_to_polars: Dict[SqlType, PolarsType] = {
|
|
49
|
+
# Numeric types
|
|
50
|
+
Integer: pl.Int64,
|
|
51
|
+
INTEGER: pl.Int64,
|
|
52
|
+
INT: pl.Int64,
|
|
53
|
+
SmallInteger: pl.Int16,
|
|
54
|
+
SMALLINT: pl.Int16,
|
|
55
|
+
BigInteger: pl.Int64,
|
|
56
|
+
BIGINT: pl.Int64,
|
|
57
|
+
Float: pl.Float64,
|
|
58
|
+
FLOAT: pl.Float64,
|
|
59
|
+
REAL: pl.Float32,
|
|
60
|
+
DOUBLE: pl.Float64,
|
|
61
|
+
Double: pl.Float64,
|
|
62
|
+
DOUBLE_PRECISION: pl.Float64,
|
|
63
|
+
Numeric: pl.Decimal,
|
|
64
|
+
NUMERIC: pl.Decimal,
|
|
65
|
+
DECIMAL: pl.Decimal,
|
|
66
|
+
Boolean: pl.Boolean,
|
|
67
|
+
BOOLEAN: pl.Boolean,
|
|
68
|
+
|
|
69
|
+
# String types
|
|
70
|
+
String: pl.Utf8,
|
|
71
|
+
VARCHAR: pl.Utf8,
|
|
72
|
+
CHAR: pl.Utf8,
|
|
73
|
+
NVARCHAR: pl.Utf8,
|
|
74
|
+
NCHAR: pl.Utf8,
|
|
75
|
+
Text: pl.Utf8,
|
|
76
|
+
TEXT: pl.Utf8,
|
|
77
|
+
CLOB: pl.Utf8,
|
|
78
|
+
STRINGTYPE: pl.Utf8,
|
|
79
|
+
Unicode: pl.Utf8,
|
|
80
|
+
UnicodeText: pl.Utf8,
|
|
81
|
+
|
|
82
|
+
# Date/Time types
|
|
83
|
+
Date: pl.Date,
|
|
84
|
+
DATE: pl.Date,
|
|
85
|
+
DateTime: pl.Datetime,
|
|
86
|
+
DATETIME: pl.Datetime,
|
|
87
|
+
TIMESTAMP: pl.Datetime,
|
|
88
|
+
Time: pl.Time,
|
|
89
|
+
TIME: pl.Time,
|
|
90
|
+
Interval: pl.Duration,
|
|
91
|
+
|
|
92
|
+
# Binary types
|
|
93
|
+
_Binary: pl.Binary,
|
|
94
|
+
LargeBinary: pl.Binary,
|
|
95
|
+
BINARY: pl.Binary,
|
|
96
|
+
VARBINARY: pl.Binary,
|
|
97
|
+
BLOB: pl.Binary,
|
|
98
|
+
|
|
99
|
+
# JSON types
|
|
100
|
+
JSON: pl.Utf8, # Polars doesn't have a dedicated JSON type, using Utf8
|
|
101
|
+
|
|
102
|
+
# UUID types
|
|
103
|
+
UUID: pl.Utf8, # Mapped to string
|
|
104
|
+
Uuid: pl.Utf8, # Mapped to string
|
|
105
|
+
|
|
106
|
+
# Other types
|
|
107
|
+
ARRAY: pl.List, # Approx mapping
|
|
108
|
+
Enum: pl.String, # Approx mapping
|
|
109
|
+
PickleType: pl.Object, # For storing Python objects
|
|
110
|
+
TupleType: pl.Struct, # Mapped to struct
|
|
111
|
+
|
|
112
|
+
# Special/Abstract types
|
|
113
|
+
NULLTYPE: None,
|
|
114
|
+
NullType: None,
|
|
115
|
+
Concatenable: pl.Utf8, # Default to string since it's a mixin
|
|
116
|
+
Indexable: pl.List, # Default to list since it's a mixin
|
|
117
|
+
MatchType: pl.Utf8, # Default to string
|
|
118
|
+
SchemaType: None, # Base class, not mappable directly
|
|
119
|
+
TypeDecorator: None, # Base class, not mappable directly
|
|
120
|
+
TypeEngine: None, # Base class, not mappable directly
|
|
121
|
+
UserDefinedType: None, # Base class, not mappable directly
|
|
122
|
+
Variant: pl.Object, # For variant data
|
|
123
|
+
ExternalType: None, # Abstract base class
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
# Create string mappings, filtering out None values
|
|
127
|
+
sqlalchemy_to_polars_str: Dict[str, str] = {
|
|
128
|
+
k.__name__: v.__name__
|
|
129
|
+
for k, v in sqlalchemy_to_polars.items()
|
|
130
|
+
if v is not None and hasattr(k, '__name__') and hasattr(v, '__name__')
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Additional string mappings for common SQL type names
|
|
134
|
+
sql_type_name_to_polars: Dict[str, PolarsType] = {
|
|
135
|
+
# PostgreSQL types
|
|
136
|
+
'integer': pl.Int64,
|
|
137
|
+
'bigint': pl.Int64,
|
|
138
|
+
'smallint': pl.Int64,
|
|
139
|
+
'numeric': pl.Decimal,
|
|
140
|
+
'real': pl.Float32,
|
|
141
|
+
'double precision': pl.Float64,
|
|
142
|
+
'boolean': pl.Boolean,
|
|
143
|
+
'varchar': pl.Utf8,
|
|
144
|
+
'character varying': pl.Utf8,
|
|
145
|
+
'character': pl.Utf8,
|
|
146
|
+
'text': pl.Utf8,
|
|
147
|
+
'date': pl.Date,
|
|
148
|
+
'timestamp': pl.Datetime,
|
|
149
|
+
'timestamp without time zone': pl.Datetime,
|
|
150
|
+
'timestamp with time zone': pl.Datetime,
|
|
151
|
+
'time': pl.Time,
|
|
152
|
+
'time without time zone': pl.Time,
|
|
153
|
+
'time with time zone': pl.Time,
|
|
154
|
+
'interval': pl.Duration,
|
|
155
|
+
'bytea': pl.Binary,
|
|
156
|
+
'jsonb': pl.Utf8,
|
|
157
|
+
'json': pl.Utf8,
|
|
158
|
+
'uuid': pl.Utf8,
|
|
159
|
+
'cidr': pl.Utf8,
|
|
160
|
+
'inet': pl.Utf8,
|
|
161
|
+
'macaddr': pl.Utf8,
|
|
162
|
+
'bit': pl.Utf8,
|
|
163
|
+
'bit varying': pl.Utf8,
|
|
164
|
+
'money': pl.Decimal,
|
|
165
|
+
'xml': pl.Utf8,
|
|
166
|
+
'tsquery': pl.Utf8,
|
|
167
|
+
'tsvector': pl.Utf8,
|
|
168
|
+
'hstore': pl.Utf8,
|
|
169
|
+
|
|
170
|
+
# MySQL types
|
|
171
|
+
'int': pl.Int32,
|
|
172
|
+
'int unsigned': pl.UInt64,
|
|
173
|
+
'bigint unsigned': pl.UInt64,
|
|
174
|
+
'smallint unsigned': pl.UInt16,
|
|
175
|
+
'tinyint': pl.Int8,
|
|
176
|
+
'tinyint unsigned': pl.UInt8,
|
|
177
|
+
'mediumint': pl.Int32,
|
|
178
|
+
'mediumint unsigned': pl.UInt32,
|
|
179
|
+
'decimal': pl.Decimal,
|
|
180
|
+
'float': pl.Float32,
|
|
181
|
+
'double': pl.Float64,
|
|
182
|
+
'bit': pl.Boolean,
|
|
183
|
+
'char': pl.Utf8,
|
|
184
|
+
'varchar': pl.Utf8,
|
|
185
|
+
'binary': pl.Binary,
|
|
186
|
+
'varbinary': pl.Binary,
|
|
187
|
+
'tinyblob': pl.Binary,
|
|
188
|
+
'blob': pl.Binary,
|
|
189
|
+
'mediumblob': pl.Binary,
|
|
190
|
+
'longblob': pl.Binary,
|
|
191
|
+
'tinytext': pl.Utf8,
|
|
192
|
+
'text': pl.Utf8,
|
|
193
|
+
'mediumtext': pl.Utf8,
|
|
194
|
+
'longtext': pl.Utf8,
|
|
195
|
+
'datetime': pl.Datetime,
|
|
196
|
+
'timestamp': pl.Datetime,
|
|
197
|
+
'year': pl.Int16,
|
|
198
|
+
'enum': pl.String,
|
|
199
|
+
'set': pl.List,
|
|
200
|
+
'json': pl.Utf8,
|
|
201
|
+
|
|
202
|
+
# SQLite types
|
|
203
|
+
'integer': pl.Int64, # SQLite's INTEGER is 64-bit
|
|
204
|
+
'real': pl.Float64,
|
|
205
|
+
'text': pl.Utf8,
|
|
206
|
+
'blob': pl.Binary,
|
|
207
|
+
'null': None,
|
|
208
|
+
|
|
209
|
+
# Oracle types
|
|
210
|
+
'number': pl.Decimal,
|
|
211
|
+
'float': pl.Float64,
|
|
212
|
+
'binary_float': pl.Float32,
|
|
213
|
+
'binary_double': pl.Float64,
|
|
214
|
+
'varchar2': pl.Utf8,
|
|
215
|
+
'nvarchar2': pl.Utf8,
|
|
216
|
+
'char': pl.Utf8,
|
|
217
|
+
'nchar': pl.Utf8,
|
|
218
|
+
'clob': pl.Utf8,
|
|
219
|
+
'nclob': pl.Utf8,
|
|
220
|
+
'long': pl.Utf8,
|
|
221
|
+
'raw': pl.Binary,
|
|
222
|
+
'long raw': pl.Binary,
|
|
223
|
+
'rowid': pl.Utf8,
|
|
224
|
+
'urowid': pl.Utf8,
|
|
225
|
+
'date': pl.Datetime, # Oracle DATE includes time
|
|
226
|
+
'timestamp': pl.Datetime,
|
|
227
|
+
'timestamp with time zone': pl.Datetime,
|
|
228
|
+
'timestamp with local time zone': pl.Datetime,
|
|
229
|
+
'interval year to month': pl.Duration,
|
|
230
|
+
'interval day to second': pl.Duration,
|
|
231
|
+
'bfile': pl.Binary,
|
|
232
|
+
'xmltype': pl.Utf8,
|
|
233
|
+
|
|
234
|
+
# SQL Server types
|
|
235
|
+
'bit': pl.Boolean,
|
|
236
|
+
'tinyint': pl.Int8,
|
|
237
|
+
'smallint': pl.Int16,
|
|
238
|
+
'int': pl.Int32,
|
|
239
|
+
'bigint': pl.Int64,
|
|
240
|
+
'numeric': pl.Decimal,
|
|
241
|
+
'decimal': pl.Decimal,
|
|
242
|
+
'smallmoney': pl.Decimal,
|
|
243
|
+
'money': pl.Decimal,
|
|
244
|
+
'float': pl.Float64,
|
|
245
|
+
'real': pl.Float32,
|
|
246
|
+
'datetime': pl.Datetime,
|
|
247
|
+
'datetime2': pl.Datetime,
|
|
248
|
+
'smalldatetime': pl.Datetime,
|
|
249
|
+
'date': pl.Date,
|
|
250
|
+
'time': pl.Time,
|
|
251
|
+
'datetimeoffset': pl.Datetime,
|
|
252
|
+
'char': pl.Utf8,
|
|
253
|
+
'varchar': pl.Utf8,
|
|
254
|
+
'text': pl.Utf8,
|
|
255
|
+
'nchar': pl.Utf8,
|
|
256
|
+
'nvarchar': pl.Utf8,
|
|
257
|
+
'ntext': pl.Utf8,
|
|
258
|
+
'binary': pl.Binary,
|
|
259
|
+
'varbinary': pl.Binary,
|
|
260
|
+
'image': pl.Binary,
|
|
261
|
+
'uniqueidentifier': pl.Utf8,
|
|
262
|
+
'xml': pl.Utf8,
|
|
263
|
+
'sql_variant': pl.Object,
|
|
264
|
+
'hierarchyid': pl.Utf8,
|
|
265
|
+
'geometry': pl.Utf8,
|
|
266
|
+
'geography': pl.Utf8,
|
|
267
|
+
|
|
268
|
+
# Common abbreviations and aliases
|
|
269
|
+
'int4': pl.Int32,
|
|
270
|
+
'int8': pl.Int64,
|
|
271
|
+
'float4': pl.Float32,
|
|
272
|
+
'float8': pl.Float64,
|
|
273
|
+
'bool': pl.Boolean,
|
|
274
|
+
'serial': pl.Int32, # PostgreSQL auto-incrementing integer
|
|
275
|
+
'bigserial': pl.Int64, # PostgreSQL auto-incrementing bigint
|
|
276
|
+
'smallserial': pl.Int16, # PostgreSQL auto-incrementing smallint
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
# String to string mapping
|
|
280
|
+
sql_type_name_to_polars_str: Dict[str, str] = {
|
|
281
|
+
k: v.__name__ for k, v in sql_type_name_to_polars.items() if v is not None
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def get_polars_type(sqlalchemy_type: Union[SqlType, str]):
|
|
286
|
+
"""
|
|
287
|
+
Get the corresponding Polars type from a SQLAlchemy type or string type name.
|
|
288
|
+
|
|
289
|
+
Parameters:
|
|
290
|
+
-----------
|
|
291
|
+
sqlalchemy_type : SQLAlchemy type object or string
|
|
292
|
+
The SQLAlchemy type or SQL type name string
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
--------
|
|
296
|
+
polars_type : polars.DataType
|
|
297
|
+
The corresponding Polars data type, or None if no mapping exists
|
|
298
|
+
"""
|
|
299
|
+
if isinstance(sqlalchemy_type, type):
|
|
300
|
+
# For SQLAlchemy type classes
|
|
301
|
+
return sqlalchemy_to_polars.get(cast(SqlType, sqlalchemy_type), pl.Utf8)
|
|
302
|
+
elif isinstance(sqlalchemy_type, str):
|
|
303
|
+
# For string type names (lowercase for case-insensitive matching)
|
|
304
|
+
return sql_type_name_to_polars.get(sqlalchemy_type.lower(), pl.Utf8)
|
|
305
|
+
else:
|
|
306
|
+
# For SQLAlchemy type instances
|
|
307
|
+
instance_type = type(sqlalchemy_type)
|
|
308
|
+
return sqlalchemy_to_polars.get(cast(SqlType, instance_type), pl.Utf8)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def construct_sql_uri(
|
|
312
|
+
database_type: str = "postgresql",
|
|
313
|
+
host: Optional[str] = None,
|
|
314
|
+
port: Optional[int] = None,
|
|
315
|
+
username: Optional[str] = None,
|
|
316
|
+
password: Optional[SecretStr] = None,
|
|
317
|
+
database: Optional[str] = None,
|
|
318
|
+
url: Optional[str] = None,
|
|
319
|
+
**kwargs
|
|
320
|
+
) -> str:
|
|
321
|
+
"""
|
|
322
|
+
Constructs a SQL URI string from the provided parameters.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
database_type: Database type (postgresql, mysql, sqlite, etc.)
|
|
326
|
+
host: Database host address
|
|
327
|
+
port: Database port number
|
|
328
|
+
username: Database username
|
|
329
|
+
password: Database password as SecretStr
|
|
330
|
+
database: Database name
|
|
331
|
+
url: Complete database URL (overrides other parameters if provided)
|
|
332
|
+
**kwargs: Additional connection parameters
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
str: Formatted database URI
|
|
336
|
+
|
|
337
|
+
Raises:
|
|
338
|
+
ValueError: If insufficient information is provided
|
|
339
|
+
"""
|
|
340
|
+
# If URL is explicitly provided, return it directly
|
|
341
|
+
if url:
|
|
342
|
+
return url
|
|
343
|
+
|
|
344
|
+
# For SQLite, we handle differently since it uses a file path
|
|
345
|
+
if database_type.lower() == "sqlite":
|
|
346
|
+
# For SQLite, database is the path to the file
|
|
347
|
+
path = database or "./database.db"
|
|
348
|
+
return f"sqlite:///{path}"
|
|
349
|
+
|
|
350
|
+
# Validate that minimum required fields are present for other databases
|
|
351
|
+
if not host:
|
|
352
|
+
raise ValueError("Host is required to create a URI")
|
|
353
|
+
|
|
354
|
+
# Create credential part if username is provided
|
|
355
|
+
credentials = ""
|
|
356
|
+
if username:
|
|
357
|
+
credentials = username
|
|
358
|
+
if password:
|
|
359
|
+
# Get raw password from SecretStr and encode it
|
|
360
|
+
password_value = password.get_secret_value()
|
|
361
|
+
encoded_password = quote_plus(password_value)
|
|
362
|
+
credentials += f":{encoded_password}"
|
|
363
|
+
credentials += "@"
|
|
364
|
+
|
|
365
|
+
# Add port if specified
|
|
366
|
+
port_section = f":{port}" if port else ""
|
|
367
|
+
|
|
368
|
+
# Create base URI
|
|
369
|
+
if database:
|
|
370
|
+
base_uri = f"{database_type}://{credentials}{host}{port_section}/{database}"
|
|
371
|
+
else:
|
|
372
|
+
base_uri = f"{database_type}://{credentials}{host}{port_section}"
|
|
373
|
+
|
|
374
|
+
# Add any additional connection parameters
|
|
375
|
+
if kwargs:
|
|
376
|
+
params = "&".join(f"{key}={quote_plus(str(value))}" for key, value in kwargs.items())
|
|
377
|
+
base_uri += f"?{params}"
|
|
378
|
+
|
|
379
|
+
return base_uri
|
|
File without changes
|