Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Generator, Callable, List, Any, Optional, Dict
|
|
3
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
4
|
+
import polars as pl
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ExternalDataSource(ABC):
|
|
8
|
+
schema: Optional[List[FlowfileColumn]]
|
|
9
|
+
data_getter: Optional[Callable]
|
|
10
|
+
is_collected: bool
|
|
11
|
+
cache_store: Any
|
|
12
|
+
_type: str
|
|
13
|
+
initial_data_getter: Optional[Callable]
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def __init__(self):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def get_initial_data(self) -> List[Dict[str, Any]]:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def get_iter(self) -> Generator[Dict[str, Any], None, None]:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def get_sample(self, n: int = 10000) -> Generator[Dict[str, Any], None, None]:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def get_pl_df(self) -> pl.DataFrame:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def parse_schema(*args, **kwargs) -> List[FlowfileColumn]:
|
|
38
|
+
pass
|
|
39
|
+
|
|
File without changes
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from typing import Any, Dict, Generator, List, Optional, Callable
|
|
2
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
3
|
+
from flowfile_core.schemas import input_schema
|
|
4
|
+
from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CustomExternalSourceSettings:
|
|
9
|
+
data_getter: Generator
|
|
10
|
+
initial_data_getter: Optional[Callable] = None
|
|
11
|
+
orientation: str = 'row'
|
|
12
|
+
|
|
13
|
+
def __init__(self, data_getter: Generator, initial_data_getter: Optional[Callable] = None, orientation: str = 'row'):
|
|
14
|
+
self.data_getter = data_getter
|
|
15
|
+
self.initial_data_getter = initial_data_getter
|
|
16
|
+
self.orientation = orientation
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CustomExternalSource(ExternalDataSource):
|
|
20
|
+
data_getter: Generator = None
|
|
21
|
+
schema: Optional[List[FlowfileColumn]] = None
|
|
22
|
+
cache_store: List = None
|
|
23
|
+
is_collected: bool = False
|
|
24
|
+
|
|
25
|
+
def __init__(self, data_getter: Generator[Any, None, None],
|
|
26
|
+
initial_data_getter: Callable = None,
|
|
27
|
+
orientation: str = 'row',
|
|
28
|
+
schema: List = None,
|
|
29
|
+
**kwargs):
|
|
30
|
+
self.cache_store = list()
|
|
31
|
+
self.data_getter = data_getter
|
|
32
|
+
self.collected = False
|
|
33
|
+
if schema is not None:
|
|
34
|
+
try:
|
|
35
|
+
self.schema = self.parse_schema(schema)
|
|
36
|
+
except ValueError:
|
|
37
|
+
self.schema = None
|
|
38
|
+
else:
|
|
39
|
+
self.schema = None
|
|
40
|
+
|
|
41
|
+
if not initial_data_getter and orientation == 'row':
|
|
42
|
+
def initial_data_getter():
|
|
43
|
+
if len(self.cache_store) == 0:
|
|
44
|
+
self.cache_store.append(next(data_getter, None))
|
|
45
|
+
return self.cache_store
|
|
46
|
+
self.initial_data_getter = initial_data_getter
|
|
47
|
+
elif initial_data_getter:
|
|
48
|
+
self.initial_data_getter = initial_data_getter
|
|
49
|
+
elif self.schema:
|
|
50
|
+
def initial_data_getter():
|
|
51
|
+
return [{d.column_name: None for d in self.schema}]
|
|
52
|
+
self.initial_data_getter = initial_data_getter
|
|
53
|
+
else:
|
|
54
|
+
self.initial_data_getter = None
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def parse_schema(schema: List[Any]) -> List[FlowfileColumn]:
|
|
58
|
+
if len(schema) == 0:
|
|
59
|
+
return []
|
|
60
|
+
first_col = schema[0]
|
|
61
|
+
if isinstance(first_col, dict):
|
|
62
|
+
return [FlowfileColumn(**col) for col in schema]
|
|
63
|
+
elif isinstance(first_col, (list, tuple)):
|
|
64
|
+
return [FlowfileColumn.from_input(column_name=col[0], data_type=col[1]) for col in schema]
|
|
65
|
+
elif isinstance(first_col, str):
|
|
66
|
+
return [FlowfileColumn.from_input(column_name=col, data_type='varchar') for col in schema]
|
|
67
|
+
elif isinstance(first_col, input_schema.MinimalFieldInfo):
|
|
68
|
+
return [FlowfileColumn.from_input(column_name=col.name, data_type=col.data_type) for col in schema]
|
|
69
|
+
elif isinstance(first_col, FlowfileColumn):
|
|
70
|
+
return schema
|
|
71
|
+
else:
|
|
72
|
+
raise ValueError("Schema is not a valid type")
|
|
73
|
+
|
|
74
|
+
def get_initial_data(self):
|
|
75
|
+
if self.initial_data_getter:
|
|
76
|
+
return self.initial_data_getter()
|
|
77
|
+
return []
|
|
78
|
+
|
|
79
|
+
def get_iter(self) -> Generator[Dict[str, Any], None, None]:
|
|
80
|
+
if self.collected:
|
|
81
|
+
return
|
|
82
|
+
for data in self.cache_store:
|
|
83
|
+
yield data
|
|
84
|
+
for data in self.data_getter:
|
|
85
|
+
self.cache_store.append(data)
|
|
86
|
+
yield data
|
|
87
|
+
self.is_collected = True
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
def get_sample(self, n: int = 10000):
|
|
91
|
+
data = self.get_iter()
|
|
92
|
+
for i in range(n):
|
|
93
|
+
try:
|
|
94
|
+
yield next(data)
|
|
95
|
+
except StopIteration:
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
def get_pl_df(self) -> pl.DataFrame:
|
|
99
|
+
data = self.get_iter()
|
|
100
|
+
return pl.DataFrame(list(data))
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import gspread
|
|
2
|
+
from google.oauth2.credentials import Credentials
|
|
3
|
+
from googleapiclient.discovery import build
|
|
4
|
+
from typing import Dict, Any, Generator, List
|
|
5
|
+
from flowfile_core.schemas.input_schema import GoogleSheet
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def getter(data: GoogleSheet) -> Generator[Dict[str, Any], None, None]:
|
|
9
|
+
creds = Credentials(token=data.access_token.get_secret_value())
|
|
10
|
+
gc = gspread.authorize(credentials=creds)
|
|
11
|
+
worksheet = gc.open_by_key(data.sheet_id).worksheet(data.worksheet_name)
|
|
12
|
+
all_values = worksheet.get_values()
|
|
13
|
+
headers = all_values[0]
|
|
14
|
+
for i, value in enumerate(all_values[1:]):
|
|
15
|
+
yield {k: v for k, v in zip(headers, value)}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def initial_getter(data: GoogleSheet):
|
|
19
|
+
def inner_func():
|
|
20
|
+
creds = Credentials(token=data.access_token.get_secret_value())
|
|
21
|
+
service = build('sheets', 'v4', credentials=creds)
|
|
22
|
+
_range = f'{data.worksheet_name}!1:2'
|
|
23
|
+
result = service.spreadsheets().values().get(spreadsheetId=data.sheet_id, range=_range).execute().get('values', [])
|
|
24
|
+
if len(result) > 1:
|
|
25
|
+
align_data(result[0], result[1:])
|
|
26
|
+
return [{k: v for k, v in zip(*result)}]
|
|
27
|
+
return inner_func
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def align_data(headers: List[str], values: List[List[str]]) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Ensures that the number of columns in 'headers' matches the maximum row length in 'values'.
|
|
33
|
+
If 'headers' has fewer columns, it appends 'unknown_column_{i}' for the missing columns.
|
|
34
|
+
Then aligns all rows in 'values' and 'headers' to have the same length.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
headers (List[str]): A list of column names (headers).
|
|
38
|
+
values (List[List[str]]): A list of rows, where each row is a list of strings.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
None: The function modifies 'headers' and 'values' in place.
|
|
42
|
+
"""
|
|
43
|
+
# Find the maximum number of values in any row
|
|
44
|
+
max_number_of_values = max(len(row) for row in values)
|
|
45
|
+
|
|
46
|
+
# Find the current number of columns
|
|
47
|
+
number_of_cols = len(headers)
|
|
48
|
+
|
|
49
|
+
# If headers have fewer columns than the maximum number of values, append missing columns
|
|
50
|
+
if number_of_cols < max_number_of_values:
|
|
51
|
+
headers.extend(f'unknown_column_{i}' for i in range(number_of_cols, max_number_of_values))
|
|
52
|
+
|
|
53
|
+
# Align all rows and headers to have the same length
|
|
54
|
+
align_list_len([headers] + values)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def align_list_len(values: List[List[str]], default_value: str = '') -> None:
|
|
58
|
+
"""
|
|
59
|
+
Aligns the size of each sublist in a list of lists by appending the default value to shorter sublists.
|
|
60
|
+
Modifies the list of lists in place.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
values (List[List[str]]): A list of lists where each sublist may have different lengths.
|
|
64
|
+
default_value (str): The value to append to each sublist to align their lengths (default is an empty string).
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
None
|
|
68
|
+
"""
|
|
69
|
+
# Determine the maximum length of the sublists
|
|
70
|
+
max_len = max(len(sublist) for sublist in values)
|
|
71
|
+
|
|
72
|
+
# Extend each sublist to the maximum length by appending the default value
|
|
73
|
+
for sublist in values:
|
|
74
|
+
sublist.extend([default_value] * (max_len - len(sublist)))
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Dict, Any, Generator
|
|
2
|
+
from time import sleep
|
|
3
|
+
from flowfile_core.schemas.input_schema import SampleUsers
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def getter(data: SampleUsers) -> Generator[Dict[str, Any], None, None]:
|
|
8
|
+
"""
|
|
9
|
+
Sample users generator function. This is a minimal example of a generator function that yields user data and can
|
|
10
|
+
be used in a flowfile. The function simulates a delay to mimic the behavior of an external data source.
|
|
11
|
+
Args:
|
|
12
|
+
data ():
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
index_pos = 0
|
|
18
|
+
for i in range(data.size):
|
|
19
|
+
sleep(0.01)
|
|
20
|
+
headers = {
|
|
21
|
+
'x-api-key': 'reqres-free-v1'
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
response = requests.get("https://reqres.in/api/users", headers=headers).json()
|
|
25
|
+
for v in response['data']:
|
|
26
|
+
v['index'] = index_pos
|
|
27
|
+
index_pos += 1
|
|
28
|
+
yield v
|
|
29
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from flowfile_core.flowfile.sources.external_sources.custom_external_sources.external_source import CustomExternalSource
|
|
2
|
+
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.airbyte import AirbyteSource
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource | AirbyteSource:
|
|
6
|
+
"""
|
|
7
|
+
Factory function to generate either CustomExternalSource or AirbyteSource.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
source_type (str): The type of source to create ("custom" or "airbyte").
|
|
11
|
+
**kwargs: The keyword arguments required for the specific source type.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Union[CustomExternalSource, AirbyteSource]: An instance of the selected data source type.
|
|
15
|
+
"""
|
|
16
|
+
if source_type == "custom":
|
|
17
|
+
return CustomExternalSource(**kwargs)
|
|
18
|
+
elif source_type == "airbyte":
|
|
19
|
+
return AirbyteSource(**kwargs)
|
|
20
|
+
else:
|
|
21
|
+
raise ValueError(f"Unknown source type: {source_type}")
|
|
22
|
+
|
|
File without changes
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from typing import Literal, Optional, TYPE_CHECKING
|
|
2
|
+
from pydantic import BaseModel, SecretStr
|
|
3
|
+
from flowfile_core.schemas.input_schema import (DatabaseConnection,
|
|
4
|
+
NodeDatabaseReader,
|
|
5
|
+
FullDatabaseConnection,
|
|
6
|
+
NodeDatabaseWriter)
|
|
7
|
+
import base64
|
|
8
|
+
import polars as pl
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExtDatabaseConnection(DatabaseConnection):
|
|
12
|
+
"""Database connection configuration with password handling."""
|
|
13
|
+
password: str = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DatabaseExternalWriteSettings(BaseModel):
|
|
17
|
+
"""Settings for SQL sink."""
|
|
18
|
+
connection: ExtDatabaseConnection
|
|
19
|
+
table_name: str
|
|
20
|
+
if_exists: Optional[Literal['append', 'replace', 'fail']] = 'append'
|
|
21
|
+
flowfile_flow_id: int = 1
|
|
22
|
+
flowfile_node_id: int | str = -1
|
|
23
|
+
operation: str
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def create_from_from_node_database_writer(cls, node_database_writer: NodeDatabaseWriter,
|
|
27
|
+
password: str,
|
|
28
|
+
table_name: str,
|
|
29
|
+
lf: pl.LazyFrame,
|
|
30
|
+
database_reference_settings: FullDatabaseConnection = None,
|
|
31
|
+
) -> 'DatabaseExternalWriteSettings':
|
|
32
|
+
"""
|
|
33
|
+
Create DatabaseExternalWriteSettings from NodeDatabaseWriter.
|
|
34
|
+
Args:
|
|
35
|
+
node_database_writer (NodeDatabaseWriter): an instance of NodeDatabaseWriter
|
|
36
|
+
password (str): the password for the database connection
|
|
37
|
+
table_name (str): the table name to be used for writing
|
|
38
|
+
lf (pl.LazyFrame): the LazyFrame to be written to the database
|
|
39
|
+
database_reference_settings (FullDatabaseConnection): optional database reference settings
|
|
40
|
+
Returns:
|
|
41
|
+
DatabaseExternalReadSettings: an instance of DatabaseExternalReadSettings
|
|
42
|
+
"""
|
|
43
|
+
if node_database_writer.database_write_settings.connection_mode == "inline":
|
|
44
|
+
database_connection = node_database_writer.database_settings.database_connection.model_dump()
|
|
45
|
+
else:
|
|
46
|
+
database_connection = {k: v for k, v in database_reference_settings.model_dump().items() if k != "password"}
|
|
47
|
+
|
|
48
|
+
ext_database_connection = ExtDatabaseConnection(**database_connection,
|
|
49
|
+
password=password)
|
|
50
|
+
return cls(connection=ext_database_connection,
|
|
51
|
+
table_name=table_name,
|
|
52
|
+
if_exists=node_database_writer.database_write_settings.if_exists,
|
|
53
|
+
flowfile_flow_id=node_database_writer.flow_id,
|
|
54
|
+
flowfile_node_id=node_database_writer.node_id,
|
|
55
|
+
operation=base64.b64encode(lf.serialize()).decode())
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DatabaseExternalReadSettings(BaseModel):
|
|
59
|
+
"""Settings for SQL source."""
|
|
60
|
+
connection: ExtDatabaseConnection
|
|
61
|
+
query: str
|
|
62
|
+
flowfile_flow_id: int = 1
|
|
63
|
+
flowfile_node_id: int | str = -1
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def create_from_from_node_database_reader(cls, node_database_reader: NodeDatabaseReader,
|
|
67
|
+
password: str,
|
|
68
|
+
query: str,
|
|
69
|
+
database_reference_settings: FullDatabaseConnection = None) -> 'DatabaseExternalReadSettings':
|
|
70
|
+
"""
|
|
71
|
+
Create DatabaseExternalReadSettings from NodeDatabaseReader.
|
|
72
|
+
Args:
|
|
73
|
+
node_database_reader (NodeDatabaseReader): an instance of NodeDatabaseReader
|
|
74
|
+
password (str): the password for the database connection
|
|
75
|
+
query (str): the SQL query to be executed
|
|
76
|
+
database_reference_settings (FullDatabaseConnection): optional database reference settings
|
|
77
|
+
Returns:
|
|
78
|
+
DatabaseExternalReadSettings: an instance of DatabaseExternalReadSettings
|
|
79
|
+
"""
|
|
80
|
+
if node_database_reader.database_settings.connection_mode == "inline":
|
|
81
|
+
database_connection = node_database_reader.database_settings.database_connection.model_dump()
|
|
82
|
+
else:
|
|
83
|
+
database_connection = {k: v for k, v in database_reference_settings.model_dump().items() if k != "password"}
|
|
84
|
+
|
|
85
|
+
ext_database_connection = ExtDatabaseConnection(**database_connection,
|
|
86
|
+
password=password)
|
|
87
|
+
return cls(connection=ext_database_connection,
|
|
88
|
+
query=query,
|
|
89
|
+
flowfile_flow_id=node_database_reader.flow_id,
|
|
90
|
+
flowfile_node_id=node_database_reader.node_id)
|