Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from typing import Optional, Literal
|
|
2
|
+
from pydantic import BaseModel, SecretStr
|
|
3
|
+
from flowfile_worker.secrets import decrypt_secret
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DataBaseConnection(BaseModel):
|
|
7
|
+
"""Database connection configuration with secure password handling."""
|
|
8
|
+
username: Optional[str] = None
|
|
9
|
+
password: Optional[SecretStr] = None # Encrypted password
|
|
10
|
+
host: Optional[str] = None
|
|
11
|
+
port: Optional[int] = None
|
|
12
|
+
database: Optional[str] = None # The database name
|
|
13
|
+
database_type: str = "postgresql" # Database type (postgresql, mysql, etc.)
|
|
14
|
+
url: Optional[str] = None
|
|
15
|
+
|
|
16
|
+
def get_decrypted_secret(self) -> SecretStr:
|
|
17
|
+
return decrypt_secret(self.password.get_secret_value())
|
|
18
|
+
|
|
19
|
+
def create_uri(self) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Creates a database URI based on the connection details.
|
|
22
|
+
If url is provided, it returns that directly.
|
|
23
|
+
Otherwise, it constructs a URI from the individual components.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
str: The database URI
|
|
27
|
+
"""
|
|
28
|
+
# If URL is already provided, use it
|
|
29
|
+
if self.url:
|
|
30
|
+
return self.url
|
|
31
|
+
|
|
32
|
+
# Validate that required fields are present
|
|
33
|
+
if not all([self.host, self.database_type]):
|
|
34
|
+
raise ValueError("Host and database type are required to create a URI")
|
|
35
|
+
|
|
36
|
+
# Create credential part if username is provided
|
|
37
|
+
credentials = ""
|
|
38
|
+
if self.username:
|
|
39
|
+
credentials = self.username
|
|
40
|
+
if self.password:
|
|
41
|
+
# Get the raw password string from SecretStr
|
|
42
|
+
password_value = decrypt_secret(self.password.get_secret_value()).get_secret_value()
|
|
43
|
+
credentials += f":{password_value}"
|
|
44
|
+
credentials += "@"
|
|
45
|
+
|
|
46
|
+
# Create port part if port is provided
|
|
47
|
+
port_section = ""
|
|
48
|
+
if self.port:
|
|
49
|
+
port_section = f":{self.port}"
|
|
50
|
+
if self.database:
|
|
51
|
+
|
|
52
|
+
base_uri = f"{self.database_type}://{credentials}{self.host}{port_section}/{self.database}"
|
|
53
|
+
else:
|
|
54
|
+
base_uri = f"{self.database_type}://{credentials}{self.host}{port_section}"
|
|
55
|
+
return base_uri
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DatabaseReadSettings(BaseModel):
|
|
59
|
+
"""Settings for SQL source."""
|
|
60
|
+
connection: DataBaseConnection
|
|
61
|
+
query: str
|
|
62
|
+
flowfile_flow_id: int = 1
|
|
63
|
+
flowfile_node_id: int | str = -1
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class DatabaseWriteSettings(BaseModel):
|
|
67
|
+
"""Settings for SQL sink."""
|
|
68
|
+
connection: DataBaseConnection
|
|
69
|
+
table_name: str
|
|
70
|
+
if_exists: Literal['append', 'replace', 'fail'] = 'append'
|
|
71
|
+
flowfile_flow_id: int = 1
|
|
72
|
+
flowfile_node_id: int | str = -1
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import requests
|
|
3
|
+
from flowfile_worker.models import RawLogInput
|
|
4
|
+
from flowfile_worker.configs import FLOWFILE_CORE_URI
|
|
5
|
+
|
|
6
|
+
LOGGING_URL = FLOWFILE_CORE_URI + "/raw_logs"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FlowfileLogHandler(logging.Handler):
|
|
10
|
+
def __init__(self, flowfile_flow_id: int = 1, flowfile_node_id: int | str = -1):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self.flowfile_flow_id = flowfile_flow_id
|
|
13
|
+
self.flowfile_node_id = flowfile_node_id
|
|
14
|
+
|
|
15
|
+
def emit(self, record):
|
|
16
|
+
try:
|
|
17
|
+
log_message = self.format(record)
|
|
18
|
+
|
|
19
|
+
extra = {"Node Id": self.flowfile_node_id}
|
|
20
|
+
for k, v in extra.items():
|
|
21
|
+
log_message = f"{k}: {v} - {log_message}"
|
|
22
|
+
raw_log_input = RawLogInput(
|
|
23
|
+
flowfile_flow_id=self.flowfile_flow_id,
|
|
24
|
+
log_message=log_message,
|
|
25
|
+
log_type=record.levelname.upper(),
|
|
26
|
+
extra={
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
if self.flowfile_flow_id != -1 and self.flowfile_node_id != -1:
|
|
30
|
+
response = requests.post(LOGGING_URL, json=raw_log_input.__dict__,
|
|
31
|
+
headers={"Content-Type": "application/json"})
|
|
32
|
+
if response.status_code != 200:
|
|
33
|
+
raise Exception(f"Failed to send log: {response.text}")
|
|
34
|
+
except Exception as e:
|
|
35
|
+
print(f"Error sending log to {LOGGING_URL}: {e}")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_worker_logger(flowfile_flow_id: int, flowfile_node_id: int | str) -> logging.Logger:
|
|
39
|
+
logger_name = f"NodeLog: {flowfile_node_id}"
|
|
40
|
+
logger = logging.getLogger(logger_name)
|
|
41
|
+
logger.propagate = False # Prevent propagation to parent loggers
|
|
42
|
+
logger.setLevel(logging.DEBUG)
|
|
43
|
+
|
|
44
|
+
# Only add handlers if they don't already exist to avoid duplicates
|
|
45
|
+
if not logger.handlers:
|
|
46
|
+
stream_handler = logging.StreamHandler()
|
|
47
|
+
stream_handler.setLevel(logging.DEBUG)
|
|
48
|
+
stream_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
49
|
+
stream_handler.setFormatter(stream_formatter)
|
|
50
|
+
logger.addHandler(stream_handler)
|
|
51
|
+
|
|
52
|
+
http_handler = FlowfileLogHandler(flowfile_flow_id=flowfile_flow_id, flowfile_node_id = flowfile_node_id)
|
|
53
|
+
http_handler.setLevel(logging.INFO)
|
|
54
|
+
http_formatter = logging.Formatter('%(message)s')
|
|
55
|
+
http_handler.setFormatter(http_formatter)
|
|
56
|
+
logger.addHandler(http_handler)
|
|
57
|
+
|
|
58
|
+
return logger
|
flowfile_worker/funcs.py
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
import io
|
|
3
|
+
from typing import List, Dict, Callable
|
|
4
|
+
from multiprocessing import Array, Value, Queue
|
|
5
|
+
from flowfile_worker.polars_fuzzy_match.matcher import fuzzy_match_dfs
|
|
6
|
+
from flowfile_worker.polars_fuzzy_match.models import FuzzyMapping
|
|
7
|
+
from flowfile_worker.flow_logger import get_worker_logger
|
|
8
|
+
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
9
|
+
from flowfile_worker.external_sources.sql_source.main import write_serialized_df_to_database, write_df_to_database
|
|
10
|
+
from base64 import encodebytes
|
|
11
|
+
from logging import Logger
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
from flowfile_worker.utils import collect_lazy_frame, collect_lazy_frame_and_get_streaming_info
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# 'store', 'calculate_schema', 'calculate_number_of_records', 'write_output', 'fuzzy', 'store_sample']
|
|
18
|
+
|
|
19
|
+
logging.basicConfig(format='%(asctime)s: %(message)s')
|
|
20
|
+
logger = logging.getLogger('Spawner')
|
|
21
|
+
logger.setLevel(logging.INFO)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def fuzzy_join_task(left_serializable_object: bytes, right_serializable_object: bytes,
|
|
25
|
+
fuzzy_maps: List[FuzzyMapping], error_message: Array, file_path: str,
|
|
26
|
+
progress: Value,
|
|
27
|
+
queue: Queue, flowfile_flow_id: int, flowfile_node_id: int | str,
|
|
28
|
+
):
|
|
29
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
30
|
+
try:
|
|
31
|
+
flowfile_logger.info("Starting fuzzy join operation")
|
|
32
|
+
left_df = pl.LazyFrame.deserialize(io.BytesIO(left_serializable_object))
|
|
33
|
+
right_df = pl.LazyFrame.deserialize(io.BytesIO(right_serializable_object))
|
|
34
|
+
fuzzy_match_result = fuzzy_match_dfs(left_df, right_df, fuzzy_maps, flowfile_logger)
|
|
35
|
+
flowfile_logger.info("Fuzzy join operation completed successfully")
|
|
36
|
+
fuzzy_match_result.write_ipc(file_path)
|
|
37
|
+
with progress.get_lock():
|
|
38
|
+
progress.value = 100
|
|
39
|
+
except Exception as e:
|
|
40
|
+
error_msg = str(e).encode()[:256]
|
|
41
|
+
with error_message.get_lock():
|
|
42
|
+
error_message[:len(error_msg)] = error_msg
|
|
43
|
+
with progress.get_lock():
|
|
44
|
+
progress.value = -1
|
|
45
|
+
flowfile_logger.error(f'Error during fuzzy join operation: {str(e)}')
|
|
46
|
+
lf = pl.scan_ipc(file_path)
|
|
47
|
+
number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
48
|
+
flowfile_logger.info(f'Number of records after fuzzy match: {number_of_records}')
|
|
49
|
+
queue.put(encodebytes(lf.serialize()))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def process_and_cache(polars_serializable_object: io.BytesIO, progress: Value, error_message: Array,
|
|
53
|
+
file_path: str, flowfile_logger: Logger) -> bytes:
|
|
54
|
+
try:
|
|
55
|
+
lf = pl.LazyFrame.deserialize(polars_serializable_object)
|
|
56
|
+
collect_lazy_frame(lf).write_ipc(file_path)
|
|
57
|
+
flowfile_logger.info("Process operation completed successfully")
|
|
58
|
+
with progress.get_lock():
|
|
59
|
+
progress.value = 100
|
|
60
|
+
except Exception as e:
|
|
61
|
+
error_msg = str(e).encode()[:1024] # Limit error message length
|
|
62
|
+
flowfile_logger.error(f'Error during process and cache operation: {str(e)}')
|
|
63
|
+
with error_message.get_lock():
|
|
64
|
+
error_message[:len(error_msg)] = error_msg
|
|
65
|
+
with progress.get_lock():
|
|
66
|
+
progress.value = -1 # Indicate error
|
|
67
|
+
return error_msg
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def store_sample(polars_serializable_object: bytes,
|
|
71
|
+
progress: Value,
|
|
72
|
+
error_message: Array,
|
|
73
|
+
queue: Queue,
|
|
74
|
+
file_path: str,
|
|
75
|
+
sample_size: int,
|
|
76
|
+
flowfile_flow_id: int,
|
|
77
|
+
flowfile_node_id: int | str
|
|
78
|
+
):
|
|
79
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
80
|
+
flowfile_logger.info("Starting store sample operation")
|
|
81
|
+
try:
|
|
82
|
+
lf = pl.LazyFrame.deserialize(io.BytesIO(polars_serializable_object))
|
|
83
|
+
collect_lazy_frame(lf.limit(sample_size)).write_ipc(file_path)
|
|
84
|
+
flowfile_logger.info("Store sample operation completed successfully")
|
|
85
|
+
with progress.get_lock():
|
|
86
|
+
progress.value = 100
|
|
87
|
+
except Exception as e:
|
|
88
|
+
flowfile_logger.error(f'Error during store sample operation: {str(e)}')
|
|
89
|
+
error_msg = str(e).encode()[:1024] # Limit error message length
|
|
90
|
+
with error_message.get_lock():
|
|
91
|
+
error_message[:len(error_msg)] = error_msg
|
|
92
|
+
with progress.get_lock():
|
|
93
|
+
progress.value = -1 # Indicate error
|
|
94
|
+
return error_msg
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def store(polars_serializable_object: bytes, progress: Value, error_message: Array, queue: Queue, file_path: str,
|
|
98
|
+
flowfile_flow_id: int, flowfile_node_id: int | str):
|
|
99
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
100
|
+
flowfile_logger.info("Starting store operation")
|
|
101
|
+
polars_serializable_object_io = io.BytesIO(polars_serializable_object)
|
|
102
|
+
process_and_cache(polars_serializable_object_io, progress, error_message, file_path, flowfile_logger)
|
|
103
|
+
lf = pl.scan_ipc(file_path)
|
|
104
|
+
number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
105
|
+
flowfile_logger.info(f'Number of records processed: {number_of_records}')
|
|
106
|
+
queue.put(encodebytes(lf.serialize()))
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def calculate_schema_logic(df: pl.LazyFrame, optimize_memory: bool = True, flowfile_logger: Logger = None) -> List[Dict]:
|
|
110
|
+
if flowfile_logger is None:
|
|
111
|
+
raise ValueError('flowfile_logger is required')
|
|
112
|
+
schema = df.collect_schema()
|
|
113
|
+
schema_stats = [dict(column_name=k, pl_datatype=str(v), col_index=i) for i, (k, v) in
|
|
114
|
+
enumerate(schema.items())]
|
|
115
|
+
flowfile_logger.info('Starting to calculate the number of records')
|
|
116
|
+
collected_streaming_info = collect_lazy_frame_and_get_streaming_info(df.select(pl.len()))
|
|
117
|
+
n_records = collected_streaming_info.df[0, 0]
|
|
118
|
+
if n_records < 10_000:
|
|
119
|
+
flowfile_logger.info('Collecting the whole dataset')
|
|
120
|
+
df = collect_lazy_frame(df).lazy()
|
|
121
|
+
if optimize_memory and n_records > 1_000_000:
|
|
122
|
+
df = df.head(1_000_000)
|
|
123
|
+
null_cols = [col for col, data_type in schema.items() if data_type is pl.Null]
|
|
124
|
+
if not (n_records == 0 and df.width == 0):
|
|
125
|
+
if len(null_cols) == 0:
|
|
126
|
+
pl_stats = df.describe()
|
|
127
|
+
else:
|
|
128
|
+
df = df.drop(null_cols)
|
|
129
|
+
pl_stats = df.describe()
|
|
130
|
+
n_unique_per_cols = list(df.select(pl.all().approx_n_unique()).collect(
|
|
131
|
+
engine="streaming" if collected_streaming_info.streaming_collect_available else "auto").to_dicts()[0].values()
|
|
132
|
+
)
|
|
133
|
+
stats_headers = pl_stats.drop_in_place('statistic').to_list()
|
|
134
|
+
stats = {v['column_name']: v for v in pl_stats.transpose(include_header=True, header_name='column_name',
|
|
135
|
+
column_names=stats_headers).to_dicts()}
|
|
136
|
+
for i, (col_stat, n_unique_values) in enumerate(zip(stats.values(), n_unique_per_cols)):
|
|
137
|
+
col_stat['n_unique'] = n_unique_values
|
|
138
|
+
col_stat['examples'] = ', '.join({str(col_stat['min']), str(col_stat['max'])})
|
|
139
|
+
col_stat['null_count'] = int(float(col_stat['null_count']))
|
|
140
|
+
col_stat['count'] = int(float(col_stat['count']))
|
|
141
|
+
|
|
142
|
+
for schema_stat in schema_stats:
|
|
143
|
+
deep_stat = stats.get(schema_stat['column_name'])
|
|
144
|
+
if deep_stat:
|
|
145
|
+
schema_stat.update(deep_stat)
|
|
146
|
+
del df
|
|
147
|
+
else:
|
|
148
|
+
schema_stats = []
|
|
149
|
+
return schema_stats
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def calculate_schema(polars_serializable_object: bytes, progress: Value, error_message: Array, queue: Queue,
|
|
153
|
+
flowfile_flow_id: int, flowfile_node_id: int | str, *args, **kwargs):
|
|
154
|
+
polars_serializable_object_io = io.BytesIO(polars_serializable_object)
|
|
155
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
156
|
+
flowfile_logger.info("Starting schema calculation")
|
|
157
|
+
try:
|
|
158
|
+
lf = pl.LazyFrame.deserialize(polars_serializable_object_io)
|
|
159
|
+
schema_stats = calculate_schema_logic(lf, flowfile_logger=flowfile_logger)
|
|
160
|
+
flowfile_logger.info('schema_stats', schema_stats)
|
|
161
|
+
queue.put(schema_stats)
|
|
162
|
+
flowfile_logger.info("Schema calculation completed successfully")
|
|
163
|
+
with progress.get_lock():
|
|
164
|
+
progress.value = 100
|
|
165
|
+
except Exception as e:
|
|
166
|
+
error_msg = str(e).encode()[:256] # Limit error message length
|
|
167
|
+
flowfile_logger.error('error', e)
|
|
168
|
+
with error_message.get_lock():
|
|
169
|
+
error_message[:len(error_msg)] = error_msg
|
|
170
|
+
with progress.get_lock():
|
|
171
|
+
progress.value = -1 # Indicate error
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def calculate_number_of_records(polars_serializable_object: bytes, progress: Value, error_message: Array,
|
|
175
|
+
queue: Queue, flowfile_flow_id: int, *args, **kwargs):
|
|
176
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, -1)
|
|
177
|
+
flowfile_logger.info("Starting number of records calculation")
|
|
178
|
+
polars_serializable_object_io = io.BytesIO(polars_serializable_object)
|
|
179
|
+
try:
|
|
180
|
+
lf = pl.LazyFrame.deserialize(polars_serializable_object_io)
|
|
181
|
+
n_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
182
|
+
queue.put(n_records)
|
|
183
|
+
flowfile_logger.debug("Number of records calculation completed successfully")
|
|
184
|
+
flowfile_logger.debug(f'n_records {n_records}')
|
|
185
|
+
with progress.get_lock():
|
|
186
|
+
progress.value = 100
|
|
187
|
+
except Exception as e:
|
|
188
|
+
flowfile_logger.error('error', e)
|
|
189
|
+
error_msg = str(e).encode()[:256] # Limit error message length
|
|
190
|
+
with error_message.get_lock():
|
|
191
|
+
error_message[:len(error_msg)] = error_msg
|
|
192
|
+
with progress.get_lock():
|
|
193
|
+
progress.value = -1 # Indicate error
|
|
194
|
+
return b'error'
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def execute_write_method(write_method: Callable, path: str, data_type: str = None, sheet_name: str = None,
|
|
198
|
+
delimiter: str = None,
|
|
199
|
+
write_mode: str = 'create', flowfile_logger: Logger = None):
|
|
200
|
+
flowfile_logger.info('executing write method')
|
|
201
|
+
if data_type == 'excel':
|
|
202
|
+
logger.info('Writing as excel file')
|
|
203
|
+
write_method(path, worksheet=sheet_name)
|
|
204
|
+
elif data_type == 'csv':
|
|
205
|
+
logger.info('Writing as csv file')
|
|
206
|
+
if write_mode == 'append':
|
|
207
|
+
with open(path, 'ab') as f:
|
|
208
|
+
write_method(file=f, separator=delimiter, quote_style='always')
|
|
209
|
+
else:
|
|
210
|
+
write_method(file=path, separator=delimiter, quote_style='always')
|
|
211
|
+
elif data_type == 'parquet':
|
|
212
|
+
logger.info('Writing as parquet file')
|
|
213
|
+
write_method(path)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def write_to_database(polars_serializable_object: bytes,
|
|
217
|
+
progress: Value,
|
|
218
|
+
error_message: Array,
|
|
219
|
+
queue: Queue,
|
|
220
|
+
file_path: str,
|
|
221
|
+
database_write_settings: DatabaseWriteSettings,
|
|
222
|
+
flowfile_flow_id: int = -1,
|
|
223
|
+
flowfile_node_id: int | str = -1
|
|
224
|
+
):
|
|
225
|
+
"""
|
|
226
|
+
Writes a Polars DataFrame to a SQL database.
|
|
227
|
+
"""
|
|
228
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
229
|
+
flowfile_logger.info(f"Starting write operation to: {database_write_settings.table_name}")
|
|
230
|
+
df = collect_lazy_frame(pl.LazyFrame.deserialize(io.BytesIO(polars_serializable_object)))
|
|
231
|
+
flowfile_logger.info(f"Starting to write {len(df)} records")
|
|
232
|
+
try:
|
|
233
|
+
write_df_to_database(df, database_write_settings)
|
|
234
|
+
flowfile_logger.info("Write operation completed successfully")
|
|
235
|
+
with progress.get_lock():
|
|
236
|
+
progress.value = 100
|
|
237
|
+
except Exception as e:
|
|
238
|
+
error_msg = str(e).encode()[:1024]
|
|
239
|
+
flowfile_logger.error(f'Error during write operation: {str(e)}')
|
|
240
|
+
with error_message.get_lock():
|
|
241
|
+
error_message[:len(error_msg)] = error_msg
|
|
242
|
+
with progress.get_lock():
|
|
243
|
+
progress.value = -1
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def write_output(polars_serializable_object: bytes,
|
|
247
|
+
progress: Value,
|
|
248
|
+
error_message: Array,
|
|
249
|
+
queue: Queue,
|
|
250
|
+
file_path: str,
|
|
251
|
+
data_type: str,
|
|
252
|
+
path: str,
|
|
253
|
+
write_mode: str,
|
|
254
|
+
sheet_name: str = None,
|
|
255
|
+
delimiter: str = None,
|
|
256
|
+
flowfile_flow_id: int = -1,
|
|
257
|
+
flowfile_node_id: int | str = -1
|
|
258
|
+
):
|
|
259
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
260
|
+
flowfile_logger.info(f"Starting write operation to: {path}")
|
|
261
|
+
try:
|
|
262
|
+
df = pl.LazyFrame.deserialize(io.BytesIO(polars_serializable_object))
|
|
263
|
+
if isinstance(df, pl.LazyFrame):
|
|
264
|
+
flowfile_logger.info(f'Execution plan explanation:\n{df.explain(format="plain")}')
|
|
265
|
+
flowfile_logger.info("Successfully deserialized dataframe")
|
|
266
|
+
is_lazy = False
|
|
267
|
+
sink_method_str = 'sink_'+data_type
|
|
268
|
+
write_method_str = 'write_'+data_type
|
|
269
|
+
has_sink_method = hasattr(df, sink_method_str)
|
|
270
|
+
write_method = None
|
|
271
|
+
if os.path.exists(path) and write_mode == 'create':
|
|
272
|
+
raise Exception('File already exists')
|
|
273
|
+
if has_sink_method and is_lazy:
|
|
274
|
+
write_method = getattr(df, 'sink_' + data_type)
|
|
275
|
+
elif not is_lazy or not has_sink_method:
|
|
276
|
+
if isinstance(df, pl.LazyFrame):
|
|
277
|
+
df = collect_lazy_frame(df)
|
|
278
|
+
write_method = getattr(df, write_method_str)
|
|
279
|
+
if write_method is not None:
|
|
280
|
+
execute_write_method(write_method, path=path, data_type=data_type, sheet_name=sheet_name,
|
|
281
|
+
delimiter=delimiter, write_mode=write_mode, flowfile_logger=flowfile_logger)
|
|
282
|
+
number_of_records_written = (collect_lazy_frame(df.select(pl.len()))[0, 0]
|
|
283
|
+
if isinstance(df, pl.LazyFrame) else df.height)
|
|
284
|
+
flowfile_logger.info(f'Number of records written: {number_of_records_written}')
|
|
285
|
+
else:
|
|
286
|
+
raise Exception('Write method not found')
|
|
287
|
+
with progress.get_lock():
|
|
288
|
+
progress.value = 100
|
|
289
|
+
except Exception as e:
|
|
290
|
+
logger.info(f'Error during write operation: {str(e)}')
|
|
291
|
+
error_message[:len(str(e))] = str(e).encode()
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def generic_task(func: Callable,
|
|
295
|
+
progress: Value,
|
|
296
|
+
error_message: Array,
|
|
297
|
+
queue: Queue,
|
|
298
|
+
file_path: str,
|
|
299
|
+
flowfile_flow_id: int,
|
|
300
|
+
flowfile_node_id: int | str,
|
|
301
|
+
*args, **kwargs):
|
|
302
|
+
print(kwargs)
|
|
303
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
304
|
+
flowfile_logger.info("Starting generic task")
|
|
305
|
+
try:
|
|
306
|
+
df = func(*args, **kwargs)
|
|
307
|
+
if isinstance(df, pl.LazyFrame):
|
|
308
|
+
collect_lazy_frame(df).write_ipc(file_path)
|
|
309
|
+
elif isinstance(df, pl.DataFrame):
|
|
310
|
+
df.write_ipc(file_path)
|
|
311
|
+
else:
|
|
312
|
+
raise Exception('Returned object is not a DataFrame or LazyFrame')
|
|
313
|
+
with progress.get_lock():
|
|
314
|
+
progress.value = 100
|
|
315
|
+
flowfile_logger.info("Task completed successfully")
|
|
316
|
+
except Exception as e:
|
|
317
|
+
flowfile_logger.error(f'Error during task execution: {str(e)}')
|
|
318
|
+
error_msg = str(e).encode()[:1024]
|
|
319
|
+
with error_message.get_lock():
|
|
320
|
+
error_message[:len(error_msg)] = error_msg
|
|
321
|
+
with progress.get_lock():
|
|
322
|
+
progress.value = -1
|
|
323
|
+
|
|
324
|
+
lf = pl.scan_ipc(file_path)
|
|
325
|
+
number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
326
|
+
flowfile_logger.info(f'Number of records processed: {number_of_records}')
|
|
327
|
+
queue.put(encodebytes(lf.serialize()))
|
flowfile_worker/main.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import uvicorn
|
|
3
|
+
import signal
|
|
4
|
+
|
|
5
|
+
from contextlib import asynccontextmanager
|
|
6
|
+
from fastapi import FastAPI
|
|
7
|
+
from flowfile_worker.routes import router
|
|
8
|
+
from flowfile_worker import mp_context, CACHE_DIR
|
|
9
|
+
from flowfile_worker.configs import logger, FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
should_exit = False
|
|
13
|
+
server_instance = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@asynccontextmanager
|
|
17
|
+
async def shutdown_handler(app: FastAPI):
|
|
18
|
+
"""Handle application startup and shutdown"""
|
|
19
|
+
logger.info('Starting application...')
|
|
20
|
+
try:
|
|
21
|
+
yield
|
|
22
|
+
finally:
|
|
23
|
+
logger.info('Shutting down application...')
|
|
24
|
+
logger.info("Cleaning up worker resources...")
|
|
25
|
+
for p in mp_context.active_children():
|
|
26
|
+
try:
|
|
27
|
+
p.terminate()
|
|
28
|
+
p.join()
|
|
29
|
+
except Exception as e:
|
|
30
|
+
logger.error(f"Error cleaning up process: {e}")
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
CACHE_DIR.cleanup()
|
|
34
|
+
except Exception as e:
|
|
35
|
+
print(f"Error cleaning up cache directory: {e}")
|
|
36
|
+
|
|
37
|
+
await asyncio.sleep(0.1)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
app = FastAPI(lifespan=shutdown_handler)
|
|
41
|
+
app.include_router(router)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@app.post("/shutdown")
|
|
45
|
+
async def shutdown():
|
|
46
|
+
"""Endpoint to handle graceful shutdown"""
|
|
47
|
+
if server_instance:
|
|
48
|
+
# Schedule the shutdown
|
|
49
|
+
await asyncio.create_task(trigger_shutdown())
|
|
50
|
+
return {"message": "Shutting down"}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def trigger_shutdown():
|
|
54
|
+
"""Trigger the actual shutdown after responding to the client"""
|
|
55
|
+
await asyncio.sleep(1) # Give time for the response to be sent
|
|
56
|
+
if server_instance:
|
|
57
|
+
server_instance.should_exit = True
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def signal_handler(signum, frame):
|
|
61
|
+
"""Handle shutdown signals"""
|
|
62
|
+
logger.info(f"Received signal {signum}")
|
|
63
|
+
if server_instance:
|
|
64
|
+
server_instance.should_exit = True
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def run(host: str = None, port: int = None):
|
|
68
|
+
"""Run the FastAPI app with graceful shutdown"""
|
|
69
|
+
global server_instance
|
|
70
|
+
|
|
71
|
+
# Use values from settings if not explicitly provided
|
|
72
|
+
if host is None:
|
|
73
|
+
host = SERVICE_HOST
|
|
74
|
+
if port is None:
|
|
75
|
+
port = SERVICE_PORT
|
|
76
|
+
|
|
77
|
+
# Log service configuration
|
|
78
|
+
logger.info(f"Starting worker service on {host}:{port}")
|
|
79
|
+
logger.info(f"Core service configured at {FLOWFILE_CORE_URI}")
|
|
80
|
+
|
|
81
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
|
82
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
83
|
+
|
|
84
|
+
config = uvicorn.Config(
|
|
85
|
+
app,
|
|
86
|
+
host=host,
|
|
87
|
+
port=port,
|
|
88
|
+
loop="asyncio"
|
|
89
|
+
)
|
|
90
|
+
server = uvicorn.Server(config)
|
|
91
|
+
server_instance = server # Store server instance globally
|
|
92
|
+
|
|
93
|
+
logger.info('Starting server...')
|
|
94
|
+
logger.info('Server started')
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
server.run()
|
|
98
|
+
except KeyboardInterrupt:
|
|
99
|
+
logger.info("Received interrupt signal, shutting down...")
|
|
100
|
+
finally:
|
|
101
|
+
server_instance = None
|
|
102
|
+
logger.info("Server shutdown complete")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
if __name__ == "__main__":
|
|
106
|
+
import multiprocessing
|
|
107
|
+
multiprocessing.freeze_support()
|
|
108
|
+
run()
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
from typing import Optional, Literal, Any
|
|
3
|
+
from base64 import decodebytes
|
|
4
|
+
from flowfile_worker.polars_fuzzy_match.models import FuzzyMapping
|
|
5
|
+
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
OperationType = Literal[
|
|
9
|
+
'store', 'calculate_schema', 'calculate_number_of_records', 'write_output', 'fuzzy', 'store_sample',
|
|
10
|
+
'write_to_database']
|
|
11
|
+
ResultType = Literal['polars', 'other']
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PolarsOperation(BaseModel):
|
|
15
|
+
operation: bytes
|
|
16
|
+
flowfile_flow_id: Optional[int] = 1
|
|
17
|
+
flowfile_node_id: Optional[int | str] = -1
|
|
18
|
+
def polars_serializable_object(self):
|
|
19
|
+
return decodebytes(self.operation)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PolarsScript(PolarsOperation):
|
|
23
|
+
task_id: Optional[str] = None
|
|
24
|
+
cache_dir: Optional[str] = None
|
|
25
|
+
operation_type: OperationType
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PolarsScriptSample(PolarsScript):
|
|
29
|
+
sample_size: Optional[int] = 100
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PolarsScriptWrite(BaseModel):
|
|
33
|
+
operation: bytes
|
|
34
|
+
data_type: str
|
|
35
|
+
path: str
|
|
36
|
+
write_mode: str
|
|
37
|
+
sheet_name: Optional[str] = None
|
|
38
|
+
delimiter: Optional[str] = None
|
|
39
|
+
flowfile_flow_id: Optional[int] = -1
|
|
40
|
+
flowfile_node_id: Optional[int | str] = -1
|
|
41
|
+
|
|
42
|
+
def polars_serializable_object(self):
|
|
43
|
+
return decodebytes(self.operation)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class DatabaseScriptWrite(DatabaseWriteSettings):
|
|
47
|
+
operation: bytes
|
|
48
|
+
|
|
49
|
+
def polars_serializable_object(self):
|
|
50
|
+
return decodebytes(self.operation)
|
|
51
|
+
|
|
52
|
+
def get_database_write_settings(self) -> DatabaseWriteSettings:
|
|
53
|
+
"""
|
|
54
|
+
Converts the current instance to a DatabaseWriteSettings object.
|
|
55
|
+
Returns:
|
|
56
|
+
DatabaseWriteSettings: The corresponding DatabaseWriteSettings object.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
return DatabaseWriteSettings(
|
|
60
|
+
connection=self.connection,
|
|
61
|
+
table_name=self.table_name,
|
|
62
|
+
if_exists=self.if_exists,
|
|
63
|
+
flowfile_flow_id=self.flowfile_flow_id,
|
|
64
|
+
flowfile_node_id=self.flowfile_node_id
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class FuzzyJoinInput(BaseModel):
|
|
69
|
+
task_id: Optional[str] = None
|
|
70
|
+
cache_dir: Optional[str] = None
|
|
71
|
+
left_df_operation: PolarsOperation
|
|
72
|
+
right_df_operation: PolarsOperation
|
|
73
|
+
fuzzy_maps: list[FuzzyMapping]
|
|
74
|
+
flowfile_flow_id: Optional[int] = 1
|
|
75
|
+
flowfile_node_id: Optional[int | str] = -1
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Status(BaseModel):
|
|
79
|
+
background_task_id: str
|
|
80
|
+
status: Literal['Processing', 'Completed', 'Error', 'Unknown Error', 'Starting'] # Type alias for status
|
|
81
|
+
file_ref: str
|
|
82
|
+
progress: Optional[int] = 0
|
|
83
|
+
error_message: Optional[str] = None # Add error_message field
|
|
84
|
+
results: Optional[Any] = None
|
|
85
|
+
result_type: Optional[ResultType] = 'polars'
|
|
86
|
+
|
|
87
|
+
def __hash__(self):
|
|
88
|
+
return hash(self.file_ref)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class RawLogInput(BaseModel):
|
|
92
|
+
flowfile_flow_id: int
|
|
93
|
+
log_message: str
|
|
94
|
+
log_type: Literal["INFO", "ERROR"]
|
|
95
|
+
extra: Optional[dict] = None
|
|
File without changes
|