PyPI - Flowfile - Versions diffs - 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

Flowfile 0.3.9py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (201) hide show

flowfile_worker/create/funcs.py CHANGED Viewed

@@ -1,94 +1,100 @@
 import polars as pl
 import os
-from flowfile_worker.create.models import ReceivedCsvTable, ReceivedParquetTable, ReceivedExcelTable
+from flowfile_worker.create.models import ReceivedTable, InputCsvTable, InputJsonTable, InputExcelTable, InputParquetTable
 from flowfile_worker.create.utils import create_fake_data
 from flowfile_worker.create.read_excel_tables import df_from_openpyxl, df_from_calamine_xlsx
-def create_from_path_json(received_table: ReceivedCsvTable):
+def create_from_path_json(received_table: ReceivedTable):
+    if not isinstance(received_table.table_settings, InputJsonTable):
+        raise ValueError("Received table settings are not of type InputJsonTable")
+    input_table_settings: InputJsonTable = received_table.table_settings
     f = received_table.abs_file_path
     gbs_to_load = os.path.getsize(f) / 1024 / 1000 / 1000
     low_mem = gbs_to_load > 10
-    if received_table.encoding.upper() == 'UTF8' or received_table.encoding.upper() == 'UTF-8':
+    if input_table_settings.encoding.upper() == 'UTF8' or input_table_settings.encoding.upper() == 'UTF-8':
         try:
             df = pl.scan_csv(f,
                                low_memory=low_mem,
                                try_parse_dates=True,
-                               separator=received_table.delimiter,
-                               has_header=received_table.has_headers,
-                               skip_rows=received_table.starting_from_line,
+                               separator=input_table_settings.delimiter,
+                               has_header=input_table_settings.has_headers,
+                               skip_rows=input_table_settings.starting_from_line,
                                encoding='utf8',
-                               infer_schema_length=received_table.infer_schema_length)
+                               infer_schema_length=input_table_settings.infer_schema_length)
             df.head(1).collect()
             return df
         except:
             try:
                 df = pl.scan_csv(f, low_memory=low_mem,
-                                   separator=received_table.delimiter,
-                                   has_header=received_table.has_headers,
-                                   skip_rows=received_table.starting_from_line,
+                                   separator=input_table_settings.delimiter,
+                                   has_header=input_table_settings.has_headers,
+                                   skip_rows=input_table_settings.starting_from_line,
                                    encoding='utf8-lossy',
                                    ignore_errors=True)
                 return df
             except:
                 df = pl.scan_csv(f, low_memory=low_mem,
-                                   separator=received_table.delimiter,
-                                   has_header=received_table.has_headers,
-                                   skip_rows=received_table.starting_from_line,
+                                   separator=input_table_settings.delimiter,
+                                   has_header=input_table_settings.has_headers,
+                                   skip_rows=input_table_settings.starting_from_line,
                                    encoding='utf8',
                                    ignore_errors=True)
                 return df
     else:
         df = pl.read_csv(f, low_memory=low_mem,
-                           separator=received_table.delimiter,
-                           has_header=received_table.has_headers,
-                           skip_rows=received_table.starting_from_line,
-                           encoding=received_table.encoding,
+                           separator=input_table_settings.delimiter,
+                           has_header=input_table_settings.has_headers,
+                           skip_rows=input_table_settings.starting_from_line,
+                           encoding=input_table_settings.encoding,
                            ignore_errors=True)
         return df
-def create_from_path_csv(received_table: ReceivedCsvTable) -> pl.DataFrame:
+def create_from_path_csv(received_table: ReceivedTable) -> pl.DataFrame:
     f = received_table.abs_file_path
+    if not isinstance(received_table.table_settings, InputCsvTable):
+        raise ValueError("Received table settings are not of type InputCsvTable")
+    input_table_settings: InputCsvTable = received_table.table_settings
     gbs_to_load = os.path.getsize(f) / 1024 / 1000 / 1000
     low_mem = gbs_to_load > 10
-    if received_table.encoding.upper() == 'UTF8' or received_table.encoding.upper() == 'UTF-8':
+    if input_table_settings.encoding.upper() == 'UTF8' or input_table_settings.encoding.upper() == 'UTF-8':
         try:
             df = pl.scan_csv(f,
                                low_memory=low_mem,
                                try_parse_dates=True,
-                               separator=received_table.delimiter,
-                               has_header=received_table.has_headers,
-                               skip_rows=received_table.starting_from_line,
+                               separator=input_table_settings.delimiter,
+                               has_header=input_table_settings.has_headers,
+                               skip_rows=input_table_settings.starting_from_line,
                                encoding='utf8',
-                               infer_schema_length=received_table.infer_schema_length)
+                               infer_schema_length=input_table_settings.infer_schema_length)
             df.head(1).collect()
             return df
         except:
             try:
                 df = pl.scan_csv(f, low_memory=low_mem,
-                                   separator=received_table.delimiter,
-                                   has_header=received_table.has_headers,
-                                   skip_rows=received_table.starting_from_line,
+                                   separator=input_table_settings.delimiter,
+                                   has_header=input_table_settings.has_headers,
+                                   skip_rows=input_table_settings.starting_from_line,
                                    encoding='utf8-lossy',
                                    ignore_errors=True)
                 return df
             except:
                 df = pl.scan_csv(f, low_memory=low_mem,
-                                   separator=received_table.delimiter,
-                                   has_header=received_table.has_headers,
-                                   skip_rows=received_table.starting_from_line,
+                                   separator=input_table_settings.delimiter,
+                                   has_header=input_table_settings.has_headers,
+                                   skip_rows=input_table_settings.starting_from_line,
                                    encoding='utf8',
                                    ignore_errors=True)
                 return df
     else:
         df = pl.read_csv(f,
                            low_memory=low_mem,
-                           separator=received_table.delimiter,
-                           has_header=received_table.has_headers,
-                           skip_rows=received_table.starting_from_line,
-                           encoding=received_table.encoding,
+                           separator=input_table_settings.delimiter,
+                           has_header=input_table_settings.has_headers,
+                           skip_rows=input_table_settings.starting_from_line,
+                           encoding=input_table_settings.encoding,
                            ignore_errors=True)
         return df
@@ -97,50 +103,56 @@ def create_random(number_of_records: int = 1000) -> pl.LazyFrame:
     return create_fake_data(number_of_records).lazy()
-def create_from_path_parquet(received_table: ReceivedParquetTable):
+def create_from_path_parquet(received_table: ReceivedTable):
+    if not isinstance(received_table.table_settings, InputParquetTable):
+        raise ValueError("Received table settings are not of type InputParquetTable")
     low_mem = (os.path.getsize(received_table.abs_file_path) / 1024 / 1000 / 1000) > 2
     return pl.scan_parquet(source=received_table.abs_file_path, low_memory=low_mem)
-def create_from_path_excel(received_table: ReceivedExcelTable):
-    if received_table.type_inference:
+def create_from_path_excel(received_table: ReceivedTable):
+    if not isinstance(received_table.table_settings, InputExcelTable):
+        raise ValueError("Received table settings are not of type InputExcelTable")
+    input_table_settings: InputExcelTable = received_table.table_settings
+    if input_table_settings.type_inference:
         engine = 'openpyxl'
-    elif received_table.start_row > 0 and received_table.start_column == 0:
-        engine = 'calamine' if received_table.has_headers else 'xlsx2csv'
-    elif received_table.start_column > 0 or received_table.start_row > 0:
+    elif input_table_settings.start_row > 0 and input_table_settings.start_column == 0:
+        engine = 'calamine' if input_table_settings.has_headers else 'xlsx2csv'
+    elif input_table_settings.start_column > 0 or input_table_settings.start_row > 0:
         engine = 'openpyxl'
     else:
         engine = 'calamine'
-    sheet_name = received_table.sheet_name
+    sheet_name = input_table_settings.sheet_name
     if engine == 'calamine':
         df = df_from_calamine_xlsx(file_path=received_table.abs_file_path, sheet_name=sheet_name,
-                                   start_row=received_table.start_row, end_row=received_table.end_row)
-        if received_table.end_column > 0:
-            end_col_index = received_table.end_column
-            cols_to_select = [df.columns[i] for i in range(received_table.start_column, end_col_index)]
+                                   start_row=input_table_settings.start_row, end_row=input_table_settings.end_row)
+        if input_table_settings.end_column > 0:
+            end_col_index = input_table_settings.end_column
+            cols_to_select = [df.columns[i] for i in range(input_table_settings.start_column, end_col_index)]
             df = df.select(cols_to_select)
     elif engine == 'xlsx2csv':
-        csv_options = {'has_header': received_table.has_headers, 'skip_rows': received_table.start_row}
+        csv_options = {'has_header': input_table_settings.has_headers, 'skip_rows': input_table_settings.start_row}
         df = pl.read_excel(source=received_table.abs_file_path,
                            read_options=csv_options,
                            engine='xlsx2csv',
-                           sheet_name=received_table.sheet_name)
-        end_col_index = received_table.end_column if received_table.end_column > 0 else len(df.columns)
-        cols_to_select = [df.columns[i] for i in range(received_table.start_column, end_col_index)]
+                           sheet_name=input_table_settings.sheet_name)
+        end_col_index = input_table_settings.end_column if input_table_settings.end_column > 0 else len(df.columns)
+        cols_to_select = [df.columns[i] for i in range(input_table_settings.start_column, end_col_index)]
         df = df.select(cols_to_select)
-        if 0 < received_table.end_row < len(df):
-            df = df.head(received_table.end_row)
+        if 0 < input_table_settings.end_row < len(df):
+            df = df.head(input_table_settings.end_row)
     else:
-        max_col = received_table.end_column if received_table.end_column > 0 else None
-        max_row = received_table.end_row + 1 if received_table.end_row > 0 else None
+        max_col = input_table_settings.end_column if input_table_settings.end_column > 0 else None
+        max_row = input_table_settings.end_row + 1 if input_table_settings.end_row > 0 else None
         df = df_from_openpyxl(file_path=received_table.abs_file_path,
-                              sheet_name=received_table.sheet_name,
-                              min_row=received_table.start_row + 1,
-                              min_col=received_table.start_column + 1,
+                              sheet_name=input_table_settings.sheet_name,
+                              min_row=input_table_settings.start_row + 1,
+                              min_col=input_table_settings.start_column + 1,
                               max_row=max_row,
-                              max_col=max_col, has_headers=received_table.has_headers)
+                              max_col=max_col, has_headers=input_table_settings.has_headers)
     return df

flowfile_worker/create/models.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from pydantic import BaseModel, Field, model_validator
-from typing import List, Optional
+from pydantic import BaseModel, Field, model_validator, field_validator
+from typing import List, Optional, Literal, Annotated
 import os
 from pathlib import Path
@@ -9,78 +9,146 @@ class MinimalFieldInfo(BaseModel):
     data_type: str
-class ReceivedTableBase(BaseModel):
-    id: Optional[int] = None
-    name: str
-    path: str
-    directory: Optional[str] = None
-    analysis_file_available: Optional[bool] = False
-    status: Optional[str] = None
-    file_type: Optional[str] = None
-    fields: List[MinimalFieldInfo] = Field(default_factory=list)
-    abs_file_path: Optional[str] = None
+class InputTableBase(BaseModel):
+    """Base settings for input file operations."""
+    file_type: str  # Will be overridden with Literal in subclasses
-    @classmethod
-    def create_from_path(cls, path: str):
-        filename = os.path.basename(path)
-        return cls(name=filename, path=path)
-    @property
-    def file_path(self) -> str:
-        if self.name not in self.path:
-            return os.path.join(self.path, self.name)
-        return self.path
-    @model_validator(mode="after")
-    def set_abs_file_path(cls, values):
-        abs_file_path = getattr(values, "abs_file_path", None)
-        if abs_file_path is None:
-            path = getattr(values, "path", None)
-            if not path:
-                raise ValueError("Field 'path' is required to compute abs_file_path")
-            setattr(values, "abs_file_path", str(Path(path).absolute()))
-        return values
-class ReceivedCsvTable(ReceivedTableBase):
-    file_type: Optional[str] = 'csv'
-    reference: Optional[str] = ''
-    starting_from_line: Optional[int] = 0
-    delimiter: Optional[str] = ','
-    has_headers: Optional[bool] = True
-    encoding: Optional[str] = 'utf-8'
+class InputCsvTable(InputTableBase):
+    """Defines settings for reading a CSV file."""
+    file_type: Literal['csv'] = 'csv'
+    reference: str = ''
+    starting_from_line: int = 0
+    delimiter: str = ','
+    has_headers: bool = True
+    encoding: str = 'utf-8'
     parquet_ref: Optional[str] = None
-    row_delimiter: Optional[str] = '\n'
-    quote_char: Optional[str] = '"'
-    infer_schema_length: Optional[int] = 10_000
-    truncate_ragged_lines: Optional[bool] = False
-    ignore_errors: Optional[bool] = False
+    row_delimiter: str = '\n'
+    quote_char: str = '"'
+    infer_schema_length: int = 10_000
+    truncate_ragged_lines: bool = False
+    ignore_errors: bool = False
-class ReceivedJsonTable(ReceivedCsvTable):
-    pass
+class InputJsonTable(InputCsvTable):
+    """Defines settings for reading a JSON file."""
+    file_type: Literal['json'] = 'json'
-class ReceivedParquetTable(ReceivedTableBase):
-    file_type: Optional[str] = 'parquet'
+class InputParquetTable(InputTableBase):
+    """Defines settings for reading a Parquet file."""
+    file_type: Literal['parquet'] = 'parquet'
-class ReceivedExcelTable(ReceivedTableBase):
+class InputExcelTable(InputTableBase):
+    """Defines settings for reading an Excel file."""
+    file_type: Literal['excel'] = 'excel'
     sheet_name: Optional[str] = None
-    start_row: Optional[int] = 0  # optional
-    start_column: Optional[int] = 0  # optional
-    end_row: Optional[int] = 0  # optional
-    end_column: Optional[int] = 0  # optional
-    has_headers: Optional[bool] = True  # optional
-    type_inference: Optional[bool] = False  # optional
+    start_row: int = 0
+    start_column: int = 0
+    end_row: int = 0
+    end_column: int = 0
+    has_headers: bool = True
+    type_inference: bool = False
+    @model_validator(mode='after')
     def validate_range_values(self):
-        # Validate that start and end rows/columns are non-negative integers
+        """Validates that the Excel cell range is logical."""
         for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
             if not isinstance(attribute, int) or attribute < 0:
                 raise ValueError("Row and column indices must be non-negative integers")
+        if (self.end_row > 0 and self.start_row > self.end_row) or \
+                (self.end_column > 0 and self.start_column > self.end_column):
+            raise ValueError("Start row/column must not be greater than end row/column")
+        return self
+# Create the discriminated union (similar to OutputTableSettings)
+InputTableSettings = Annotated[
+    InputCsvTable | InputJsonTable | InputParquetTable | InputExcelTable,
+    Field(discriminator='file_type')
+]
+# Now create the main ReceivedTable model
+class ReceivedTable(BaseModel):
+    """Model for defining a table received from an external source."""
+    # Metadata fields
+    id: Optional[int] = None
+    name: Optional[str] = None
+    path: str  # This can be an absolute or relative path
+    directory: Optional[str] = None
+    analysis_file_available: bool = False
+    status: Optional[str] = None
+    fields: List[MinimalFieldInfo] = Field(default_factory=list)
+    abs_file_path: Optional[str] = None
+    file_type: Literal['csv', 'json', 'parquet', 'excel']
+    table_settings: InputTableSettings
+    @classmethod
+    def create_from_path(cls, path: str, file_type: Literal['csv', 'json', 'parquet', 'excel'] = 'csv'):
+        """Creates an instance from a file path string."""
+        filename = Path(path).name
+        # Create appropriate table_settings based on file_type
+        settings_map = {
+            'csv': InputCsvTable(),
+            'json': InputJsonTable(),
+            'parquet': InputParquetTable(),
+            'excel': InputExcelTable(),
+        }
+        return cls(
+            name=filename,
+            path=path,
+            file_type=file_type,
+            table_settings=settings_map.get(file_type, InputCsvTable())
+        )
+    @property
+    def file_path(self) -> str:
+        """Constructs the full file path from the directory and name."""
+        if self.name and self.name not in self.path:
+            return os.path.join(self.path, self.name)
+        else:
+            return self.path
+    def set_absolute_filepath(self):
+        """Resolves the path to an absolute file path."""
+        base_path = Path(self.path).expanduser()
+        if not base_path.is_absolute():
+            base_path = Path.cwd() / base_path
+        if self.name and self.name not in base_path.name:
+            base_path = base_path / self.name
+        self.abs_file_path = str(base_path.resolve())
+    @field_validator('table_settings', mode='before')
+    @classmethod
+    def validate_table_settings(cls, v, info):
+        """Ensures table_settings matches the file_type."""
+        if v is None:
+            file_type = info.data.get('file_type', 'csv')
+            # Create default based on file_type
+            settings_map = {
+                'csv': InputCsvTable(),
+                'json': InputJsonTable(),
+                'parquet': InputParquetTable(),
+                'excel': InputExcelTable(),
+            }
+            return settings_map.get(file_type, InputCsvTable())
+        # If it's a dict, add file_type if missing
+        if isinstance(v, dict) and 'file_type' not in v:
+            v['file_type'] = info.data.get('file_type', 'csv')
+        return v
+    @model_validator(mode='after')
+    def populate_abs_file_path(self):
+        """Ensures the absolute file path is populated after validation."""
+        if not self.abs_file_path:
+            self.set_absolute_filepath()
+        return self
-        # Validate that start is before end if end is specified (non-zero)
-        if (0 < self.end_row < self.start_row) or \
-                (0 < self.end_column < self.start_column):
-            raise ValueError("Start row/column must not be greater than end row/column if specified")

flowfile_worker/main.py CHANGED Viewed

@@ -4,8 +4,11 @@ import signal
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
+from shared.storage_config import storage
 from flowfile_worker.routes import router
-from flowfile_worker import mp_context, CACHE_DIR
+from flowfile_worker import mp_context
 from flowfile_worker.configs import logger, FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT
@@ -30,7 +33,7 @@ async def shutdown_handler(app: FastAPI):
                 logger.error(f"Error cleaning up process: {e}")
         try:
-            CACHE_DIR.cleanup()
+            storage.cleanup_directories()
         except Exception as e:
             print(f"Error cleaning up cache directory: {e}")

flowfile_worker/routes.py CHANGED Viewed

@@ -8,22 +8,30 @@ from base64 import encodebytes
 from flowfile_worker import status_dict, CACHE_DIR, PROCESS_MEMORY_USAGE, status_dict_lock
 from flowfile_worker import models
 from flowfile_worker.spawner import start_process, start_fuzzy_process, start_generic_process, process_manager
-from flowfile_worker.create import table_creator_factory_method, received_table_parser, FileType
+from flowfile_worker.create import table_creator_factory_method, FileType
 from flowfile_worker.configs import logger
 from flowfile_worker.external_sources.sql_source.models import DatabaseReadSettings
-from flowfile_worker.external_sources.sql_source.main import read_sql_source, write_serialized_df_to_database
+from flowfile_worker.external_sources.sql_source.main import read_sql_source
+from flowfile_worker.create.models import ReceivedTable
 router = APIRouter()
+def create_and_get_default_cache_dir(flowfile_flow_id: int) -> str:
+    default_cache_dir = CACHE_DIR / str(flowfile_flow_id)
+    default_cache_dir.mkdir(parents=True, exist_ok=True)
+    return str(default_cache_dir)
 @router.post("/submit_query/")
 def submit_query(polars_script: models.PolarsScript, background_tasks: BackgroundTasks) -> models.Status:
     logger.info(f"Processing query with operation: {polars_script.operation_type}")
     try:
         polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
-        polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else CACHE_DIR.name
+        default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
+        polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
         polars_serializable_object = polars_script.polars_serializable_object()
         file_path = os.path.join(polars_script.cache_dir, f"{polars_script.task_id}.arrow")
         result_type = "polars" if polars_script.operation_type == "store" else "other"
@@ -49,8 +57,9 @@ def store_sample(polars_script: models.PolarsScriptSample, background_tasks: Bac
     logger.info(f"Processing sample storage with size: {polars_script.sample_size}")
     try:
+        default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
         polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
-        polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else CACHE_DIR.name
+        polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
         polars_serializable_object = polars_script.polars_serializable_object()
         file_path = os.path.join(polars_script.cache_dir, f"{polars_script.task_id}.arrow")
@@ -210,7 +219,8 @@ def store_sql_db_result(database_read_settings: DatabaseReadSettings, background
     try:
         task_id = str(uuid.uuid4())
-        file_path = os.path.join(CACHE_DIR.name, f"{task_id}.arrow")
+        file_path = os.path.join(create_and_get_default_cache_dir(database_read_settings.flowfile_flow_id),
+                                 f"{task_id}.arrow")
         status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_path,
                                result_type="polars")
         status_dict[task_id] = status
@@ -227,7 +237,7 @@ def store_sql_db_result(database_read_settings: DatabaseReadSettings, background
 @router.post('/create_table/{file_type}')
-def create_table(file_type: FileType, received_table: Dict, background_tasks: BackgroundTasks,
+def create_table(file_type: FileType, received_table: ReceivedTable, background_tasks: BackgroundTasks,
                  flowfile_flow_id: int = 1, flowfile_node_id: int | str = -1) -> models.Status:
     """
     Create a Polars table from received dictionary data based on specified file type.
@@ -243,18 +253,15 @@ def create_table(file_type: FileType, received_table: Dict, background_tasks: Ba
         models.Status: Status object tracking the table creation
     """
     logger.info(f"Creating table of type: {file_type}")
     try:
         task_id = str(uuid.uuid4())
-        file_ref = os.path.join(CACHE_DIR.name, f"{task_id}.arrow")
+        file_ref = os.path.join(create_and_get_default_cache_dir(flowfile_flow_id), f"{task_id}.arrow")
         status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_ref,
                                result_type="polars")
         status_dict[task_id] = status
         func_ref = table_creator_factory_method(file_type)
-        received_table_parsed = received_table_parser(received_table, file_type)
         background_tasks.add_task(start_generic_process, func_ref=func_ref, file_ref=file_ref,
-                                  task_id=task_id, kwargs={'received_table': received_table_parsed},
+                                  task_id=task_id, kwargs={'received_table': received_table},
                                   flowfile_flow_id=flowfile_flow_id,
                                   flowfile_node_id=flowfile_node_id)
         logger.info(f"Started table creation task: {task_id}")
@@ -382,8 +389,9 @@ async def add_fuzzy_join(polars_script: models.FuzzyJoinInput, background_tasks:
     """
     logger.info("Starting fuzzy join operation")
     try:
+        default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
         polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
-        polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else CACHE_DIR.name
+        polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
         left_serializable_object = polars_script.left_df_operation.polars_serializable_object()
         right_serializable_object = polars_script.right_df_operation.polars_serializable_object()
@@ -405,6 +413,37 @@ async def add_fuzzy_join(polars_script: models.FuzzyJoinInput, background_tasks:
         raise HTTPException(status_code=500, detail=str(e))
+@router.delete("/clear_task/{task_id}")
+def clear_task(task_id: str):
+    """
+    Clear task data and status by ID.
+    Args:
+        task_id: Unique identifier of the task to clear
+    Returns:
+        dict: Success message
+    Raises:
+        HTTPException: If task not found
+    """
+    logger.info(f"Clearing task: {task_id}")
+    status = status_dict.get(task_id)
+    if not status:
+        logger.warning(f"Task not found for clearing: {task_id}")
+        raise HTTPException(status_code=404, detail="Task not found")
+    try:
+        if os.path.exists(status.file_ref):
+            os.remove(status.file_ref)
+            logger.debug(f"Removed file: {status.file_ref}")
+    except Exception as e:
+        logger.error(f"Error removing file {status.file_ref}: {str(e)}", exc_info=True)
+    with status_dict_lock:
+        status_dict.pop(task_id, None)
+        PROCESS_MEMORY_USAGE.pop(task_id, None)
+        logger.info(f"Successfully cleared task: {task_id}")
+    return {"message": f"Task {task_id} has been cleared."}
 @router.post("/cancel_task/{task_id}")
 def cancel_task(task_id: str):
     """Cancel a running task by ID.

shared/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+Shared utilities for Flowfile services.
+This package contains common functionality that can be used across
+flowfile_core, flowfile_worker, and other components without creating
+circular dependencies.
+"""
+from .storage_config import storage, get_cache_directory, get_temp_directory, get_flows_directory
+__all__ = [
+    'storage',
+    'get_cache_directory',
+    'get_temp_directory',
+    'get_flows_directory'
+]

Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl

Flowfile 0.3.9py3-none-any.whl → 0.5.1py3-none-any.whl