atlan-application-sdk 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. application_sdk/activities/common/sql_utils.py +308 -0
  2. application_sdk/activities/common/utils.py +1 -45
  3. application_sdk/activities/metadata_extraction/sql.py +110 -353
  4. application_sdk/activities/query_extraction/sql.py +12 -11
  5. application_sdk/application/__init__.py +1 -1
  6. application_sdk/clients/sql.py +167 -1
  7. application_sdk/clients/temporal.py +6 -6
  8. application_sdk/common/types.py +8 -0
  9. application_sdk/common/utils.py +1 -8
  10. application_sdk/constants.py +1 -1
  11. application_sdk/handlers/sql.py +10 -25
  12. application_sdk/interceptors/events.py +1 -1
  13. application_sdk/io/__init__.py +654 -0
  14. application_sdk/io/json.py +429 -0
  15. application_sdk/{outputs → io}/parquet.py +358 -47
  16. application_sdk/io/utils.py +307 -0
  17. application_sdk/observability/observability.py +23 -12
  18. application_sdk/server/fastapi/middleware/logmiddleware.py +23 -17
  19. application_sdk/server/fastapi/middleware/metrics.py +27 -24
  20. application_sdk/server/fastapi/models.py +1 -1
  21. application_sdk/server/fastapi/routers/server.py +1 -1
  22. application_sdk/server/fastapi/utils.py +10 -0
  23. application_sdk/services/eventstore.py +4 -4
  24. application_sdk/services/objectstore.py +30 -7
  25. application_sdk/services/secretstore.py +1 -1
  26. application_sdk/test_utils/hypothesis/strategies/outputs/json_output.py +0 -1
  27. application_sdk/test_utils/hypothesis/strategies/server/fastapi/__init__.py +1 -1
  28. application_sdk/version.py +1 -1
  29. application_sdk/worker.py +1 -1
  30. {atlan_application_sdk-1.1.0.dist-info → atlan_application_sdk-2.0.0.dist-info}/METADATA +9 -11
  31. {atlan_application_sdk-1.1.0.dist-info → atlan_application_sdk-2.0.0.dist-info}/RECORD +36 -43
  32. application_sdk/common/dataframe_utils.py +0 -42
  33. application_sdk/events/__init__.py +0 -5
  34. application_sdk/inputs/.cursor/BUGBOT.md +0 -250
  35. application_sdk/inputs/__init__.py +0 -168
  36. application_sdk/inputs/iceberg.py +0 -75
  37. application_sdk/inputs/json.py +0 -136
  38. application_sdk/inputs/parquet.py +0 -272
  39. application_sdk/inputs/sql_query.py +0 -271
  40. application_sdk/outputs/.cursor/BUGBOT.md +0 -295
  41. application_sdk/outputs/__init__.py +0 -445
  42. application_sdk/outputs/iceberg.py +0 -139
  43. application_sdk/outputs/json.py +0 -268
  44. /application_sdk/{events → interceptors}/models.py +0 -0
  45. /application_sdk/{common/dapr_utils.py → services/_utils.py} +0 -0
  46. {atlan_application_sdk-1.1.0.dist-info → atlan_application_sdk-2.0.0.dist-info}/WHEEL +0 -0
  47. {atlan_application_sdk-1.1.0.dist-info → atlan_application_sdk-2.0.0.dist-info}/licenses/LICENSE +0 -0
  48. {atlan_application_sdk-1.1.0.dist-info → atlan_application_sdk-2.0.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,307 @@
1
+ import glob
2
+ import os
3
+ from datetime import datetime
4
+ from typing import TYPE_CHECKING, Any, List, Optional, Union
5
+
6
+ from application_sdk.activities.common.utils import get_object_store_prefix
7
+ from application_sdk.common.error_codes import IOError
8
+ from application_sdk.constants import TEMPORARY_PATH
9
+ from application_sdk.observability.logger_adaptor import get_logger
10
+ from application_sdk.services.objectstore import ObjectStore
11
+
12
+ JSON_FILE_EXTENSION = ".json"
13
+ PARQUET_FILE_EXTENSION = ".parquet"
14
+
15
+ if TYPE_CHECKING:
16
+ import daft
17
+ import pandas as pd
18
+
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ def find_local_files_by_extension(
24
+ path: str,
25
+ extension: str,
26
+ file_names: Optional[List[str]] = None,
27
+ ) -> List[str]:
28
+ """Find local files at the specified local path, optionally filtering by file names.
29
+
30
+ Args:
31
+ path (str): Local path to search in (file or directory)
32
+ extension (str): File extension to filter by (e.g., '.parquet', '.json')
33
+ file_names (Optional[List[str]]): List of file names (basenames) to filter by, paths are not supported
34
+
35
+ Returns:
36
+ List[str]: List of matching file paths
37
+
38
+ Example:
39
+ >>> find_local_files_by_extension("/data", ".parquet", ["file1.parquet", "file2.parquet"])
40
+ ['file1.parquet', 'file2.parquet']
41
+
42
+ >>> find_local_files_by_extension("/data/single.json", ".json")
43
+ ['single.json']
44
+ """
45
+ if os.path.isfile(path) and path.endswith(extension):
46
+ # Single file - return it directly
47
+ return [path]
48
+
49
+ elif os.path.isdir(path):
50
+ # Directory - find all files in directory
51
+ all_files = glob.glob(
52
+ os.path.join(path, "**", f"*{extension}"),
53
+ recursive=True,
54
+ )
55
+
56
+ # Filter by file names if specified
57
+ if file_names:
58
+ file_names_set = set(file_names) # Convert to set for O(1) lookup
59
+ return [f for f in all_files if os.path.basename(f) in file_names_set]
60
+ else:
61
+ return all_files
62
+
63
+ return []
64
+
65
+
66
+ async def download_files(
67
+ path: str, file_extension: str, file_names: Optional[List[str]] = None
68
+ ) -> List[str]:
69
+ """Download files from object store if not available locally.
70
+
71
+ Flow:
72
+ 1. Check if files exist locally at self.path
73
+ 2. If not, try to download from object store
74
+ 3. Filter by self.file_names if provided
75
+ 4. Return list of file paths for logging purposes
76
+
77
+ Returns:
78
+ List[str]: List of file paths
79
+
80
+ Raises:
81
+ AttributeError: When the reader class doesn't support file operations or _extension
82
+ IOError: When no files found locally or in object store
83
+ """
84
+ # Step 1: Check if files exist locally
85
+ local_files: List[str] = find_local_files_by_extension(
86
+ path, file_extension, file_names
87
+ )
88
+ if local_files:
89
+ logger.info(
90
+ f"Found {len(local_files)} {file_extension} files locally at: {path}"
91
+ )
92
+ return local_files
93
+
94
+ # Step 2: Try to download from object store
95
+ logger.info(
96
+ f"No local {file_extension} files found at {path}, checking object store..."
97
+ )
98
+
99
+ try:
100
+ # Determine what to download based on path type and filters
101
+ downloaded_paths: List[str] = []
102
+
103
+ if path.endswith(file_extension):
104
+ # Single file case (file_names validation already ensures this is valid)
105
+ source_path = get_object_store_prefix(path)
106
+ destination_path = os.path.join(TEMPORARY_PATH, source_path)
107
+ await ObjectStore.download_file(
108
+ source=source_path,
109
+ destination=destination_path,
110
+ )
111
+ downloaded_paths.append(destination_path)
112
+
113
+ elif file_names:
114
+ # Directory with specific files - download each file individually
115
+ for file_name in file_names:
116
+ file_path = os.path.join(path, file_name)
117
+ source_path = get_object_store_prefix(file_path)
118
+ destination_path = os.path.join(TEMPORARY_PATH, source_path)
119
+ await ObjectStore.download_file(
120
+ source=source_path,
121
+ destination=destination_path,
122
+ )
123
+ downloaded_paths.append(destination_path)
124
+ else:
125
+ # Download entire directory
126
+ source_path = get_object_store_prefix(path)
127
+ destination_path = os.path.join(TEMPORARY_PATH, source_path)
128
+ await ObjectStore.download_prefix(
129
+ source=source_path,
130
+ destination=destination_path,
131
+ )
132
+ # Find the actual files in the downloaded directory
133
+ found_files = find_local_files_by_extension(
134
+ destination_path, file_extension, file_names
135
+ )
136
+ downloaded_paths.extend(found_files)
137
+
138
+ # Check results
139
+ if downloaded_paths:
140
+ logger.info(
141
+ f"Successfully downloaded {len(downloaded_paths)} {file_extension} files from object store"
142
+ )
143
+ return downloaded_paths
144
+ else:
145
+ raise IOError(
146
+ f"{IOError.OBJECT_STORE_READ_ERROR}: Downloaded from object store but no {file_extension} files found"
147
+ )
148
+
149
+ except Exception as e:
150
+ logger.error(f"Failed to download from object store: {str(e)}")
151
+ raise IOError(
152
+ f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: No {file_extension} files found locally at '{path}' and failed to download from object store. "
153
+ f"Error: {str(e)}"
154
+ )
155
+
156
+
157
+ def estimate_dataframe_record_size(
158
+ dataframe: "pd.DataFrame", file_extension: str
159
+ ) -> int:
160
+ """Estimate File size of a DataFrame by sampling a few records.
161
+
162
+ Args:
163
+ dataframe (pd.DataFrame): The DataFrame to estimate the size of.
164
+ file_extension (str): The extension of the file to estimate the size of.
165
+
166
+ Returns:
167
+ int: The estimated size of the DataFrame in bytes.
168
+ """
169
+ if len(dataframe) == 0:
170
+ return 0
171
+
172
+ # Sample up to 10 records to estimate average size
173
+ sample_size = min(10, len(dataframe))
174
+ sample = dataframe.head(sample_size)
175
+ compression_factor = 1
176
+ if file_extension == JSON_FILE_EXTENSION:
177
+ sample_file = sample.to_json(orient="records", lines=True)
178
+ elif file_extension == PARQUET_FILE_EXTENSION:
179
+ sample_file = sample.to_parquet(index=False, compression="snappy")
180
+ compression_factor = 0.01
181
+ else:
182
+ raise ValueError(f"Unsupported file extension: {file_extension}")
183
+
184
+ if sample_file is not None:
185
+ avg_record_size = len(sample_file) / sample_size * compression_factor
186
+ return int(avg_record_size)
187
+
188
+ return 0
189
+
190
+
191
+ def path_gen(
192
+ chunk_count: Optional[int] = None,
193
+ chunk_part: int = 0,
194
+ start_marker: Optional[str] = None,
195
+ end_marker: Optional[str] = None,
196
+ extension: str = ".json",
197
+ ) -> str:
198
+ """Generate a file path for a chunk.
199
+
200
+ Args:
201
+ chunk_start (Optional[int]): Starting index of the chunk, or None for single chunk.
202
+ chunk_count (int): Total number of chunks.
203
+ start_marker (Optional[str]): Start marker for query extraction.
204
+ end_marker (Optional[str]): End marker for query extraction.
205
+
206
+ Returns:
207
+ str: Generated file path for the chunk.
208
+ """
209
+ # For Query Extraction - use start and end markers without chunk count
210
+ if start_marker and end_marker:
211
+ return f"{start_marker}_{end_marker}{extension}"
212
+
213
+ # For regular chunking - include chunk count
214
+ if chunk_count is None:
215
+ return f"{str(chunk_part)}{extension}"
216
+ else:
217
+ return f"chunk-{str(chunk_count)}-part{str(chunk_part)}{extension}"
218
+
219
+
220
+ def process_null_fields(
221
+ obj: Any,
222
+ preserve_fields: Optional[List[str]] = None,
223
+ null_to_empty_dict_fields: Optional[List[str]] = None,
224
+ ) -> Any:
225
+ """
226
+ By default the method removes null values from dictionaries and lists.
227
+ Except for the fields specified in preserve_fields.
228
+ And fields in null_to_empty_dict_fields are replaced with empty dict if null.
229
+
230
+ Args:
231
+ obj: The object to clean (dict, list, or other value)
232
+ preserve_fields: Optional list of field names that should be preserved even if they contain null values
233
+ null_to_empty_dict_fields: Optional list of field names that should be replaced with empty dict if null
234
+
235
+ Returns:
236
+ The cleaned object with null values removed
237
+ """
238
+ if isinstance(obj, dict):
239
+ result = {}
240
+ for k, v in obj.items():
241
+ # Handle null fields that should be converted to empty dicts
242
+ if k in (null_to_empty_dict_fields or []) and v is None:
243
+ result[k] = {}
244
+ continue
245
+
246
+ # Process the value recursively
247
+ processed_value = process_null_fields(
248
+ v, preserve_fields, null_to_empty_dict_fields
249
+ )
250
+
251
+ # Keep the field if it's in preserve_fields or has a non-None processed value
252
+ if k in (preserve_fields or []) or processed_value is not None:
253
+ result[k] = processed_value
254
+
255
+ return result
256
+ return obj
257
+
258
+
259
+ def convert_datetime_to_epoch(data: Any) -> Any:
260
+ """Convert datetime objects to epoch timestamps in milliseconds.
261
+
262
+ Args:
263
+ data: The data to convert
264
+
265
+ Returns:
266
+ The converted data with datetime fields as epoch timestamps
267
+ """
268
+ if isinstance(data, datetime):
269
+ return int(data.timestamp() * 1000)
270
+ elif isinstance(data, dict):
271
+ return {k: convert_datetime_to_epoch(v) for k, v in data.items()}
272
+ elif isinstance(data, list):
273
+ return [convert_datetime_to_epoch(item) for item in data]
274
+ return data
275
+
276
+
277
+ def is_empty_dataframe(dataframe: Union["pd.DataFrame", "daft.DataFrame"]) -> bool: # noqa: F821
278
+ """Check if a DataFrame is empty.
279
+
280
+ This function determines whether a DataFrame has any rows, supporting both
281
+ pandas and daft DataFrame types. For pandas DataFrames, it uses the `empty`
282
+ property, and for daft DataFrames, it checks if the row count is 0.
283
+
284
+ Args:
285
+ dataframe (Union[pd.DataFrame, daft.DataFrame]): The DataFrame to check,
286
+ can be either a pandas DataFrame or a daft DataFrame.
287
+
288
+ Returns:
289
+ bool: True if the DataFrame has no rows, False otherwise.
290
+
291
+ Note:
292
+ If daft is not available and a daft DataFrame is passed, the function
293
+ will log a warning and return True.
294
+ """
295
+ import pandas as pd
296
+
297
+ if isinstance(dataframe, pd.DataFrame):
298
+ return dataframe.empty
299
+
300
+ try:
301
+ import daft
302
+
303
+ if isinstance(dataframe, daft.DataFrame):
304
+ return dataframe.count_rows() == 0
305
+ except Exception:
306
+ logger.warning("Module daft not found")
307
+ return True
@@ -363,14 +363,14 @@ class AtlanObservability(Generic[T], ABC):
363
363
  logging.error(f"Error buffering log: {e}")
364
364
 
365
365
  async def _flush_records(self, records: List[Dict[str, Any]]):
366
- """Flush records to parquet file and object store using ParquetOutput abstraction.
366
+ """Flush records to parquet file and object store using ParquetFileWriter.
367
367
 
368
368
  Args:
369
369
  records: List of records to flush
370
370
 
371
371
  This method:
372
372
  - Groups records by partition (year/month/day)
373
- - Uses ParquetOutput abstraction for efficient writing
373
+ - Uses ParquetFileWriter for efficient writing
374
374
  - Automatically handles chunking, compression, and dual upload
375
375
  - Provides robust error handling per partition
376
376
  - Cleans up old records if enabled
@@ -395,7 +395,7 @@ class AtlanObservability(Generic[T], ABC):
395
395
  partition_records[partition_path] = []
396
396
  partition_records[partition_path].append(record)
397
397
 
398
- # Write records to each partition using ParquetOutput abstraction
398
+ # Write records to each partition using ParquetFileWriter
399
399
  for partition_path, partition_data in partition_records.items():
400
400
  # Create new dataframe from current records
401
401
  new_df = pd.DataFrame(partition_data)
@@ -412,23 +412,34 @@ class AtlanObservability(Generic[T], ABC):
412
412
  elif part.startswith("day="):
413
413
  new_df["day"] = int(part.split("=")[1])
414
414
 
415
- # Use new data directly - let ParquetOutput handle consolidation and merging
415
+ # Use new data directly - let ParquetFileWriter handle consolidation and merging
416
416
  df = new_df
417
417
 
418
- # Use ParquetOutput abstraction for efficient writing and uploading
418
+ # Use ParquetFileWriter for efficient writing and uploading
419
419
  # Set the output path for this partition
420
420
  try:
421
- # Lazy import and instantiation of ParquetOutput
422
- from application_sdk.outputs.parquet import ParquetOutput
421
+ # Lazy import and instantiation of ParquetFileWriter
422
+ from application_sdk.io.parquet import ParquetFileWriter
423
423
 
424
- parquet_output = ParquetOutput(
425
- output_path=partition_path,
424
+ parquet_writer = ParquetFileWriter(
425
+ path=partition_path,
426
426
  chunk_start=0,
427
427
  chunk_part=int(time()),
428
428
  )
429
- await parquet_output.write_dataframe(dataframe=df)
430
- except Exception as e:
431
- print(f"Error writing records to partition: {str(e)}")
429
+ logging.info(
430
+ f"Successfully instantiated ParquetFileWriter for partition: {partition_path}"
431
+ )
432
+
433
+ await parquet_writer._write_dataframe(dataframe=df)
434
+
435
+ logging.info(
436
+ f"Successfully wrote {len(df)} records to partition: {partition_path}"
437
+ )
438
+
439
+ except Exception as partition_error:
440
+ logging.error(
441
+ f"Error processing partition {partition_path}: {str(partition_error)}"
442
+ )
432
443
 
433
444
  # Clean up old records if enabled
434
445
  if self._cleanup_enabled:
@@ -8,6 +8,7 @@ from starlette.types import ASGIApp
8
8
 
9
9
  from application_sdk.observability.context import request_context
10
10
  from application_sdk.observability.logger_adaptor import get_logger
11
+ from application_sdk.server.fastapi.utils import EXCLUDED_LOG_PATHS
11
12
 
12
13
  logger = get_logger(__name__)
13
14
 
@@ -29,31 +30,36 @@ class LogMiddleware(BaseHTTPMiddleware):
29
30
  token = request_context.set({"request_id": request_id})
30
31
  start_time = time.time()
31
32
 
32
- self.logger.info(
33
- f"Request started for {request.method} {request.url.path}",
34
- extra={
35
- "method": request.method,
36
- "path": request.url.path,
37
- "request_id": request_id,
38
- "url": str(request.url),
39
- "client_host": request.client.host if request.client else None,
40
- },
41
- )
42
-
43
- try:
44
- response = await call_next(request)
45
- duration = time.time() - start_time
33
+ # Skip logging for health check endpoints
34
+ should_log = request.url.path not in EXCLUDED_LOG_PATHS
46
35
 
36
+ if should_log:
47
37
  self.logger.info(
48
- f"Request completed for {request.method} {request.url.path} {response.status_code}",
38
+ f"Request started for {request.method} {request.url.path}",
49
39
  extra={
50
40
  "method": request.method,
51
41
  "path": request.url.path,
52
- "status_code": response.status_code,
53
- "duration_ms": round(duration * 1000, 2),
54
42
  "request_id": request_id,
43
+ "url": str(request.url),
44
+ "client_host": request.client.host if request.client else None,
55
45
  },
56
46
  )
47
+
48
+ try:
49
+ response = await call_next(request)
50
+ duration = time.time() - start_time
51
+
52
+ if should_log:
53
+ self.logger.info(
54
+ f"Request completed for {request.method} {request.url.path} {response.status_code}",
55
+ extra={
56
+ "method": request.method,
57
+ "path": request.url.path,
58
+ "status_code": response.status_code,
59
+ "duration_ms": round(duration * 1000, 2),
60
+ "request_id": request_id,
61
+ },
62
+ )
57
63
  return response
58
64
 
59
65
  except Exception as e:
@@ -4,6 +4,7 @@ from fastapi import Request, Response
4
4
  from starlette.middleware.base import BaseHTTPMiddleware
5
5
 
6
6
  from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
7
+ from application_sdk.server.fastapi.utils import EXCLUDED_LOG_PATHS
7
8
 
8
9
  metrics = get_metrics()
9
10
 
@@ -24,29 +25,31 @@ class MetricsMiddleware(BaseHTTPMiddleware):
24
25
  method = request.method
25
26
  status_code = response.status_code
26
27
 
27
- labels = {
28
- "path": path,
29
- "method": method,
30
- "status": str(status_code),
31
- }
32
-
33
- # Record request count
34
- metrics.record_metric(
35
- name="http_requests_total",
36
- value=1,
37
- metric_type=MetricType.COUNTER,
38
- labels=labels,
39
- description="Total number of HTTP requests",
40
- )
41
-
42
- # Record request latency
43
- metrics.record_metric(
44
- name="http_request_duration_seconds",
45
- value=process_time,
46
- metric_type=MetricType.HISTOGRAM,
47
- labels=labels,
48
- description="Duration of HTTP requests",
49
- unit="seconds",
50
- )
28
+ # Skip metrics for health check endpoints
29
+ if path not in EXCLUDED_LOG_PATHS:
30
+ labels = {
31
+ "path": path,
32
+ "method": method,
33
+ "status": str(status_code),
34
+ }
35
+
36
+ # Record request count
37
+ metrics.record_metric(
38
+ name="http_requests_total",
39
+ value=1,
40
+ metric_type=MetricType.COUNTER,
41
+ labels=labels,
42
+ description="Total number of HTTP requests",
43
+ )
44
+
45
+ # Record request latency
46
+ metrics.record_metric(
47
+ name="http_request_duration_seconds",
48
+ value=process_time,
49
+ metric_type=MetricType.HISTOGRAM,
50
+ labels=labels,
51
+ description="Duration of HTTP requests",
52
+ unit="seconds",
53
+ )
51
54
 
52
55
  return response
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Type
5
5
 
6
6
  from pydantic import BaseModel, Field, RootModel
7
7
 
8
- from application_sdk.events.models import Event, EventFilter
8
+ from application_sdk.interceptors.models import Event, EventFilter
9
9
  from application_sdk.workflows import WorkflowInterface
10
10
 
11
11
 
@@ -68,7 +68,7 @@ async def health():
68
68
  "processor": platform.processor(),
69
69
  "ram": str(round(psutil.virtual_memory().total / (1024.0**3))) + " GB",
70
70
  }
71
- logger.info("Health check passed")
71
+ logger.debug("Health check passed")
72
72
  return info
73
73
 
74
74
 
@@ -7,6 +7,16 @@ error handlers and response formatters.
7
7
  from fastapi import status
8
8
  from fastapi.responses import JSONResponse
9
9
 
10
+ # Paths to exclude from logging and metrics (health checks and event ingress)
11
+ EXCLUDED_LOG_PATHS: frozenset[str] = frozenset(
12
+ {
13
+ "/server/health",
14
+ "/server/ready",
15
+ "/api/eventingress/",
16
+ "/api/eventingress",
17
+ }
18
+ )
19
+
10
20
 
11
21
  def internal_server_error_handler(_, exc: Exception) -> JSONResponse:
12
22
  """Handle internal server errors in FastAPI applications.
@@ -10,14 +10,14 @@ from datetime import datetime
10
10
  from dapr import clients
11
11
  from temporalio import activity, workflow
12
12
 
13
- from application_sdk.common.dapr_utils import is_component_registered
14
13
  from application_sdk.constants import (
15
14
  APPLICATION_NAME,
16
15
  DAPR_BINDING_OPERATION_CREATE,
17
16
  EVENT_STORE_NAME,
18
17
  )
19
- from application_sdk.events.models import Event, EventMetadata, WorkflowStates
18
+ from application_sdk.interceptors.models import Event, EventMetadata, WorkflowStates
20
19
  from application_sdk.observability.logger_adaptor import get_logger
20
+ from application_sdk.services._utils import is_component_registered
21
21
 
22
22
  logger = get_logger(__name__)
23
23
  activity.logger = logger
@@ -47,7 +47,7 @@ class EventStore:
47
47
  a Temporal workflow or activity context.
48
48
 
49
49
  Examples:
50
- >>> from application_sdk.events.models import Event
50
+ >>> from application_sdk.interceptors.models import Event
51
51
 
52
52
  >>> # Create basic event
53
53
  >>> event = Event(event_type="data.processed", data={"count": 100})
@@ -109,7 +109,7 @@ class EventStore:
109
109
  Exception: If there's an error during event publishing (logged but not re-raised).
110
110
 
111
111
  Examples:
112
- >>> from application_sdk.events.models import Event
112
+ >>> from application_sdk.interceptors.models import Event
113
113
 
114
114
  >>> # Publish workflow status event
115
115
  >>> status_event = Event(
@@ -28,6 +28,21 @@ class ObjectStore:
28
28
  OBJECT_LIST_OPERATION = "list"
29
29
  OBJECT_DELETE_OPERATION = "delete"
30
30
 
31
+ @staticmethod
32
+ def _normalize_object_store_key(path: str) -> str:
33
+ """Normalize a path to use forward slashes for object store keys.
34
+
35
+ Object store keys (S3, Azure Blob, GCS, local file bindings) always use
36
+ forward slashes as the path separator regardless of the operating system.
37
+
38
+ Args:
39
+ path: The path to normalize.
40
+
41
+ Returns:
42
+ The normalized path (forward slashes) for object store keys.
43
+ """
44
+ return path.replace(os.sep, "/")
45
+
31
46
  @classmethod
32
47
  def _create_file_metadata(cls, key: str) -> dict[str, str]:
33
48
  """Create metadata for file operations (get, delete, create).
@@ -101,18 +116,26 @@ class ObjectStore:
101
116
  else:
102
117
  return []
103
118
 
119
+ # Normalize prefix for cross-platform path comparison
120
+ normalized_prefix = (
121
+ cls._normalize_object_store_key(prefix) if prefix else ""
122
+ )
123
+
104
124
  valid_list = []
105
125
  for path in paths:
106
126
  if not isinstance(path, str):
107
127
  logger.warning(f"Skipping non-string path: {path}")
108
128
  continue
109
129
 
130
+ # Normalize path separators for cross-platform compatibility
131
+ normalized_path = cls._normalize_object_store_key(path)
132
+
110
133
  valid_list.append(
111
- path[path.find(prefix) :]
112
- if prefix and prefix in path
113
- else os.path.basename(path)
114
- if prefix
115
- else path
134
+ normalized_path[normalized_path.find(normalized_prefix) :]
135
+ if normalized_prefix and normalized_prefix in normalized_path
136
+ else os.path.basename(normalized_path)
137
+ if normalized_prefix
138
+ else normalized_path
116
139
  )
117
140
 
118
141
  return valid_list
@@ -357,8 +380,8 @@ class ObjectStore:
357
380
  # Calculate relative path from the base directory
358
381
  relative_path = os.path.relpath(file_path, source)
359
382
  # Create store key by combining prefix with relative path
360
- store_key = os.path.join(destination, relative_path).replace(
361
- os.sep, "/"
383
+ store_key = cls._normalize_object_store_key(
384
+ os.path.join(destination, relative_path)
362
385
  )
363
386
  await cls.upload_file(
364
387
  file_path, store_key, store_name, retain_local_copy
@@ -20,7 +20,6 @@ from typing import Any, Dict
20
20
 
21
21
  from dapr.clients import DaprClient
22
22
 
23
- from application_sdk.common.dapr_utils import is_component_registered
24
23
  from application_sdk.common.error_codes import CommonError
25
24
  from application_sdk.constants import (
26
25
  DEPLOYMENT_NAME,
@@ -30,6 +29,7 @@ from application_sdk.constants import (
30
29
  SECRET_STORE_NAME,
31
30
  )
32
31
  from application_sdk.observability.logger_adaptor import get_logger
32
+ from application_sdk.services._utils import is_component_registered
33
33
  from application_sdk.services.statestore import StateStore, StateType
34
34
 
35
35
  logger = get_logger(__name__)
@@ -62,7 +62,6 @@ def dataframe_strategy(draw) -> pd.DataFrame:
62
62
  json_output_config_strategy = st.fixed_dictionaries(
63
63
  {
64
64
  "output_path": safe_path_strategy,
65
- "output_suffix": st.builds(lambda x: f"/{x}", safe_path_strategy),
66
65
  "output_prefix": output_prefix_strategy,
67
66
  "chunk_size": chunk_size_strategy,
68
67
  }
@@ -2,7 +2,7 @@ import json
2
2
 
3
3
  from hypothesis import strategies as st
4
4
 
5
- from application_sdk.events.models import Event
5
+ from application_sdk.interceptors.models import Event
6
6
 
7
7
  # Strategy for generating auth credentials
8
8
  auth_credentials_strategy = st.fixed_dictionaries(