atlan-application-sdk 1.1.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/common/sql_utils.py +312 -0
- application_sdk/activities/common/utils.py +1 -45
- application_sdk/activities/metadata_extraction/sql.py +110 -353
- application_sdk/activities/query_extraction/sql.py +12 -11
- application_sdk/application/__init__.py +1 -1
- application_sdk/clients/sql.py +167 -1
- application_sdk/clients/temporal.py +6 -6
- application_sdk/common/types.py +8 -0
- application_sdk/common/utils.py +1 -8
- application_sdk/constants.py +1 -1
- application_sdk/handlers/sql.py +10 -25
- application_sdk/interceptors/events.py +1 -1
- application_sdk/io/__init__.py +749 -0
- application_sdk/io/json.py +473 -0
- application_sdk/{outputs → io}/parquet.py +414 -47
- application_sdk/io/utils.py +307 -0
- application_sdk/observability/observability.py +16 -12
- application_sdk/server/fastapi/middleware/logmiddleware.py +23 -17
- application_sdk/server/fastapi/middleware/metrics.py +27 -24
- application_sdk/server/fastapi/models.py +1 -1
- application_sdk/server/fastapi/routers/server.py +1 -1
- application_sdk/server/fastapi/utils.py +10 -0
- application_sdk/services/eventstore.py +4 -4
- application_sdk/services/objectstore.py +14 -1
- application_sdk/services/secretstore.py +1 -1
- application_sdk/test_utils/hypothesis/strategies/outputs/json_output.py +0 -1
- application_sdk/test_utils/hypothesis/strategies/server/fastapi/__init__.py +1 -1
- application_sdk/version.py +1 -1
- application_sdk/worker.py +1 -1
- {atlan_application_sdk-1.1.1.dist-info → atlan_application_sdk-2.1.0.dist-info}/METADATA +9 -11
- {atlan_application_sdk-1.1.1.dist-info → atlan_application_sdk-2.1.0.dist-info}/RECORD +36 -43
- application_sdk/common/dataframe_utils.py +0 -42
- application_sdk/events/__init__.py +0 -5
- application_sdk/inputs/.cursor/BUGBOT.md +0 -250
- application_sdk/inputs/__init__.py +0 -168
- application_sdk/inputs/iceberg.py +0 -75
- application_sdk/inputs/json.py +0 -136
- application_sdk/inputs/parquet.py +0 -272
- application_sdk/inputs/sql_query.py +0 -271
- application_sdk/outputs/.cursor/BUGBOT.md +0 -295
- application_sdk/outputs/__init__.py +0 -453
- application_sdk/outputs/iceberg.py +0 -139
- application_sdk/outputs/json.py +0 -268
- /application_sdk/{events → interceptors}/models.py +0 -0
- /application_sdk/{common/dapr_utils.py → services/_utils.py} +0 -0
- {atlan_application_sdk-1.1.1.dist-info → atlan_application_sdk-2.1.0.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-1.1.1.dist-info → atlan_application_sdk-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-1.1.1.dist-info → atlan_application_sdk-2.1.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
import glob
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
7
|
+
from application_sdk.common.error_codes import IOError
|
|
8
|
+
from application_sdk.constants import TEMPORARY_PATH
|
|
9
|
+
from application_sdk.observability.logger_adaptor import get_logger
|
|
10
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
11
|
+
|
|
12
|
+
JSON_FILE_EXTENSION = ".json"
|
|
13
|
+
PARQUET_FILE_EXTENSION = ".parquet"
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import daft
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def find_local_files_by_extension(
|
|
24
|
+
path: str,
|
|
25
|
+
extension: str,
|
|
26
|
+
file_names: Optional[List[str]] = None,
|
|
27
|
+
) -> List[str]:
|
|
28
|
+
"""Find local files at the specified local path, optionally filtering by file names.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
path (str): Local path to search in (file or directory)
|
|
32
|
+
extension (str): File extension to filter by (e.g., '.parquet', '.json')
|
|
33
|
+
file_names (Optional[List[str]]): List of file names (basenames) to filter by, paths are not supported
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List[str]: List of matching file paths
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
>>> find_local_files_by_extension("/data", ".parquet", ["file1.parquet", "file2.parquet"])
|
|
40
|
+
['file1.parquet', 'file2.parquet']
|
|
41
|
+
|
|
42
|
+
>>> find_local_files_by_extension("/data/single.json", ".json")
|
|
43
|
+
['single.json']
|
|
44
|
+
"""
|
|
45
|
+
if os.path.isfile(path) and path.endswith(extension):
|
|
46
|
+
# Single file - return it directly
|
|
47
|
+
return [path]
|
|
48
|
+
|
|
49
|
+
elif os.path.isdir(path):
|
|
50
|
+
# Directory - find all files in directory
|
|
51
|
+
all_files = glob.glob(
|
|
52
|
+
os.path.join(path, "**", f"*{extension}"),
|
|
53
|
+
recursive=True,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Filter by file names if specified
|
|
57
|
+
if file_names:
|
|
58
|
+
file_names_set = set(file_names) # Convert to set for O(1) lookup
|
|
59
|
+
return [f for f in all_files if os.path.basename(f) in file_names_set]
|
|
60
|
+
else:
|
|
61
|
+
return all_files
|
|
62
|
+
|
|
63
|
+
return []
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def download_files(
|
|
67
|
+
path: str, file_extension: str, file_names: Optional[List[str]] = None
|
|
68
|
+
) -> List[str]:
|
|
69
|
+
"""Download files from object store if not available locally.
|
|
70
|
+
|
|
71
|
+
Flow:
|
|
72
|
+
1. Check if files exist locally at self.path
|
|
73
|
+
2. If not, try to download from object store
|
|
74
|
+
3. Filter by self.file_names if provided
|
|
75
|
+
4. Return list of file paths for logging purposes
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
List[str]: List of file paths
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
AttributeError: When the reader class doesn't support file operations or _extension
|
|
82
|
+
IOError: When no files found locally or in object store
|
|
83
|
+
"""
|
|
84
|
+
# Step 1: Check if files exist locally
|
|
85
|
+
local_files: List[str] = find_local_files_by_extension(
|
|
86
|
+
path, file_extension, file_names
|
|
87
|
+
)
|
|
88
|
+
if local_files:
|
|
89
|
+
logger.info(
|
|
90
|
+
f"Found {len(local_files)} {file_extension} files locally at: {path}"
|
|
91
|
+
)
|
|
92
|
+
return local_files
|
|
93
|
+
|
|
94
|
+
# Step 2: Try to download from object store
|
|
95
|
+
logger.info(
|
|
96
|
+
f"No local {file_extension} files found at {path}, checking object store..."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
# Determine what to download based on path type and filters
|
|
101
|
+
downloaded_paths: List[str] = []
|
|
102
|
+
|
|
103
|
+
if path.endswith(file_extension):
|
|
104
|
+
# Single file case (file_names validation already ensures this is valid)
|
|
105
|
+
source_path = get_object_store_prefix(path)
|
|
106
|
+
destination_path = os.path.join(TEMPORARY_PATH, source_path)
|
|
107
|
+
await ObjectStore.download_file(
|
|
108
|
+
source=source_path,
|
|
109
|
+
destination=destination_path,
|
|
110
|
+
)
|
|
111
|
+
downloaded_paths.append(destination_path)
|
|
112
|
+
|
|
113
|
+
elif file_names:
|
|
114
|
+
# Directory with specific files - download each file individually
|
|
115
|
+
for file_name in file_names:
|
|
116
|
+
file_path = os.path.join(path, file_name)
|
|
117
|
+
source_path = get_object_store_prefix(file_path)
|
|
118
|
+
destination_path = os.path.join(TEMPORARY_PATH, source_path)
|
|
119
|
+
await ObjectStore.download_file(
|
|
120
|
+
source=source_path,
|
|
121
|
+
destination=destination_path,
|
|
122
|
+
)
|
|
123
|
+
downloaded_paths.append(destination_path)
|
|
124
|
+
else:
|
|
125
|
+
# Download entire directory
|
|
126
|
+
source_path = get_object_store_prefix(path)
|
|
127
|
+
destination_path = os.path.join(TEMPORARY_PATH, source_path)
|
|
128
|
+
await ObjectStore.download_prefix(
|
|
129
|
+
source=source_path,
|
|
130
|
+
destination=destination_path,
|
|
131
|
+
)
|
|
132
|
+
# Find the actual files in the downloaded directory
|
|
133
|
+
found_files = find_local_files_by_extension(
|
|
134
|
+
destination_path, file_extension, file_names
|
|
135
|
+
)
|
|
136
|
+
downloaded_paths.extend(found_files)
|
|
137
|
+
|
|
138
|
+
# Check results
|
|
139
|
+
if downloaded_paths:
|
|
140
|
+
logger.info(
|
|
141
|
+
f"Successfully downloaded {len(downloaded_paths)} {file_extension} files from object store"
|
|
142
|
+
)
|
|
143
|
+
return downloaded_paths
|
|
144
|
+
else:
|
|
145
|
+
raise IOError(
|
|
146
|
+
f"{IOError.OBJECT_STORE_READ_ERROR}: Downloaded from object store but no {file_extension} files found"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logger.error(f"Failed to download from object store: {str(e)}")
|
|
151
|
+
raise IOError(
|
|
152
|
+
f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: No {file_extension} files found locally at '{path}' and failed to download from object store. "
|
|
153
|
+
f"Error: {str(e)}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def estimate_dataframe_record_size(
|
|
158
|
+
dataframe: "pd.DataFrame", file_extension: str
|
|
159
|
+
) -> int:
|
|
160
|
+
"""Estimate File size of a DataFrame by sampling a few records.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
dataframe (pd.DataFrame): The DataFrame to estimate the size of.
|
|
164
|
+
file_extension (str): The extension of the file to estimate the size of.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
int: The estimated size of the DataFrame in bytes.
|
|
168
|
+
"""
|
|
169
|
+
if len(dataframe) == 0:
|
|
170
|
+
return 0
|
|
171
|
+
|
|
172
|
+
# Sample up to 10 records to estimate average size
|
|
173
|
+
sample_size = min(10, len(dataframe))
|
|
174
|
+
sample = dataframe.head(sample_size)
|
|
175
|
+
compression_factor = 1
|
|
176
|
+
if file_extension == JSON_FILE_EXTENSION:
|
|
177
|
+
sample_file = sample.to_json(orient="records", lines=True)
|
|
178
|
+
elif file_extension == PARQUET_FILE_EXTENSION:
|
|
179
|
+
sample_file = sample.to_parquet(index=False, compression="snappy")
|
|
180
|
+
compression_factor = 0.01
|
|
181
|
+
else:
|
|
182
|
+
raise ValueError(f"Unsupported file extension: {file_extension}")
|
|
183
|
+
|
|
184
|
+
if sample_file is not None:
|
|
185
|
+
avg_record_size = len(sample_file) / sample_size * compression_factor
|
|
186
|
+
return int(avg_record_size)
|
|
187
|
+
|
|
188
|
+
return 0
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def path_gen(
|
|
192
|
+
chunk_count: Optional[int] = None,
|
|
193
|
+
chunk_part: int = 0,
|
|
194
|
+
start_marker: Optional[str] = None,
|
|
195
|
+
end_marker: Optional[str] = None,
|
|
196
|
+
extension: str = ".json",
|
|
197
|
+
) -> str:
|
|
198
|
+
"""Generate a file path for a chunk.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
chunk_start (Optional[int]): Starting index of the chunk, or None for single chunk.
|
|
202
|
+
chunk_count (int): Total number of chunks.
|
|
203
|
+
start_marker (Optional[str]): Start marker for query extraction.
|
|
204
|
+
end_marker (Optional[str]): End marker for query extraction.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
str: Generated file path for the chunk.
|
|
208
|
+
"""
|
|
209
|
+
# For Query Extraction - use start and end markers without chunk count
|
|
210
|
+
if start_marker and end_marker:
|
|
211
|
+
return f"{start_marker}_{end_marker}{extension}"
|
|
212
|
+
|
|
213
|
+
# For regular chunking - include chunk count
|
|
214
|
+
if chunk_count is None:
|
|
215
|
+
return f"{str(chunk_part)}{extension}"
|
|
216
|
+
else:
|
|
217
|
+
return f"chunk-{str(chunk_count)}-part{str(chunk_part)}{extension}"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def process_null_fields(
|
|
221
|
+
obj: Any,
|
|
222
|
+
preserve_fields: Optional[List[str]] = None,
|
|
223
|
+
null_to_empty_dict_fields: Optional[List[str]] = None,
|
|
224
|
+
) -> Any:
|
|
225
|
+
"""
|
|
226
|
+
By default the method removes null values from dictionaries and lists.
|
|
227
|
+
Except for the fields specified in preserve_fields.
|
|
228
|
+
And fields in null_to_empty_dict_fields are replaced with empty dict if null.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
obj: The object to clean (dict, list, or other value)
|
|
232
|
+
preserve_fields: Optional list of field names that should be preserved even if they contain null values
|
|
233
|
+
null_to_empty_dict_fields: Optional list of field names that should be replaced with empty dict if null
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
The cleaned object with null values removed
|
|
237
|
+
"""
|
|
238
|
+
if isinstance(obj, dict):
|
|
239
|
+
result = {}
|
|
240
|
+
for k, v in obj.items():
|
|
241
|
+
# Handle null fields that should be converted to empty dicts
|
|
242
|
+
if k in (null_to_empty_dict_fields or []) and v is None:
|
|
243
|
+
result[k] = {}
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
# Process the value recursively
|
|
247
|
+
processed_value = process_null_fields(
|
|
248
|
+
v, preserve_fields, null_to_empty_dict_fields
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Keep the field if it's in preserve_fields or has a non-None processed value
|
|
252
|
+
if k in (preserve_fields or []) or processed_value is not None:
|
|
253
|
+
result[k] = processed_value
|
|
254
|
+
|
|
255
|
+
return result
|
|
256
|
+
return obj
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def convert_datetime_to_epoch(data: Any) -> Any:
|
|
260
|
+
"""Convert datetime objects to epoch timestamps in milliseconds.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
data: The data to convert
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
The converted data with datetime fields as epoch timestamps
|
|
267
|
+
"""
|
|
268
|
+
if isinstance(data, datetime):
|
|
269
|
+
return int(data.timestamp() * 1000)
|
|
270
|
+
elif isinstance(data, dict):
|
|
271
|
+
return {k: convert_datetime_to_epoch(v) for k, v in data.items()}
|
|
272
|
+
elif isinstance(data, list):
|
|
273
|
+
return [convert_datetime_to_epoch(item) for item in data]
|
|
274
|
+
return data
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def is_empty_dataframe(dataframe: Union["pd.DataFrame", "daft.DataFrame"]) -> bool: # noqa: F821
|
|
278
|
+
"""Check if a DataFrame is empty.
|
|
279
|
+
|
|
280
|
+
This function determines whether a DataFrame has any rows, supporting both
|
|
281
|
+
pandas and daft DataFrame types. For pandas DataFrames, it uses the `empty`
|
|
282
|
+
property, and for daft DataFrames, it checks if the row count is 0.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
dataframe (Union[pd.DataFrame, daft.DataFrame]): The DataFrame to check,
|
|
286
|
+
can be either a pandas DataFrame or a daft DataFrame.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
bool: True if the DataFrame has no rows, False otherwise.
|
|
290
|
+
|
|
291
|
+
Note:
|
|
292
|
+
If daft is not available and a daft DataFrame is passed, the function
|
|
293
|
+
will log a warning and return True.
|
|
294
|
+
"""
|
|
295
|
+
import pandas as pd
|
|
296
|
+
|
|
297
|
+
if isinstance(dataframe, pd.DataFrame):
|
|
298
|
+
return dataframe.empty
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
import daft
|
|
302
|
+
|
|
303
|
+
if isinstance(dataframe, daft.DataFrame):
|
|
304
|
+
return dataframe.count_rows() == 0
|
|
305
|
+
except Exception:
|
|
306
|
+
logger.warning("Module daft not found")
|
|
307
|
+
return True
|
|
@@ -363,14 +363,14 @@ class AtlanObservability(Generic[T], ABC):
|
|
|
363
363
|
logging.error(f"Error buffering log: {e}")
|
|
364
364
|
|
|
365
365
|
async def _flush_records(self, records: List[Dict[str, Any]]):
|
|
366
|
-
"""Flush records to parquet file and object store using
|
|
366
|
+
"""Flush records to parquet file and object store using ParquetFileWriter.
|
|
367
367
|
|
|
368
368
|
Args:
|
|
369
369
|
records: List of records to flush
|
|
370
370
|
|
|
371
371
|
This method:
|
|
372
372
|
- Groups records by partition (year/month/day)
|
|
373
|
-
- Uses
|
|
373
|
+
- Uses ParquetFileWriter for efficient writing
|
|
374
374
|
- Automatically handles chunking, compression, and dual upload
|
|
375
375
|
- Provides robust error handling per partition
|
|
376
376
|
- Cleans up old records if enabled
|
|
@@ -395,7 +395,7 @@ class AtlanObservability(Generic[T], ABC):
|
|
|
395
395
|
partition_records[partition_path] = []
|
|
396
396
|
partition_records[partition_path].append(record)
|
|
397
397
|
|
|
398
|
-
|
|
398
|
+
# Write records to each partition using ParquetFileWriter
|
|
399
399
|
for partition_path, partition_data in partition_records.items():
|
|
400
400
|
# Create new dataframe from current records
|
|
401
401
|
new_df = pd.DataFrame(partition_data)
|
|
@@ -412,23 +412,27 @@ class AtlanObservability(Generic[T], ABC):
|
|
|
412
412
|
elif part.startswith("day="):
|
|
413
413
|
new_df["day"] = int(part.split("=")[1])
|
|
414
414
|
|
|
415
|
-
# Use new data directly - let
|
|
415
|
+
# Use new data directly - let ParquetFileWriter handle consolidation and merging
|
|
416
416
|
df = new_df
|
|
417
417
|
|
|
418
|
-
# Use
|
|
418
|
+
# Use ParquetFileWriter for efficient writing and uploading
|
|
419
419
|
# Set the output path for this partition
|
|
420
420
|
try:
|
|
421
|
-
# Lazy import and instantiation of
|
|
422
|
-
from application_sdk.
|
|
421
|
+
# Lazy import and instantiation of ParquetFileWriter
|
|
422
|
+
from application_sdk.io.parquet import ParquetFileWriter
|
|
423
423
|
|
|
424
|
-
|
|
425
|
-
|
|
424
|
+
parquet_writer = ParquetFileWriter(
|
|
425
|
+
path=partition_path,
|
|
426
426
|
chunk_start=0,
|
|
427
427
|
chunk_part=int(time()),
|
|
428
428
|
)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
429
|
+
|
|
430
|
+
await parquet_writer._write_dataframe(dataframe=df)
|
|
431
|
+
|
|
432
|
+
except Exception as partition_error:
|
|
433
|
+
logging.error(
|
|
434
|
+
f"Error processing partition {partition_path}: {str(partition_error)}"
|
|
435
|
+
)
|
|
432
436
|
|
|
433
437
|
# Clean up old records if enabled
|
|
434
438
|
if self._cleanup_enabled:
|
|
@@ -8,6 +8,7 @@ from starlette.types import ASGIApp
|
|
|
8
8
|
|
|
9
9
|
from application_sdk.observability.context import request_context
|
|
10
10
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
11
|
+
from application_sdk.server.fastapi.utils import EXCLUDED_LOG_PATHS
|
|
11
12
|
|
|
12
13
|
logger = get_logger(__name__)
|
|
13
14
|
|
|
@@ -29,31 +30,36 @@ class LogMiddleware(BaseHTTPMiddleware):
|
|
|
29
30
|
token = request_context.set({"request_id": request_id})
|
|
30
31
|
start_time = time.time()
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
extra={
|
|
35
|
-
"method": request.method,
|
|
36
|
-
"path": request.url.path,
|
|
37
|
-
"request_id": request_id,
|
|
38
|
-
"url": str(request.url),
|
|
39
|
-
"client_host": request.client.host if request.client else None,
|
|
40
|
-
},
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
try:
|
|
44
|
-
response = await call_next(request)
|
|
45
|
-
duration = time.time() - start_time
|
|
33
|
+
# Skip logging for health check endpoints
|
|
34
|
+
should_log = request.url.path not in EXCLUDED_LOG_PATHS
|
|
46
35
|
|
|
36
|
+
if should_log:
|
|
47
37
|
self.logger.info(
|
|
48
|
-
f"Request
|
|
38
|
+
f"Request started for {request.method} {request.url.path}",
|
|
49
39
|
extra={
|
|
50
40
|
"method": request.method,
|
|
51
41
|
"path": request.url.path,
|
|
52
|
-
"status_code": response.status_code,
|
|
53
|
-
"duration_ms": round(duration * 1000, 2),
|
|
54
42
|
"request_id": request_id,
|
|
43
|
+
"url": str(request.url),
|
|
44
|
+
"client_host": request.client.host if request.client else None,
|
|
55
45
|
},
|
|
56
46
|
)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
response = await call_next(request)
|
|
50
|
+
duration = time.time() - start_time
|
|
51
|
+
|
|
52
|
+
if should_log:
|
|
53
|
+
self.logger.info(
|
|
54
|
+
f"Request completed for {request.method} {request.url.path} {response.status_code}",
|
|
55
|
+
extra={
|
|
56
|
+
"method": request.method,
|
|
57
|
+
"path": request.url.path,
|
|
58
|
+
"status_code": response.status_code,
|
|
59
|
+
"duration_ms": round(duration * 1000, 2),
|
|
60
|
+
"request_id": request_id,
|
|
61
|
+
},
|
|
62
|
+
)
|
|
57
63
|
return response
|
|
58
64
|
|
|
59
65
|
except Exception as e:
|
|
@@ -4,6 +4,7 @@ from fastapi import Request, Response
|
|
|
4
4
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
5
5
|
|
|
6
6
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
7
|
+
from application_sdk.server.fastapi.utils import EXCLUDED_LOG_PATHS
|
|
7
8
|
|
|
8
9
|
metrics = get_metrics()
|
|
9
10
|
|
|
@@ -24,29 +25,31 @@ class MetricsMiddleware(BaseHTTPMiddleware):
|
|
|
24
25
|
method = request.method
|
|
25
26
|
status_code = response.status_code
|
|
26
27
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
28
|
+
# Skip metrics for health check endpoints
|
|
29
|
+
if path not in EXCLUDED_LOG_PATHS:
|
|
30
|
+
labels = {
|
|
31
|
+
"path": path,
|
|
32
|
+
"method": method,
|
|
33
|
+
"status": str(status_code),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Record request count
|
|
37
|
+
metrics.record_metric(
|
|
38
|
+
name="http_requests_total",
|
|
39
|
+
value=1,
|
|
40
|
+
metric_type=MetricType.COUNTER,
|
|
41
|
+
labels=labels,
|
|
42
|
+
description="Total number of HTTP requests",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Record request latency
|
|
46
|
+
metrics.record_metric(
|
|
47
|
+
name="http_request_duration_seconds",
|
|
48
|
+
value=process_time,
|
|
49
|
+
metric_type=MetricType.HISTOGRAM,
|
|
50
|
+
labels=labels,
|
|
51
|
+
description="Duration of HTTP requests",
|
|
52
|
+
unit="seconds",
|
|
53
|
+
)
|
|
51
54
|
|
|
52
55
|
return response
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Type
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field, RootModel
|
|
7
7
|
|
|
8
|
-
from application_sdk.
|
|
8
|
+
from application_sdk.interceptors.models import Event, EventFilter
|
|
9
9
|
from application_sdk.workflows import WorkflowInterface
|
|
10
10
|
|
|
11
11
|
|
|
@@ -7,6 +7,16 @@ error handlers and response formatters.
|
|
|
7
7
|
from fastapi import status
|
|
8
8
|
from fastapi.responses import JSONResponse
|
|
9
9
|
|
|
10
|
+
# Paths to exclude from logging and metrics (health checks and event ingress)
|
|
11
|
+
EXCLUDED_LOG_PATHS: frozenset[str] = frozenset(
|
|
12
|
+
{
|
|
13
|
+
"/server/health",
|
|
14
|
+
"/server/ready",
|
|
15
|
+
"/api/eventingress/",
|
|
16
|
+
"/api/eventingress",
|
|
17
|
+
}
|
|
18
|
+
)
|
|
19
|
+
|
|
10
20
|
|
|
11
21
|
def internal_server_error_handler(_, exc: Exception) -> JSONResponse:
|
|
12
22
|
"""Handle internal server errors in FastAPI applications.
|
|
@@ -10,14 +10,14 @@ from datetime import datetime
|
|
|
10
10
|
from dapr import clients
|
|
11
11
|
from temporalio import activity, workflow
|
|
12
12
|
|
|
13
|
-
from application_sdk.common.dapr_utils import is_component_registered
|
|
14
13
|
from application_sdk.constants import (
|
|
15
14
|
APPLICATION_NAME,
|
|
16
15
|
DAPR_BINDING_OPERATION_CREATE,
|
|
17
16
|
EVENT_STORE_NAME,
|
|
18
17
|
)
|
|
19
|
-
from application_sdk.
|
|
18
|
+
from application_sdk.interceptors.models import Event, EventMetadata, WorkflowStates
|
|
20
19
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
20
|
+
from application_sdk.services._utils import is_component_registered
|
|
21
21
|
|
|
22
22
|
logger = get_logger(__name__)
|
|
23
23
|
activity.logger = logger
|
|
@@ -47,7 +47,7 @@ class EventStore:
|
|
|
47
47
|
a Temporal workflow or activity context.
|
|
48
48
|
|
|
49
49
|
Examples:
|
|
50
|
-
>>> from application_sdk.
|
|
50
|
+
>>> from application_sdk.interceptors.models import Event
|
|
51
51
|
|
|
52
52
|
>>> # Create basic event
|
|
53
53
|
>>> event = Event(event_type="data.processed", data={"count": 100})
|
|
@@ -109,7 +109,7 @@ class EventStore:
|
|
|
109
109
|
Exception: If there's an error during event publishing (logged but not re-raised).
|
|
110
110
|
|
|
111
111
|
Examples:
|
|
112
|
-
>>> from application_sdk.
|
|
112
|
+
>>> from application_sdk.interceptors.models import Event
|
|
113
113
|
|
|
114
114
|
>>> # Publish workflow status event
|
|
115
115
|
>>> status_event = Event(
|
|
@@ -459,9 +459,22 @@ class ObjectStore:
|
|
|
459
459
|
|
|
460
460
|
logger.info(f"Found {len(file_list)} files to download from: {source}")
|
|
461
461
|
|
|
462
|
+
# Normalize source prefix to use forward slashes for comparison
|
|
463
|
+
normalized_source = cls._normalize_object_store_key(source)
|
|
464
|
+
|
|
462
465
|
# Download each file
|
|
463
466
|
for file_path in file_list:
|
|
464
|
-
|
|
467
|
+
normalized_file_path = cls._normalize_object_store_key(file_path)
|
|
468
|
+
if normalized_file_path.startswith(normalized_source):
|
|
469
|
+
# Extract relative path after the prefix
|
|
470
|
+
relative_path = normalized_file_path[
|
|
471
|
+
len(normalized_source) :
|
|
472
|
+
].lstrip("/")
|
|
473
|
+
else:
|
|
474
|
+
# Fallback to just the filename
|
|
475
|
+
relative_path = os.path.basename(normalized_file_path)
|
|
476
|
+
|
|
477
|
+
local_file_path = os.path.join(destination, relative_path)
|
|
465
478
|
await cls.download_file(file_path, local_file_path, store_name)
|
|
466
479
|
|
|
467
480
|
logger.info(f"Successfully downloaded all files from: {source}")
|
|
@@ -20,7 +20,6 @@ from typing import Any, Dict
|
|
|
20
20
|
|
|
21
21
|
from dapr.clients import DaprClient
|
|
22
22
|
|
|
23
|
-
from application_sdk.common.dapr_utils import is_component_registered
|
|
24
23
|
from application_sdk.common.error_codes import CommonError
|
|
25
24
|
from application_sdk.constants import (
|
|
26
25
|
DEPLOYMENT_NAME,
|
|
@@ -30,6 +29,7 @@ from application_sdk.constants import (
|
|
|
30
29
|
SECRET_STORE_NAME,
|
|
31
30
|
)
|
|
32
31
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
32
|
+
from application_sdk.services._utils import is_component_registered
|
|
33
33
|
from application_sdk.services.statestore import StateStore, StateType
|
|
34
34
|
|
|
35
35
|
logger = get_logger(__name__)
|
|
@@ -62,7 +62,6 @@ def dataframe_strategy(draw) -> pd.DataFrame:
|
|
|
62
62
|
json_output_config_strategy = st.fixed_dictionaries(
|
|
63
63
|
{
|
|
64
64
|
"output_path": safe_path_strategy,
|
|
65
|
-
"output_suffix": st.builds(lambda x: f"/{x}", safe_path_strategy),
|
|
66
65
|
"output_prefix": output_prefix_strategy,
|
|
67
66
|
"chunk_size": chunk_size_strategy,
|
|
68
67
|
}
|
application_sdk/version.py
CHANGED
application_sdk/worker.py
CHANGED
|
@@ -15,7 +15,7 @@ from temporalio.worker import Worker as TemporalWorker
|
|
|
15
15
|
|
|
16
16
|
from application_sdk.clients.workflow import WorkflowClient
|
|
17
17
|
from application_sdk.constants import DEPLOYMENT_NAME, MAX_CONCURRENT_ACTIVITIES
|
|
18
|
-
from application_sdk.
|
|
18
|
+
from application_sdk.interceptors.models import (
|
|
19
19
|
ApplicationEventNames,
|
|
20
20
|
Event,
|
|
21
21
|
EventTypes,
|