atlan-application-sdk 0.1.1rc58__py3-none-any.whl → 0.1.1rc60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/constants.py +2 -1
- application_sdk/observability/observability.py +31 -56
- application_sdk/observability/utils.py +7 -6
- application_sdk/outputs/__init__.py +1 -0
- application_sdk/outputs/parquet.py +14 -2
- application_sdk/version.py +1 -1
- {atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/RECORD +11 -11
- {atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/licenses/NOTICE +0 -0
application_sdk/constants.py
CHANGED
|
@@ -75,7 +75,7 @@ STATE_STORE_PATH_TEMPLATE = (
|
|
|
75
75
|
|
|
76
76
|
# Observability Constants
|
|
77
77
|
#: Directory for storing observability data
|
|
78
|
-
OBSERVABILITY_DIR = "artifacts/apps/{application_name}/observability"
|
|
78
|
+
OBSERVABILITY_DIR = "artifacts/apps/{application_name}/{deployment_name}/observability"
|
|
79
79
|
|
|
80
80
|
# Workflow Client Constants
|
|
81
81
|
#: Host address for the Temporal server
|
|
@@ -88,6 +88,7 @@ WORKFLOW_NAMESPACE = os.getenv("ATLAN_WORKFLOW_NAMESPACE", "default")
|
|
|
88
88
|
WORKFLOW_UI_HOST = os.getenv("ATLAN_WORKFLOW_UI_HOST", "localhost")
|
|
89
89
|
#: Port number for the Temporal UI
|
|
90
90
|
WORKFLOW_UI_PORT = os.getenv("ATLAN_WORKFLOW_UI_PORT", "8233")
|
|
91
|
+
|
|
91
92
|
#: Maximum timeout duration for workflows
|
|
92
93
|
WORKFLOW_MAX_TIMEOUT_HOURS = timedelta(
|
|
93
94
|
hours=int(os.getenv("ATLAN_WORKFLOW_MAX_TIMEOUT_HOURS", "1"))
|
|
@@ -16,13 +16,11 @@ from dapr.clients import DaprClient
|
|
|
16
16
|
from pydantic import BaseModel
|
|
17
17
|
|
|
18
18
|
from application_sdk.constants import (
|
|
19
|
-
DAPR_BINDING_OPERATION_CREATE,
|
|
20
19
|
DEPLOYMENT_OBJECT_STORE_NAME,
|
|
21
20
|
ENABLE_OBSERVABILITY_DAPR_SINK,
|
|
22
21
|
LOG_FILE_NAME,
|
|
23
22
|
METRICS_FILE_NAME,
|
|
24
23
|
STATE_STORE_NAME,
|
|
25
|
-
TEMPORARY_PATH,
|
|
26
24
|
TRACES_FILE_NAME,
|
|
27
25
|
)
|
|
28
26
|
from application_sdk.observability.utils import get_observability_dir
|
|
@@ -365,16 +363,23 @@ class AtlanObservability(Generic[T], ABC):
|
|
|
365
363
|
logging.error(f"Error buffering log: {e}")
|
|
366
364
|
|
|
367
365
|
async def _flush_records(self, records: List[Dict[str, Any]]):
|
|
368
|
-
"""Flush records to parquet file and object store.
|
|
366
|
+
"""Flush records to parquet file and object store using ParquetOutput abstraction.
|
|
369
367
|
|
|
370
368
|
Args:
|
|
371
369
|
records: List of records to flush
|
|
372
370
|
|
|
373
371
|
This method:
|
|
374
|
-
- Groups records by partition
|
|
375
|
-
-
|
|
376
|
-
-
|
|
372
|
+
- Groups records by partition (year/month/day)
|
|
373
|
+
- Uses ParquetOutput abstraction for efficient writing
|
|
374
|
+
- Automatically handles chunking, compression, and dual upload
|
|
375
|
+
- Provides robust error handling per partition
|
|
377
376
|
- Cleans up old records if enabled
|
|
377
|
+
|
|
378
|
+
Features:
|
|
379
|
+
- Automatic chunking for large datasets
|
|
380
|
+
- Dual upload support (primary + upstream if enabled)
|
|
381
|
+
- Advanced consolidation for optimal performance
|
|
382
|
+
- Fault-tolerant processing (continues on partition errors)
|
|
378
383
|
"""
|
|
379
384
|
if not ENABLE_OBSERVABILITY_DAPR_SINK:
|
|
380
385
|
return
|
|
@@ -390,30 +395,15 @@ class AtlanObservability(Generic[T], ABC):
|
|
|
390
395
|
partition_records[partition_path] = []
|
|
391
396
|
partition_records[partition_path].append(record)
|
|
392
397
|
|
|
393
|
-
# Write records to each partition
|
|
398
|
+
# Write records to each partition using ParquetOutput abstraction
|
|
394
399
|
for partition_path, partition_data in partition_records.items():
|
|
395
|
-
os.makedirs(partition_path, exist_ok=True)
|
|
396
|
-
# Use a consistent file name for each partition
|
|
397
|
-
parquet_path = os.path.join(partition_path, "data.parquet")
|
|
398
|
-
|
|
399
|
-
# Read existing data if any
|
|
400
|
-
existing_df = None
|
|
401
|
-
if os.path.exists(parquet_path):
|
|
402
|
-
try:
|
|
403
|
-
# Read the entire parquet file without excluding any columns
|
|
404
|
-
existing_df = pd.read_parquet(parquet_path)
|
|
405
|
-
except Exception as e:
|
|
406
|
-
logging.error(f"Error reading existing parquet file: {e}")
|
|
407
|
-
# If there's an error reading the existing file, we'll overwrite it
|
|
408
|
-
existing_df = None
|
|
409
|
-
|
|
410
400
|
# Create new dataframe from current records
|
|
411
401
|
new_df = pd.DataFrame(partition_data)
|
|
412
402
|
|
|
413
|
-
# Extract partition values from path
|
|
414
|
-
partition_parts = os.path.basename(
|
|
415
|
-
os.
|
|
416
|
-
)
|
|
403
|
+
# Extract partition values from path and add to dataframe
|
|
404
|
+
partition_parts = os.path.basename(
|
|
405
|
+
os.path.dirname(partition_path)
|
|
406
|
+
).split(os.sep)
|
|
417
407
|
for part in partition_parts:
|
|
418
408
|
if part.startswith("year="):
|
|
419
409
|
new_df["year"] = int(part.split("=")[1])
|
|
@@ -422,38 +412,23 @@ class AtlanObservability(Generic[T], ABC):
|
|
|
422
412
|
elif part.startswith("day="):
|
|
423
413
|
new_df["day"] = int(part.split("=")[1])
|
|
424
414
|
|
|
425
|
-
#
|
|
426
|
-
|
|
427
|
-
df = pd.concat([existing_df, new_df], ignore_index=True)
|
|
428
|
-
else:
|
|
429
|
-
df = new_df
|
|
430
|
-
|
|
431
|
-
# Sort by timestamp to maintain order
|
|
432
|
-
df = df.sort_values("timestamp")
|
|
415
|
+
# Use new data directly - let ParquetOutput handle consolidation and merging
|
|
416
|
+
df = new_df
|
|
433
417
|
|
|
434
|
-
#
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
)
|
|
418
|
+
# Use ParquetOutput abstraction for efficient writing and uploading
|
|
419
|
+
# Set the output path for this partition
|
|
420
|
+
try:
|
|
421
|
+
# Lazy import and instantiation of ParquetOutput
|
|
422
|
+
from application_sdk.outputs.parquet import ParquetOutput
|
|
440
423
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
}
|
|
450
|
-
with DaprClient() as client:
|
|
451
|
-
client.invoke_binding(
|
|
452
|
-
binding_name=DEPLOYMENT_OBJECT_STORE_NAME,
|
|
453
|
-
operation=DAPR_BINDING_OPERATION_CREATE,
|
|
454
|
-
data=file_content,
|
|
455
|
-
binding_metadata=metadata,
|
|
456
|
-
)
|
|
424
|
+
parquet_output = ParquetOutput(
|
|
425
|
+
output_path=partition_path,
|
|
426
|
+
chunk_start=0,
|
|
427
|
+
chunk_part=int(time()),
|
|
428
|
+
)
|
|
429
|
+
await parquet_output.write_dataframe(dataframe=df)
|
|
430
|
+
except Exception as e:
|
|
431
|
+
print(f"Error writing records to partition: {str(e)}")
|
|
457
432
|
|
|
458
433
|
# Clean up old records if enabled
|
|
459
434
|
if self._cleanup_enabled:
|
|
@@ -5,6 +5,7 @@ from temporalio import activity, workflow
|
|
|
5
5
|
|
|
6
6
|
from application_sdk.constants import (
|
|
7
7
|
APPLICATION_NAME,
|
|
8
|
+
DEPLOYMENT_NAME,
|
|
8
9
|
OBSERVABILITY_DIR,
|
|
9
10
|
TEMPORARY_PATH,
|
|
10
11
|
)
|
|
@@ -26,16 +27,16 @@ class WorkflowContext(BaseModel):
|
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
def get_observability_dir() -> str:
|
|
29
|
-
"""Build the observability path.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
path: The path to build the observability path from.
|
|
30
|
+
"""Build the observability path using deployment name.
|
|
33
31
|
|
|
34
32
|
Returns:
|
|
35
|
-
str: The built observability path.
|
|
33
|
+
str: The built observability path using deployment name.
|
|
36
34
|
"""
|
|
37
35
|
return os.path.join(
|
|
38
|
-
TEMPORARY_PATH,
|
|
36
|
+
TEMPORARY_PATH,
|
|
37
|
+
OBSERVABILITY_DIR.format(
|
|
38
|
+
application_name=APPLICATION_NAME, deployment_name=DEPLOYMENT_NAME
|
|
39
|
+
),
|
|
39
40
|
)
|
|
40
41
|
|
|
41
42
|
|
|
@@ -8,7 +8,11 @@ from temporalio import activity
|
|
|
8
8
|
|
|
9
9
|
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
10
10
|
from application_sdk.common.dataframe_utils import is_empty_dataframe
|
|
11
|
-
from application_sdk.constants import
|
|
11
|
+
from application_sdk.constants import (
|
|
12
|
+
DAPR_MAX_GRPC_MESSAGE_LENGTH,
|
|
13
|
+
ENABLE_ATLAN_UPLOAD,
|
|
14
|
+
UPSTREAM_OBJECT_STORE_NAME,
|
|
15
|
+
)
|
|
12
16
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
13
17
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
14
18
|
from application_sdk.outputs import Output
|
|
@@ -61,6 +65,7 @@ class ParquetOutput(Output):
|
|
|
61
65
|
buffer_size: int = 5000,
|
|
62
66
|
total_record_count: int = 0,
|
|
63
67
|
chunk_count: int = 0,
|
|
68
|
+
chunk_part: int = 0,
|
|
64
69
|
chunk_start: Optional[int] = None,
|
|
65
70
|
start_marker: Optional[str] = None,
|
|
66
71
|
end_marker: Optional[str] = None,
|
|
@@ -101,7 +106,7 @@ class ParquetOutput(Output):
|
|
|
101
106
|
DAPR_MAX_GRPC_MESSAGE_LENGTH * 0.75
|
|
102
107
|
) # 75% of DAPR limit as safety buffer
|
|
103
108
|
self.chunk_start = chunk_start
|
|
104
|
-
self.chunk_part =
|
|
109
|
+
self.chunk_part = chunk_part
|
|
105
110
|
self.start_marker = start_marker
|
|
106
111
|
self.end_marker = end_marker
|
|
107
112
|
self.partitions = []
|
|
@@ -269,6 +274,13 @@ class ParquetOutput(Output):
|
|
|
269
274
|
f"No files found under prefix {get_object_store_prefix(self.output_path)}: {str(e)}"
|
|
270
275
|
)
|
|
271
276
|
for path in file_paths:
|
|
277
|
+
if ENABLE_ATLAN_UPLOAD:
|
|
278
|
+
await ObjectStore.upload_file(
|
|
279
|
+
source=path,
|
|
280
|
+
store_name=UPSTREAM_OBJECT_STORE_NAME,
|
|
281
|
+
destination=get_object_store_prefix(path),
|
|
282
|
+
retain_local_copy=True,
|
|
283
|
+
)
|
|
272
284
|
await ObjectStore.upload_file(
|
|
273
285
|
source=path,
|
|
274
286
|
destination=get_object_store_prefix(path),
|
application_sdk/version.py
CHANGED
{atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: atlan-application-sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1rc60
|
|
4
4
|
Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
|
|
5
5
|
Project-URL: Repository, https://github.com/atlanhq/application-sdk
|
|
6
6
|
Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
|
{atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/RECORD
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
|
|
2
|
-
application_sdk/constants.py,sha256=
|
|
3
|
-
application_sdk/version.py,sha256=
|
|
2
|
+
application_sdk/constants.py,sha256=ySrjME6CSoiyjVLgQt0s4dIdgJ0JWIMmMs7WBZBVUpA,11013
|
|
3
|
+
application_sdk/version.py,sha256=2aOWC-Y08sXYBsPMJehGpskX5CTH1TNdtzY-QR-Ub8o,88
|
|
4
4
|
application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
|
|
5
5
|
application_sdk/activities/__init__.py,sha256=L5WXkTwOwGtjWAlXrUJRCKGwyIyp3z8fBv8BZVCRFQI,11175
|
|
6
6
|
application_sdk/activities/lock_management.py,sha256=6Wdf3jMKitoarHQP91PIJOoGFz4aaOLS_40c7n1yAOA,3902
|
|
@@ -73,14 +73,14 @@ application_sdk/interceptors/.cursor/BUGBOT.md,sha256=pxmUF2c7dtaXAX8yAa1-LBa6FC
|
|
|
73
73
|
application_sdk/observability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
74
|
application_sdk/observability/logger_adaptor.py,sha256=00c0F7maDkp1xrHttW6VQbWFDGr6NkXeDPjmf97ojlY,29989
|
|
75
75
|
application_sdk/observability/metrics_adaptor.py,sha256=5Oz02lUED60duryoVDF9mbD11fpxhbXi7P1609n_15Y,16446
|
|
76
|
-
application_sdk/observability/observability.py,sha256=
|
|
76
|
+
application_sdk/observability/observability.py,sha256=MGxNFPx6pOdpWrpNXZp44NPk3SG4xjA9cKrTeZ1ENK8,23681
|
|
77
77
|
application_sdk/observability/traces_adaptor.py,sha256=0eQJPN-tYA_dV8D3uEa5ZiX9g12NDuLnPaFuQMVDdL0,18242
|
|
78
|
-
application_sdk/observability/utils.py,sha256=
|
|
78
|
+
application_sdk/observability/utils.py,sha256=JoHEA68cjkXTnAXHzgiULYOzRTk8rG4kPZRvFYah3aU,2505
|
|
79
79
|
application_sdk/observability/decorators/observability_decorator.py,sha256=yd6qfrg1MmH5KcZ5Ydzb0RaBzmxx5FrmiI9qwvZx3EU,8963
|
|
80
|
-
application_sdk/outputs/__init__.py,sha256=
|
|
80
|
+
application_sdk/outputs/__init__.py,sha256=dekaEqJEVAmXQYSy_AohXOHNNI56OXG3Xn27FFRmoPQ,15926
|
|
81
81
|
application_sdk/outputs/iceberg.py,sha256=TdppOMEMfojMhGyBmhWeu1AJQexRyHM-huAYeJmhjdY,5533
|
|
82
82
|
application_sdk/outputs/json.py,sha256=gYDDNOVb8EFxxeOkb6zKWZWjTEVgZLoapFM97_roK4A,10883
|
|
83
|
-
application_sdk/outputs/parquet.py,sha256=
|
|
83
|
+
application_sdk/outputs/parquet.py,sha256=DxcKh1IXPdiXNQJS1HIn6-JRdLkmN4At8uF1zppiZX0,20762
|
|
84
84
|
application_sdk/outputs/.cursor/BUGBOT.md,sha256=KxEC3CIyRSK1YftZou5BgKc6PRXT3qQmBNFJp-HSyYE,11496
|
|
85
85
|
application_sdk/server/__init__.py,sha256=KTqE1YPw_3WDVMWatJUuf9OOiobLM2K5SMaBrI62sCo,1568
|
|
86
86
|
application_sdk/server/.cursor/BUGBOT.md,sha256=p_MMoWUW5G1894WfOKYReZKWCuyJT_OJz3rL5g21NbI,16566
|
|
@@ -157,8 +157,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
|
|
|
157
157
|
application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
|
|
158
158
|
application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
|
|
159
159
|
application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
|
|
160
|
-
atlan_application_sdk-0.1.
|
|
161
|
-
atlan_application_sdk-0.1.
|
|
162
|
-
atlan_application_sdk-0.1.
|
|
163
|
-
atlan_application_sdk-0.1.
|
|
164
|
-
atlan_application_sdk-0.1.
|
|
160
|
+
atlan_application_sdk-0.1.1rc60.dist-info/METADATA,sha256=vbNpysrGHIBYdJGSe-s8V3km-Amr5xeXIbGunIDHnnI,5730
|
|
161
|
+
atlan_application_sdk-0.1.1rc60.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
162
|
+
atlan_application_sdk-0.1.1rc60.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
163
|
+
atlan_application_sdk-0.1.1rc60.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
|
|
164
|
+
atlan_application_sdk-0.1.1rc60.dist-info/RECORD,,
|
{atlan_application_sdk-0.1.1rc58.dist-info → atlan_application_sdk-0.1.1rc60.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|