atlan-application-sdk 0.1.1rc58__py3-none-any.whl → 0.1.1rc59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,7 +75,7 @@ STATE_STORE_PATH_TEMPLATE = (
75
75
 
76
76
  # Observability Constants
77
77
  #: Directory for storing observability data
78
- OBSERVABILITY_DIR = "artifacts/apps/{application_name}/observability"
78
+ OBSERVABILITY_DIR = "artifacts/apps/{application_name}/{deployment_name}/observability"
79
79
 
80
80
  # Workflow Client Constants
81
81
  #: Host address for the Temporal server
@@ -88,6 +88,7 @@ WORKFLOW_NAMESPACE = os.getenv("ATLAN_WORKFLOW_NAMESPACE", "default")
88
88
  WORKFLOW_UI_HOST = os.getenv("ATLAN_WORKFLOW_UI_HOST", "localhost")
89
89
  #: Port number for the Temporal UI
90
90
  WORKFLOW_UI_PORT = os.getenv("ATLAN_WORKFLOW_UI_PORT", "8233")
91
+
91
92
  #: Maximum timeout duration for workflows
92
93
  WORKFLOW_MAX_TIMEOUT_HOURS = timedelta(
93
94
  hours=int(os.getenv("ATLAN_WORKFLOW_MAX_TIMEOUT_HOURS", "1"))
@@ -10,19 +10,18 @@ from pathlib import Path
10
10
  from time import time
11
11
  from typing import Any, Dict, Generic, List, TypeVar
12
12
 
13
+ import daft
13
14
  import duckdb
14
15
  import pandas as pd
15
16
  from dapr.clients import DaprClient
16
17
  from pydantic import BaseModel
17
18
 
18
19
  from application_sdk.constants import (
19
- DAPR_BINDING_OPERATION_CREATE,
20
20
  DEPLOYMENT_OBJECT_STORE_NAME,
21
21
  ENABLE_OBSERVABILITY_DAPR_SINK,
22
22
  LOG_FILE_NAME,
23
23
  METRICS_FILE_NAME,
24
24
  STATE_STORE_NAME,
25
- TEMPORARY_PATH,
26
25
  TRACES_FILE_NAME,
27
26
  )
28
27
  from application_sdk.observability.utils import get_observability_dir
@@ -365,16 +364,23 @@ class AtlanObservability(Generic[T], ABC):
365
364
  logging.error(f"Error buffering log: {e}")
366
365
 
367
366
  async def _flush_records(self, records: List[Dict[str, Any]]):
368
- """Flush records to parquet file and object store.
367
+ """Flush records to parquet file and object store using ParquetOutput abstraction.
369
368
 
370
369
  Args:
371
370
  records: List of records to flush
372
371
 
373
372
  This method:
374
- - Groups records by partition
375
- - Writes records to each partition
376
- - Uploads to object store if enabled
373
+ - Groups records by partition (year/month/day)
374
+ - Uses ParquetOutput abstraction for efficient writing
375
+ - Automatically handles chunking, compression, and dual upload
376
+ - Provides robust error handling per partition
377
377
  - Cleans up old records if enabled
378
+
379
+ Features:
380
+ - Automatic chunking for large datasets
381
+ - Dual upload support (primary + upstream if enabled)
382
+ - Advanced consolidation for optimal performance
383
+ - Fault-tolerant processing (continues on partition errors)
378
384
  """
379
385
  if not ENABLE_OBSERVABILITY_DAPR_SINK:
380
386
  return
@@ -390,30 +396,15 @@ class AtlanObservability(Generic[T], ABC):
390
396
  partition_records[partition_path] = []
391
397
  partition_records[partition_path].append(record)
392
398
 
393
- # Write records to each partition
399
+ # Write records to each partition using ParquetOutput abstraction
394
400
  for partition_path, partition_data in partition_records.items():
395
- os.makedirs(partition_path, exist_ok=True)
396
- # Use a consistent file name for each partition
397
- parquet_path = os.path.join(partition_path, "data.parquet")
398
-
399
- # Read existing data if any
400
- existing_df = None
401
- if os.path.exists(parquet_path):
402
- try:
403
- # Read the entire parquet file without excluding any columns
404
- existing_df = pd.read_parquet(parquet_path)
405
- except Exception as e:
406
- logging.error(f"Error reading existing parquet file: {e}")
407
- # If there's an error reading the existing file, we'll overwrite it
408
- existing_df = None
409
-
410
401
  # Create new dataframe from current records
411
402
  new_df = pd.DataFrame(partition_data)
412
403
 
413
- # Extract partition values from path
414
- partition_parts = os.path.basename(os.path.dirname(parquet_path)).split(
415
- os.sep
416
- )
404
+ # Extract partition values from path and add to dataframe
405
+ partition_parts = os.path.basename(
406
+ os.path.dirname(partition_path)
407
+ ).split(os.sep)
417
408
  for part in partition_parts:
418
409
  if part.startswith("year="):
419
410
  new_df["year"] = int(part.split("=")[1])
@@ -422,38 +413,37 @@ class AtlanObservability(Generic[T], ABC):
422
413
  elif part.startswith("day="):
423
414
  new_df["day"] = int(part.split("=")[1])
424
415
 
425
- # Merge with existing data if any
426
- if existing_df is not None:
427
- df = pd.concat([existing_df, new_df], ignore_index=True)
428
- else:
429
- df = new_df
416
+ # Use new data directly - let ParquetOutput handle consolidation and merging
417
+ df = new_df
430
418
 
431
- # Sort by timestamp to maintain order
432
- df = df.sort_values("timestamp")
419
+ # Use ParquetOutput abstraction for efficient writing and uploading
420
+ # Set the output path for this partition
421
+ try:
422
+ # Lazy import and instantiation of ParquetOutput
423
+ from application_sdk.outputs.parquet import ParquetOutput
433
424
 
434
- # Write to parquet file
435
- df.to_parquet(
436
- parquet_path,
437
- compression="snappy",
438
- index=False,
439
- )
425
+ parquet_output = ParquetOutput(output_path=partition_path)
426
+ logging.info(
427
+ f"Successfully instantiated ParquetOutput for partition: {partition_path}"
428
+ )
440
429
 
441
- # Upload to object store
442
- with open(parquet_path, "rb") as f:
443
- file_content = f.read()
444
- relative_path = os.path.relpath(parquet_path, TEMPORARY_PATH)
445
- metadata = {
446
- "key": relative_path,
447
- "blobName": relative_path,
448
- "fileName": relative_path,
449
- }
450
- with DaprClient() as client:
451
- client.invoke_binding(
452
- binding_name=DEPLOYMENT_OBJECT_STORE_NAME,
453
- operation=DAPR_BINDING_OPERATION_CREATE,
454
- data=file_content,
455
- binding_metadata=metadata,
456
- )
430
+ # Use write_daft_dataframe with the DataFrame we have
431
+ from application_sdk.outputs.parquet import WriteMode
432
+
433
+ daft_df = daft.from_pandas(df)
434
+ await parquet_output.write_daft_dataframe(
435
+ dataframe=daft_df,
436
+ write_mode=WriteMode.APPEND, # Append mode to merge with existing data
437
+ )
438
+
439
+ logging.info(
440
+ f"Successfully wrote {len(df)} records to partition: {partition_path}"
441
+ )
442
+
443
+ except Exception as partition_error:
444
+ logging.error(
445
+ f"Error processing partition {partition_path}: {str(partition_error)}"
446
+ )
457
447
 
458
448
  # Clean up old records if enabled
459
449
  if self._cleanup_enabled:
@@ -5,6 +5,7 @@ from temporalio import activity, workflow
5
5
 
6
6
  from application_sdk.constants import (
7
7
  APPLICATION_NAME,
8
+ DEPLOYMENT_NAME,
8
9
  OBSERVABILITY_DIR,
9
10
  TEMPORARY_PATH,
10
11
  )
@@ -26,16 +27,16 @@ class WorkflowContext(BaseModel):
26
27
 
27
28
 
28
29
  def get_observability_dir() -> str:
29
- """Build the observability path.
30
-
31
- Args:
32
- path: The path to build the observability path from.
30
+ """Build the observability path using deployment name.
33
31
 
34
32
  Returns:
35
- str: The built observability path.
33
+ str: The built observability path using deployment name.
36
34
  """
37
35
  return os.path.join(
38
- TEMPORARY_PATH, OBSERVABILITY_DIR.format(application_name=APPLICATION_NAME)
36
+ TEMPORARY_PATH,
37
+ OBSERVABILITY_DIR.format(
38
+ application_name=APPLICATION_NAME, deployment_name=DEPLOYMENT_NAME
39
+ ),
39
40
  )
40
41
 
41
42
 
@@ -8,7 +8,11 @@ from temporalio import activity
8
8
 
9
9
  from application_sdk.activities.common.utils import get_object_store_prefix
10
10
  from application_sdk.common.dataframe_utils import is_empty_dataframe
11
- from application_sdk.constants import DAPR_MAX_GRPC_MESSAGE_LENGTH
11
+ from application_sdk.constants import (
12
+ DAPR_MAX_GRPC_MESSAGE_LENGTH,
13
+ ENABLE_ATLAN_UPLOAD,
14
+ UPSTREAM_OBJECT_STORE_NAME,
15
+ )
12
16
  from application_sdk.observability.logger_adaptor import get_logger
13
17
  from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
14
18
  from application_sdk.outputs import Output
@@ -269,6 +273,13 @@ class ParquetOutput(Output):
269
273
  f"No files found under prefix {get_object_store_prefix(self.output_path)}: {str(e)}"
270
274
  )
271
275
  for path in file_paths:
276
+ if ENABLE_ATLAN_UPLOAD:
277
+ await ObjectStore.upload_file(
278
+ source=path,
279
+ store_name=UPSTREAM_OBJECT_STORE_NAME,
280
+ destination=get_object_store_prefix(path),
281
+ retain_local_copy=True,
282
+ )
272
283
  await ObjectStore.upload_file(
273
284
  source=path,
274
285
  destination=get_object_store_prefix(path),
@@ -2,4 +2,4 @@
2
2
  Version information for the application_sdk package.
3
3
  """
4
4
 
5
- __version__ = "0.1.1rc58"
5
+ __version__ = "0.1.1rc59"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlan-application-sdk
3
- Version: 0.1.1rc58
3
+ Version: 0.1.1rc59
4
4
  Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
5
5
  Project-URL: Repository, https://github.com/atlanhq/application-sdk
6
6
  Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
@@ -1,6 +1,6 @@
1
1
  application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
2
- application_sdk/constants.py,sha256=WDw0I9u_OoXIE79IFDFBHJYmIS9KAsghMeGOxfshnpg,10994
3
- application_sdk/version.py,sha256=GfLIvCy7o6Bup3dK-TlbQNu-KhQHzePlpsQ45bXgCqg,88
2
+ application_sdk/constants.py,sha256=ySrjME6CSoiyjVLgQt0s4dIdgJ0JWIMmMs7WBZBVUpA,11013
3
+ application_sdk/version.py,sha256=sXMApD_x-2esC_2hKV1Jo0L6_WWdwAw9zFGgtDsWblQ,88
4
4
  application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
5
5
  application_sdk/activities/__init__.py,sha256=L5WXkTwOwGtjWAlXrUJRCKGwyIyp3z8fBv8BZVCRFQI,11175
6
6
  application_sdk/activities/lock_management.py,sha256=6Wdf3jMKitoarHQP91PIJOoGFz4aaOLS_40c7n1yAOA,3902
@@ -73,14 +73,14 @@ application_sdk/interceptors/.cursor/BUGBOT.md,sha256=pxmUF2c7dtaXAX8yAa1-LBa6FC
73
73
  application_sdk/observability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  application_sdk/observability/logger_adaptor.py,sha256=00c0F7maDkp1xrHttW6VQbWFDGr6NkXeDPjmf97ojlY,29989
75
75
  application_sdk/observability/metrics_adaptor.py,sha256=5Oz02lUED60duryoVDF9mbD11fpxhbXi7P1609n_15Y,16446
76
- application_sdk/observability/observability.py,sha256=DP0I4bHyg3TA4hxCqDFy2IiRmBGOpZ7449m7BUoc_RA,24530
76
+ application_sdk/observability/observability.py,sha256=7gRws1Lg1bM6WWL5Y-ZJOxDMHDx1tKqo8Qw-xxSkDEs,24312
77
77
  application_sdk/observability/traces_adaptor.py,sha256=0eQJPN-tYA_dV8D3uEa5ZiX9g12NDuLnPaFuQMVDdL0,18242
78
- application_sdk/observability/utils.py,sha256=MKEpT0WYtpATUgLgJDkGQaAP_t-jpDYMUKDfEvr8Phg,2448
78
+ application_sdk/observability/utils.py,sha256=JoHEA68cjkXTnAXHzgiULYOzRTk8rG4kPZRvFYah3aU,2505
79
79
  application_sdk/observability/decorators/observability_decorator.py,sha256=yd6qfrg1MmH5KcZ5Ydzb0RaBzmxx5FrmiI9qwvZx3EU,8963
80
80
  application_sdk/outputs/__init__.py,sha256=hrOPw0xuG9xP720Bt309TfbY2Qq_i51R8Xt3ZjwWDUY,15906
81
81
  application_sdk/outputs/iceberg.py,sha256=TdppOMEMfojMhGyBmhWeu1AJQexRyHM-huAYeJmhjdY,5533
82
82
  application_sdk/outputs/json.py,sha256=gYDDNOVb8EFxxeOkb6zKWZWjTEVgZLoapFM97_roK4A,10883
83
- application_sdk/outputs/parquet.py,sha256=lEUosbdLfLhZZPS9lZJxnQHEbLc5e1O7JeiDX958frw,20330
83
+ application_sdk/outputs/parquet.py,sha256=bAv-IrWLNFqF-j8D7jSDi0ug1qV_qBTA3dDxK2eiFcA,20724
84
84
  application_sdk/outputs/.cursor/BUGBOT.md,sha256=KxEC3CIyRSK1YftZou5BgKc6PRXT3qQmBNFJp-HSyYE,11496
85
85
  application_sdk/server/__init__.py,sha256=KTqE1YPw_3WDVMWatJUuf9OOiobLM2K5SMaBrI62sCo,1568
86
86
  application_sdk/server/.cursor/BUGBOT.md,sha256=p_MMoWUW5G1894WfOKYReZKWCuyJT_OJz3rL5g21NbI,16566
@@ -157,8 +157,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
157
157
  application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
158
158
  application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
159
159
  application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
160
- atlan_application_sdk-0.1.1rc58.dist-info/METADATA,sha256=sLmqBn4MVdNZQluPCk4z_iItSspSM4W_W1RZGqyPL1c,5730
161
- atlan_application_sdk-0.1.1rc58.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
- atlan_application_sdk-0.1.1rc58.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
163
- atlan_application_sdk-0.1.1rc58.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
164
- atlan_application_sdk-0.1.1rc58.dist-info/RECORD,,
160
+ atlan_application_sdk-0.1.1rc59.dist-info/METADATA,sha256=f39Mtuyd4ASvIaEwAGtaGyWlSx0JqbFFfa2fNA20P_Y,5730
161
+ atlan_application_sdk-0.1.1rc59.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
+ atlan_application_sdk-0.1.1rc59.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
163
+ atlan_application_sdk-0.1.1rc59.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
164
+ atlan_application_sdk-0.1.1rc59.dist-info/RECORD,,