atlan-application-sdk 0.1.1rc58__py3-none-any.whl → 0.1.1rc60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,7 +75,7 @@ STATE_STORE_PATH_TEMPLATE = (
75
75
 
76
76
  # Observability Constants
77
77
  #: Directory for storing observability data
78
- OBSERVABILITY_DIR = "artifacts/apps/{application_name}/observability"
78
+ OBSERVABILITY_DIR = "artifacts/apps/{application_name}/{deployment_name}/observability"
79
79
 
80
80
  # Workflow Client Constants
81
81
  #: Host address for the Temporal server
@@ -88,6 +88,7 @@ WORKFLOW_NAMESPACE = os.getenv("ATLAN_WORKFLOW_NAMESPACE", "default")
88
88
  WORKFLOW_UI_HOST = os.getenv("ATLAN_WORKFLOW_UI_HOST", "localhost")
89
89
  #: Port number for the Temporal UI
90
90
  WORKFLOW_UI_PORT = os.getenv("ATLAN_WORKFLOW_UI_PORT", "8233")
91
+
91
92
  #: Maximum timeout duration for workflows
92
93
  WORKFLOW_MAX_TIMEOUT_HOURS = timedelta(
93
94
  hours=int(os.getenv("ATLAN_WORKFLOW_MAX_TIMEOUT_HOURS", "1"))
@@ -16,13 +16,11 @@ from dapr.clients import DaprClient
16
16
  from pydantic import BaseModel
17
17
 
18
18
  from application_sdk.constants import (
19
- DAPR_BINDING_OPERATION_CREATE,
20
19
  DEPLOYMENT_OBJECT_STORE_NAME,
21
20
  ENABLE_OBSERVABILITY_DAPR_SINK,
22
21
  LOG_FILE_NAME,
23
22
  METRICS_FILE_NAME,
24
23
  STATE_STORE_NAME,
25
- TEMPORARY_PATH,
26
24
  TRACES_FILE_NAME,
27
25
  )
28
26
  from application_sdk.observability.utils import get_observability_dir
@@ -365,16 +363,23 @@ class AtlanObservability(Generic[T], ABC):
365
363
  logging.error(f"Error buffering log: {e}")
366
364
 
367
365
  async def _flush_records(self, records: List[Dict[str, Any]]):
368
- """Flush records to parquet file and object store.
366
+ """Flush records to parquet file and object store using ParquetOutput abstraction.
369
367
 
370
368
  Args:
371
369
  records: List of records to flush
372
370
 
373
371
  This method:
374
- - Groups records by partition
375
- - Writes records to each partition
376
- - Uploads to object store if enabled
372
+ - Groups records by partition (year/month/day)
373
+ - Uses ParquetOutput abstraction for efficient writing
374
+ - Automatically handles chunking, compression, and dual upload
375
+ - Provides robust error handling per partition
377
376
  - Cleans up old records if enabled
377
+
378
+ Features:
379
+ - Automatic chunking for large datasets
380
+ - Dual upload support (primary + upstream if enabled)
381
+ - Advanced consolidation for optimal performance
382
+ - Fault-tolerant processing (continues on partition errors)
378
383
  """
379
384
  if not ENABLE_OBSERVABILITY_DAPR_SINK:
380
385
  return
@@ -390,30 +395,15 @@ class AtlanObservability(Generic[T], ABC):
390
395
  partition_records[partition_path] = []
391
396
  partition_records[partition_path].append(record)
392
397
 
393
- # Write records to each partition
398
+ # Write records to each partition using ParquetOutput abstraction
394
399
  for partition_path, partition_data in partition_records.items():
395
- os.makedirs(partition_path, exist_ok=True)
396
- # Use a consistent file name for each partition
397
- parquet_path = os.path.join(partition_path, "data.parquet")
398
-
399
- # Read existing data if any
400
- existing_df = None
401
- if os.path.exists(parquet_path):
402
- try:
403
- # Read the entire parquet file without excluding any columns
404
- existing_df = pd.read_parquet(parquet_path)
405
- except Exception as e:
406
- logging.error(f"Error reading existing parquet file: {e}")
407
- # If there's an error reading the existing file, we'll overwrite it
408
- existing_df = None
409
-
410
400
  # Create new dataframe from current records
411
401
  new_df = pd.DataFrame(partition_data)
412
402
 
413
- # Extract partition values from path
414
- partition_parts = os.path.basename(os.path.dirname(parquet_path)).split(
415
- os.sep
416
- )
403
+ # Extract partition values from path and add to dataframe
404
+ partition_parts = os.path.basename(
405
+ os.path.dirname(partition_path)
406
+ ).split(os.sep)
417
407
  for part in partition_parts:
418
408
  if part.startswith("year="):
419
409
  new_df["year"] = int(part.split("=")[1])
@@ -422,38 +412,23 @@ class AtlanObservability(Generic[T], ABC):
422
412
  elif part.startswith("day="):
423
413
  new_df["day"] = int(part.split("=")[1])
424
414
 
425
- # Merge with existing data if any
426
- if existing_df is not None:
427
- df = pd.concat([existing_df, new_df], ignore_index=True)
428
- else:
429
- df = new_df
430
-
431
- # Sort by timestamp to maintain order
432
- df = df.sort_values("timestamp")
415
+ # Use new data directly - let ParquetOutput handle consolidation and merging
416
+ df = new_df
433
417
 
434
- # Write to parquet file
435
- df.to_parquet(
436
- parquet_path,
437
- compression="snappy",
438
- index=False,
439
- )
418
+ # Use ParquetOutput abstraction for efficient writing and uploading
419
+ # Set the output path for this partition
420
+ try:
421
+ # Lazy import and instantiation of ParquetOutput
422
+ from application_sdk.outputs.parquet import ParquetOutput
440
423
 
441
- # Upload to object store
442
- with open(parquet_path, "rb") as f:
443
- file_content = f.read()
444
- relative_path = os.path.relpath(parquet_path, TEMPORARY_PATH)
445
- metadata = {
446
- "key": relative_path,
447
- "blobName": relative_path,
448
- "fileName": relative_path,
449
- }
450
- with DaprClient() as client:
451
- client.invoke_binding(
452
- binding_name=DEPLOYMENT_OBJECT_STORE_NAME,
453
- operation=DAPR_BINDING_OPERATION_CREATE,
454
- data=file_content,
455
- binding_metadata=metadata,
456
- )
424
+ parquet_output = ParquetOutput(
425
+ output_path=partition_path,
426
+ chunk_start=0,
427
+ chunk_part=int(time()),
428
+ )
429
+ await parquet_output.write_dataframe(dataframe=df)
430
+ except Exception as e:
431
+ print(f"Error writing records to partition: {str(e)}")
457
432
 
458
433
  # Clean up old records if enabled
459
434
  if self._cleanup_enabled:
@@ -5,6 +5,7 @@ from temporalio import activity, workflow
5
5
 
6
6
  from application_sdk.constants import (
7
7
  APPLICATION_NAME,
8
+ DEPLOYMENT_NAME,
8
9
  OBSERVABILITY_DIR,
9
10
  TEMPORARY_PATH,
10
11
  )
@@ -26,16 +27,16 @@ class WorkflowContext(BaseModel):
26
27
 
27
28
 
28
29
  def get_observability_dir() -> str:
29
- """Build the observability path.
30
-
31
- Args:
32
- path: The path to build the observability path from.
30
+ """Build the observability path using deployment name.
33
31
 
34
32
  Returns:
35
- str: The built observability path.
33
+ str: The built observability path using deployment name.
36
34
  """
37
35
  return os.path.join(
38
- TEMPORARY_PATH, OBSERVABILITY_DIR.format(application_name=APPLICATION_NAME)
36
+ TEMPORARY_PATH,
37
+ OBSERVABILITY_DIR.format(
38
+ application_name=APPLICATION_NAME, deployment_name=DEPLOYMENT_NAME
39
+ ),
39
40
  )
40
41
 
41
42
 
@@ -64,6 +64,7 @@ class Output(ABC):
64
64
  output_prefix: str
65
65
  total_record_count: int
66
66
  chunk_count: int
67
+ chunk_part: int
67
68
  buffer_size: int
68
69
  max_file_size_bytes: int
69
70
  current_buffer_size: int
@@ -8,7 +8,11 @@ from temporalio import activity
8
8
 
9
9
  from application_sdk.activities.common.utils import get_object_store_prefix
10
10
  from application_sdk.common.dataframe_utils import is_empty_dataframe
11
- from application_sdk.constants import DAPR_MAX_GRPC_MESSAGE_LENGTH
11
+ from application_sdk.constants import (
12
+ DAPR_MAX_GRPC_MESSAGE_LENGTH,
13
+ ENABLE_ATLAN_UPLOAD,
14
+ UPSTREAM_OBJECT_STORE_NAME,
15
+ )
12
16
  from application_sdk.observability.logger_adaptor import get_logger
13
17
  from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
14
18
  from application_sdk.outputs import Output
@@ -61,6 +65,7 @@ class ParquetOutput(Output):
61
65
  buffer_size: int = 5000,
62
66
  total_record_count: int = 0,
63
67
  chunk_count: int = 0,
68
+ chunk_part: int = 0,
64
69
  chunk_start: Optional[int] = None,
65
70
  start_marker: Optional[str] = None,
66
71
  end_marker: Optional[str] = None,
@@ -101,7 +106,7 @@ class ParquetOutput(Output):
101
106
  DAPR_MAX_GRPC_MESSAGE_LENGTH * 0.75
102
107
  ) # 75% of DAPR limit as safety buffer
103
108
  self.chunk_start = chunk_start
104
- self.chunk_part = 0
109
+ self.chunk_part = chunk_part
105
110
  self.start_marker = start_marker
106
111
  self.end_marker = end_marker
107
112
  self.partitions = []
@@ -269,6 +274,13 @@ class ParquetOutput(Output):
269
274
  f"No files found under prefix {get_object_store_prefix(self.output_path)}: {str(e)}"
270
275
  )
271
276
  for path in file_paths:
277
+ if ENABLE_ATLAN_UPLOAD:
278
+ await ObjectStore.upload_file(
279
+ source=path,
280
+ store_name=UPSTREAM_OBJECT_STORE_NAME,
281
+ destination=get_object_store_prefix(path),
282
+ retain_local_copy=True,
283
+ )
272
284
  await ObjectStore.upload_file(
273
285
  source=path,
274
286
  destination=get_object_store_prefix(path),
@@ -2,4 +2,4 @@
2
2
  Version information for the application_sdk package.
3
3
  """
4
4
 
5
- __version__ = "0.1.1rc58"
5
+ __version__ = "0.1.1rc60"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlan-application-sdk
3
- Version: 0.1.1rc58
3
+ Version: 0.1.1rc60
4
4
  Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
5
5
  Project-URL: Repository, https://github.com/atlanhq/application-sdk
6
6
  Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
@@ -1,6 +1,6 @@
1
1
  application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
2
- application_sdk/constants.py,sha256=WDw0I9u_OoXIE79IFDFBHJYmIS9KAsghMeGOxfshnpg,10994
3
- application_sdk/version.py,sha256=GfLIvCy7o6Bup3dK-TlbQNu-KhQHzePlpsQ45bXgCqg,88
2
+ application_sdk/constants.py,sha256=ySrjME6CSoiyjVLgQt0s4dIdgJ0JWIMmMs7WBZBVUpA,11013
3
+ application_sdk/version.py,sha256=2aOWC-Y08sXYBsPMJehGpskX5CTH1TNdtzY-QR-Ub8o,88
4
4
  application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
5
5
  application_sdk/activities/__init__.py,sha256=L5WXkTwOwGtjWAlXrUJRCKGwyIyp3z8fBv8BZVCRFQI,11175
6
6
  application_sdk/activities/lock_management.py,sha256=6Wdf3jMKitoarHQP91PIJOoGFz4aaOLS_40c7n1yAOA,3902
@@ -73,14 +73,14 @@ application_sdk/interceptors/.cursor/BUGBOT.md,sha256=pxmUF2c7dtaXAX8yAa1-LBa6FC
73
73
  application_sdk/observability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  application_sdk/observability/logger_adaptor.py,sha256=00c0F7maDkp1xrHttW6VQbWFDGr6NkXeDPjmf97ojlY,29989
75
75
  application_sdk/observability/metrics_adaptor.py,sha256=5Oz02lUED60duryoVDF9mbD11fpxhbXi7P1609n_15Y,16446
76
- application_sdk/observability/observability.py,sha256=DP0I4bHyg3TA4hxCqDFy2IiRmBGOpZ7449m7BUoc_RA,24530
76
+ application_sdk/observability/observability.py,sha256=MGxNFPx6pOdpWrpNXZp44NPk3SG4xjA9cKrTeZ1ENK8,23681
77
77
  application_sdk/observability/traces_adaptor.py,sha256=0eQJPN-tYA_dV8D3uEa5ZiX9g12NDuLnPaFuQMVDdL0,18242
78
- application_sdk/observability/utils.py,sha256=MKEpT0WYtpATUgLgJDkGQaAP_t-jpDYMUKDfEvr8Phg,2448
78
+ application_sdk/observability/utils.py,sha256=JoHEA68cjkXTnAXHzgiULYOzRTk8rG4kPZRvFYah3aU,2505
79
79
  application_sdk/observability/decorators/observability_decorator.py,sha256=yd6qfrg1MmH5KcZ5Ydzb0RaBzmxx5FrmiI9qwvZx3EU,8963
80
- application_sdk/outputs/__init__.py,sha256=hrOPw0xuG9xP720Bt309TfbY2Qq_i51R8Xt3ZjwWDUY,15906
80
+ application_sdk/outputs/__init__.py,sha256=dekaEqJEVAmXQYSy_AohXOHNNI56OXG3Xn27FFRmoPQ,15926
81
81
  application_sdk/outputs/iceberg.py,sha256=TdppOMEMfojMhGyBmhWeu1AJQexRyHM-huAYeJmhjdY,5533
82
82
  application_sdk/outputs/json.py,sha256=gYDDNOVb8EFxxeOkb6zKWZWjTEVgZLoapFM97_roK4A,10883
83
- application_sdk/outputs/parquet.py,sha256=lEUosbdLfLhZZPS9lZJxnQHEbLc5e1O7JeiDX958frw,20330
83
+ application_sdk/outputs/parquet.py,sha256=DxcKh1IXPdiXNQJS1HIn6-JRdLkmN4At8uF1zppiZX0,20762
84
84
  application_sdk/outputs/.cursor/BUGBOT.md,sha256=KxEC3CIyRSK1YftZou5BgKc6PRXT3qQmBNFJp-HSyYE,11496
85
85
  application_sdk/server/__init__.py,sha256=KTqE1YPw_3WDVMWatJUuf9OOiobLM2K5SMaBrI62sCo,1568
86
86
  application_sdk/server/.cursor/BUGBOT.md,sha256=p_MMoWUW5G1894WfOKYReZKWCuyJT_OJz3rL5g21NbI,16566
@@ -157,8 +157,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
157
157
  application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
158
158
  application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
159
159
  application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
160
- atlan_application_sdk-0.1.1rc58.dist-info/METADATA,sha256=sLmqBn4MVdNZQluPCk4z_iItSspSM4W_W1RZGqyPL1c,5730
161
- atlan_application_sdk-0.1.1rc58.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
- atlan_application_sdk-0.1.1rc58.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
163
- atlan_application_sdk-0.1.1rc58.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
164
- atlan_application_sdk-0.1.1rc58.dist-info/RECORD,,
160
+ atlan_application_sdk-0.1.1rc60.dist-info/METADATA,sha256=vbNpysrGHIBYdJGSe-s8V3km-Amr5xeXIbGunIDHnnI,5730
161
+ atlan_application_sdk-0.1.1rc60.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
+ atlan_application_sdk-0.1.1rc60.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
163
+ atlan_application_sdk-0.1.1rc60.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
164
+ atlan_application_sdk-0.1.1rc60.dist-info/RECORD,,