monocle-apptrace 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of monocle-apptrace might be problematic. Click here for more details.

Files changed (88) hide show
  1. monocle_apptrace/__init__.py +1 -0
  2. monocle_apptrace/__main__.py +19 -0
  3. monocle_apptrace/exporters/aws/s3_exporter.py +181 -0
  4. monocle_apptrace/exporters/aws/s3_exporter_opendal.py +137 -0
  5. monocle_apptrace/exporters/azure/blob_exporter.py +146 -0
  6. monocle_apptrace/exporters/azure/blob_exporter_opendal.py +162 -0
  7. monocle_apptrace/exporters/base_exporter.py +48 -0
  8. monocle_apptrace/exporters/exporter_processor.py +144 -0
  9. monocle_apptrace/exporters/file_exporter.py +16 -0
  10. monocle_apptrace/exporters/monocle_exporters.py +55 -0
  11. monocle_apptrace/exporters/okahu/okahu_exporter.py +117 -0
  12. monocle_apptrace/instrumentation/__init__.py +1 -0
  13. monocle_apptrace/instrumentation/common/__init__.py +2 -0
  14. monocle_apptrace/instrumentation/common/constants.py +70 -0
  15. monocle_apptrace/instrumentation/common/instrumentor.py +362 -0
  16. monocle_apptrace/instrumentation/common/span_handler.py +220 -0
  17. monocle_apptrace/instrumentation/common/utils.py +356 -0
  18. monocle_apptrace/instrumentation/common/wrapper.py +92 -0
  19. monocle_apptrace/instrumentation/common/wrapper_method.py +72 -0
  20. monocle_apptrace/instrumentation/metamodel/__init__.py +0 -0
  21. monocle_apptrace/instrumentation/metamodel/botocore/__init__.py +0 -0
  22. monocle_apptrace/instrumentation/metamodel/botocore/_helper.py +95 -0
  23. monocle_apptrace/instrumentation/metamodel/botocore/entities/__init__.py +0 -0
  24. monocle_apptrace/instrumentation/metamodel/botocore/entities/inference.py +65 -0
  25. monocle_apptrace/instrumentation/metamodel/botocore/handlers/botocore_span_handler.py +26 -0
  26. monocle_apptrace/instrumentation/metamodel/botocore/methods.py +16 -0
  27. monocle_apptrace/instrumentation/metamodel/flask/__init__.py +0 -0
  28. monocle_apptrace/instrumentation/metamodel/flask/_helper.py +29 -0
  29. monocle_apptrace/instrumentation/metamodel/flask/methods.py +13 -0
  30. monocle_apptrace/instrumentation/metamodel/haystack/__init__.py +0 -0
  31. monocle_apptrace/instrumentation/metamodel/haystack/_helper.py +127 -0
  32. monocle_apptrace/instrumentation/metamodel/haystack/entities/__init__.py +0 -0
  33. monocle_apptrace/instrumentation/metamodel/haystack/entities/inference.py +76 -0
  34. monocle_apptrace/instrumentation/metamodel/haystack/entities/retrieval.py +61 -0
  35. monocle_apptrace/instrumentation/metamodel/haystack/methods.py +43 -0
  36. monocle_apptrace/instrumentation/metamodel/langchain/__init__.py +0 -0
  37. monocle_apptrace/instrumentation/metamodel/langchain/_helper.py +127 -0
  38. monocle_apptrace/instrumentation/metamodel/langchain/entities/__init__.py +0 -0
  39. monocle_apptrace/instrumentation/metamodel/langchain/entities/inference.py +72 -0
  40. monocle_apptrace/instrumentation/metamodel/langchain/entities/retrieval.py +58 -0
  41. monocle_apptrace/{metamodel/maps/lang_chain_methods.json → instrumentation/metamodel/langchain/methods.py} +48 -43
  42. monocle_apptrace/instrumentation/metamodel/langgraph/__init__.py +0 -0
  43. monocle_apptrace/instrumentation/metamodel/langgraph/_helper.py +48 -0
  44. monocle_apptrace/instrumentation/metamodel/langgraph/entities/__init__.py +0 -0
  45. monocle_apptrace/instrumentation/metamodel/langgraph/entities/inference.py +56 -0
  46. monocle_apptrace/instrumentation/metamodel/langgraph/methods.py +14 -0
  47. monocle_apptrace/instrumentation/metamodel/llamaindex/__init__.py +0 -0
  48. monocle_apptrace/instrumentation/metamodel/llamaindex/_helper.py +172 -0
  49. monocle_apptrace/instrumentation/metamodel/llamaindex/entities/__init__.py +0 -0
  50. monocle_apptrace/instrumentation/metamodel/llamaindex/entities/agent.py +47 -0
  51. monocle_apptrace/instrumentation/metamodel/llamaindex/entities/inference.py +73 -0
  52. monocle_apptrace/instrumentation/metamodel/llamaindex/entities/retrieval.py +57 -0
  53. monocle_apptrace/instrumentation/metamodel/llamaindex/methods.py +101 -0
  54. monocle_apptrace/instrumentation/metamodel/openai/__init__.py +0 -0
  55. monocle_apptrace/instrumentation/metamodel/openai/_helper.py +112 -0
  56. monocle_apptrace/instrumentation/metamodel/openai/entities/__init__.py +0 -0
  57. monocle_apptrace/instrumentation/metamodel/openai/entities/inference.py +71 -0
  58. monocle_apptrace/instrumentation/metamodel/openai/entities/retrieval.py +43 -0
  59. monocle_apptrace/instrumentation/metamodel/openai/methods.py +45 -0
  60. monocle_apptrace/instrumentation/metamodel/requests/__init__.py +4 -0
  61. monocle_apptrace/instrumentation/metamodel/requests/_helper.py +31 -0
  62. monocle_apptrace/instrumentation/metamodel/requests/methods.py +12 -0
  63. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0.dist-info}/METADATA +23 -2
  64. monocle_apptrace-0.3.0.dist-info/RECORD +68 -0
  65. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0.dist-info}/WHEEL +1 -1
  66. monocle_apptrace/constants.py +0 -22
  67. monocle_apptrace/haystack/__init__.py +0 -9
  68. monocle_apptrace/haystack/wrap_node.py +0 -27
  69. monocle_apptrace/haystack/wrap_openai.py +0 -44
  70. monocle_apptrace/haystack/wrap_pipeline.py +0 -62
  71. monocle_apptrace/instrumentor.py +0 -124
  72. monocle_apptrace/langchain/__init__.py +0 -6
  73. monocle_apptrace/llamaindex/__init__.py +0 -15
  74. monocle_apptrace/metamodel/README.md +0 -47
  75. monocle_apptrace/metamodel/entities/README.md +0 -54
  76. monocle_apptrace/metamodel/entities/entity_types.json +0 -157
  77. monocle_apptrace/metamodel/entities/entity_types.py +0 -51
  78. monocle_apptrace/metamodel/maps/haystack_methods.json +0 -25
  79. monocle_apptrace/metamodel/maps/llama_index_methods.json +0 -70
  80. monocle_apptrace/metamodel/spans/README.md +0 -121
  81. monocle_apptrace/metamodel/spans/span_example.json +0 -140
  82. monocle_apptrace/metamodel/spans/span_format.json +0 -55
  83. monocle_apptrace/utils.py +0 -93
  84. monocle_apptrace/wrap_common.py +0 -311
  85. monocle_apptrace/wrapper.py +0 -24
  86. monocle_apptrace-0.1.1.dist-info/RECORD +0 -29
  87. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0.dist-info}/licenses/LICENSE +0 -0
  88. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1 @@
1
+ from .instrumentation import *
@@ -0,0 +1,19 @@
1
+ import sys, os
2
+ import runpy
3
+ from monocle_apptrace.instrumentation.common.instrumentor import setup_monocle_telemetry
4
+
5
+ def main():
6
+ if len(sys.argv) < 2 or not sys.argv[1].endswith(".py"):
7
+ print("Usage: python -m monocle_apptrace <your-main-module-file> <args>")
8
+ sys.exit(1)
9
+ file_name = os.path.basename(sys.argv[1])
10
+ workflow_name = file_name[:-3]
11
+ setup_monocle_telemetry(workflow_name=workflow_name)
12
+ sys.argv.pop(0)
13
+
14
+ try:
15
+ runpy.run_path(path_name=sys.argv[0], run_name="__main__")
16
+ except Exception as e:
17
+ print(e)
18
+ if __name__ == "__main__":
19
+ main()
@@ -0,0 +1,181 @@
1
+ import os
2
+ import time
3
+ import random
4
+ import datetime
5
+ import logging
6
+ import asyncio
7
+ import boto3
8
+ from botocore.exceptions import ClientError
9
+ from botocore.exceptions import (
10
+ BotoCoreError,
11
+ ConnectionClosedError,
12
+ ConnectTimeoutError,
13
+ EndpointConnectionError,
14
+ ReadTimeoutError,
15
+ )
16
+ from opentelemetry.sdk.trace import ReadableSpan
17
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
18
+ from monocle_apptrace.exporters.base_exporter import SpanExporterBase
19
+ from monocle_apptrace.exporters.exporter_processor import ExportTaskProcessor
20
+ from typing import Sequence, Optional
21
+ import json
22
+ logger = logging.getLogger(__name__)
23
+
24
+ class S3SpanExporter(SpanExporterBase):
25
+ def __init__(self, bucket_name=None, region_name=None, task_processor: Optional[ExportTaskProcessor] = None):
26
+ super().__init__()
27
+ # Use environment variables if credentials are not provided
28
+ DEFAULT_FILE_PREFIX = "monocle_trace_"
29
+ DEFAULT_TIME_FORMAT = "%Y-%m-%d__%H.%M.%S"
30
+ self.max_batch_size = 500
31
+ self.export_interval = 1
32
+ if(os.getenv('MONOCLE_AWS_ACCESS_KEY_ID') and os.getenv('MONOCLE_AWS_SECRET_ACCESS_KEY')):
33
+ self.s3_client = boto3.client(
34
+ 's3',
35
+ aws_access_key_id=os.getenv('MONOCLE_AWS_ACCESS_KEY_ID'),
36
+ aws_secret_access_key=os.getenv('MONOCLE_AWS_SECRET_ACCESS_KEY'),
37
+ region_name=region_name,
38
+ )
39
+ else:
40
+ self.s3_client = boto3.client(
41
+ 's3',
42
+ aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
43
+ aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
44
+ region_name=region_name,
45
+ )
46
+ self.bucket_name = bucket_name or os.getenv('MONOCLE_S3_BUCKET_NAME','default-bucket')
47
+ self.file_prefix = os.getenv('MONOCLE_S3_KEY_PREFIX', DEFAULT_FILE_PREFIX)
48
+ self.time_format = DEFAULT_TIME_FORMAT
49
+ self.export_queue = []
50
+ self.last_export_time = time.time()
51
+ self.task_processor = task_processor
52
+ if self.task_processor is not None:
53
+ self.task_processor.start()
54
+
55
+ # Check if bucket exists or create it
56
+ if not self.__bucket_exists(self.bucket_name):
57
+ try:
58
+ self.s3_client.create_bucket(
59
+ Bucket=self.bucket_name,
60
+ CreateBucketConfiguration={'LocationConstraint': region_name}
61
+ )
62
+ logger.info(f"Bucket {self.bucket_name} created successfully.")
63
+ except ClientError as e:
64
+ logger.error(f"Error creating bucket {self.bucket_name}: {e}")
65
+ raise e
66
+
67
+ def __bucket_exists(self, bucket_name):
68
+ try:
69
+ # Check if the bucket exists by calling head_bucket
70
+ self.s3_client.head_bucket(Bucket=bucket_name)
71
+ return True
72
+ except ClientError as e:
73
+ error_code = e.response['Error']['Code']
74
+ if error_code == '404':
75
+ # Bucket not found
76
+ logger.error(f"Bucket {bucket_name} does not exist (404).")
77
+ return False
78
+ elif error_code == '403':
79
+ # Permission denied
80
+ logger.error(f"Access to bucket {bucket_name} is forbidden (403).")
81
+ raise PermissionError(f"Access to bucket {bucket_name} is forbidden.")
82
+ elif error_code == '400':
83
+ # Bad request or malformed input
84
+ logger.error(f"Bad request for bucket {bucket_name} (400).")
85
+ raise ValueError(f"Bad request for bucket {bucket_name}.")
86
+ else:
87
+ # Other client errors
88
+ logger.error(f"Unexpected error when accessing bucket {bucket_name}: {e}")
89
+ raise e
90
+ except TypeError as e:
91
+ # Handle TypeError separately
92
+ logger.error(f"Type error while checking bucket existence: {e}")
93
+ raise e
94
+
95
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
96
+ """Synchronous export method that internally handles async logic."""
97
+ try:
98
+ # Run the asynchronous export logic in an event loop
99
+ logger.info(f"Exporting {len(spans)} spans to S3.")
100
+ asyncio.run(self.__export_async(spans))
101
+ return SpanExportResult.SUCCESS
102
+ except Exception as e:
103
+ logger.error(f"Error exporting spans: {e}")
104
+ return SpanExportResult.FAILURE
105
+
106
+ async def __export_async(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
107
+ try:
108
+ logger.info(f"__export_async {len(spans)} spans to S3.")
109
+ # Add spans to the export queue
110
+ for span in spans:
111
+ self.export_queue.append(span)
112
+ # If the queue reaches MAX_BATCH_SIZE, export the spans
113
+ if len(self.export_queue) >= self.max_batch_size:
114
+ await self.__export_spans()
115
+
116
+ # Check if it's time to force a flush
117
+ current_time = time.time()
118
+ if current_time - self.last_export_time >= self.export_interval:
119
+ await self.__export_spans() # Export spans if time interval has passed
120
+ self.last_export_time = current_time # Reset the last export time
121
+
122
+ return SpanExportResult.SUCCESS
123
+ except Exception as e:
124
+ logger.error(f"Error exporting spans: {e}")
125
+ return SpanExportResult.FAILURE
126
+
127
+ def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
128
+ try:
129
+ # Serialize spans to JSON or any other format you prefer
130
+ valid_json_list = []
131
+ for span in spans:
132
+ try:
133
+ valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
134
+ except json.JSONDecodeError as e:
135
+ logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
136
+ continue
137
+ ndjson_data = "\n".join(valid_json_list) + "\n"
138
+ return ndjson_data
139
+ except Exception as e:
140
+ logger.warning(f"Error serializing spans: {e}")
141
+
142
+
143
+ async def __export_spans(self):
144
+ if len(self.export_queue) == 0:
145
+ return
146
+
147
+ # Take a batch of spans from the queue
148
+ batch_to_export = self.export_queue[:self.max_batch_size]
149
+ serialized_data = self.__serialize_spans(batch_to_export)
150
+ self.export_queue = self.export_queue[self.max_batch_size:]
151
+ # to calculate is_root_span loop over each span in batch_to_export and check if parent id is none or null
152
+ is_root_span = any(not span.parent for span in batch_to_export)
153
+ logger.info(f"Exporting {len(batch_to_export)} spans to S3 is_root_span : {is_root_span}.")
154
+ if self.task_processor is not None and callable(getattr(self.task_processor, 'queue_task', None)):
155
+ self.task_processor.queue_task(self.__upload_to_s3, serialized_data, is_root_span)
156
+ else:
157
+ try:
158
+ self.__upload_to_s3(serialized_data)
159
+ except Exception as e:
160
+ logger.error(f"Failed to upload span batch: {e}")
161
+
162
+ @SpanExporterBase.retry_with_backoff(exceptions=(EndpointConnectionError, ConnectionClosedError, ReadTimeoutError, ConnectTimeoutError))
163
+ def __upload_to_s3(self, span_data_batch: str):
164
+ current_time = datetime.datetime.now().strftime(self.time_format)
165
+ prefix = self.file_prefix + os.environ.get('MONOCLE_S3_KEY_PREFIX_CURRENT', '')
166
+ file_name = f"{prefix}{current_time}.ndjson"
167
+ self.s3_client.put_object(
168
+ Bucket=self.bucket_name,
169
+ Key=file_name,
170
+ Body=span_data_batch
171
+ )
172
+ logger.info(f"Span batch uploaded to AWS S3 as {file_name}.")
173
+
174
+ async def force_flush(self, timeout_millis: int = 30000) -> bool:
175
+ await self.__export_spans() # Export any remaining spans in the queue
176
+ return True
177
+
178
+ def shutdown(self) -> None:
179
+ if hasattr(self, 'task_processor') and self.task_processor is not None:
180
+ self.task_processor.stop()
181
+ logger.info("S3SpanExporter has been shut down.")
@@ -0,0 +1,137 @@
1
+ import os
2
+ import time
3
+ import datetime
4
+ import logging
5
+ import asyncio
6
+ from typing import Sequence, Optional
7
+ from opentelemetry.sdk.trace import ReadableSpan
8
+ from opentelemetry.sdk.trace.export import SpanExportResult
9
+ from monocle_apptrace.exporters.base_exporter import SpanExporterBase
10
+ from monocle_apptrace.exporters.exporter_processor import ExportTaskProcessor
11
+ from opendal import Operator
12
+ from opendal.exceptions import PermissionDenied, ConfigInvalid, Unexpected
13
+
14
+ import json
15
+
16
+ logger = logging.getLogger(__name__)
17
+ class OpenDALS3Exporter(SpanExporterBase):
18
+ def __init__(self, bucket_name=None, region_name=None, task_processor: Optional[ExportTaskProcessor] = None):
19
+ super().__init__()
20
+ DEFAULT_FILE_PREFIX = "monocle_trace_"
21
+ DEFAULT_TIME_FORMAT = "%Y-%m-%d__%H.%M.%S"
22
+ self.max_batch_size = 500
23
+ self.export_interval = 1
24
+ self.file_prefix = DEFAULT_FILE_PREFIX
25
+ self.time_format = DEFAULT_TIME_FORMAT
26
+ self.export_queue = []
27
+ self.last_export_time = time.time()
28
+ self.bucket_name = bucket_name or os.getenv("MONOCLE_S3_BUCKET_NAME", "default-bucket")
29
+
30
+ # Initialize OpenDAL S3 operator
31
+ self.op = Operator(
32
+ "s3",
33
+ root = "/",
34
+ region=os.getenv("AWS_REGION", region_name),
35
+ bucket=self.bucket_name,
36
+ access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
37
+ secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
38
+ )
39
+
40
+ self.task_processor = task_processor
41
+ if self.task_processor is not None:
42
+ self.task_processor.start()
43
+
44
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
45
+ """Synchronous export method that internally handles async logic."""
46
+ try:
47
+ # Run the asynchronous export logic in an event loop
48
+ asyncio.run(self.__export_async(spans))
49
+ return SpanExportResult.SUCCESS
50
+ except Exception as e:
51
+ logger.error(f"Error exporting spans: {e}")
52
+ return SpanExportResult.FAILURE
53
+
54
+ async def __export_async(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
55
+ try:
56
+ # Add spans to the export queue
57
+ for span in spans:
58
+ self.export_queue.append(span)
59
+ if len(self.export_queue) >= self.max_batch_size:
60
+ await self.__export_spans()
61
+
62
+ # Check if it's time to force a flush
63
+ current_time = time.time()
64
+ if current_time - self.last_export_time >= self.export_interval:
65
+ await self.__export_spans()
66
+ self.last_export_time = current_time
67
+
68
+ return SpanExportResult.SUCCESS
69
+ except Exception as e:
70
+ logger.error(f"Error exporting spans: {e}")
71
+ return SpanExportResult.FAILURE
72
+
73
+ def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
74
+ try:
75
+ # Serialize spans to JSON or any other format you prefer
76
+ valid_json_list = []
77
+ for span in spans:
78
+ try:
79
+ valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
80
+ except json.JSONDecodeError as e:
81
+ logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
82
+ continue
83
+ return "\n".join(valid_json_list) + "\n"
84
+ except Exception as e:
85
+ logger.warning(f"Error serializing spans: {e}")
86
+
87
+ async def __export_spans(self):
88
+ if not self.export_queue:
89
+ return
90
+ # Take a batch of spans from the queue
91
+ batch_to_export = self.export_queue[:self.max_batch_size]
92
+ serialized_data = self.__serialize_spans(batch_to_export)
93
+ self.export_queue = self.export_queue[self.max_batch_size:]
94
+
95
+ # Calculate is_root_span by checking if any span has no parent
96
+ is_root_span = any(not span.parent for span in batch_to_export)
97
+
98
+ if self.task_processor is not None and callable(getattr(self.task_processor, 'queue_task', None)):
99
+ self.task_processor.queue_task(self.__upload_to_s3, serialized_data, is_root_span)
100
+ else:
101
+ try:
102
+ self.__upload_to_s3(serialized_data, is_root_span)
103
+ except Exception as e:
104
+ logger.error(f"Failed to upload span batch: {e}")
105
+
106
+ @SpanExporterBase.retry_with_backoff(exceptions=(Unexpected))
107
+ def __upload_to_s3(self, span_data_batch: str, is_root_span: bool = False):
108
+ current_time = datetime.datetime.now().strftime(self.time_format)
109
+ file_name = f"{self.file_prefix}{current_time}.ndjson"
110
+ try:
111
+ # Attempt to write the span data batch to S3
112
+ self.op.write(file_name, span_data_batch.encode("utf-8"))
113
+ logger.info(f"Span batch uploaded to S3 as {file_name}. Is root span: {is_root_span}")
114
+
115
+ except PermissionDenied as e:
116
+ # S3 bucket is forbidden.
117
+ logger.error(f"Access to bucket {self.bucket_name} is forbidden (403).")
118
+ raise PermissionError(f"Access to bucket {self.bucket_name} is forbidden.")
119
+
120
+ except ConfigInvalid as e:
121
+ # Bucket does not exist.
122
+ if "404" in str(e):
123
+ logger.error("Bucket does not exist. Please check the bucket name and region.")
124
+ raise Exception(f"Bucket does not exist. Error: {e}")
125
+ else:
126
+ logger.error(f"Unexpected error when accessing bucket {self.bucket_name}: {e}")
127
+ raise e
128
+
129
+
130
+ async def force_flush(self, timeout_millis: int = 30000) -> bool:
131
+ await self.__export_spans()
132
+ return True
133
+
134
+ def shutdown(self) -> None:
135
+ if hasattr(self, 'task_processor') and self.task_processor is not None:
136
+ self.task_processor.stop()
137
+ logger.info("S3SpanExporter has been shut down.")
@@ -0,0 +1,146 @@
1
+ import os
2
+ import time
3
+ import random
4
+ import datetime
5
+ import logging
6
+ import asyncio
7
+ from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
8
+ from azure.core.exceptions import ResourceNotFoundError, ClientAuthenticationError, ServiceRequestError
9
+ from opentelemetry.sdk.trace import ReadableSpan
10
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
11
+ from typing import Sequence, Optional
12
+ from monocle_apptrace.exporters.base_exporter import SpanExporterBase
13
+ from monocle_apptrace.exporters.exporter_processor import ExportTaskProcessor
14
+ import json
15
+ from monocle_apptrace.instrumentation.common.constants import MONOCLE_SDK_VERSION
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class AzureBlobSpanExporter(SpanExporterBase):
19
+ def __init__(self, connection_string=None, container_name=None, task_processor: Optional[ExportTaskProcessor] = None):
20
+ super().__init__()
21
+ DEFAULT_FILE_PREFIX = "monocle_trace_"
22
+ DEFAULT_TIME_FORMAT = "%Y-%m-%d_%H.%M.%S"
23
+ self.max_batch_size = 500
24
+ self.export_interval = 1
25
+ # Use default values if none are provided
26
+ if not connection_string:
27
+ connection_string = os.getenv('MONOCLE_BLOB_CONNECTION_STRING')
28
+ if not connection_string:
29
+ raise ValueError("Azure Storage connection string is not provided or set in environment variables.")
30
+
31
+ if not container_name:
32
+ container_name = os.getenv('MONOCLE_BLOB_CONTAINER_NAME', 'default-container')
33
+
34
+ self.blob_service_client = BlobServiceClient.from_connection_string(connection_string)
35
+ self.container_name = container_name
36
+ self.file_prefix = DEFAULT_FILE_PREFIX
37
+ self.time_format = DEFAULT_TIME_FORMAT
38
+
39
+ # Check if container exists or create it
40
+ if not self.__container_exists(container_name):
41
+ try:
42
+ self.blob_service_client.create_container(container_name)
43
+ logger.info(f"Container {container_name} created successfully.")
44
+ except Exception as e:
45
+ logger.error(f"Error creating container {container_name}: {e}")
46
+ raise e
47
+
48
+ self.task_processor = task_processor
49
+ if self.task_processor is not None:
50
+ self.task_processor.start()
51
+
52
+ def __container_exists(self, container_name):
53
+ try:
54
+ container_client = self.blob_service_client.get_container_client(container_name)
55
+ container_client.get_container_properties()
56
+ return True
57
+ except ResourceNotFoundError:
58
+ logger.error(f"Container {container_name} not found (404).")
59
+ return False
60
+ except ClientAuthenticationError:
61
+ logger.error(f"Access to container {container_name} is forbidden (403).")
62
+ raise PermissionError(f"Access to container {container_name} is forbidden.")
63
+ except Exception as e:
64
+ logger.error(f"Unexpected error when checking if container {container_name} exists: {e}")
65
+ raise e
66
+
67
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
68
+ """Synchronous export method that internally handles async logic."""
69
+ try:
70
+ # Run the asynchronous export logic in an event loop
71
+ asyncio.run(self._export_async(spans))
72
+ return SpanExportResult.SUCCESS
73
+ except Exception as e:
74
+ logger.error(f"Error exporting spans: {e}")
75
+ return SpanExportResult.FAILURE
76
+
77
+ async def _export_async(self, spans: Sequence[ReadableSpan]):
78
+ """The actual async export logic is run here."""
79
+ # Add spans to the export queue
80
+ for span in spans:
81
+ # Azure blob library has a check to generate it's own span if OpenTelemetry is loaded and Azure trace package is installed (just pip install azure-trace-opentelemetry)
82
+ # With Monocle,OpenTelemetry is always loaded. If the Azure trace package is installed, then it triggers the blob trace generation on every blob operation.
83
+ # Thus, the Monocle span write ends up generating a blob span which again comes back to the exporter .. and would result in an infinite loop.
84
+ # To avoid this, we check if the span has the Monocle SDK version attribute and skip it if it doesn't. That way the blob span genearted by Azure library are not exported.
85
+ if not span.attributes.get(MONOCLE_SDK_VERSION):
86
+ continue # TODO: All exporters to use same base class and check it there
87
+ self.export_queue.append(span)
88
+ if len(self.export_queue) >= self.max_batch_size:
89
+ await self.__export_spans()
90
+
91
+ # Force a flush if the interval has passed
92
+ current_time = time.time()
93
+ if current_time - self.last_export_time >= self.export_interval:
94
+ await self.__export_spans()
95
+ self.last_export_time = current_time
96
+
97
+ def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
98
+ try:
99
+ valid_json_list = []
100
+ for span in spans:
101
+ try:
102
+ valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
103
+ except json.JSONDecodeError as e:
104
+ logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
105
+ continue
106
+
107
+ ndjson_data = "\n".join(valid_json_list) + "\n"
108
+ return ndjson_data
109
+ except Exception as e:
110
+ logger.warning(f"Error serializing spans: {e}")
111
+
112
+ async def __export_spans(self):
113
+ if len(self.export_queue) == 0:
114
+ return
115
+
116
+ batch_to_export = self.export_queue[:self.max_batch_size]
117
+ serialized_data = self.__serialize_spans(batch_to_export)
118
+ self.export_queue = self.export_queue[self.max_batch_size:]
119
+
120
+ # Calculate is_root_span by checking if any span has no parent
121
+ is_root_span = any(not span.parent for span in batch_to_export)
122
+
123
+ if self.task_processor is not None and callable(getattr(self.task_processor, 'queue_task', None)):
124
+ self.task_processor.queue_task(self.__upload_to_blob, serialized_data, is_root_span)
125
+ else:
126
+ try:
127
+ self.__upload_to_blob(serialized_data, is_root_span)
128
+ except Exception as e:
129
+ logger.error(f"Failed to upload span batch: {e}")
130
+
131
+ @SpanExporterBase.retry_with_backoff(exceptions=(ResourceNotFoundError, ClientAuthenticationError, ServiceRequestError))
132
+ def __upload_to_blob(self, span_data_batch: str, is_root_span: bool = False):
133
+ current_time = datetime.datetime.now().strftime(self.time_format)
134
+ file_name = f"{self.file_prefix}{current_time}.ndjson"
135
+ blob_client = self.blob_service_client.get_blob_client(container=self.container_name, blob=file_name)
136
+ blob_client.upload_blob(span_data_batch, overwrite=True)
137
+ logger.info(f"Span batch uploaded to Azure Blob Storage as {file_name}. Is root span: {is_root_span}")
138
+
139
+ async def force_flush(self, timeout_millis: int = 30000) -> bool:
140
+ await self.__export_spans()
141
+ return True
142
+
143
+ def shutdown(self) -> None:
144
+ if hasattr(self, 'task_processor') and self.task_processor is not None:
145
+ self.task_processor.stop()
146
+ logger.info("AzureBlobSpanExporter has been shut down.")
@@ -0,0 +1,162 @@
1
+ import os
2
+ import time
3
+ import datetime
4
+ import logging
5
+ import asyncio
6
+ from opentelemetry.sdk.trace import ReadableSpan
7
+ from opentelemetry.sdk.trace.export import SpanExportResult
8
+ from typing import Sequence, Optional
9
+ from opendal import Operator
10
+ from monocle_apptrace.exporters.base_exporter import SpanExporterBase
11
+ from monocle_apptrace.exporters.exporter_processor import ExportTaskProcessor
12
+ from opendal.exceptions import Unexpected, PermissionDenied, NotFound
13
+ import json
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class OpenDALAzureExporter(SpanExporterBase):
18
+ def __init__(self, connection_string=None, container_name=None, task_processor: Optional[ExportTaskProcessor] = None):
19
+ super().__init__()
20
+ DEFAULT_FILE_PREFIX = "monocle_trace_"
21
+ DEFAULT_TIME_FORMAT = "%Y-%m-%d_%H.%M.%S"
22
+ self.max_batch_size = 500
23
+ self.export_interval = 1
24
+ self.container_name = container_name
25
+
26
+ # Default values
27
+ self.file_prefix = DEFAULT_FILE_PREFIX
28
+ self.time_format = DEFAULT_TIME_FORMAT
29
+ self.export_queue = [] # Add this line to initialize export_queue
30
+ self.last_export_time = time.time() # Add this line to initialize last_export_time
31
+
32
+ # Validate input
33
+ if not connection_string:
34
+ connection_string = os.getenv('MONOCLE_BLOB_CONNECTION_STRING')
35
+ if not connection_string:
36
+ raise ValueError("Azure Storage connection string is not provided or set in environment variables.")
37
+
38
+ if not container_name:
39
+ container_name = os.getenv('MONOCLE_BLOB_CONTAINER_NAME', 'default-container')
40
+ endpoint, account_name , account_key = self.parse_connection_string(connection_string)
41
+
42
+ if not account_name or not account_key:
43
+ raise ValueError("AccountName or AccountKey missing in the connection string.")
44
+
45
+ try:
46
+ # Initialize OpenDAL operator with explicit credentials
47
+ self.operator = Operator(
48
+ "azblob",
49
+ endpoint=endpoint,
50
+ account_name=account_name,
51
+ account_key=account_key,
52
+ container=container_name
53
+ )
54
+ except Exception as e:
55
+ raise RuntimeError(f"Failed to initialize OpenDAL operator: {e}")
56
+
57
+ self.task_processor = task_processor
58
+ if self.task_processor is not None:
59
+ self.task_processor.start()
60
+
61
+ def parse_connection_string(self,connection_string):
62
+ connection_params = dict(item.split('=', 1) for item in connection_string.split(';') if '=' in item)
63
+
64
+ account_name = connection_params.get('AccountName')
65
+ account_key = connection_params.get('AccountKey')
66
+ endpoint_suffix = connection_params.get('EndpointSuffix')
67
+
68
+ if not all([account_name, account_key, endpoint_suffix]):
69
+ raise ValueError("Invalid connection string. Ensure it contains AccountName, AccountKey, and EndpointSuffix.")
70
+
71
+ endpoint = f"https://{account_name}.blob.{endpoint_suffix}"
72
+ return endpoint, account_name, account_key
73
+
74
+
75
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
76
+ """Synchronous export method that internally handles async logic."""
77
+ try:
78
+ # Run the asynchronous export logic in an event loop
79
+ asyncio.run(self._export_async(spans))
80
+ return SpanExportResult.SUCCESS
81
+ except Exception as e:
82
+ logger.error(f"Error exporting spans: {e}")
83
+ return SpanExportResult.FAILURE
84
+
85
+ async def _export_async(self, spans: Sequence[ReadableSpan]):
86
+ """The actual async export logic is run here."""
87
+ # Add spans to the export queue
88
+ for span in spans:
89
+ self.export_queue.append(span)
90
+ if len(self.export_queue) >= self.max_batch_size:
91
+ await self.__export_spans()
92
+
93
+ # Force a flush if the interval has passed
94
+ current_time = time.time()
95
+ if current_time - self.last_export_time >= self.export_interval:
96
+ await self.__export_spans()
97
+ self.last_export_time = current_time
98
+
99
+ def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
100
+ try:
101
+ valid_json_list = []
102
+ for span in spans:
103
+ try:
104
+ valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
105
+ except json.JSONDecodeError as e:
106
+ logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
107
+ continue
108
+
109
+ ndjson_data = "\n".join(valid_json_list) + "\n"
110
+ return ndjson_data
111
+ except Exception as e:
112
+ logger.warning(f"Error serializing spans: {e}")
113
+
114
+ async def __export_spans(self):
115
+ if len(self.export_queue) == 0:
116
+ return
117
+
118
+ batch_to_export = self.export_queue[:self.max_batch_size]
119
+ serialized_data = self.__serialize_spans(batch_to_export)
120
+ self.export_queue = self.export_queue[self.max_batch_size:]
121
+
122
+ # Calculate is_root_span by checking if any span has no parent
123
+ is_root_span = any(not span.parent for span in batch_to_export)
124
+
125
+ if self.task_processor is not None and callable(getattr(self.task_processor, 'queue_task', None)):
126
+ self.task_processor.queue_task(self.__upload_to_opendal, serialized_data, is_root_span)
127
+ else:
128
+ try:
129
+ self.__upload_to_opendal(serialized_data, is_root_span)
130
+ except Exception as e:
131
+ logger.error(f"Failed to upload span batch: {e}")
132
+
133
+ @SpanExporterBase.retry_with_backoff(exceptions=(Unexpected,))
134
+ def __upload_to_opendal(self, span_data_batch: str, is_root_span: bool = False):
135
+ current_time = datetime.datetime.now().strftime(self.time_format)
136
+ file_name = f"{self.file_prefix}{current_time}.ndjson"
137
+
138
+ try:
139
+ self.operator.write(file_name, span_data_batch.encode('utf-8'))
140
+ logger.info(f"Span batch uploaded to Azure Blob Storage as {file_name}. Is root span: {is_root_span}")
141
+ except PermissionDenied as e:
142
+ # Azure Container is forbidden.
143
+ logger.error(f"Access to container {self.container_name} is forbidden (403).")
144
+ raise PermissionError(f"Access to container {self.container_name} is forbidden.")
145
+
146
+ except NotFound as e:
147
+ # Container does not exist.
148
+ if "404" in str(e):
149
+ logger.error("Container does not exist. Please check the container name.")
150
+ raise Exception(f"Container does not exist. Error: {e}")
151
+ else:
152
+ logger.error(f"Unexpected NotFound error when accessing container {self.container_name}: {e}")
153
+ raise e
154
+
155
+ async def force_flush(self, timeout_millis: int = 30000) -> bool:
156
+ await self.__export_spans()
157
+ return True
158
+
159
+ def shutdown(self) -> None:
160
+ if hasattr(self, 'task_processor') and self.task_processor is not None:
161
+ self.task_processor.stop()
162
+ logger.info("OpenDALAzureExporter has been shut down.")