monocle-apptrace 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of monocle-apptrace might be problematic. Click here for more details.
- monocle_apptrace/exporters/aws/s3_exporter.py +158 -0
- monocle_apptrace/exporters/azure/blob_exporter.py +128 -0
- monocle_apptrace/exporters/base_exporter.py +47 -0
- monocle_apptrace/exporters/exporter_processor.py +19 -0
- monocle_apptrace/exporters/monocle_exporters.py +27 -0
- monocle_apptrace/exporters/okahu/okahu_exporter.py +115 -0
- monocle_apptrace/haystack/__init__.py +4 -4
- monocle_apptrace/haystack/wrap_pipeline.py +3 -2
- monocle_apptrace/instrumentor.py +12 -15
- monocle_apptrace/langchain/__init__.py +6 -3
- monocle_apptrace/llamaindex/__init__.py +8 -7
- monocle_apptrace/metamodel/entities/README.md +33 -10
- monocle_apptrace/metamodel/entities/app_hosting_types.json +29 -0
- monocle_apptrace/metamodel/entities/entities.json +49 -0
- monocle_apptrace/metamodel/entities/inference_types.json +33 -0
- monocle_apptrace/metamodel/entities/model_types.json +41 -0
- monocle_apptrace/metamodel/entities/vector_store_types.json +25 -0
- monocle_apptrace/metamodel/entities/workflow_types.json +22 -0
- monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json +35 -0
- monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json +35 -0
- monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json +27 -0
- monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json +27 -0
- monocle_apptrace/metamodel/maps/{lang_chain_methods.json → langchain_methods.json} +31 -8
- monocle_apptrace/metamodel/maps/{llama_index_methods.json → llamaindex_methods.json} +12 -8
- monocle_apptrace/metamodel/spans/span_example.json +1 -1
- monocle_apptrace/metamodel/spans/span_types.json +16 -0
- monocle_apptrace/utils.py +90 -11
- monocle_apptrace/wrap_common.py +228 -122
- monocle_apptrace/wrapper.py +3 -1
- {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.2.0.dist-info}/METADATA +5 -1
- monocle_apptrace-0.2.0.dist-info/RECORD +44 -0
- monocle_apptrace/metamodel/entities/entity_types.json +0 -157
- monocle_apptrace/metamodel/entities/entity_types.py +0 -51
- monocle_apptrace-0.1.1.dist-info/RECORD +0 -29
- {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.2.0.dist-info}/WHEEL +0 -0
- {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.2.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
import random
|
|
4
|
+
import datetime
|
|
5
|
+
import logging
|
|
6
|
+
import asyncio
|
|
7
|
+
import boto3
|
|
8
|
+
from botocore.exceptions import ClientError
|
|
9
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
10
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
11
|
+
from monocle_apptrace.exporters.base_exporter import SpanExporterBase
|
|
12
|
+
from typing import Sequence
|
|
13
|
+
import json
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
class S3SpanExporter(SpanExporterBase):
|
|
17
|
+
def __init__(self, bucket_name=None, region_name="us-east-1"):
|
|
18
|
+
super().__init__()
|
|
19
|
+
# Use environment variables if credentials are not provided
|
|
20
|
+
DEFAULT_FILE_PREFIX = "monocle_trace__"
|
|
21
|
+
DEFAULT_TIME_FORMAT = "%Y-%m-%d__%H.%M.%S"
|
|
22
|
+
self.max_batch_size = 500
|
|
23
|
+
self.export_interval = 1
|
|
24
|
+
self.s3_client = boto3.client(
|
|
25
|
+
's3',
|
|
26
|
+
aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
|
|
27
|
+
aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
|
|
28
|
+
region_name=region_name,
|
|
29
|
+
)
|
|
30
|
+
self.bucket_name = bucket_name or os.getenv('MONOCLE_S3_BUCKET_NAME','default-bucket')
|
|
31
|
+
self.file_prefix = DEFAULT_FILE_PREFIX
|
|
32
|
+
self.time_format = DEFAULT_TIME_FORMAT
|
|
33
|
+
self.export_queue = []
|
|
34
|
+
self.last_export_time = time.time()
|
|
35
|
+
|
|
36
|
+
# Check if bucket exists or create it
|
|
37
|
+
if not self.__bucket_exists(self.bucket_name):
|
|
38
|
+
try:
|
|
39
|
+
if region_name == "us-east-1":
|
|
40
|
+
self.s3_client.create_bucket(Bucket=self.bucket_name)
|
|
41
|
+
else:
|
|
42
|
+
self.s3_client.create_bucket(
|
|
43
|
+
Bucket=self.bucket_name,
|
|
44
|
+
CreateBucketConfiguration={'LocationConstraint': region_name}
|
|
45
|
+
)
|
|
46
|
+
logger.info(f"Bucket {self.bucket_name} created successfully.")
|
|
47
|
+
except ClientError as e:
|
|
48
|
+
logger.error(f"Error creating bucket {self.bucket_name}: {e}")
|
|
49
|
+
raise e
|
|
50
|
+
|
|
51
|
+
def __bucket_exists(self, bucket_name):
|
|
52
|
+
try:
|
|
53
|
+
# Check if the bucket exists by calling head_bucket
|
|
54
|
+
self.s3_client.head_bucket(Bucket=bucket_name)
|
|
55
|
+
return True
|
|
56
|
+
except ClientError as e:
|
|
57
|
+
error_code = e.response['Error']['Code']
|
|
58
|
+
if error_code == '404':
|
|
59
|
+
# Bucket not found
|
|
60
|
+
logger.error(f"Bucket {bucket_name} does not exist (404).")
|
|
61
|
+
return False
|
|
62
|
+
elif error_code == '403':
|
|
63
|
+
# Permission denied
|
|
64
|
+
logger.error(f"Access to bucket {bucket_name} is forbidden (403).")
|
|
65
|
+
raise PermissionError(f"Access to bucket {bucket_name} is forbidden.")
|
|
66
|
+
elif error_code == '400':
|
|
67
|
+
# Bad request or malformed input
|
|
68
|
+
logger.error(f"Bad request for bucket {bucket_name} (400).")
|
|
69
|
+
raise ValueError(f"Bad request for bucket {bucket_name}.")
|
|
70
|
+
else:
|
|
71
|
+
# Other client errors
|
|
72
|
+
logger.error(f"Unexpected error when accessing bucket {bucket_name}: {e}")
|
|
73
|
+
raise e
|
|
74
|
+
except TypeError as e:
|
|
75
|
+
# Handle TypeError separately
|
|
76
|
+
logger.error(f"Type error while checking bucket existence: {e}")
|
|
77
|
+
raise e
|
|
78
|
+
|
|
79
|
+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
80
|
+
"""Synchronous export method that internally handles async logic."""
|
|
81
|
+
try:
|
|
82
|
+
# Run the asynchronous export logic in an event loop
|
|
83
|
+
asyncio.run(self.__export_async(spans))
|
|
84
|
+
return SpanExportResult.SUCCESS
|
|
85
|
+
except Exception as e:
|
|
86
|
+
logger.error(f"Error exporting spans: {e}")
|
|
87
|
+
return SpanExportResult.FAILURE
|
|
88
|
+
|
|
89
|
+
async def __export_async(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
90
|
+
try:
|
|
91
|
+
# Add spans to the export queue
|
|
92
|
+
for span in spans:
|
|
93
|
+
self.export_queue.append(span)
|
|
94
|
+
# If the queue reaches MAX_BATCH_SIZE, export the spans
|
|
95
|
+
if len(self.export_queue) >= self.max_batch_size:
|
|
96
|
+
await self.__export_spans()
|
|
97
|
+
|
|
98
|
+
# Check if it's time to force a flush
|
|
99
|
+
current_time = time.time()
|
|
100
|
+
if current_time - self.last_export_time >= self.export_interval:
|
|
101
|
+
await self.__export_spans() # Export spans if time interval has passed
|
|
102
|
+
self.last_export_time = current_time # Reset the last export time
|
|
103
|
+
|
|
104
|
+
return SpanExportResult.SUCCESS
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.error(f"Error exporting spans: {e}")
|
|
107
|
+
return SpanExportResult.FAILURE
|
|
108
|
+
|
|
109
|
+
def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
|
|
110
|
+
try:
|
|
111
|
+
# Serialize spans to JSON or any other format you prefer
|
|
112
|
+
valid_json_list = []
|
|
113
|
+
for span in spans:
|
|
114
|
+
try:
|
|
115
|
+
valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
|
|
116
|
+
except json.JSONDecodeError as e:
|
|
117
|
+
logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
|
|
118
|
+
continue
|
|
119
|
+
ndjson_data = "\n".join(valid_json_list) + "\n"
|
|
120
|
+
return ndjson_data
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logger.warning(f"Error serializing spans: {e}")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
async def __export_spans(self):
|
|
126
|
+
if len(self.export_queue) == 0:
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
# Take a batch of spans from the queue
|
|
130
|
+
batch_to_export = self.export_queue[:self.max_batch_size]
|
|
131
|
+
serialized_data = self.__serialize_spans(batch_to_export)
|
|
132
|
+
self.export_queue = self.export_queue[self.max_batch_size:]
|
|
133
|
+
try:
|
|
134
|
+
if asyncio.get_event_loop().is_running():
|
|
135
|
+
task = asyncio.create_task(self._retry_with_backoff(self.__upload_to_s3, serialized_data))
|
|
136
|
+
await task
|
|
137
|
+
else:
|
|
138
|
+
await self._retry_with_backoff(self.__upload_to_s3, serialized_data)
|
|
139
|
+
|
|
140
|
+
except Exception as e:
|
|
141
|
+
logger.error(f"Failed to upload span batch: {e}")
|
|
142
|
+
|
|
143
|
+
def __upload_to_s3(self, span_data_batch: str):
|
|
144
|
+
current_time = datetime.datetime.now().strftime(self.time_format)
|
|
145
|
+
file_name = f"{self.file_prefix}{current_time}.ndjson"
|
|
146
|
+
self.s3_client.put_object(
|
|
147
|
+
Bucket=self.bucket_name,
|
|
148
|
+
Key=file_name,
|
|
149
|
+
Body=span_data_batch
|
|
150
|
+
)
|
|
151
|
+
logger.info(f"Span batch uploaded to AWS S3 as {file_name}.")
|
|
152
|
+
|
|
153
|
+
async def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
154
|
+
await self.__export_spans() # Export any remaining spans in the queue
|
|
155
|
+
return True
|
|
156
|
+
|
|
157
|
+
def shutdown(self) -> None:
|
|
158
|
+
logger.info("S3SpanExporter has been shut down.")
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
import random
|
|
4
|
+
import datetime
|
|
5
|
+
import logging
|
|
6
|
+
import asyncio
|
|
7
|
+
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
|
|
8
|
+
from azure.core.exceptions import ResourceNotFoundError, ClientAuthenticationError, ServiceRequestError
|
|
9
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
10
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
11
|
+
from typing import Sequence
|
|
12
|
+
from monocle_apptrace.exporters.base_exporter import SpanExporterBase
|
|
13
|
+
import json
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
class AzureBlobSpanExporter(SpanExporterBase):
|
|
17
|
+
def __init__(self, connection_string=None, container_name=None):
|
|
18
|
+
super().__init__()
|
|
19
|
+
DEFAULT_FILE_PREFIX = "monocle_trace_"
|
|
20
|
+
DEFAULT_TIME_FORMAT = "%Y-%m-%d_%H.%M.%S"
|
|
21
|
+
self.max_batch_size = 500
|
|
22
|
+
self.export_interval = 1
|
|
23
|
+
# Use default values if none are provided
|
|
24
|
+
if not connection_string:
|
|
25
|
+
connection_string = os.getenv('MONOCLE_BLOB_CONNECTION_STRING')
|
|
26
|
+
if not connection_string:
|
|
27
|
+
raise ValueError("Azure Storage connection string is not provided or set in environment variables.")
|
|
28
|
+
|
|
29
|
+
if not container_name:
|
|
30
|
+
container_name = os.getenv('MONOCLE_BLOB_CONTAINER_NAME', 'default-container')
|
|
31
|
+
|
|
32
|
+
self.blob_service_client = BlobServiceClient.from_connection_string(connection_string)
|
|
33
|
+
self.container_name = container_name
|
|
34
|
+
self.file_prefix = DEFAULT_FILE_PREFIX
|
|
35
|
+
self.time_format = DEFAULT_TIME_FORMAT
|
|
36
|
+
|
|
37
|
+
# Check if container exists or create it
|
|
38
|
+
if not self.__container_exists(container_name):
|
|
39
|
+
try:
|
|
40
|
+
self.blob_service_client.create_container(container_name)
|
|
41
|
+
logger.info(f"Container {container_name} created successfully.")
|
|
42
|
+
except Exception as e:
|
|
43
|
+
logger.error(f"Error creating container {container_name}: {e}")
|
|
44
|
+
raise e
|
|
45
|
+
|
|
46
|
+
def __container_exists(self, container_name):
|
|
47
|
+
try:
|
|
48
|
+
container_client = self.blob_service_client.get_container_client(container_name)
|
|
49
|
+
container_client.get_container_properties()
|
|
50
|
+
return True
|
|
51
|
+
except ResourceNotFoundError:
|
|
52
|
+
logger.error(f"Container {container_name} not found (404).")
|
|
53
|
+
return False
|
|
54
|
+
except ClientAuthenticationError:
|
|
55
|
+
logger.error(f"Access to container {container_name} is forbidden (403).")
|
|
56
|
+
raise PermissionError(f"Access to container {container_name} is forbidden.")
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.error(f"Unexpected error when checking if container {container_name} exists: {e}")
|
|
59
|
+
raise e
|
|
60
|
+
|
|
61
|
+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
62
|
+
"""Synchronous export method that internally handles async logic."""
|
|
63
|
+
try:
|
|
64
|
+
# Run the asynchronous export logic in an event loop
|
|
65
|
+
asyncio.run(self._export_async(spans))
|
|
66
|
+
return SpanExportResult.SUCCESS
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.error(f"Error exporting spans: {e}")
|
|
69
|
+
return SpanExportResult.FAILURE
|
|
70
|
+
|
|
71
|
+
async def _export_async(self, spans: Sequence[ReadableSpan]):
|
|
72
|
+
"""The actual async export logic is run here."""
|
|
73
|
+
# Add spans to the export queue
|
|
74
|
+
for span in spans:
|
|
75
|
+
self.export_queue.append(span)
|
|
76
|
+
if len(self.export_queue) >= self.max_batch_size:
|
|
77
|
+
await self.__export_spans()
|
|
78
|
+
|
|
79
|
+
# Force a flush if the interval has passed
|
|
80
|
+
current_time = time.time()
|
|
81
|
+
if current_time - self.last_export_time >= self.export_interval:
|
|
82
|
+
await self.__export_spans()
|
|
83
|
+
self.last_export_time = current_time
|
|
84
|
+
|
|
85
|
+
def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
|
|
86
|
+
try:
|
|
87
|
+
valid_json_list = []
|
|
88
|
+
for span in spans:
|
|
89
|
+
try:
|
|
90
|
+
valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
|
|
91
|
+
except json.JSONDecodeError as e:
|
|
92
|
+
logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
ndjson_data = "\n".join(valid_json_list) + "\n"
|
|
96
|
+
return ndjson_data
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.warning(f"Error serializing spans: {e}")
|
|
99
|
+
|
|
100
|
+
async def __export_spans(self):
|
|
101
|
+
if len(self.export_queue) == 0:
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
batch_to_export = self.export_queue[:self.max_batch_size]
|
|
105
|
+
serialized_data = self.__serialize_spans(batch_to_export)
|
|
106
|
+
self.export_queue = self.export_queue[self.max_batch_size:]
|
|
107
|
+
try:
|
|
108
|
+
if asyncio.get_event_loop().is_running():
|
|
109
|
+
task = asyncio.create_task(self._retry_with_backoff(self.__upload_to_blob, serialized_data))
|
|
110
|
+
await task
|
|
111
|
+
else:
|
|
112
|
+
await self._retry_with_backoff(self.__upload_to_blob, serialized_data)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error(f"Failed to upload span batch: {e}")
|
|
115
|
+
|
|
116
|
+
def __upload_to_blob(self, span_data_batch: str):
|
|
117
|
+
current_time = datetime.datetime.now().strftime(self.time_format)
|
|
118
|
+
file_name = f"{self.file_prefix}{current_time}.ndjson"
|
|
119
|
+
blob_client = self.blob_service_client.get_blob_client(container=self.container_name, blob=file_name)
|
|
120
|
+
blob_client.upload_blob(span_data_batch, overwrite=True)
|
|
121
|
+
logger.info(f"Span batch uploaded to Azure Blob Storage as {file_name}.")
|
|
122
|
+
|
|
123
|
+
async def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
124
|
+
await self.__export_spans()
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
def shutdown(self) -> None:
|
|
128
|
+
logger.info("AzureBlobSpanExporter has been shut down.")
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import random
|
|
3
|
+
import logging
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from azure.core.exceptions import ServiceRequestError, ClientAuthenticationError
|
|
6
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
7
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
8
|
+
from typing import Sequence
|
|
9
|
+
import asyncio
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
class SpanExporterBase(ABC):
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self.backoff_factor = 2
|
|
16
|
+
self.max_retries = 10
|
|
17
|
+
self.export_queue = []
|
|
18
|
+
self.last_export_time = time.time()
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
async def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
async def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def shutdown(self) -> None:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
async def _retry_with_backoff(self, func, *args, **kwargs):
|
|
32
|
+
"""Handle retries with exponential backoff."""
|
|
33
|
+
attempt = 0
|
|
34
|
+
while attempt < self.max_retries:
|
|
35
|
+
try:
|
|
36
|
+
return func(*args, **kwargs)
|
|
37
|
+
except ServiceRequestError as e:
|
|
38
|
+
logger.warning(f"Network connectivity error: {e}. Retrying in {self.backoff_factor ** attempt} seconds...")
|
|
39
|
+
sleep_time = self.backoff_factor * (2 ** attempt) + random.uniform(0, 1)
|
|
40
|
+
await asyncio.sleep(sleep_time)
|
|
41
|
+
attempt += 1
|
|
42
|
+
except ClientAuthenticationError as e:
|
|
43
|
+
logger.error(f"Failed to authenticate: {str(e)}")
|
|
44
|
+
break
|
|
45
|
+
|
|
46
|
+
logger.error("Max retries exceeded.")
|
|
47
|
+
raise ServiceRequestError(message="Max retries exceeded.")
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
class ExportTaskProcessor(ABC):
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def start(self):
|
|
11
|
+
return
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def stop(self):
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def queue_task(self, async_task: Callable[[Callable, any], any] = None, args: any = None):
|
|
19
|
+
return
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
import os, warnings
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
from opentelemetry.sdk.trace.export import SpanExporter, ConsoleSpanExporter
|
|
5
|
+
from monocle_apptrace.exporters.file_exporter import FileSpanExporter
|
|
6
|
+
|
|
7
|
+
monocle_exporters:Dict[str, Any] = {
|
|
8
|
+
"s3": {"module": "monocle_apptrace.exporters.aws.s3_exporter", "class": "S3SpanExporter"},
|
|
9
|
+
"blob": {"module":"monocle_apptrace.exporters.azure.blob_exporter", "class": "AzureBlobSpanExporter"},
|
|
10
|
+
"okahu": {"module":"monocle_apptrace.exporters.okahu.okahu_exporter", "class": "OkahuSpanExporter"},
|
|
11
|
+
"file": {"module":"monocle_apptrace.exporters.file_exporter", "class": "FileSpanExporter"}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
def get_monocle_exporter() -> SpanExporter:
|
|
15
|
+
exporter_name = os.environ.get("MONOCLE_EXPORTER", "file")
|
|
16
|
+
try:
|
|
17
|
+
exporter_class_path = monocle_exporters[exporter_name]
|
|
18
|
+
except Exception as ex:
|
|
19
|
+
warnings.warn(f"Unsupported Monocle span exporter setting {exporter_name}, using default FileSpanExporter.")
|
|
20
|
+
return FileSpanExporter()
|
|
21
|
+
try:
|
|
22
|
+
exporter_module = import_module(exporter_class_path.get("module"))
|
|
23
|
+
exporter_class = getattr(exporter_module, exporter_class_path.get("class"))
|
|
24
|
+
return exporter_class()
|
|
25
|
+
except Exception as ex:
|
|
26
|
+
warnings.warn(f"Unable to set Monocle span exporter to {exporter_name}, error {ex}. Using ConsoleSpanExporter")
|
|
27
|
+
return ConsoleSpanExporter()
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from typing import Callable, Optional, Sequence
|
|
5
|
+
import requests
|
|
6
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
7
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult, ConsoleSpanExporter
|
|
8
|
+
from requests.exceptions import ReadTimeout
|
|
9
|
+
|
|
10
|
+
from monocle_apptrace.exporters.exporter_processor import ExportTaskProcessor
|
|
11
|
+
|
|
12
|
+
REQUESTS_SUCCESS_STATUS_CODES = (200, 202)
|
|
13
|
+
OKAHU_PROD_INGEST_ENDPOINT = "https://ingest.okahu.co/api/v1/trace/ingest"
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OkahuSpanExporter(SpanExporter):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
endpoint: Optional[str] = None,
|
|
22
|
+
timeout: Optional[int] = None,
|
|
23
|
+
session: Optional[requests.Session] = None,
|
|
24
|
+
task_processor: ExportTaskProcessor = None
|
|
25
|
+
):
|
|
26
|
+
"""Okahu exporter."""
|
|
27
|
+
okahu_endpoint: str = os.environ.get("OKAHU_INGESTION_ENDPOINT", OKAHU_PROD_INGEST_ENDPOINT)
|
|
28
|
+
self.endpoint = endpoint or okahu_endpoint
|
|
29
|
+
api_key: str = os.environ.get("OKAHU_API_KEY")
|
|
30
|
+
self._closed = False
|
|
31
|
+
if not api_key:
|
|
32
|
+
raise ValueError("OKAHU_API_KEY not set.")
|
|
33
|
+
self.timeout = timeout or 15
|
|
34
|
+
self.session = session or requests.Session()
|
|
35
|
+
self.session.headers.update(
|
|
36
|
+
{"Content-Type": "application/json", "x-api-key": api_key}
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
self.task_processor = task_processor or None
|
|
40
|
+
if task_processor is not None:
|
|
41
|
+
task_processor.start()
|
|
42
|
+
|
|
43
|
+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
|
|
44
|
+
# After the call to Shutdown subsequent calls to Export are
|
|
45
|
+
# not allowed and should return a Failure result
|
|
46
|
+
if not hasattr(self, 'session'):
|
|
47
|
+
return self.exporter.export(spans)
|
|
48
|
+
|
|
49
|
+
if self._closed:
|
|
50
|
+
logger.warning("Exporter already shutdown, ignoring batch")
|
|
51
|
+
return SpanExportResult.FAILUREencoder
|
|
52
|
+
if len(spans) == 0:
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
span_list = {
|
|
56
|
+
"batch": []
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# append the batch object with all the spans object
|
|
60
|
+
for span in spans:
|
|
61
|
+
# create a object from serialized span
|
|
62
|
+
obj = json.loads(span.to_json())
|
|
63
|
+
if obj["parent_id"] is None:
|
|
64
|
+
obj["parent_id"] = "None"
|
|
65
|
+
else:
|
|
66
|
+
obj["parent_id"] = remove_0x_from_start(obj["parent_id"])
|
|
67
|
+
if obj["context"] is not None:
|
|
68
|
+
obj["context"]["trace_id"] = remove_0x_from_start(obj["context"]["trace_id"])
|
|
69
|
+
obj["context"]["span_id"] = remove_0x_from_start(obj["context"]["span_id"])
|
|
70
|
+
span_list["batch"].append(obj)
|
|
71
|
+
|
|
72
|
+
def send_spans_to_okahu(span_list_local=None):
|
|
73
|
+
try:
|
|
74
|
+
result = self.session.post(
|
|
75
|
+
url=self.endpoint,
|
|
76
|
+
data=json.dumps(span_list_local),
|
|
77
|
+
timeout=self.timeout,
|
|
78
|
+
)
|
|
79
|
+
if result.status_code not in REQUESTS_SUCCESS_STATUS_CODES:
|
|
80
|
+
logger.error(
|
|
81
|
+
"Traces cannot be uploaded; status code: %s, message %s",
|
|
82
|
+
result.status_code,
|
|
83
|
+
result.text,
|
|
84
|
+
)
|
|
85
|
+
return SpanExportResult.FAILURE
|
|
86
|
+
logger.warning("spans successfully exported to okahu")
|
|
87
|
+
return SpanExportResult.SUCCESS
|
|
88
|
+
except ReadTimeout as e:
|
|
89
|
+
logger.warning("Trace export timed out: %s", str(e))
|
|
90
|
+
return SpanExportResult.FAILURE
|
|
91
|
+
|
|
92
|
+
# if async task function is present, then push the request to asnc task
|
|
93
|
+
|
|
94
|
+
if self.task_processor is not None and callable(self.task_processor.queue_task):
|
|
95
|
+
self.task_processor.queue_task(send_spans_to_okahu, span_list)
|
|
96
|
+
return SpanExportResult.SUCCESS
|
|
97
|
+
return send_spans_to_okahu(span_list)
|
|
98
|
+
|
|
99
|
+
def shutdown(self) -> None:
|
|
100
|
+
if self._closed:
|
|
101
|
+
logger.warning("Exporter already shutdown, ignoring call")
|
|
102
|
+
return
|
|
103
|
+
if hasattr(self, 'session'):
|
|
104
|
+
self.session.close()
|
|
105
|
+
self._closed = True
|
|
106
|
+
|
|
107
|
+
def force_flush(self, timeout_millis: int = 30000) -> bool:
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# only removes the first occurrence of 0x from the string
|
|
112
|
+
def remove_0x_from_start(my_str: str):
|
|
113
|
+
if my_str.startswith("0x"):
|
|
114
|
+
return my_str.replace("0x", "", 1)
|
|
115
|
+
return my_str
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
|
|
2
1
|
import os
|
|
3
2
|
import logging
|
|
4
|
-
from monocle_apptrace.utils import
|
|
3
|
+
from monocle_apptrace.utils import get_wrapper_methods_config
|
|
5
4
|
|
|
6
5
|
logger = logging.getLogger(__name__)
|
|
7
6
|
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
8
|
-
HAYSTACK_METHODS =
|
|
9
|
-
os.path.join(parent_dir, 'metamodel', 'maps', 'haystack_methods.json')
|
|
7
|
+
HAYSTACK_METHODS = get_wrapper_methods_config(
|
|
8
|
+
wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'haystack_methods.json'),
|
|
9
|
+
attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
@@ -4,8 +4,8 @@ from opentelemetry.context import attach, set_value
|
|
|
4
4
|
from opentelemetry.instrumentation.utils import (
|
|
5
5
|
_SUPPRESS_INSTRUMENTATION_KEY,
|
|
6
6
|
)
|
|
7
|
-
from monocle_apptrace.wrap_common import PROMPT_INPUT_KEY, PROMPT_OUTPUT_KEY, WORKFLOW_TYPE_MAP, with_tracer_wrapper
|
|
8
|
-
from monocle_apptrace.utils import set_embedding_model
|
|
7
|
+
from monocle_apptrace.wrap_common import PROMPT_INPUT_KEY, PROMPT_OUTPUT_KEY, WORKFLOW_TYPE_MAP, with_tracer_wrapper, DATA_INPUT_KEY
|
|
8
|
+
from monocle_apptrace.utils import set_embedding_model, set_attribute
|
|
9
9
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
@@ -20,6 +20,7 @@ def wrap(tracer, to_wrap, wrapped, instance, args, kwargs):
|
|
|
20
20
|
workflow_input = get_workflow_input(args, inputs)
|
|
21
21
|
embedding_model = get_embedding_model(instance)
|
|
22
22
|
set_embedding_model(embedding_model)
|
|
23
|
+
set_attribute(DATA_INPUT_KEY, workflow_input)
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
with tracer.start_as_current_span(f"{name}.workflow") as span:
|
monocle_apptrace/instrumentor.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from typing import Collection,List
|
|
1
|
+
import logging, os
|
|
2
|
+
from typing import Collection, List
|
|
5
3
|
from wrapt import wrap_function_wrapper
|
|
6
4
|
from opentelemetry.trace import get_tracer
|
|
7
5
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
|
@@ -11,17 +9,16 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanProcessor
|
|
|
11
9
|
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
|
|
12
10
|
from opentelemetry import trace
|
|
13
11
|
from opentelemetry.context import get_value, attach, set_value
|
|
12
|
+
from monocle_apptrace.utils import process_wrapper_method_config
|
|
14
13
|
from monocle_apptrace.wrap_common import SESSION_PROPERTIES_KEY
|
|
15
14
|
from monocle_apptrace.wrapper import INBUILT_METHODS_LIST, WrapperMethod
|
|
16
|
-
from monocle_apptrace.exporters.
|
|
17
|
-
|
|
15
|
+
from monocle_apptrace.exporters.monocle_exporters import get_monocle_exporter
|
|
18
16
|
|
|
19
17
|
logger = logging.getLogger(__name__)
|
|
20
18
|
|
|
21
19
|
_instruments = ()
|
|
22
20
|
|
|
23
21
|
class MonocleInstrumentor(BaseInstrumentor):
|
|
24
|
-
|
|
25
22
|
workflow_name: str = ""
|
|
26
23
|
user_wrapper_methods: list[WrapperMethod] = []
|
|
27
24
|
instrumented_method_list: list[object] = []
|
|
@@ -37,7 +34,7 @@ class MonocleInstrumentor(BaseInstrumentor):
|
|
|
37
34
|
|
|
38
35
|
def _instrument(self, **kwargs):
|
|
39
36
|
tracer_provider = kwargs.get("tracer_provider")
|
|
40
|
-
tracer = get_tracer(instrumenting_module_name=
|
|
37
|
+
tracer = get_tracer(instrumenting_module_name=__name__, tracer_provider=tracer_provider)
|
|
41
38
|
|
|
42
39
|
user_method_list = [
|
|
43
40
|
{
|
|
@@ -46,8 +43,9 @@ class MonocleInstrumentor(BaseInstrumentor):
|
|
|
46
43
|
"method": method.method,
|
|
47
44
|
"span_name": method.span_name,
|
|
48
45
|
"wrapper": method.wrapper,
|
|
46
|
+
"output_processor": method.output_processor
|
|
49
47
|
} for method in self.user_wrapper_methods]
|
|
50
|
-
|
|
48
|
+
process_wrapper_method_config(user_method_list)
|
|
51
49
|
final_method_list = user_method_list + INBUILT_METHODS_LIST
|
|
52
50
|
|
|
53
51
|
for wrapped_method in final_method_list:
|
|
@@ -69,7 +67,6 @@ class MonocleInstrumentor(BaseInstrumentor):
|
|
|
69
67
|
object:{wrap_object},
|
|
70
68
|
method:{wrap_method}""")
|
|
71
69
|
|
|
72
|
-
|
|
73
70
|
def _uninstrument(self, **kwargs):
|
|
74
71
|
for wrapped_method in self.instrumented_method_list:
|
|
75
72
|
try:
|
|
@@ -93,8 +90,9 @@ def setup_monocle_telemetry(
|
|
|
93
90
|
resource = Resource(attributes={
|
|
94
91
|
SERVICE_NAME: workflow_name
|
|
95
92
|
})
|
|
96
|
-
span_processors = span_processors or [BatchSpanProcessor(
|
|
93
|
+
span_processors = span_processors or [BatchSpanProcessor(get_monocle_exporter())]
|
|
97
94
|
trace_provider = TracerProvider(resource=resource)
|
|
95
|
+
attach(set_value("workflow_name", workflow_name))
|
|
98
96
|
tracer_provider_default = trace.get_tracer_provider()
|
|
99
97
|
provider_type = type(tracer_provider_default).__name__
|
|
100
98
|
is_proxy_provider = "Proxy" in provider_type
|
|
@@ -102,16 +100,15 @@ def setup_monocle_telemetry(
|
|
|
102
100
|
processor.on_start = on_processor_start
|
|
103
101
|
if not is_proxy_provider:
|
|
104
102
|
tracer_provider_default.add_span_processor(processor)
|
|
105
|
-
else
|
|
103
|
+
else:
|
|
106
104
|
trace_provider.add_span_processor(processor)
|
|
107
|
-
if is_proxy_provider
|
|
105
|
+
if is_proxy_provider:
|
|
108
106
|
trace.set_tracer_provider(trace_provider)
|
|
109
107
|
instrumentor = MonocleInstrumentor(user_wrapper_methods=wrapper_methods or [])
|
|
110
108
|
# instrumentor.app_name = workflow_name
|
|
111
109
|
if not instrumentor.is_instrumented_by_opentelemetry:
|
|
112
110
|
instrumentor.instrument()
|
|
113
111
|
|
|
114
|
-
|
|
115
112
|
def on_processor_start(span: Span, parent_context):
|
|
116
113
|
context_properties = get_value(SESSION_PROPERTIES_KEY)
|
|
117
114
|
if context_properties is not None:
|
|
@@ -121,4 +118,4 @@ def on_processor_start(span: Span, parent_context):
|
|
|
121
118
|
)
|
|
122
119
|
|
|
123
120
|
def set_context_properties(properties: dict) -> None:
|
|
124
|
-
attach(set_value(SESSION_PROPERTIES_KEY, properties))
|
|
121
|
+
attach(set_value(SESSION_PROPERTIES_KEY, properties))
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from monocle_apptrace.utils import
|
|
2
|
+
from monocle_apptrace.utils import get_wrapper_methods_config
|
|
3
3
|
|
|
4
4
|
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
5
|
-
LANGCHAIN_METHODS =
|
|
6
|
-
os.path.join(parent_dir, 'metamodel', 'maps', '
|
|
5
|
+
LANGCHAIN_METHODS = get_wrapper_methods_config(
|
|
6
|
+
wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'langchain_methods.json'),
|
|
7
|
+
attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
8
|
+
|
|
9
|
+
|
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
#pylint: disable=protected-access
|
|
1
|
+
# pylint: disable=protected-access
|
|
3
2
|
import os
|
|
4
|
-
from monocle_apptrace.utils import
|
|
3
|
+
from monocle_apptrace.utils import get_wrapper_methods_config
|
|
4
|
+
|
|
5
5
|
|
|
6
6
|
def get_llm_span_name_for_openai(instance):
|
|
7
7
|
if (hasattr(instance, "_is_azure_client")
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
and callable(getattr(instance, "_is_azure_client"))
|
|
9
|
+
and instance._is_azure_client()):
|
|
10
10
|
return "llamaindex.azure_openai"
|
|
11
11
|
return "llamaindex.openai"
|
|
12
12
|
|
|
13
13
|
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
14
|
-
LLAMAINDEX_METHODS =
|
|
15
|
-
os.path.join(parent_dir, 'metamodel', 'maps', '
|
|
14
|
+
LLAMAINDEX_METHODS = get_wrapper_methods_config(
|
|
15
|
+
wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'llamaindex_methods.json'),
|
|
16
|
+
attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|