monocle-apptrace 0.1.1__py3-none-any.whl → 0.3.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of monocle-apptrace might be problematic. Click here for more details.

Files changed (47) hide show
  1. monocle_apptrace/botocore/__init__.py +9 -0
  2. monocle_apptrace/constants.py +18 -4
  3. monocle_apptrace/exporters/aws/s3_exporter.py +158 -0
  4. monocle_apptrace/exporters/azure/blob_exporter.py +125 -0
  5. monocle_apptrace/exporters/base_exporter.py +48 -0
  6. monocle_apptrace/exporters/exporter_processor.py +19 -0
  7. monocle_apptrace/exporters/monocle_exporters.py +27 -0
  8. monocle_apptrace/exporters/okahu/okahu_exporter.py +115 -0
  9. monocle_apptrace/haystack/__init__.py +4 -4
  10. monocle_apptrace/haystack/wrap_pipeline.py +3 -2
  11. monocle_apptrace/instrumentor.py +14 -17
  12. monocle_apptrace/langchain/__init__.py +6 -3
  13. monocle_apptrace/llamaindex/__init__.py +8 -7
  14. monocle_apptrace/message_processing.py +80 -0
  15. monocle_apptrace/metamodel/entities/README.md +33 -10
  16. monocle_apptrace/metamodel/entities/app_hosting_types.json +29 -0
  17. monocle_apptrace/metamodel/entities/entities.json +49 -0
  18. monocle_apptrace/metamodel/entities/inference_types.json +33 -0
  19. monocle_apptrace/metamodel/entities/model_types.json +41 -0
  20. monocle_apptrace/metamodel/entities/vector_store_types.json +25 -0
  21. monocle_apptrace/metamodel/entities/workflow_types.json +22 -0
  22. monocle_apptrace/metamodel/maps/attributes/inference/botocore_entities.json +27 -0
  23. monocle_apptrace/metamodel/maps/attributes/inference/haystack_entities.json +57 -0
  24. monocle_apptrace/metamodel/maps/attributes/inference/langchain_entities.json +57 -0
  25. monocle_apptrace/metamodel/maps/attributes/inference/llamaindex_entities.json +57 -0
  26. monocle_apptrace/metamodel/maps/attributes/retrieval/haystack_entities.json +31 -0
  27. monocle_apptrace/metamodel/maps/attributes/retrieval/langchain_entities.json +31 -0
  28. monocle_apptrace/metamodel/maps/attributes/retrieval/llamaindex_entities.json +31 -0
  29. monocle_apptrace/metamodel/maps/botocore_methods.json +13 -0
  30. monocle_apptrace/metamodel/maps/haystack_methods.json +26 -6
  31. monocle_apptrace/metamodel/maps/{lang_chain_methods.json → langchain_methods.json} +31 -8
  32. monocle_apptrace/metamodel/maps/{llama_index_methods.json → llamaindex_methods.json} +30 -8
  33. monocle_apptrace/metamodel/spans/span_example.json +1 -1
  34. monocle_apptrace/metamodel/spans/span_types.json +16 -0
  35. monocle_apptrace/utils.py +179 -20
  36. monocle_apptrace/wrap_common.py +350 -150
  37. monocle_apptrace/wrapper.py +5 -2
  38. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/METADATA +8 -3
  39. monocle_apptrace-0.3.0b1.dist-info/RECORD +48 -0
  40. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/WHEEL +1 -1
  41. monocle_apptrace/haystack/wrap_node.py +0 -27
  42. monocle_apptrace/haystack/wrap_openai.py +0 -44
  43. monocle_apptrace/metamodel/entities/entity_types.json +0 -157
  44. monocle_apptrace/metamodel/entities/entity_types.py +0 -51
  45. monocle_apptrace-0.1.1.dist-info/RECORD +0 -29
  46. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/licenses/LICENSE +0 -0
  47. {monocle_apptrace-0.1.1.dist-info → monocle_apptrace-0.3.0b1.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,9 @@
1
+ import os
2
+ from monocle_apptrace.utils import get_wrapper_methods_config
3
+
4
+ parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
5
+ BOTOCORE_METHODS = get_wrapper_methods_config(
6
+ wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'botocore_methods.json'),
7
+ attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
+
9
+
@@ -3,20 +3,34 @@ AZURE_ML_ENDPOINT_ENV_NAME = "AZUREML_ENTRY_SCRIPT"
3
3
  AZURE_FUNCTION_WORKER_ENV_NAME = "FUNCTIONS_WORKER_RUNTIME"
4
4
  AZURE_APP_SERVICE_ENV_NAME = "WEBSITE_SITE_NAME"
5
5
  AWS_LAMBDA_ENV_NAME = "AWS_LAMBDA_RUNTIME_API"
6
+ GITHUB_CODESPACE_ENV_NAME = "CODESPACES"
6
7
 
8
+ AWS_LAMBDA_FUNCTION_IDENTIFIER_ENV_NAME = "AWS_LAMBDA_FUNCTION_NAME"
9
+ AZURE_FUNCTION_IDENTIFIER_ENV_NAME = "WEBSITE_SITE_NAME"
10
+ AZURE_APP_SERVICE_IDENTIFIER_ENV_NAME = "WEBSITE_DEPLOYMENT_ID"
11
+ GITHUB_CODESPACE_IDENTIFIER_ENV_NAME = "GITHUB_REPOSITORY"
7
12
  # Azure naming reference can be found here
8
13
  # https://learn.microsoft.com/en-us/azure/cloud-adoption-framework/ready/azure-best-practices/resource-abbreviations
9
14
  AZURE_FUNCTION_NAME = "azure.func"
10
15
  AZURE_APP_SERVICE_NAME = "azure.asp"
11
16
  AZURE_ML_SERVICE_NAME = "azure.mlw"
12
17
  AWS_LAMBDA_SERVICE_NAME = "aws.lambda"
18
+ GITHUB_CODESPACE_SERVICE_NAME = "github_codespace"
13
19
 
14
- azure_service_map = {
20
+ # Env variables to identify infra service type
21
+ service_type_map = {
15
22
  AZURE_ML_ENDPOINT_ENV_NAME: AZURE_ML_SERVICE_NAME,
16
23
  AZURE_APP_SERVICE_ENV_NAME: AZURE_APP_SERVICE_NAME,
17
- AZURE_FUNCTION_WORKER_ENV_NAME: AZURE_FUNCTION_NAME
24
+ AZURE_FUNCTION_WORKER_ENV_NAME: AZURE_FUNCTION_NAME,
25
+ AWS_LAMBDA_ENV_NAME: AWS_LAMBDA_SERVICE_NAME,
26
+ GITHUB_CODESPACE_ENV_NAME: GITHUB_CODESPACE_SERVICE_NAME
18
27
  }
19
28
 
20
- aws_service_map = {
21
- AWS_LAMBDA_ENV_NAME: AWS_LAMBDA_SERVICE_NAME
29
+ # Env variables to identify infra service name
30
+ service_name_map = {
31
+ AZURE_APP_SERVICE_NAME: AZURE_APP_SERVICE_IDENTIFIER_ENV_NAME,
32
+ AZURE_FUNCTION_NAME: AZURE_FUNCTION_IDENTIFIER_ENV_NAME,
33
+ AZURE_ML_SERVICE_NAME: AZURE_ML_ENDPOINT_ENV_NAME,
34
+ AWS_LAMBDA_SERVICE_NAME: AWS_LAMBDA_FUNCTION_IDENTIFIER_ENV_NAME,
35
+ GITHUB_CODESPACE_SERVICE_NAME: GITHUB_CODESPACE_IDENTIFIER_ENV_NAME
22
36
  }
@@ -0,0 +1,158 @@
1
+ import os
2
+ import time
3
+ import random
4
+ import datetime
5
+ import logging
6
+ import asyncio
7
+ import boto3
8
+ from botocore.exceptions import ClientError
9
+ from botocore.exceptions import (
10
+ BotoCoreError,
11
+ ConnectionClosedError,
12
+ ConnectTimeoutError,
13
+ EndpointConnectionError,
14
+ ReadTimeoutError,
15
+ )
16
+ from opentelemetry.sdk.trace import ReadableSpan
17
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
18
+ from monocle_apptrace.exporters.base_exporter import SpanExporterBase
19
+ from typing import Sequence
20
+ import json
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class S3SpanExporter(SpanExporterBase):
24
+ def __init__(self, bucket_name=None, region_name=None):
25
+ super().__init__()
26
+ # Use environment variables if credentials are not provided
27
+ DEFAULT_FILE_PREFIX = "monocle_trace_"
28
+ DEFAULT_TIME_FORMAT = "%Y-%m-%d_%H.%M.%S"
29
+ self.max_batch_size = 500
30
+ self.export_interval = 1
31
+ self.s3_client = boto3.client(
32
+ 's3',
33
+ aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
34
+ aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
35
+ region_name=region_name,
36
+ )
37
+ self.bucket_name = bucket_name or os.getenv('MONOCLE_S3_BUCKET_NAME','default-bucket')
38
+ self.file_prefix = DEFAULT_FILE_PREFIX
39
+ self.time_format = DEFAULT_TIME_FORMAT
40
+ self.export_queue = []
41
+ self.last_export_time = time.time()
42
+
43
+ # Check if bucket exists or create it
44
+ if not self.__bucket_exists(self.bucket_name):
45
+ try:
46
+ self.s3_client.create_bucket(
47
+ Bucket=self.bucket_name,
48
+ CreateBucketConfiguration={'LocationConstraint': region_name}
49
+ )
50
+ logger.info(f"Bucket {self.bucket_name} created successfully.")
51
+ except ClientError as e:
52
+ logger.error(f"Error creating bucket {self.bucket_name}: {e}")
53
+ raise e
54
+
55
+ def __bucket_exists(self, bucket_name):
56
+ try:
57
+ # Check if the bucket exists by calling head_bucket
58
+ self.s3_client.head_bucket(Bucket=bucket_name)
59
+ return True
60
+ except ClientError as e:
61
+ error_code = e.response['Error']['Code']
62
+ if error_code == '404':
63
+ # Bucket not found
64
+ logger.error(f"Bucket {bucket_name} does not exist (404).")
65
+ return False
66
+ elif error_code == '403':
67
+ # Permission denied
68
+ logger.error(f"Access to bucket {bucket_name} is forbidden (403).")
69
+ raise PermissionError(f"Access to bucket {bucket_name} is forbidden.")
70
+ elif error_code == '400':
71
+ # Bad request or malformed input
72
+ logger.error(f"Bad request for bucket {bucket_name} (400).")
73
+ raise ValueError(f"Bad request for bucket {bucket_name}.")
74
+ else:
75
+ # Other client errors
76
+ logger.error(f"Unexpected error when accessing bucket {bucket_name}: {e}")
77
+ raise e
78
+ except TypeError as e:
79
+ # Handle TypeError separately
80
+ logger.error(f"Type error while checking bucket existence: {e}")
81
+ raise e
82
+
83
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
84
+ """Synchronous export method that internally handles async logic."""
85
+ try:
86
+ # Run the asynchronous export logic in an event loop
87
+ asyncio.run(self.__export_async(spans))
88
+ return SpanExportResult.SUCCESS
89
+ except Exception as e:
90
+ logger.error(f"Error exporting spans: {e}")
91
+ return SpanExportResult.FAILURE
92
+
93
+ async def __export_async(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
94
+ try:
95
+ # Add spans to the export queue
96
+ for span in spans:
97
+ self.export_queue.append(span)
98
+ # If the queue reaches MAX_BATCH_SIZE, export the spans
99
+ if len(self.export_queue) >= self.max_batch_size:
100
+ await self.__export_spans()
101
+
102
+ # Check if it's time to force a flush
103
+ current_time = time.time()
104
+ if current_time - self.last_export_time >= self.export_interval:
105
+ await self.__export_spans() # Export spans if time interval has passed
106
+ self.last_export_time = current_time # Reset the last export time
107
+
108
+ return SpanExportResult.SUCCESS
109
+ except Exception as e:
110
+ logger.error(f"Error exporting spans: {e}")
111
+ return SpanExportResult.FAILURE
112
+
113
+ def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
114
+ try:
115
+ # Serialize spans to JSON or any other format you prefer
116
+ valid_json_list = []
117
+ for span in spans:
118
+ try:
119
+ valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
120
+ except json.JSONDecodeError as e:
121
+ logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
122
+ continue
123
+ ndjson_data = "\n".join(valid_json_list) + "\n"
124
+ return ndjson_data
125
+ except Exception as e:
126
+ logger.warning(f"Error serializing spans: {e}")
127
+
128
+
129
+ async def __export_spans(self):
130
+ if len(self.export_queue) == 0:
131
+ return
132
+
133
+ # Take a batch of spans from the queue
134
+ batch_to_export = self.export_queue[:self.max_batch_size]
135
+ serialized_data = self.__serialize_spans(batch_to_export)
136
+ self.export_queue = self.export_queue[self.max_batch_size:]
137
+ try:
138
+ self.__upload_to_s3(serialized_data)
139
+ except Exception as e:
140
+ logger.error(f"Failed to upload span batch: {e}")
141
+
142
+ @SpanExporterBase.retry_with_backoff(exceptions=(EndpointConnectionError, ConnectionClosedError, ReadTimeoutError, ConnectTimeoutError))
143
+ def __upload_to_s3(self, span_data_batch: str):
144
+ current_time = datetime.datetime.now().strftime(self.time_format)
145
+ file_name = f"{self.file_prefix}{current_time}.ndjson"
146
+ self.s3_client.put_object(
147
+ Bucket=self.bucket_name,
148
+ Key=file_name,
149
+ Body=span_data_batch
150
+ )
151
+ logger.info(f"Span batch uploaded to AWS S3 as {file_name}.")
152
+
153
+ async def force_flush(self, timeout_millis: int = 30000) -> bool:
154
+ await self.__export_spans() # Export any remaining spans in the queue
155
+ return True
156
+
157
+ def shutdown(self) -> None:
158
+ logger.info("S3SpanExporter has been shut down.")
@@ -0,0 +1,125 @@
1
+ import os
2
+ import time
3
+ import random
4
+ import datetime
5
+ import logging
6
+ import asyncio
7
+ from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
8
+ from azure.core.exceptions import ResourceNotFoundError, ClientAuthenticationError, ServiceRequestError
9
+ from opentelemetry.sdk.trace import ReadableSpan
10
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
11
+ from typing import Sequence
12
+ from monocle_apptrace.exporters.base_exporter import SpanExporterBase
13
+ import json
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class AzureBlobSpanExporter(SpanExporterBase):
17
+ def __init__(self, connection_string=None, container_name=None):
18
+ super().__init__()
19
+ DEFAULT_FILE_PREFIX = "monocle_trace_"
20
+ DEFAULT_TIME_FORMAT = "%Y-%m-%d_%H.%M.%S"
21
+ self.max_batch_size = 500
22
+ self.export_interval = 1
23
+ # Use default values if none are provided
24
+ if not connection_string:
25
+ connection_string = os.getenv('MONOCLE_BLOB_CONNECTION_STRING')
26
+ if not connection_string:
27
+ raise ValueError("Azure Storage connection string is not provided or set in environment variables.")
28
+
29
+ if not container_name:
30
+ container_name = os.getenv('MONOCLE_BLOB_CONTAINER_NAME', 'default-container')
31
+
32
+ self.blob_service_client = BlobServiceClient.from_connection_string(connection_string)
33
+ self.container_name = container_name
34
+ self.file_prefix = DEFAULT_FILE_PREFIX
35
+ self.time_format = DEFAULT_TIME_FORMAT
36
+
37
+ # Check if container exists or create it
38
+ if not self.__container_exists(container_name):
39
+ try:
40
+ self.blob_service_client.create_container(container_name)
41
+ logger.info(f"Container {container_name} created successfully.")
42
+ except Exception as e:
43
+ logger.error(f"Error creating container {container_name}: {e}")
44
+ raise e
45
+
46
+ def __container_exists(self, container_name):
47
+ try:
48
+ container_client = self.blob_service_client.get_container_client(container_name)
49
+ container_client.get_container_properties()
50
+ return True
51
+ except ResourceNotFoundError:
52
+ logger.error(f"Container {container_name} not found (404).")
53
+ return False
54
+ except ClientAuthenticationError:
55
+ logger.error(f"Access to container {container_name} is forbidden (403).")
56
+ raise PermissionError(f"Access to container {container_name} is forbidden.")
57
+ except Exception as e:
58
+ logger.error(f"Unexpected error when checking if container {container_name} exists: {e}")
59
+ raise e
60
+
61
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
62
+ """Synchronous export method that internally handles async logic."""
63
+ try:
64
+ # Run the asynchronous export logic in an event loop
65
+ asyncio.run(self._export_async(spans))
66
+ return SpanExportResult.SUCCESS
67
+ except Exception as e:
68
+ logger.error(f"Error exporting spans: {e}")
69
+ return SpanExportResult.FAILURE
70
+
71
+ async def _export_async(self, spans: Sequence[ReadableSpan]):
72
+ """The actual async export logic is run here."""
73
+ # Add spans to the export queue
74
+ for span in spans:
75
+ self.export_queue.append(span)
76
+ if len(self.export_queue) >= self.max_batch_size:
77
+ await self.__export_spans()
78
+
79
+ # Force a flush if the interval has passed
80
+ current_time = time.time()
81
+ if current_time - self.last_export_time >= self.export_interval:
82
+ await self.__export_spans()
83
+ self.last_export_time = current_time
84
+
85
+ def __serialize_spans(self, spans: Sequence[ReadableSpan]) -> str:
86
+ try:
87
+ valid_json_list = []
88
+ for span in spans:
89
+ try:
90
+ valid_json_list.append(span.to_json(indent=0).replace("\n", ""))
91
+ except json.JSONDecodeError as e:
92
+ logger.warning(f"Invalid JSON format in span data: {span.context.span_id}. Error: {e}")
93
+ continue
94
+
95
+ ndjson_data = "\n".join(valid_json_list) + "\n"
96
+ return ndjson_data
97
+ except Exception as e:
98
+ logger.warning(f"Error serializing spans: {e}")
99
+
100
+ async def __export_spans(self):
101
+ if len(self.export_queue) == 0:
102
+ return
103
+
104
+ batch_to_export = self.export_queue[:self.max_batch_size]
105
+ serialized_data = self.__serialize_spans(batch_to_export)
106
+ self.export_queue = self.export_queue[self.max_batch_size:]
107
+ try:
108
+ self.__upload_to_blob(serialized_data)
109
+ except Exception as e:
110
+ logger.error(f"Failed to upload span batch: {e}")
111
+
112
+ @SpanExporterBase.retry_with_backoff(exceptions=(ResourceNotFoundError, ClientAuthenticationError, ServiceRequestError))
113
+ def __upload_to_blob(self, span_data_batch: str):
114
+ current_time = datetime.datetime.now().strftime(self.time_format)
115
+ file_name = f"{self.file_prefix}{current_time}.ndjson"
116
+ blob_client = self.blob_service_client.get_blob_client(container=self.container_name, blob=file_name)
117
+ blob_client.upload_blob(span_data_batch, overwrite=True)
118
+ logger.info(f"Span batch uploaded to Azure Blob Storage as {file_name}.")
119
+
120
+ async def force_flush(self, timeout_millis: int = 30000) -> bool:
121
+ await self.__export_spans()
122
+ return True
123
+
124
+ def shutdown(self) -> None:
125
+ logger.info("AzureBlobSpanExporter has been shut down.")
@@ -0,0 +1,48 @@
1
+ import time
2
+ import random
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from opentelemetry.sdk.trace import ReadableSpan
6
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
7
+ from typing import Sequence
8
+ import asyncio
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class SpanExporterBase(ABC):
13
+ def __init__(self):
14
+ self.backoff_factor = 2
15
+ self.max_retries = 10
16
+ self.export_queue = []
17
+ self.last_export_time = time.time()
18
+
19
+ @abstractmethod
20
+ async def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
21
+ pass
22
+
23
+ @abstractmethod
24
+ async def force_flush(self, timeout_millis: int = 30000) -> bool:
25
+ pass
26
+
27
+ def shutdown(self) -> None:
28
+ pass
29
+
30
+ @staticmethod
31
+ def retry_with_backoff(retries=3, backoff_in_seconds=1, max_backoff_in_seconds=32, exceptions=(Exception,)):
32
+ def decorator(func):
33
+ def wrapper(*args, **kwargs):
34
+ attempt = 0
35
+ while attempt < retries:
36
+ try:
37
+ return func(*args, **kwargs)
38
+ except exceptions as e:
39
+ attempt += 1
40
+ sleep_time = min(max_backoff_in_seconds, backoff_in_seconds * (2 ** (attempt - 1)))
41
+ sleep_time = sleep_time * (1 + random.uniform(-0.1, 0.1)) # Add jitter
42
+ logger.warning(f"Network connectivity error, Attempt {attempt} failed: {e}. Retrying in {sleep_time:.2f} seconds...")
43
+ time.sleep(sleep_time)
44
+ raise Exception(f"Failed after {retries} attempts")
45
+
46
+ return wrapper
47
+
48
+ return decorator
@@ -0,0 +1,19 @@
1
+ from abc import ABC, abstractmethod
2
+ import logging
3
+ from typing import Callable
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class ExportTaskProcessor(ABC):
8
+
9
+ @abstractmethod
10
+ def start(self):
11
+ return
12
+
13
+ @abstractmethod
14
+ def stop(self):
15
+ return
16
+
17
+ @abstractmethod
18
+ def queue_task(self, async_task: Callable[[Callable, any], any] = None, args: any = None):
19
+ return
@@ -0,0 +1,27 @@
1
+ from typing import Dict, Any
2
+ import os, warnings
3
+ from importlib import import_module
4
+ from opentelemetry.sdk.trace.export import SpanExporter, ConsoleSpanExporter
5
+ from monocle_apptrace.exporters.file_exporter import FileSpanExporter
6
+
7
+ monocle_exporters:Dict[str, Any] = {
8
+ "s3": {"module": "monocle_apptrace.exporters.aws.s3_exporter", "class": "S3SpanExporter"},
9
+ "blob": {"module":"monocle_apptrace.exporters.azure.blob_exporter", "class": "AzureBlobSpanExporter"},
10
+ "okahu": {"module":"monocle_apptrace.exporters.okahu.okahu_exporter", "class": "OkahuSpanExporter"},
11
+ "file": {"module":"monocle_apptrace.exporters.file_exporter", "class": "FileSpanExporter"}
12
+ }
13
+
14
+ def get_monocle_exporter() -> SpanExporter:
15
+ exporter_name = os.environ.get("MONOCLE_EXPORTER", "file")
16
+ try:
17
+ exporter_class_path = monocle_exporters[exporter_name]
18
+ except Exception as ex:
19
+ warnings.warn(f"Unsupported Monocle span exporter setting {exporter_name}, using default FileSpanExporter.")
20
+ return FileSpanExporter()
21
+ try:
22
+ exporter_module = import_module(exporter_class_path.get("module"))
23
+ exporter_class = getattr(exporter_module, exporter_class_path.get("class"))
24
+ return exporter_class()
25
+ except Exception as ex:
26
+ warnings.warn(f"Unable to set Monocle span exporter to {exporter_name}, error {ex}. Using ConsoleSpanExporter")
27
+ return ConsoleSpanExporter()
@@ -0,0 +1,115 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from typing import Callable, Optional, Sequence
5
+ import requests
6
+ from opentelemetry.sdk.trace import ReadableSpan
7
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult, ConsoleSpanExporter
8
+ from requests.exceptions import ReadTimeout
9
+
10
+ from monocle_apptrace.exporters.exporter_processor import ExportTaskProcessor
11
+
12
+ REQUESTS_SUCCESS_STATUS_CODES = (200, 202)
13
+ OKAHU_PROD_INGEST_ENDPOINT = "https://ingest.okahu.co/api/v1/trace/ingest"
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class OkahuSpanExporter(SpanExporter):
19
+ def __init__(
20
+ self,
21
+ endpoint: Optional[str] = None,
22
+ timeout: Optional[int] = None,
23
+ session: Optional[requests.Session] = None,
24
+ task_processor: ExportTaskProcessor = None
25
+ ):
26
+ """Okahu exporter."""
27
+ okahu_endpoint: str = os.environ.get("OKAHU_INGESTION_ENDPOINT", OKAHU_PROD_INGEST_ENDPOINT)
28
+ self.endpoint = endpoint or okahu_endpoint
29
+ api_key: str = os.environ.get("OKAHU_API_KEY")
30
+ self._closed = False
31
+ if not api_key:
32
+ raise ValueError("OKAHU_API_KEY not set.")
33
+ self.timeout = timeout or 15
34
+ self.session = session or requests.Session()
35
+ self.session.headers.update(
36
+ {"Content-Type": "application/json", "x-api-key": api_key}
37
+ )
38
+
39
+ self.task_processor = task_processor or None
40
+ if task_processor is not None:
41
+ task_processor.start()
42
+
43
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
44
+ # After the call to Shutdown subsequent calls to Export are
45
+ # not allowed and should return a Failure result
46
+ if not hasattr(self, 'session'):
47
+ return self.exporter.export(spans)
48
+
49
+ if self._closed:
50
+ logger.warning("Exporter already shutdown, ignoring batch")
51
+ return SpanExportResult.FAILUREencoder
52
+ if len(spans) == 0:
53
+ return
54
+
55
+ span_list = {
56
+ "batch": []
57
+ }
58
+
59
+ # append the batch object with all the spans object
60
+ for span in spans:
61
+ # create a object from serialized span
62
+ obj = json.loads(span.to_json())
63
+ if obj["parent_id"] is None:
64
+ obj["parent_id"] = "None"
65
+ else:
66
+ obj["parent_id"] = remove_0x_from_start(obj["parent_id"])
67
+ if obj["context"] is not None:
68
+ obj["context"]["trace_id"] = remove_0x_from_start(obj["context"]["trace_id"])
69
+ obj["context"]["span_id"] = remove_0x_from_start(obj["context"]["span_id"])
70
+ span_list["batch"].append(obj)
71
+
72
+ def send_spans_to_okahu(span_list_local=None):
73
+ try:
74
+ result = self.session.post(
75
+ url=self.endpoint,
76
+ data=json.dumps(span_list_local),
77
+ timeout=self.timeout,
78
+ )
79
+ if result.status_code not in REQUESTS_SUCCESS_STATUS_CODES:
80
+ logger.error(
81
+ "Traces cannot be uploaded; status code: %s, message %s",
82
+ result.status_code,
83
+ result.text,
84
+ )
85
+ return SpanExportResult.FAILURE
86
+ logger.warning("spans successfully exported to okahu")
87
+ return SpanExportResult.SUCCESS
88
+ except ReadTimeout as e:
89
+ logger.warning("Trace export timed out: %s", str(e))
90
+ return SpanExportResult.FAILURE
91
+
92
+ # if async task function is present, then push the request to asnc task
93
+
94
+ if self.task_processor is not None and callable(self.task_processor.queue_task):
95
+ self.task_processor.queue_task(send_spans_to_okahu, span_list)
96
+ return SpanExportResult.SUCCESS
97
+ return send_spans_to_okahu(span_list)
98
+
99
+ def shutdown(self) -> None:
100
+ if self._closed:
101
+ logger.warning("Exporter already shutdown, ignoring call")
102
+ return
103
+ if hasattr(self, 'session'):
104
+ self.session.close()
105
+ self._closed = True
106
+
107
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
108
+ return True
109
+
110
+
111
+ # only removes the first occurrence of 0x from the string
112
+ def remove_0x_from_start(my_str: str):
113
+ if my_str.startswith("0x"):
114
+ return my_str.replace("0x", "", 1)
115
+ return my_str
@@ -1,9 +1,9 @@
1
-
2
1
  import os
3
2
  import logging
4
- from monocle_apptrace.utils import load_wrapper_from_config
3
+ from monocle_apptrace.utils import get_wrapper_methods_config
5
4
 
6
5
  logger = logging.getLogger(__name__)
7
6
  parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
8
- HAYSTACK_METHODS = load_wrapper_from_config(
9
- os.path.join(parent_dir, 'metamodel', 'maps', 'haystack_methods.json'))
7
+ HAYSTACK_METHODS = get_wrapper_methods_config(
8
+ wrapper_methods_config_path=os.path.join(parent_dir, 'metamodel', 'maps', 'haystack_methods.json'),
9
+ attributes_config_base_path=os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
@@ -4,8 +4,8 @@ from opentelemetry.context import attach, set_value
4
4
  from opentelemetry.instrumentation.utils import (
5
5
  _SUPPRESS_INSTRUMENTATION_KEY,
6
6
  )
7
- from monocle_apptrace.wrap_common import PROMPT_INPUT_KEY, PROMPT_OUTPUT_KEY, WORKFLOW_TYPE_MAP, with_tracer_wrapper
8
- from monocle_apptrace.utils import set_embedding_model
7
+ from monocle_apptrace.wrap_common import PROMPT_INPUT_KEY, PROMPT_OUTPUT_KEY, WORKFLOW_TYPE_MAP, with_tracer_wrapper, DATA_INPUT_KEY
8
+ from monocle_apptrace.utils import set_embedding_model, set_attribute
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
@@ -20,6 +20,7 @@ def wrap(tracer, to_wrap, wrapped, instance, args, kwargs):
20
20
  workflow_input = get_workflow_input(args, inputs)
21
21
  embedding_model = get_embedding_model(instance)
22
22
  set_embedding_model(embedding_model)
23
+ set_attribute(DATA_INPUT_KEY, workflow_input)
23
24
 
24
25
 
25
26
  with tracer.start_as_current_span(f"{name}.workflow") as span:
@@ -1,7 +1,5 @@
1
-
2
-
3
- import logging
4
- from typing import Collection,List
1
+ import logging, os
2
+ from typing import Collection, List
5
3
  from wrapt import wrap_function_wrapper
6
4
  from opentelemetry.trace import get_tracer
7
5
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
@@ -11,17 +9,16 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanProcessor
11
9
  from opentelemetry.sdk.resources import SERVICE_NAME, Resource
12
10
  from opentelemetry import trace
13
11
  from opentelemetry.context import get_value, attach, set_value
12
+ from monocle_apptrace.utils import process_wrapper_method_config
14
13
  from monocle_apptrace.wrap_common import SESSION_PROPERTIES_KEY
15
14
  from monocle_apptrace.wrapper import INBUILT_METHODS_LIST, WrapperMethod
16
- from monocle_apptrace.exporters.file_exporter import FileSpanExporter
17
-
15
+ from monocle_apptrace.exporters.monocle_exporters import get_monocle_exporter
18
16
 
19
17
  logger = logging.getLogger(__name__)
20
18
 
21
19
  _instruments = ()
22
20
 
23
21
  class MonocleInstrumentor(BaseInstrumentor):
24
-
25
22
  workflow_name: str = ""
26
23
  user_wrapper_methods: list[WrapperMethod] = []
27
24
  instrumented_method_list: list[object] = []
@@ -36,8 +33,8 @@ class MonocleInstrumentor(BaseInstrumentor):
36
33
  return _instruments
37
34
 
38
35
  def _instrument(self, **kwargs):
39
- tracer_provider = kwargs.get("tracer_provider")
40
- tracer = get_tracer(instrumenting_module_name= __name__, tracer_provider= tracer_provider)
36
+ tracer_provider: TracerProvider = kwargs.get("tracer_provider")
37
+ tracer = get_tracer(instrumenting_module_name="monocle_apptrace", tracer_provider=tracer_provider)
41
38
 
42
39
  user_method_list = [
43
40
  {
@@ -46,8 +43,9 @@ class MonocleInstrumentor(BaseInstrumentor):
46
43
  "method": method.method,
47
44
  "span_name": method.span_name,
48
45
  "wrapper": method.wrapper,
46
+ "output_processor": method.output_processor
49
47
  } for method in self.user_wrapper_methods]
50
-
48
+ process_wrapper_method_config(user_method_list)
51
49
  final_method_list = user_method_list + INBUILT_METHODS_LIST
52
50
 
53
51
  for wrapped_method in final_method_list:
@@ -69,7 +67,6 @@ class MonocleInstrumentor(BaseInstrumentor):
69
67
  object:{wrap_object},
70
68
  method:{wrap_method}""")
71
69
 
72
-
73
70
  def _uninstrument(self, **kwargs):
74
71
  for wrapped_method in self.instrumented_method_list:
75
72
  try:
@@ -93,8 +90,9 @@ def setup_monocle_telemetry(
93
90
  resource = Resource(attributes={
94
91
  SERVICE_NAME: workflow_name
95
92
  })
96
- span_processors = span_processors or [BatchSpanProcessor(FileSpanExporter())]
93
+ span_processors = span_processors or [BatchSpanProcessor(get_monocle_exporter())]
97
94
  trace_provider = TracerProvider(resource=resource)
95
+ attach(set_value("workflow_name", workflow_name))
98
96
  tracer_provider_default = trace.get_tracer_provider()
99
97
  provider_type = type(tracer_provider_default).__name__
100
98
  is_proxy_provider = "Proxy" in provider_type
@@ -102,15 +100,14 @@ def setup_monocle_telemetry(
102
100
  processor.on_start = on_processor_start
103
101
  if not is_proxy_provider:
104
102
  tracer_provider_default.add_span_processor(processor)
105
- else :
103
+ else:
106
104
  trace_provider.add_span_processor(processor)
107
- if is_proxy_provider :
105
+ if is_proxy_provider:
108
106
  trace.set_tracer_provider(trace_provider)
109
107
  instrumentor = MonocleInstrumentor(user_wrapper_methods=wrapper_methods or [])
110
108
  # instrumentor.app_name = workflow_name
111
109
  if not instrumentor.is_instrumented_by_opentelemetry:
112
- instrumentor.instrument()
113
-
110
+ instrumentor.instrument(trace_provider=trace_provider)
114
111
 
115
112
  def on_processor_start(span: Span, parent_context):
116
113
  context_properties = get_value(SESSION_PROPERTIES_KEY)
@@ -121,4 +118,4 @@ def on_processor_start(span: Span, parent_context):
121
118
  )
122
119
 
123
120
  def set_context_properties(properties: dict) -> None:
124
- attach(set_value(SESSION_PROPERTIES_KEY, properties))
121
+ attach(set_value(SESSION_PROPERTIES_KEY, properties))