monocle-apptrace 0.3.1b1__py3-none-any.whl → 0.4.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of monocle-apptrace might be problematic. Click here for more details.

Files changed (46) hide show
  1. monocle_apptrace/exporters/aws/s3_exporter.py +3 -1
  2. monocle_apptrace/exporters/azure/blob_exporter.py +2 -2
  3. monocle_apptrace/exporters/base_exporter.py +10 -4
  4. monocle_apptrace/exporters/file_exporter.py +19 -4
  5. monocle_apptrace/exporters/monocle_exporters.py +8 -5
  6. monocle_apptrace/exporters/okahu/okahu_exporter.py +5 -2
  7. monocle_apptrace/instrumentation/common/__init__.py +1 -1
  8. monocle_apptrace/instrumentation/common/constants.py +12 -5
  9. monocle_apptrace/instrumentation/common/instrumentor.py +44 -22
  10. monocle_apptrace/instrumentation/common/span_handler.py +100 -50
  11. monocle_apptrace/instrumentation/common/tracing.md +68 -0
  12. monocle_apptrace/instrumentation/common/utils.py +101 -63
  13. monocle_apptrace/instrumentation/common/wrapper.py +223 -48
  14. monocle_apptrace/instrumentation/common/wrapper_method.py +15 -7
  15. monocle_apptrace/instrumentation/metamodel/aiohttp/__init__.py +0 -0
  16. monocle_apptrace/instrumentation/metamodel/aiohttp/_helper.py +66 -0
  17. monocle_apptrace/instrumentation/metamodel/aiohttp/entities/http.py +51 -0
  18. monocle_apptrace/instrumentation/metamodel/aiohttp/methods.py +13 -0
  19. monocle_apptrace/instrumentation/metamodel/anthropic/methods.py +4 -2
  20. monocle_apptrace/instrumentation/metamodel/flask/_helper.py +50 -3
  21. monocle_apptrace/instrumentation/metamodel/flask/entities/http.py +48 -0
  22. monocle_apptrace/instrumentation/metamodel/flask/methods.py +10 -1
  23. monocle_apptrace/instrumentation/metamodel/haystack/_helper.py +17 -4
  24. monocle_apptrace/instrumentation/metamodel/haystack/entities/inference.py +5 -2
  25. monocle_apptrace/instrumentation/metamodel/haystack/methods.py +8 -4
  26. monocle_apptrace/instrumentation/metamodel/langchain/_helper.py +12 -4
  27. monocle_apptrace/instrumentation/metamodel/langchain/entities/inference.py +1 -1
  28. monocle_apptrace/instrumentation/metamodel/langchain/methods.py +6 -14
  29. monocle_apptrace/instrumentation/metamodel/llamaindex/_helper.py +13 -9
  30. monocle_apptrace/instrumentation/metamodel/llamaindex/entities/inference.py +1 -1
  31. monocle_apptrace/instrumentation/metamodel/llamaindex/methods.py +16 -15
  32. monocle_apptrace/instrumentation/metamodel/openai/_helper.py +10 -2
  33. monocle_apptrace/instrumentation/metamodel/openai/entities/inference.py +174 -26
  34. monocle_apptrace/instrumentation/metamodel/openai/methods.py +6 -8
  35. monocle_apptrace/instrumentation/metamodel/requests/_helper.py +31 -0
  36. monocle_apptrace/instrumentation/metamodel/requests/entities/http.py +51 -0
  37. monocle_apptrace/instrumentation/metamodel/requests/methods.py +2 -1
  38. monocle_apptrace/instrumentation/metamodel/teamsai/_helper.py +55 -5
  39. monocle_apptrace/instrumentation/metamodel/teamsai/entities/inference/actionplanner_output_processor.py +13 -33
  40. monocle_apptrace/instrumentation/metamodel/teamsai/entities/inference/teamsai_output_processor.py +24 -20
  41. monocle_apptrace/instrumentation/metamodel/teamsai/methods.py +42 -8
  42. {monocle_apptrace-0.3.1b1.dist-info → monocle_apptrace-0.4.0b2.dist-info}/METADATA +2 -1
  43. {monocle_apptrace-0.3.1b1.dist-info → monocle_apptrace-0.4.0b2.dist-info}/RECORD +46 -39
  44. {monocle_apptrace-0.3.1b1.dist-info → monocle_apptrace-0.4.0b2.dist-info}/WHEEL +0 -0
  45. {monocle_apptrace-0.3.1b1.dist-info → monocle_apptrace-0.4.0b2.dist-info}/licenses/LICENSE +0 -0
  46. {monocle_apptrace-0.3.1b1.dist-info → monocle_apptrace-0.4.0b2.dist-info}/licenses/NOTICE +0 -0
@@ -1,13 +1,22 @@
1
1
  from monocle_apptrace.instrumentation.common.wrapper import task_wrapper
2
+ from monocle_apptrace.instrumentation.metamodel.flask.entities.http import FLASK_HTTP_PROCESSOR, FLASK_RESPONSE_PROCESSOR
2
3
 
3
4
  FLASK_METHODS = [
4
5
  {
5
6
  "package": "flask.app",
6
7
  "object": "Flask",
7
8
  "method": "wsgi_app",
8
- "span_name": "Flask.wsgi_app",
9
9
  "wrapper_method": task_wrapper,
10
10
  "span_handler": "flask_handler",
11
+ "output_processor": FLASK_HTTP_PROCESSOR,
12
+ },
13
+ {
14
+ "package": "werkzeug.wrappers.response",
15
+ "object": "Response",
16
+ "method": "__call__",
17
+ "wrapper_method": task_wrapper,
18
+ "span_handler": "flask_response_handler",
19
+ "output_processor": FLASK_RESPONSE_PROCESSOR,
11
20
  "skip_span": True
12
21
  }
13
22
  ]
@@ -1,4 +1,5 @@
1
1
  import logging
2
+
2
3
  from monocle_apptrace.instrumentation.common.utils import (
3
4
  Option,
4
5
  get_keys_as_tuple,
@@ -11,13 +12,19 @@ logger = logging.getLogger(__name__)
11
12
  def extract_messages(kwargs):
12
13
  try:
13
14
  messages = []
15
+ system_message, user_message = None,None
14
16
  if isinstance(kwargs, dict):
15
17
  if 'system_prompt' in kwargs and kwargs['system_prompt']:
16
18
  system_message = kwargs['system_prompt']
17
- messages.append({"system" : system_message})
18
19
  if 'prompt' in kwargs and kwargs['prompt']:
19
20
  user_message = extract_question_from_prompt(kwargs['prompt'])
21
+ if 'messages' in kwargs and len(kwargs['messages'])>1:
22
+ system_message = kwargs['messages'][0].text
23
+ user_message = kwargs['messages'][1].text
24
+ if system_message and user_message:
25
+ messages.append({"system": system_message})
20
26
  messages.append({"user": user_message})
27
+
21
28
  return [str(message) for message in messages]
22
29
  except Exception as e:
23
30
  logger.warning("Warning: Error occurred in extract_messages: %s", str(e))
@@ -52,6 +59,8 @@ def extract_assistant_message(response):
52
59
  reply = response["replies"][0]
53
60
  if hasattr(reply, 'content'):
54
61
  return [reply.content]
62
+ if hasattr(reply, 'text'):
63
+ return [reply.text]
55
64
  return [reply]
56
65
  except Exception as e:
57
66
  logger.warning("Warning: Error occurred in extract_assistant_message: %s", str(e))
@@ -108,15 +117,19 @@ def extract_embeding_model(instance):
108
117
 
109
118
  def update_span_from_llm_response(response, instance):
110
119
  meta_dict = {}
111
- if response is not None and isinstance(response, dict) and "meta" in response:
112
- token_usage = response["meta"][0]["usage"]
120
+ token_usage = None
121
+ if response is not None and isinstance(response, dict):
122
+ if "meta" in response:
123
+ token_usage = response["meta"][0]["usage"]
124
+ elif "replies" in response: # and "meta" in response["replies"][0]:
125
+ token_usage = response["replies"][0].meta["usage"]
113
126
  if token_usage is not None:
114
127
  temperature = instance.__dict__.get("temperature", None)
115
128
  meta_dict.update({"temperature": temperature})
116
129
  meta_dict.update(
117
130
  {"completion_tokens": token_usage.get("completion_tokens") or token_usage.get("output_tokens")})
118
131
  meta_dict.update({"prompt_tokens": token_usage.get("prompt_tokens") or token_usage.get("input_tokens")})
119
- meta_dict.update({"total_tokens": token_usage.get("total_tokens")})
132
+ meta_dict.update({"total_tokens": token_usage.get("total_tokens") or token_usage.get("completion_tokens")+token_usage.get("prompt_tokens")})
120
133
  return meta_dict
121
134
 
122
135
 
@@ -1,15 +1,18 @@
1
1
  from monocle_apptrace.instrumentation.metamodel.haystack import (
2
2
  _helper,
3
3
  )
4
+ from monocle_apptrace.instrumentation.common.utils import get_llm_type
4
5
 
5
6
  INFERENCE = {
6
- "type": "inference",
7
+ "type": "inference.framework",
7
8
  "attributes": [
8
9
  [
9
10
  {
10
11
  "_comment": "provider type ,name , deployment , inference_endpoint",
11
12
  "attribute": "type",
12
- "accessor": lambda arguments: 'inference.azure_openai'
13
+ # "accessor": lambda arguments: 'inference.azure_openai'
14
+ "accessor": lambda arguments: 'inference.' + (get_llm_type(arguments['instance']) or 'generic')
15
+
13
16
  },
14
17
  {
15
18
  "attribute": "provider_name",
@@ -7,7 +7,6 @@ HAYSTACK_METHODS = [
7
7
  "package": "haystack.components.retrievers.in_memory",
8
8
  "object": "InMemoryEmbeddingRetriever",
9
9
  "method": "run",
10
- "span_name": "haystack.retriever",
11
10
  "wrapper_method": task_wrapper,
12
11
  "output_processor": RETRIEVAL
13
12
  },
@@ -15,7 +14,6 @@ HAYSTACK_METHODS = [
15
14
  "package": "haystack_integrations.components.retrievers.opensearch",
16
15
  "object": "OpenSearchEmbeddingRetriever",
17
16
  "method": "run",
18
- "span_name": "haystack.retriever",
19
17
  "wrapper_method": task_wrapper,
20
18
  "output_processor": RETRIEVAL
21
19
  },
@@ -37,7 +35,13 @@ HAYSTACK_METHODS = [
37
35
  "package": "haystack.core.pipeline.pipeline",
38
36
  "object": "Pipeline",
39
37
  "method": "run",
38
+ "wrapper_method": task_wrapper
39
+ },
40
+ {
41
+ "package": "haystack_integrations.components.generators.anthropic",
42
+ "object": "AnthropicChatGenerator",
43
+ "method": "run",
40
44
  "wrapper_method": task_wrapper,
41
- "span_type": "workflow"
42
- }
45
+ "output_processor": INFERENCE
46
+ },
43
47
  ]
@@ -50,14 +50,22 @@ def extract_assistant_message(response):
50
50
 
51
51
 
52
52
  def extract_provider_name(instance):
53
- provider_url: Option[str] = try_option(getattr, instance.client._client.base_url, 'host')
53
+ provider_url: Option[str] = None
54
+ if hasattr(instance,'client'):
55
+ provider_url: Option[str] = try_option(getattr, instance.client._client.base_url, 'host')
56
+ if hasattr(instance, '_client'):
57
+ provider_url = try_option(getattr, instance._client.base_url, 'host')
54
58
  return provider_url.unwrap_or(None)
55
59
 
56
60
 
57
61
  def extract_inference_endpoint(instance):
58
- inference_endpoint: Option[str] = try_option(getattr, instance.client._client, 'base_url').map(str)
59
- if inference_endpoint.is_none() and "meta" in instance.client.__dict__:
60
- inference_endpoint = try_option(getattr, instance.client.meta, 'endpoint_url').map(str)
62
+ inference_endpoint: Option[str] = None
63
+ if hasattr(instance,'client'):
64
+ inference_endpoint: Option[str] = try_option(getattr, instance.client._client, 'base_url').map(str)
65
+ if inference_endpoint.is_none() and "meta" in instance.client.__dict__:
66
+ inference_endpoint = try_option(getattr, instance.client.meta, 'endpoint_url').map(str)
67
+ if hasattr(instance,'_client'):
68
+ inference_endpoint = try_option(getattr, instance._client, 'base_url').map(str)
61
69
 
62
70
  return inference_endpoint.unwrap_or(extract_provider_name(instance))
63
71
 
@@ -4,7 +4,7 @@ from monocle_apptrace.instrumentation.metamodel.langchain import (
4
4
  from monocle_apptrace.instrumentation.common.utils import resolve_from_alias, get_llm_type
5
5
 
6
6
  INFERENCE = {
7
- "type": "inference",
7
+ "type": "inference.framework",
8
8
  "attributes": [
9
9
  [
10
10
  {
@@ -11,15 +11,13 @@ LANGCHAIN_METHODS = [
11
11
  "package": "langchain.prompts.base",
12
12
  "object": "BasePromptTemplate",
13
13
  "method": "invoke",
14
- "wrapper_method": task_wrapper,
15
- "span_type": "workflow"
14
+ "wrapper_method": task_wrapper
16
15
  },
17
16
  {
18
17
  "package": "langchain.prompts.base",
19
18
  "object": "BasePromptTemplate",
20
19
  "method": "ainvoke",
21
- "wrapper_method": atask_wrapper,
22
- "span_type": "workflow"
20
+ "wrapper_method": atask_wrapper
23
21
  },
24
22
  {
25
23
  "package": "langchain.chat_models.base",
@@ -82,30 +80,24 @@ LANGCHAIN_METHODS = [
82
80
  "package": "langchain.schema",
83
81
  "object": "BaseOutputParser",
84
82
  "method": "invoke",
85
- "wrapper_method": task_wrapper,
86
- "span_type": "workflow"
83
+ "wrapper_method": task_wrapper
87
84
  },
88
85
  {
89
86
  "package": "langchain.schema",
90
87
  "object": "BaseOutputParser",
91
88
  "method": "ainvoke",
92
- "wrapper_method": atask_wrapper,
93
- "span_type": "workflow"
89
+ "wrapper_method": atask_wrapper
94
90
  },
95
91
  {
96
92
  "package": "langchain.schema.runnable",
97
93
  "object": "RunnableSequence",
98
94
  "method": "invoke",
99
- "span_name": "langchain.workflow",
100
- "wrapper_method": task_wrapper,
101
- "span_type": "workflow"
95
+ "wrapper_method": task_wrapper
102
96
  },
103
97
  {
104
98
  "package": "langchain.schema.runnable",
105
99
  "object": "RunnableSequence",
106
100
  "method": "ainvoke",
107
- "span_name": "langchain.workflow",
108
- "wrapper_method": atask_wrapper,
109
- "span_type": "workflow"
101
+ "wrapper_method": atask_wrapper
110
102
  }
111
103
  ]
@@ -96,12 +96,19 @@ def extract_query_from_content(content):
96
96
 
97
97
 
98
98
  def extract_provider_name(instance):
99
- provider_url = try_option(getattr, instance, 'api_base').and_then(lambda url: urlparse(url).hostname)
100
- return provider_url
99
+ if hasattr(instance,'api_base'):
100
+ provider_url: Option[str]= try_option(getattr, instance, 'api_base').and_then(lambda url: urlparse(url).hostname)
101
+ if hasattr(instance,'_client'):
102
+ provider_url:Option[str] = try_option(getattr, instance._client.base_url,'host')
103
+ return provider_url.unwrap_or(None)
101
104
 
102
105
 
103
106
  def extract_inference_endpoint(instance):
104
- inference_endpoint = try_option(getattr, instance._client.sdk_configuration, 'server_url').map(str)
107
+ if hasattr(instance,'_client'):
108
+ if hasattr(instance._client,'sdk_configuration'):
109
+ inference_endpoint: Option[str] = try_option(getattr, instance._client.sdk_configuration, 'server_url').map(str)
110
+ if hasattr(instance._client,'base_url'):
111
+ inference_endpoint: Option[str] = try_option(getattr, instance._client, 'base_url').map(str)
105
112
  return inference_endpoint.unwrap_or(extract_provider_name(instance))
106
113
 
107
114
 
@@ -163,10 +170,7 @@ def update_span_from_llm_response(response, instance):
163
170
  if token_usage is not None:
164
171
  temperature = instance.__dict__.get("temperature", None)
165
172
  meta_dict.update({"temperature": temperature})
166
- if getattr(token_usage, "completion_tokens", None):
167
- meta_dict.update({"completion_tokens": getattr(token_usage, "completion_tokens")})
168
- if getattr(token_usage, "prompt_tokens", None):
169
- meta_dict.update({"prompt_tokens": getattr(token_usage, "prompt_tokens")})
170
- if getattr(token_usage, "total_tokens", None):
171
- meta_dict.update({"total_tokens": getattr(token_usage, "total_tokens")})
173
+ meta_dict.update({"completion_tokens": getattr(token_usage, "completion_tokens",None) or getattr(token_usage,"output_tokens",None)})
174
+ meta_dict.update({"prompt_tokens": getattr(token_usage, "prompt_tokens",None) or getattr(token_usage,"input_tokens",None)})
175
+ meta_dict.update({"total_tokens": getattr(token_usage, "total_tokens",None) or getattr(token_usage,"output_tokens",None)+getattr(token_usage,"input_tokens",None)})
172
176
  return meta_dict
@@ -4,7 +4,7 @@ from monocle_apptrace.instrumentation.metamodel.llamaindex import (
4
4
  from monocle_apptrace.instrumentation.common.utils import resolve_from_alias, get_llm_type
5
5
 
6
6
  INFERENCE = {
7
- "type": "inference",
7
+ "type": "inference.framework",
8
8
  "attributes": [
9
9
  [
10
10
  {
@@ -13,7 +13,6 @@ LLAMAINDEX_METHODS = [
13
13
  "package": "llama_index.core.indices.base_retriever",
14
14
  "object": "BaseRetriever",
15
15
  "method": "retrieve",
16
- "span_name": "llamaindex.retrieve",
17
16
  "wrapper_method": task_wrapper,
18
17
  "output_processor": RETRIEVAL
19
18
  },
@@ -21,7 +20,6 @@ LLAMAINDEX_METHODS = [
21
20
  "package": "llama_index.core.indices.base_retriever",
22
21
  "object": "BaseRetriever",
23
22
  "method": "aretrieve",
24
- "span_name": "llamaindex.retrieve",
25
23
  "wrapper_method": atask_wrapper,
26
24
  "output_processor": RETRIEVAL
27
25
  },
@@ -29,23 +27,18 @@ LLAMAINDEX_METHODS = [
29
27
  "package": "llama_index.core.base.base_query_engine",
30
28
  "object": "BaseQueryEngine",
31
29
  "method": "query",
32
- "span_name": "llamaindex.query",
33
- "wrapper_method": task_wrapper,
34
- "span_type": "workflow"
30
+ "wrapper_method": task_wrapper
35
31
  },
36
32
  {
37
33
  "package": "llama_index.core.base.base_query_engine",
38
34
  "object": "BaseQueryEngine",
39
35
  "method": "aquery",
40
- "span_name": "llamaindex.query",
41
- "wrapper_method": atask_wrapper,
42
- "span_type": "workflow"
36
+ "wrapper_method": atask_wrapper
43
37
  },
44
38
  {
45
39
  "package": "llama_index.core.llms.custom",
46
40
  "object": "CustomLLM",
47
41
  "method": "chat",
48
- "span_name": "llamaindex.llmchat",
49
42
  "wrapper_method": task_wrapper,
50
43
  "output_processor": INFERENCE
51
44
  },
@@ -53,7 +46,6 @@ LLAMAINDEX_METHODS = [
53
46
  "package": "llama_index.core.llms.custom",
54
47
  "object": "CustomLLM",
55
48
  "method": "achat",
56
- "span_name": "llamaindex.llmchat",
57
49
  "wrapper_method": atask_wrapper,
58
50
  "output_processor": INFERENCE,
59
51
 
@@ -62,7 +54,6 @@ LLAMAINDEX_METHODS = [
62
54
  "package": "llama_index.llms.openai.base",
63
55
  "object": "OpenAI",
64
56
  "method": "chat",
65
- "span_name": "llamaindex.openai",
66
57
  "wrapper_method": task_wrapper,
67
58
  "output_processor": INFERENCE
68
59
  },
@@ -70,7 +61,6 @@ LLAMAINDEX_METHODS = [
70
61
  "package": "llama_index.llms.openai.base",
71
62
  "object": "OpenAI",
72
63
  "method": "achat",
73
- "span_name": "llamaindex.openai",
74
64
  "wrapper_method": atask_wrapper,
75
65
  "output_processor": INFERENCE
76
66
  },
@@ -78,7 +68,6 @@ LLAMAINDEX_METHODS = [
78
68
  "package": "llama_index.llms.mistralai.base",
79
69
  "object": "MistralAI",
80
70
  "method": "chat",
81
- "span_name": "llamaindex.mistralai",
82
71
  "wrapper_method": task_wrapper,
83
72
  "output_processor": INFERENCE
84
73
  },
@@ -86,7 +75,6 @@ LLAMAINDEX_METHODS = [
86
75
  "package": "llama_index.llms.mistralai.base",
87
76
  "object": "MistralAI",
88
77
  "method": "achat",
89
- "span_name": "llamaindex.mistralai",
90
78
  "wrapper_method": atask_wrapper,
91
79
  "output_processor": INFERENCE
92
80
  },
@@ -94,8 +82,21 @@ LLAMAINDEX_METHODS = [
94
82
  "package": "llama_index.core.agent",
95
83
  "object": "ReActAgent",
96
84
  "method": "chat",
97
- "span_name": "react.agent",
98
85
  "wrapper_method": task_wrapper,
99
86
  "output_processor": AGENT
87
+ },
88
+ {
89
+ "package": "llama_index.llms.anthropic",
90
+ "object": "Anthropic",
91
+ "method": "chat",
92
+ "wrapper_method": task_wrapper,
93
+ "output_processor": INFERENCE
94
+ },
95
+ {
96
+ "package": "llama_index.llms.anthropic",
97
+ "object": "Anthropic",
98
+ "method": "achat",
99
+ "wrapper_method": atask_wrapper,
100
+ "output_processor": INFERENCE
100
101
  }
101
102
  ]
@@ -10,7 +10,7 @@ from monocle_apptrace.instrumentation.common.utils import (
10
10
  get_nested_value,
11
11
  try_option,
12
12
  )
13
-
13
+ from monocle_apptrace.instrumentation.common.span_handler import NonFrameworkSpanHandler
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
16
 
@@ -114,4 +114,12 @@ def get_inference_type(instance):
114
114
  if inference_type.unwrap_or(None):
115
115
  return 'azure_openai'
116
116
  else:
117
- return 'openai'
117
+ return 'openai'
118
+
119
+ class OpenAISpanHandler(NonFrameworkSpanHandler):
120
+ # If openAI is being called by Teams AI SDK, then retain the metadata part of the span events
121
+ def skip_processor(self, to_wrap, wrapped, instance, span, args, kwargs) -> list[str]:
122
+ if self.is_framework_span_in_progess() and self.get_workflow_name_in_progress() == "workflow.teams_ai":
123
+ return ["attributes", "events.data.input", "events.data.output"]
124
+ else:
125
+ return super().skip_processor(to_wrap, wrapped, instance, span, args, kwargs)
@@ -1,71 +1,219 @@
1
+ import logging
2
+ import random
3
+ import time
4
+ from types import SimpleNamespace
1
5
  from monocle_apptrace.instrumentation.metamodel.openai import (
2
6
  _helper,
3
7
  )
4
- from monocle_apptrace.instrumentation.common.utils import resolve_from_alias
8
+ from monocle_apptrace.instrumentation.common.utils import (
9
+ patch_instance_method,
10
+ resolve_from_alias,
11
+ )
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def process_stream(to_wrap, response, span_processor):
17
+ waiting_for_first_token = True
18
+ stream_start_time = time.time_ns()
19
+ first_token_time = stream_start_time
20
+ stream_closed_time = None
21
+ accumulated_response = ""
22
+ token_usage = None
23
+ accumulated_temp_list = []
24
+
25
+ if to_wrap and hasattr(response, "__iter__"):
26
+ original_iter = response.__iter__
27
+
28
+ def new_iter(self):
29
+ nonlocal waiting_for_first_token, first_token_time, stream_closed_time, accumulated_response, token_usage
30
+
31
+ for item in original_iter():
32
+ try:
33
+ if (
34
+ item.choices
35
+ and item.choices[0].delta
36
+ and item.choices[0].delta.content
37
+ ):
38
+ if waiting_for_first_token:
39
+ waiting_for_first_token = False
40
+ first_token_time = time.time_ns()
41
+
42
+ accumulated_response += item.choices[0].delta.content
43
+ # token_usage = item.usage
44
+ elif item.object == "chat.completion.chunk" and item.usage:
45
+ # Handle the case where the response is a chunk
46
+ token_usage = item.usage
47
+ stream_closed_time = time.time_ns()
48
+
49
+ except Exception as e:
50
+ logger.warning(
51
+ "Warning: Error occurred while processing item in new_iter: %s",
52
+ str(e),
53
+ )
54
+ finally:
55
+ accumulated_temp_list.append(item)
56
+ yield item
57
+
58
+ if span_processor:
59
+ ret_val = SimpleNamespace(
60
+ type="stream",
61
+ timestamps={
62
+ "data.input": int(stream_start_time),
63
+ "data.output": int(first_token_time),
64
+ "metadata": int(stream_closed_time or time.time_ns()),
65
+ },
66
+ output_text=accumulated_response,
67
+ usage=token_usage,
68
+ )
69
+ span_processor(ret_val)
70
+
71
+ patch_instance_method(response, "__iter__", new_iter)
72
+
73
+ if to_wrap and hasattr(response, "__aiter__"):
74
+ original_iter = response.__aiter__
75
+
76
+ async def new_aiter(self):
77
+ nonlocal waiting_for_first_token, first_token_time, stream_closed_time, accumulated_response, token_usage
78
+
79
+ async for item in original_iter():
80
+ try:
81
+ if (
82
+ item.choices
83
+ and item.choices[0].delta
84
+ and item.choices[0].delta.content
85
+ ):
86
+ if waiting_for_first_token:
87
+ waiting_for_first_token = False
88
+ first_token_time = time.time_ns()
89
+
90
+ accumulated_response += item.choices[0].delta.content
91
+ # token_usage = item.usage
92
+ elif item.object == "chat.completion.chunk" and item.usage:
93
+ # Handle the case where the response is a chunk
94
+ token_usage = item.usage
95
+ stream_closed_time = time.time_ns()
96
+
97
+ except Exception as e:
98
+ logger.warning(
99
+ "Warning: Error occurred while processing item in new_aiter: %s",
100
+ str(e),
101
+ )
102
+ finally:
103
+ accumulated_temp_list.append(item)
104
+ yield item
105
+
106
+ if span_processor:
107
+ ret_val = SimpleNamespace(
108
+ type="stream",
109
+ timestamps={
110
+ "data.input": int(stream_start_time),
111
+ "data.output": int(first_token_time),
112
+ "metadata": int(stream_closed_time or time.time_ns()),
113
+ },
114
+ output_text=accumulated_response,
115
+ usage=token_usage,
116
+ )
117
+ span_processor(ret_val)
118
+
119
+ patch_instance_method(response, "__aiter__", new_aiter)
120
+
5
121
 
6
122
  INFERENCE = {
7
123
  "type": "inference",
124
+ "is_auto_close": lambda kwargs: kwargs.get("stream", False) is False,
125
+ "response_processor": process_stream,
8
126
  "attributes": [
9
127
  [
10
128
  {
11
129
  "_comment": "provider type ,name , deployment , inference_endpoint",
12
130
  "attribute": "type",
13
- "accessor": lambda arguments: 'inference.' + (_helper.get_inference_type(arguments['instance'])) or 'openai'
131
+ "accessor": lambda arguments: "inference."
132
+ + (_helper.get_inference_type(arguments["instance"]))
133
+ or "openai",
14
134
  },
15
135
  {
16
136
  "attribute": "provider_name",
17
- "accessor": lambda arguments: _helper.extract_provider_name(arguments['instance'])
137
+ "accessor": lambda arguments: _helper.extract_provider_name(
138
+ arguments["instance"]
139
+ ),
18
140
  },
19
141
  {
20
142
  "attribute": "deployment",
21
- "accessor": lambda arguments: resolve_from_alias(arguments['instance'].__dict__, ['engine', 'azure_deployment', 'deployment_name', 'deployment_id', 'deployment'])
143
+ "accessor": lambda arguments: resolve_from_alias(
144
+ arguments["instance"].__dict__,
145
+ [
146
+ "engine",
147
+ "azure_deployment",
148
+ "deployment_name",
149
+ "deployment_id",
150
+ "deployment",
151
+ ],
152
+ ),
22
153
  },
23
154
  {
24
155
  "attribute": "inference_endpoint",
25
- "accessor": lambda arguments: resolve_from_alias(arguments['instance'].__dict__, ['azure_endpoint', 'api_base', 'endpoint']) or _helper.extract_inference_endpoint(arguments['instance'])
26
- }
156
+ "accessor": lambda arguments: resolve_from_alias(
157
+ arguments["instance"].__dict__,
158
+ ["azure_endpoint", "api_base", "endpoint"],
159
+ )
160
+ or _helper.extract_inference_endpoint(arguments["instance"]),
161
+ },
27
162
  ],
28
163
  [
29
164
  {
30
165
  "_comment": "LLM Model",
31
166
  "attribute": "name",
32
- "accessor": lambda arguments: resolve_from_alias(arguments['kwargs'], ['model', 'model_name', 'endpoint_name', 'deployment_name'])
167
+ "accessor": lambda arguments: resolve_from_alias(
168
+ arguments["kwargs"],
169
+ ["model", "model_name", "endpoint_name", "deployment_name"],
170
+ ),
33
171
  },
34
172
  {
35
173
  "attribute": "type",
36
- "accessor": lambda arguments: 'model.llm.' + resolve_from_alias(arguments['kwargs'], ['model', 'model_name', 'endpoint_name', 'deployment_name'])
37
- }
38
- ]
174
+ "accessor": lambda arguments: "model.llm."
175
+ + resolve_from_alias(
176
+ arguments["kwargs"],
177
+ ["model", "model_name", "endpoint_name", "deployment_name"],
178
+ ),
179
+ },
180
+ ],
39
181
  ],
40
182
  "events": [
41
- {"name": "data.input",
42
- "attributes": [
43
-
44
- {
45
- "_comment": "this is instruction and user query to LLM",
46
- "attribute": "input",
47
- "accessor": lambda arguments: _helper.extract_messages(arguments['kwargs'])
48
- }
49
- ]
50
- },
183
+ {
184
+ "name": "data.input",
185
+ "attributes": [
186
+ {
187
+ "_comment": "this is instruction and user query to LLM",
188
+ "attribute": "input",
189
+ "accessor": lambda arguments: _helper.extract_messages(
190
+ arguments["kwargs"]
191
+ ),
192
+ }
193
+ ],
194
+ },
51
195
  {
52
196
  "name": "data.output",
53
197
  "attributes": [
54
198
  {
55
199
  "_comment": "this is result from LLM",
56
200
  "attribute": "response",
57
- "accessor": lambda arguments: _helper.extract_assistant_message(arguments['result'])
201
+ "accessor": lambda arguments: _helper.extract_assistant_message(
202
+ arguments["result"]
203
+ ),
58
204
  }
59
- ]
205
+ ],
60
206
  },
61
207
  {
62
208
  "name": "metadata",
63
209
  "attributes": [
64
210
  {
65
211
  "_comment": "this is metadata usage from LLM",
66
- "accessor": lambda arguments: _helper.update_span_from_llm_response(arguments['result'])
212
+ "accessor": lambda arguments: _helper.update_span_from_llm_response(
213
+ arguments["result"]
214
+ ),
67
215
  }
68
- ]
69
- }
70
- ]
216
+ ],
217
+ },
218
+ ],
71
219
  }