langtrace-python-sdk 3.3.4__py3-none-any.whl → 3.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,106 @@
1
+ from pymilvus import MilvusClient, model
2
+ from typing import List
3
+ from langtrace_python_sdk import langtrace, with_langtrace_root_span
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+ langtrace.init()
8
+
9
+ client = MilvusClient("milvus_demo.db")
10
+
11
+ COLLECTION_NAME = "demo_collection"
12
+ embedding_fn = model.DefaultEmbeddingFunction()
13
+
14
+
15
+ def create_collection(collection_name: str = COLLECTION_NAME):
16
+ if client.has_collection(collection_name=collection_name):
17
+ client.drop_collection(collection_name=collection_name)
18
+
19
+ client.create_collection(
20
+ collection_name=collection_name,
21
+ dimension=768, # The vectors we will use in this demo has 768 dimensions
22
+ )
23
+
24
+
25
+ def create_embedding(docs: List[str] = [], subject: str = "history"):
26
+ """
27
+ Create embeddings for the given documents.
28
+ """
29
+
30
+ vectors = embedding_fn.encode_documents(docs)
31
+ # Each entity has id, vector representation, raw text, and a subject label that we use
32
+ # to demo metadata filtering later.
33
+ data = [
34
+ {"id": i, "vector": vectors[i], "text": docs[i], "subject": subject}
35
+ for i in range(len(vectors))
36
+ ]
37
+ # print("Data has", len(data), "entities, each with fields: ", data[0].keys())
38
+ # print("Vector dim:", len(data[0]["vector"]))
39
+ return data
40
+
41
+
42
+ def insert_data(collection_name: str = COLLECTION_NAME, data: List[dict] = []):
43
+ client.insert(
44
+ collection_name=collection_name,
45
+ data=data,
46
+ )
47
+
48
+
49
+ def vector_search(collection_name: str = COLLECTION_NAME, queries: List[str] = []):
50
+ query_vectors = embedding_fn.encode_queries(queries)
51
+ # If you don't have the embedding function you can use a fake vector to finish the demo:
52
+ # query_vectors = [ [ random.uniform(-1, 1) for _ in range(768) ] ]
53
+
54
+ res = client.search(
55
+ collection_name="demo_collection", # target collection
56
+ data=query_vectors, # query vectors
57
+ limit=2, # number of returned entities
58
+ output_fields=["text", "subject"], # specifies fields to be returned
59
+ timeout=10,
60
+ partition_names=["history"],
61
+ anns_field="vector",
62
+ search_params={"nprobe": 10},
63
+ )
64
+
65
+
66
+ def query(collection_name: str = COLLECTION_NAME, query: str = ""):
67
+ res = client.query(
68
+ collection_name=collection_name,
69
+ filter=query,
70
+ # output_fields=["text", "subject"],
71
+ )
72
+
73
+ # print(res)
74
+
75
+
76
+ @with_langtrace_root_span("milvus_example")
77
+ def main():
78
+ create_collection()
79
+ # insert Alan Turing's history
80
+ turing_data = create_embedding(
81
+ docs=[
82
+ "Artificial intelligence was founded as an academic discipline in 1956.",
83
+ "Alan Turing was the first person to conduct substantial research in AI.",
84
+ "Born in Maida Vale, London, Turing was raised in southern England.",
85
+ ]
86
+ )
87
+ insert_data(data=turing_data)
88
+
89
+ # insert AI Drug Discovery
90
+ drug_data = create_embedding(
91
+ docs=[
92
+ "Machine learning has been used for drug design.",
93
+ "Computational synthesis with AI algorithms predicts molecular properties.",
94
+ "DDR1 is involved in cancers and fibrosis.",
95
+ ],
96
+ subject="biology",
97
+ )
98
+ insert_data(data=drug_data)
99
+
100
+ vector_search(queries=["Who is Alan Turing?"])
101
+ query(query="subject == 'history'")
102
+ query(query="subject == 'biology'")
103
+
104
+
105
+ if __name__ == "__main__":
106
+ main()
@@ -37,6 +37,7 @@ SERVICE_PROVIDERS = {
37
37
  "MONGODB": "MongoDB",
38
38
  "AWS_BEDROCK": "AWS Bedrock",
39
39
  "CEREBRAS": "Cerebras",
40
+ "MILVUS": "Milvus",
40
41
  }
41
42
 
42
43
  LANGTRACE_ADDITIONAL_SPAN_ATTRIBUTES_KEY = "langtrace_additional_attributes"
@@ -0,0 +1,38 @@
1
+ APIS = {
2
+ "INSERT": {
3
+ "MODULE": "pymilvus",
4
+ "METHOD": "MilvusClient.insert",
5
+ "OPERATION": "insert",
6
+ "SPAN_NAME": "Milvus Insert",
7
+ },
8
+ "QUERY": {
9
+ "MODULE": "pymilvus",
10
+ "METHOD": "MilvusClient.query",
11
+ "OPERATION": "query",
12
+ "SPAN_NAME": "Milvus Query",
13
+ },
14
+ "SEARCH": {
15
+ "MODULE": "pymilvus",
16
+ "METHOD": "MilvusClient.search",
17
+ "OPERATION": "search",
18
+ "SPAN_NAME": "Milvus Search",
19
+ },
20
+ "DELETE": {
21
+ "MODULE": "pymilvus",
22
+ "METHOD": "MilvusClient.delete",
23
+ "OPERATION": "delete",
24
+ "SPAN_NAME": "Milvus Delete",
25
+ },
26
+ "CREATE_COLLECTION": {
27
+ "MODULE": "pymilvus",
28
+ "METHOD": "MilvusClient.create_collection",
29
+ "OPERATION": "create_collection",
30
+ "SPAN_NAME": "Milvus Create Collection",
31
+ },
32
+ "UPSERT": {
33
+ "MODULE": "pymilvus",
34
+ "METHOD": "MilvusClient.upsert",
35
+ "OPERATION": "upsert",
36
+ "SPAN_NAME": "Milvus Upsert",
37
+ },
38
+ }
@@ -23,6 +23,7 @@ from .embedchain import EmbedchainInstrumentation
23
23
  from .litellm import LiteLLMInstrumentation
24
24
  from .pymongo import PyMongoInstrumentation
25
25
  from .cerebras import CerebrasInstrumentation
26
+ from .milvus import MilvusInstrumentation
26
27
 
27
28
  __all__ = [
28
29
  "AnthropicInstrumentation",
@@ -50,4 +51,5 @@ __all__ = [
50
51
  "PyMongoInstrumentation",
51
52
  "AWSBedrockInstrumentation",
52
53
  "CerebrasInstrumentation",
54
+ "MilvusInstrumentation",
53
55
  ]
@@ -71,16 +71,26 @@ def generic_patch(
71
71
  result = wrapped(*args, **kwargs)
72
72
  if trace_output:
73
73
  span.set_attribute("langchain.outputs", to_json_string(result))
74
-
75
- prompt_tokens = instance.get_num_tokens(args[0])
76
- completion_tokens = instance.get_num_tokens(result)
77
- if hasattr(result, 'usage'):
74
+ prompt_tokens = (
75
+ instance.get_num_tokens(args[0])
76
+ if hasattr(instance, "get_num_tokens")
77
+ else None
78
+ )
79
+ completion_tokens = (
80
+ instance.get_num_tokens(result)
81
+ if hasattr(instance, "get_num_tokens")
82
+ else None
83
+ )
84
+ if hasattr(result, "usage"):
78
85
  prompt_tokens = result.usage.prompt_tokens
79
86
  completion_tokens = result.usage.completion_tokens
80
87
 
81
- span.set_attribute(SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, prompt_tokens)
82
- span.set_attribute(SpanAttributes.LLM_USAGE_PROMPT_TOKENS, completion_tokens)
83
-
88
+ span.set_attribute(
89
+ SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, prompt_tokens
90
+ )
91
+ span.set_attribute(
92
+ SpanAttributes.LLM_USAGE_PROMPT_TOKENS, completion_tokens
93
+ )
84
94
 
85
95
  span.set_status(StatusCode.OK)
86
96
  return result
@@ -102,9 +112,17 @@ def clean_empty(d):
102
112
  if not isinstance(d, (dict, list, tuple)):
103
113
  return d
104
114
  if isinstance(d, tuple):
105
- return tuple(val for val in (clean_empty(val) for val in d) if val != () and val is not None)
115
+ return tuple(
116
+ val
117
+ for val in (clean_empty(val) for val in d)
118
+ if val != () and val is not None
119
+ )
106
120
  if isinstance(d, list):
107
- return [val for val in (clean_empty(val) for val in d) if val != [] and val is not None]
121
+ return [
122
+ val
123
+ for val in (clean_empty(val) for val in d)
124
+ if val != [] and val is not None
125
+ ]
108
126
  result = {}
109
127
  for k, val in d.items():
110
128
  if isinstance(val, dict):
@@ -120,7 +138,7 @@ def clean_empty(d):
120
138
  result[k] = val.strip()
121
139
  elif isinstance(val, object):
122
140
  # some langchain objects have a text attribute
123
- val = getattr(val, 'text', None)
141
+ val = getattr(val, "text", None)
124
142
  if val is not None and val.strip() != "":
125
143
  result[k] = val.strip()
126
144
  return result
@@ -0,0 +1,3 @@
1
+ from .instrumentation import MilvusInstrumentation
2
+
3
+ __all__ = ["MilvusInstrumentation"]
@@ -0,0 +1,29 @@
1
+ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
2
+ from opentelemetry.trace import get_tracer
3
+
4
+ from typing import Collection
5
+ from importlib_metadata import version as v
6
+ from wrapt import wrap_function_wrapper as _W
7
+
8
+ from langtrace_python_sdk.constants.instrumentation.milvus import APIS
9
+ from .patch import generic_patch
10
+
11
+
12
+ class MilvusInstrumentation(BaseInstrumentor):
13
+
14
+ def instrumentation_dependencies(self) -> Collection[str]:
15
+ return ["pymilvus >= 2.4.1"]
16
+
17
+ def _instrument(self, **kwargs):
18
+ tracer_provider = kwargs.get("tracer_provider")
19
+ tracer = get_tracer(__name__, "", tracer_provider)
20
+ version = v("pymilvus")
21
+ for api in APIS.values():
22
+ _W(
23
+ module=api["MODULE"],
24
+ name=api["METHOD"],
25
+ wrapper=generic_patch(api, version, tracer),
26
+ )
27
+
28
+ def _uninstrument(self, **kwargs):
29
+ pass
@@ -0,0 +1,132 @@
1
+ from langtrace_python_sdk.constants.instrumentation.common import SERVICE_PROVIDERS
2
+ from langtrace_python_sdk.utils.silently_fail import silently_fail
3
+ from opentelemetry.trace import Tracer
4
+ from opentelemetry.trace import SpanKind
5
+ from langtrace_python_sdk.utils import handle_span_error, set_span_attribute
6
+ from langtrace_python_sdk.utils.llm import (
7
+ get_langtrace_attributes,
8
+ get_extra_attributes,
9
+ set_span_attributes,
10
+ )
11
+ import json
12
+
13
+
14
+ def generic_patch(api, version: str, tracer: Tracer):
15
+ def traced_method(wrapped, instance, args, kwargs):
16
+ span_name = api["SPAN_NAME"]
17
+ operation = api["OPERATION"]
18
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
19
+ try:
20
+ span_attributes = {
21
+ **get_langtrace_attributes(
22
+ service_provider=SERVICE_PROVIDERS["MILVUS"],
23
+ version=version,
24
+ vendor_type="Vector Database",
25
+ ),
26
+ "db.system": "milvus",
27
+ "db.operation": operation,
28
+ "db.name": kwargs.get("collection_name", None),
29
+ **get_extra_attributes(),
30
+ }
31
+
32
+ if operation == "create_collection":
33
+ set_create_collection_attributes(span_attributes, kwargs)
34
+
35
+ elif operation == "insert" or operation == "upsert":
36
+ set_insert_or_upsert_attributes(span_attributes, kwargs)
37
+
38
+ elif operation == "search":
39
+ set_search_attributes(span_attributes, kwargs)
40
+
41
+ elif operation == "query":
42
+ set_query_attributes(span_attributes, kwargs)
43
+
44
+ set_span_attributes(span, span_attributes)
45
+ result = wrapped(*args, **kwargs)
46
+
47
+ if operation == "query":
48
+ set_query_response_attributes(span, result)
49
+
50
+ if operation == "search":
51
+ set_search_response_attributes(span, result)
52
+ return result
53
+ except Exception as err:
54
+ handle_span_error(span, err)
55
+ raise
56
+
57
+ return traced_method
58
+
59
+
60
+ @silently_fail
61
+ def set_create_collection_attributes(span_attributes, kwargs):
62
+ span_attributes["db.dimension"] = kwargs.get("dimension", None)
63
+
64
+
65
+ @silently_fail
66
+ def set_insert_or_upsert_attributes(span_attributes, kwargs):
67
+ data = kwargs.get("data")
68
+ timeout = kwargs.get("timeout")
69
+ partition_name = kwargs.get("partition_name")
70
+
71
+ span_attributes["db.num_entities"] = len(data) if data else None
72
+ span_attributes["db.timeout"] = timeout
73
+ span_attributes["db.partition_name"] = partition_name
74
+
75
+
76
+ @silently_fail
77
+ def set_search_attributes(span_attributes, kwargs):
78
+ data = kwargs.get("data")
79
+ filter = kwargs.get("filter")
80
+ limit = kwargs.get("limit")
81
+ output_fields = kwargs.get("output_fields")
82
+ search_params = kwargs.get("search_params")
83
+ timeout = kwargs.get("timeout")
84
+ partition_names = kwargs.get("partition_names")
85
+ anns_field = kwargs.get("anns_field")
86
+ span_attributes["db.num_queries"] = len(data) if data else None
87
+ span_attributes["db.filter"] = filter
88
+ span_attributes["db.limit"] = limit
89
+ span_attributes["db.output_fields"] = json.dumps(output_fields)
90
+ span_attributes["db.search_params"] = json.dumps(search_params)
91
+ span_attributes["db.partition_names"] = json.dumps(partition_names)
92
+ span_attributes["db.anns_field"] = anns_field
93
+ span_attributes["db.timeout"] = timeout
94
+
95
+
96
+ @silently_fail
97
+ def set_query_attributes(span_attributes, kwargs):
98
+ filter = kwargs.get("filter")
99
+ output_fields = kwargs.get("output_fields")
100
+ timeout = kwargs.get("timeout")
101
+ partition_names = kwargs.get("partition_names")
102
+ ids = kwargs.get("ids")
103
+
104
+ span_attributes["db.filter"] = filter
105
+ span_attributes["db.output_fields"] = output_fields
106
+ span_attributes["db.timeout"] = timeout
107
+ span_attributes["db.partition_names"] = partition_names
108
+ span_attributes["db.ids"] = ids
109
+
110
+
111
+ @silently_fail
112
+ def set_query_response_attributes(span, result):
113
+ set_span_attribute(span, name="db.num_matches", value=len(result))
114
+ for match in result:
115
+ span.add_event(
116
+ "db.query.match",
117
+ attributes=match,
118
+ )
119
+
120
+
121
+ @silently_fail
122
+ def set_search_response_attributes(span, result):
123
+ for res in result:
124
+ for match in res:
125
+ span.add_event(
126
+ "db.search.match",
127
+ attributes={
128
+ "id": match["id"],
129
+ "distance": str(match["distance"]),
130
+ "entity": json.dumps(match["entity"]),
131
+ },
132
+ )
@@ -38,7 +38,6 @@ def generic_patch(name, version, tracer):
38
38
 
39
39
  try:
40
40
  result = wrapped(*args, **kwargs)
41
- print(result)
42
41
  for doc in result:
43
42
  if span.is_recording():
44
43
  span.add_event(
@@ -66,6 +66,7 @@ from langtrace_python_sdk.instrumentation import (
66
66
  WeaviateInstrumentation,
67
67
  PyMongoInstrumentation,
68
68
  CerebrasInstrumentation,
69
+ MilvusInstrumentation,
69
70
  )
70
71
  from opentelemetry.util.re import parse_env_headers
71
72
 
@@ -284,6 +285,7 @@ def init(
284
285
  "autogen": AutogenInstrumentation(),
285
286
  "pymongo": PyMongoInstrumentation(),
286
287
  "cerebras-cloud-sdk": CerebrasInstrumentation(),
288
+ "pymilvus": MilvusInstrumentation(),
287
289
  }
288
290
 
289
291
  init_instrumentations(config.disable_instrumentations, all_instrumentations)
@@ -1 +1 @@
1
- __version__ = "3.3.4"
1
+ __version__ = "3.3.7"