langtrace-python-sdk 3.3.4__py3-none-any.whl → 3.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/milvus_example/main.py +106 -0
- langtrace_python_sdk/constants/instrumentation/common.py +1 -0
- langtrace_python_sdk/constants/instrumentation/milvus.py +38 -0
- langtrace_python_sdk/instrumentation/__init__.py +2 -0
- langtrace_python_sdk/instrumentation/langchain_community/patch.py +28 -10
- langtrace_python_sdk/instrumentation/milvus/__init__.py +3 -0
- langtrace_python_sdk/instrumentation/milvus/instrumentation.py +29 -0
- langtrace_python_sdk/instrumentation/milvus/patch.py +132 -0
- langtrace_python_sdk/instrumentation/pymongo/patch.py +0 -1
- langtrace_python_sdk/langtrace.py +2 -0
- langtrace_python_sdk/version.py +1 -1
- langtrace_python_sdk-3.3.7.dist-info/METADATA +493 -0
- {langtrace_python_sdk-3.3.4.dist-info → langtrace_python_sdk-3.3.7.dist-info}/RECORD +16 -11
- langtrace_python_sdk-3.3.4.dist-info/METADATA +0 -371
- {langtrace_python_sdk-3.3.4.dist-info → langtrace_python_sdk-3.3.7.dist-info}/WHEEL +0 -0
- {langtrace_python_sdk-3.3.4.dist-info → langtrace_python_sdk-3.3.7.dist-info}/entry_points.txt +0 -0
- {langtrace_python_sdk-3.3.4.dist-info → langtrace_python_sdk-3.3.7.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,106 @@
|
|
1
|
+
from pymilvus import MilvusClient, model
|
2
|
+
from typing import List
|
3
|
+
from langtrace_python_sdk import langtrace, with_langtrace_root_span
|
4
|
+
from dotenv import load_dotenv
|
5
|
+
|
6
|
+
load_dotenv()
|
7
|
+
langtrace.init()
|
8
|
+
|
9
|
+
client = MilvusClient("milvus_demo.db")
|
10
|
+
|
11
|
+
COLLECTION_NAME = "demo_collection"
|
12
|
+
embedding_fn = model.DefaultEmbeddingFunction()
|
13
|
+
|
14
|
+
|
15
|
+
def create_collection(collection_name: str = COLLECTION_NAME):
|
16
|
+
if client.has_collection(collection_name=collection_name):
|
17
|
+
client.drop_collection(collection_name=collection_name)
|
18
|
+
|
19
|
+
client.create_collection(
|
20
|
+
collection_name=collection_name,
|
21
|
+
dimension=768, # The vectors we will use in this demo has 768 dimensions
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
def create_embedding(docs: List[str] = [], subject: str = "history"):
|
26
|
+
"""
|
27
|
+
Create embeddings for the given documents.
|
28
|
+
"""
|
29
|
+
|
30
|
+
vectors = embedding_fn.encode_documents(docs)
|
31
|
+
# Each entity has id, vector representation, raw text, and a subject label that we use
|
32
|
+
# to demo metadata filtering later.
|
33
|
+
data = [
|
34
|
+
{"id": i, "vector": vectors[i], "text": docs[i], "subject": subject}
|
35
|
+
for i in range(len(vectors))
|
36
|
+
]
|
37
|
+
# print("Data has", len(data), "entities, each with fields: ", data[0].keys())
|
38
|
+
# print("Vector dim:", len(data[0]["vector"]))
|
39
|
+
return data
|
40
|
+
|
41
|
+
|
42
|
+
def insert_data(collection_name: str = COLLECTION_NAME, data: List[dict] = []):
|
43
|
+
client.insert(
|
44
|
+
collection_name=collection_name,
|
45
|
+
data=data,
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
def vector_search(collection_name: str = COLLECTION_NAME, queries: List[str] = []):
|
50
|
+
query_vectors = embedding_fn.encode_queries(queries)
|
51
|
+
# If you don't have the embedding function you can use a fake vector to finish the demo:
|
52
|
+
# query_vectors = [ [ random.uniform(-1, 1) for _ in range(768) ] ]
|
53
|
+
|
54
|
+
res = client.search(
|
55
|
+
collection_name="demo_collection", # target collection
|
56
|
+
data=query_vectors, # query vectors
|
57
|
+
limit=2, # number of returned entities
|
58
|
+
output_fields=["text", "subject"], # specifies fields to be returned
|
59
|
+
timeout=10,
|
60
|
+
partition_names=["history"],
|
61
|
+
anns_field="vector",
|
62
|
+
search_params={"nprobe": 10},
|
63
|
+
)
|
64
|
+
|
65
|
+
|
66
|
+
def query(collection_name: str = COLLECTION_NAME, query: str = ""):
|
67
|
+
res = client.query(
|
68
|
+
collection_name=collection_name,
|
69
|
+
filter=query,
|
70
|
+
# output_fields=["text", "subject"],
|
71
|
+
)
|
72
|
+
|
73
|
+
# print(res)
|
74
|
+
|
75
|
+
|
76
|
+
@with_langtrace_root_span("milvus_example")
|
77
|
+
def main():
|
78
|
+
create_collection()
|
79
|
+
# insert Alan Turing's history
|
80
|
+
turing_data = create_embedding(
|
81
|
+
docs=[
|
82
|
+
"Artificial intelligence was founded as an academic discipline in 1956.",
|
83
|
+
"Alan Turing was the first person to conduct substantial research in AI.",
|
84
|
+
"Born in Maida Vale, London, Turing was raised in southern England.",
|
85
|
+
]
|
86
|
+
)
|
87
|
+
insert_data(data=turing_data)
|
88
|
+
|
89
|
+
# insert AI Drug Discovery
|
90
|
+
drug_data = create_embedding(
|
91
|
+
docs=[
|
92
|
+
"Machine learning has been used for drug design.",
|
93
|
+
"Computational synthesis with AI algorithms predicts molecular properties.",
|
94
|
+
"DDR1 is involved in cancers and fibrosis.",
|
95
|
+
],
|
96
|
+
subject="biology",
|
97
|
+
)
|
98
|
+
insert_data(data=drug_data)
|
99
|
+
|
100
|
+
vector_search(queries=["Who is Alan Turing?"])
|
101
|
+
query(query="subject == 'history'")
|
102
|
+
query(query="subject == 'biology'")
|
103
|
+
|
104
|
+
|
105
|
+
if __name__ == "__main__":
|
106
|
+
main()
|
@@ -0,0 +1,38 @@
|
|
1
|
+
APIS = {
|
2
|
+
"INSERT": {
|
3
|
+
"MODULE": "pymilvus",
|
4
|
+
"METHOD": "MilvusClient.insert",
|
5
|
+
"OPERATION": "insert",
|
6
|
+
"SPAN_NAME": "Milvus Insert",
|
7
|
+
},
|
8
|
+
"QUERY": {
|
9
|
+
"MODULE": "pymilvus",
|
10
|
+
"METHOD": "MilvusClient.query",
|
11
|
+
"OPERATION": "query",
|
12
|
+
"SPAN_NAME": "Milvus Query",
|
13
|
+
},
|
14
|
+
"SEARCH": {
|
15
|
+
"MODULE": "pymilvus",
|
16
|
+
"METHOD": "MilvusClient.search",
|
17
|
+
"OPERATION": "search",
|
18
|
+
"SPAN_NAME": "Milvus Search",
|
19
|
+
},
|
20
|
+
"DELETE": {
|
21
|
+
"MODULE": "pymilvus",
|
22
|
+
"METHOD": "MilvusClient.delete",
|
23
|
+
"OPERATION": "delete",
|
24
|
+
"SPAN_NAME": "Milvus Delete",
|
25
|
+
},
|
26
|
+
"CREATE_COLLECTION": {
|
27
|
+
"MODULE": "pymilvus",
|
28
|
+
"METHOD": "MilvusClient.create_collection",
|
29
|
+
"OPERATION": "create_collection",
|
30
|
+
"SPAN_NAME": "Milvus Create Collection",
|
31
|
+
},
|
32
|
+
"UPSERT": {
|
33
|
+
"MODULE": "pymilvus",
|
34
|
+
"METHOD": "MilvusClient.upsert",
|
35
|
+
"OPERATION": "upsert",
|
36
|
+
"SPAN_NAME": "Milvus Upsert",
|
37
|
+
},
|
38
|
+
}
|
@@ -23,6 +23,7 @@ from .embedchain import EmbedchainInstrumentation
|
|
23
23
|
from .litellm import LiteLLMInstrumentation
|
24
24
|
from .pymongo import PyMongoInstrumentation
|
25
25
|
from .cerebras import CerebrasInstrumentation
|
26
|
+
from .milvus import MilvusInstrumentation
|
26
27
|
|
27
28
|
__all__ = [
|
28
29
|
"AnthropicInstrumentation",
|
@@ -50,4 +51,5 @@ __all__ = [
|
|
50
51
|
"PyMongoInstrumentation",
|
51
52
|
"AWSBedrockInstrumentation",
|
52
53
|
"CerebrasInstrumentation",
|
54
|
+
"MilvusInstrumentation",
|
53
55
|
]
|
@@ -71,16 +71,26 @@ def generic_patch(
|
|
71
71
|
result = wrapped(*args, **kwargs)
|
72
72
|
if trace_output:
|
73
73
|
span.set_attribute("langchain.outputs", to_json_string(result))
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
74
|
+
prompt_tokens = (
|
75
|
+
instance.get_num_tokens(args[0])
|
76
|
+
if hasattr(instance, "get_num_tokens")
|
77
|
+
else None
|
78
|
+
)
|
79
|
+
completion_tokens = (
|
80
|
+
instance.get_num_tokens(result)
|
81
|
+
if hasattr(instance, "get_num_tokens")
|
82
|
+
else None
|
83
|
+
)
|
84
|
+
if hasattr(result, "usage"):
|
78
85
|
prompt_tokens = result.usage.prompt_tokens
|
79
86
|
completion_tokens = result.usage.completion_tokens
|
80
87
|
|
81
|
-
span.set_attribute(
|
82
|
-
|
83
|
-
|
88
|
+
span.set_attribute(
|
89
|
+
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, prompt_tokens
|
90
|
+
)
|
91
|
+
span.set_attribute(
|
92
|
+
SpanAttributes.LLM_USAGE_PROMPT_TOKENS, completion_tokens
|
93
|
+
)
|
84
94
|
|
85
95
|
span.set_status(StatusCode.OK)
|
86
96
|
return result
|
@@ -102,9 +112,17 @@ def clean_empty(d):
|
|
102
112
|
if not isinstance(d, (dict, list, tuple)):
|
103
113
|
return d
|
104
114
|
if isinstance(d, tuple):
|
105
|
-
return tuple(
|
115
|
+
return tuple(
|
116
|
+
val
|
117
|
+
for val in (clean_empty(val) for val in d)
|
118
|
+
if val != () and val is not None
|
119
|
+
)
|
106
120
|
if isinstance(d, list):
|
107
|
-
return [
|
121
|
+
return [
|
122
|
+
val
|
123
|
+
for val in (clean_empty(val) for val in d)
|
124
|
+
if val != [] and val is not None
|
125
|
+
]
|
108
126
|
result = {}
|
109
127
|
for k, val in d.items():
|
110
128
|
if isinstance(val, dict):
|
@@ -120,7 +138,7 @@ def clean_empty(d):
|
|
120
138
|
result[k] = val.strip()
|
121
139
|
elif isinstance(val, object):
|
122
140
|
# some langchain objects have a text attribute
|
123
|
-
val = getattr(val,
|
141
|
+
val = getattr(val, "text", None)
|
124
142
|
if val is not None and val.strip() != "":
|
125
143
|
result[k] = val.strip()
|
126
144
|
return result
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
2
|
+
from opentelemetry.trace import get_tracer
|
3
|
+
|
4
|
+
from typing import Collection
|
5
|
+
from importlib_metadata import version as v
|
6
|
+
from wrapt import wrap_function_wrapper as _W
|
7
|
+
|
8
|
+
from langtrace_python_sdk.constants.instrumentation.milvus import APIS
|
9
|
+
from .patch import generic_patch
|
10
|
+
|
11
|
+
|
12
|
+
class MilvusInstrumentation(BaseInstrumentor):
|
13
|
+
|
14
|
+
def instrumentation_dependencies(self) -> Collection[str]:
|
15
|
+
return ["pymilvus >= 2.4.1"]
|
16
|
+
|
17
|
+
def _instrument(self, **kwargs):
|
18
|
+
tracer_provider = kwargs.get("tracer_provider")
|
19
|
+
tracer = get_tracer(__name__, "", tracer_provider)
|
20
|
+
version = v("pymilvus")
|
21
|
+
for api in APIS.values():
|
22
|
+
_W(
|
23
|
+
module=api["MODULE"],
|
24
|
+
name=api["METHOD"],
|
25
|
+
wrapper=generic_patch(api, version, tracer),
|
26
|
+
)
|
27
|
+
|
28
|
+
def _uninstrument(self, **kwargs):
|
29
|
+
pass
|
@@ -0,0 +1,132 @@
|
|
1
|
+
from langtrace_python_sdk.constants.instrumentation.common import SERVICE_PROVIDERS
|
2
|
+
from langtrace_python_sdk.utils.silently_fail import silently_fail
|
3
|
+
from opentelemetry.trace import Tracer
|
4
|
+
from opentelemetry.trace import SpanKind
|
5
|
+
from langtrace_python_sdk.utils import handle_span_error, set_span_attribute
|
6
|
+
from langtrace_python_sdk.utils.llm import (
|
7
|
+
get_langtrace_attributes,
|
8
|
+
get_extra_attributes,
|
9
|
+
set_span_attributes,
|
10
|
+
)
|
11
|
+
import json
|
12
|
+
|
13
|
+
|
14
|
+
def generic_patch(api, version: str, tracer: Tracer):
|
15
|
+
def traced_method(wrapped, instance, args, kwargs):
|
16
|
+
span_name = api["SPAN_NAME"]
|
17
|
+
operation = api["OPERATION"]
|
18
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
19
|
+
try:
|
20
|
+
span_attributes = {
|
21
|
+
**get_langtrace_attributes(
|
22
|
+
service_provider=SERVICE_PROVIDERS["MILVUS"],
|
23
|
+
version=version,
|
24
|
+
vendor_type="Vector Database",
|
25
|
+
),
|
26
|
+
"db.system": "milvus",
|
27
|
+
"db.operation": operation,
|
28
|
+
"db.name": kwargs.get("collection_name", None),
|
29
|
+
**get_extra_attributes(),
|
30
|
+
}
|
31
|
+
|
32
|
+
if operation == "create_collection":
|
33
|
+
set_create_collection_attributes(span_attributes, kwargs)
|
34
|
+
|
35
|
+
elif operation == "insert" or operation == "upsert":
|
36
|
+
set_insert_or_upsert_attributes(span_attributes, kwargs)
|
37
|
+
|
38
|
+
elif operation == "search":
|
39
|
+
set_search_attributes(span_attributes, kwargs)
|
40
|
+
|
41
|
+
elif operation == "query":
|
42
|
+
set_query_attributes(span_attributes, kwargs)
|
43
|
+
|
44
|
+
set_span_attributes(span, span_attributes)
|
45
|
+
result = wrapped(*args, **kwargs)
|
46
|
+
|
47
|
+
if operation == "query":
|
48
|
+
set_query_response_attributes(span, result)
|
49
|
+
|
50
|
+
if operation == "search":
|
51
|
+
set_search_response_attributes(span, result)
|
52
|
+
return result
|
53
|
+
except Exception as err:
|
54
|
+
handle_span_error(span, err)
|
55
|
+
raise
|
56
|
+
|
57
|
+
return traced_method
|
58
|
+
|
59
|
+
|
60
|
+
@silently_fail
|
61
|
+
def set_create_collection_attributes(span_attributes, kwargs):
|
62
|
+
span_attributes["db.dimension"] = kwargs.get("dimension", None)
|
63
|
+
|
64
|
+
|
65
|
+
@silently_fail
|
66
|
+
def set_insert_or_upsert_attributes(span_attributes, kwargs):
|
67
|
+
data = kwargs.get("data")
|
68
|
+
timeout = kwargs.get("timeout")
|
69
|
+
partition_name = kwargs.get("partition_name")
|
70
|
+
|
71
|
+
span_attributes["db.num_entities"] = len(data) if data else None
|
72
|
+
span_attributes["db.timeout"] = timeout
|
73
|
+
span_attributes["db.partition_name"] = partition_name
|
74
|
+
|
75
|
+
|
76
|
+
@silently_fail
|
77
|
+
def set_search_attributes(span_attributes, kwargs):
|
78
|
+
data = kwargs.get("data")
|
79
|
+
filter = kwargs.get("filter")
|
80
|
+
limit = kwargs.get("limit")
|
81
|
+
output_fields = kwargs.get("output_fields")
|
82
|
+
search_params = kwargs.get("search_params")
|
83
|
+
timeout = kwargs.get("timeout")
|
84
|
+
partition_names = kwargs.get("partition_names")
|
85
|
+
anns_field = kwargs.get("anns_field")
|
86
|
+
span_attributes["db.num_queries"] = len(data) if data else None
|
87
|
+
span_attributes["db.filter"] = filter
|
88
|
+
span_attributes["db.limit"] = limit
|
89
|
+
span_attributes["db.output_fields"] = json.dumps(output_fields)
|
90
|
+
span_attributes["db.search_params"] = json.dumps(search_params)
|
91
|
+
span_attributes["db.partition_names"] = json.dumps(partition_names)
|
92
|
+
span_attributes["db.anns_field"] = anns_field
|
93
|
+
span_attributes["db.timeout"] = timeout
|
94
|
+
|
95
|
+
|
96
|
+
@silently_fail
|
97
|
+
def set_query_attributes(span_attributes, kwargs):
|
98
|
+
filter = kwargs.get("filter")
|
99
|
+
output_fields = kwargs.get("output_fields")
|
100
|
+
timeout = kwargs.get("timeout")
|
101
|
+
partition_names = kwargs.get("partition_names")
|
102
|
+
ids = kwargs.get("ids")
|
103
|
+
|
104
|
+
span_attributes["db.filter"] = filter
|
105
|
+
span_attributes["db.output_fields"] = output_fields
|
106
|
+
span_attributes["db.timeout"] = timeout
|
107
|
+
span_attributes["db.partition_names"] = partition_names
|
108
|
+
span_attributes["db.ids"] = ids
|
109
|
+
|
110
|
+
|
111
|
+
@silently_fail
|
112
|
+
def set_query_response_attributes(span, result):
|
113
|
+
set_span_attribute(span, name="db.num_matches", value=len(result))
|
114
|
+
for match in result:
|
115
|
+
span.add_event(
|
116
|
+
"db.query.match",
|
117
|
+
attributes=match,
|
118
|
+
)
|
119
|
+
|
120
|
+
|
121
|
+
@silently_fail
|
122
|
+
def set_search_response_attributes(span, result):
|
123
|
+
for res in result:
|
124
|
+
for match in res:
|
125
|
+
span.add_event(
|
126
|
+
"db.search.match",
|
127
|
+
attributes={
|
128
|
+
"id": match["id"],
|
129
|
+
"distance": str(match["distance"]),
|
130
|
+
"entity": json.dumps(match["entity"]),
|
131
|
+
},
|
132
|
+
)
|
@@ -66,6 +66,7 @@ from langtrace_python_sdk.instrumentation import (
|
|
66
66
|
WeaviateInstrumentation,
|
67
67
|
PyMongoInstrumentation,
|
68
68
|
CerebrasInstrumentation,
|
69
|
+
MilvusInstrumentation,
|
69
70
|
)
|
70
71
|
from opentelemetry.util.re import parse_env_headers
|
71
72
|
|
@@ -284,6 +285,7 @@ def init(
|
|
284
285
|
"autogen": AutogenInstrumentation(),
|
285
286
|
"pymongo": PyMongoInstrumentation(),
|
286
287
|
"cerebras-cloud-sdk": CerebrasInstrumentation(),
|
288
|
+
"pymilvus": MilvusInstrumentation(),
|
287
289
|
}
|
288
290
|
|
289
291
|
init_instrumentations(config.disable_instrumentations, all_instrumentations)
|
langtrace_python_sdk/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "3.3.
|
1
|
+
__version__ = "3.3.7"
|