sf-vector-sdk 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sf_vector_sdk-0.2.3.dist-info → sf_vector_sdk-0.2.4.dist-info}/METADATA +6 -5
- {sf_vector_sdk-0.2.3.dist-info → sf_vector_sdk-0.2.4.dist-info}/RECORD +7 -7
- vector_sdk/__init__.py +1 -1
- vector_sdk/hash/types.py +2 -0
- vector_sdk/structured/structured_embeddings.py +30 -40
- vector_sdk/structured/tool_config.py +4 -4
- {sf_vector_sdk-0.2.3.dist-info → sf_vector_sdk-0.2.4.dist-info}/WHEEL +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sf-vector-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: redis>=5.0.0
|
|
@@ -266,8 +266,9 @@ result = client.structured_embeddings.embed_test_question_and_wait(
|
|
|
266
266
|
)
|
|
267
267
|
|
|
268
268
|
# Embed a topic - uses TopicMetadata (all fields optional)
|
|
269
|
+
# Note: Topic data requires an "id" field which becomes the TurboPuffer document ID
|
|
269
270
|
result = client.structured_embeddings.embed_topic_and_wait(
|
|
270
|
-
data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
|
|
271
|
+
data={"id": "topic-123", "topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
|
|
271
272
|
metadata=TopicMetadata(user_id="user123", topic_id="topic456"), # No tool_id needed
|
|
272
273
|
)
|
|
273
274
|
|
|
@@ -276,9 +277,9 @@ from vector_sdk import TopicBatchItem
|
|
|
276
277
|
|
|
277
278
|
batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
|
|
278
279
|
items=[
|
|
279
|
-
TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
|
|
280
|
-
TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
|
|
281
|
-
TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
|
|
280
|
+
TopicBatchItem(data={"id": "topic-1", "topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
|
|
281
|
+
TopicBatchItem(data={"id": "topic-2", "topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
|
|
282
|
+
TopicBatchItem(data={"id": "topic-3", "topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
|
|
282
283
|
],
|
|
283
284
|
)
|
|
284
285
|
```
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
vector_sdk/__init__.py,sha256=
|
|
1
|
+
vector_sdk/__init__.py,sha256=VoljCrab1syIU3NWthWI9ks2s2QDIroixzFGkYamJSY,6979
|
|
2
2
|
vector_sdk/client.py,sha256=NQFGHyR1aM0UToRFy6e9Xm_v6mk0opqzKN8UlHu97n0,17186
|
|
3
3
|
vector_sdk/content_types.py,sha256=krvFOR58iUZPfYlEVsk0sXD6_ANAFbxEBQGNpt1YPDU,7381
|
|
4
4
|
vector_sdk/types.py,sha256=rQgA2z3ls21vY-DRPZgfmm8gYFkWJk1dQaJI-nbc0no,25514
|
|
@@ -12,7 +12,7 @@ vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py,sha256=cf4PCZK-Otf
|
|
|
12
12
|
vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi,sha256=WKj_iRAuhXMNH3a2tf5j-ERYE5HLKamJTcQXm88JjDo,2451
|
|
13
13
|
vector_sdk/hash/__init__.py,sha256=if-8tGOPyGUZy0_joGH66moE0e5zzwSzfUeMqP_8QsU,723
|
|
14
14
|
vector_sdk/hash/hasher.py,sha256=k5VSQB-T0TtBM5ipaVE_TQu_uiaiWNjOWSbByxjriwQ,8618
|
|
15
|
-
vector_sdk/hash/types.py,sha256=
|
|
15
|
+
vector_sdk/hash/types.py,sha256=clBRk_D5SrXWU19K3Jg8COecz9--WZh9Ws4f70T3BXg,2044
|
|
16
16
|
vector_sdk/namespaces/__init__.py,sha256=S9dJfB39s2zjYOpFn9Fvf8bk7mLKcXk5aPatKOA-xO0,374
|
|
17
17
|
vector_sdk/namespaces/base.py,sha256=lioZBcd43mijnN0JwTMMEpQ6whiAjaueTDAAIZS1JM0,1156
|
|
18
18
|
vector_sdk/namespaces/db.py,sha256=a5sEHrfy1xAjRjyM9qfZxr3IznZVA8BnY5W1Hq5jr4I,7230
|
|
@@ -20,8 +20,8 @@ vector_sdk/namespaces/embeddings.py,sha256=r0cbCZjj0jZ9oyBpm8lA2BjUYzi8bmunWwFsY
|
|
|
20
20
|
vector_sdk/namespaces/search.py,sha256=8ruX0xp5vXD9tS8oXAu1vmF4aC25fNg4gDOtiR8aQ_0,7874
|
|
21
21
|
vector_sdk/structured/__init__.py,sha256=ZUhrH_l7bX5vA78DSKqDucWhfhYmkDX-W_MPzo5J9JU,1758
|
|
22
22
|
vector_sdk/structured/router.py,sha256=F3O1TYtbVFCPqVWCCYCt5QcRffX5WPlPQ7K3KlayooQ,5792
|
|
23
|
-
vector_sdk/structured/structured_embeddings.py,sha256=
|
|
24
|
-
vector_sdk/structured/tool_config.py,sha256=
|
|
25
|
-
sf_vector_sdk-0.2.
|
|
26
|
-
sf_vector_sdk-0.2.
|
|
27
|
-
sf_vector_sdk-0.2.
|
|
23
|
+
vector_sdk/structured/structured_embeddings.py,sha256=e-EOYgpx7SXOo1xQV6-5ZgB6W3ZH1HS2Tx1m7O_1VNE,36869
|
|
24
|
+
vector_sdk/structured/tool_config.py,sha256=qMwP8UWQTt8mkTYFVgvNXd9Dh_WztJSsqcgAjvQ_YoY,8212
|
|
25
|
+
sf_vector_sdk-0.2.4.dist-info/METADATA,sha256=kvP3u9ZJ3RUsLMcKz5yMRfkUworAcqJ-pZoLtXaYVoc,16069
|
|
26
|
+
sf_vector_sdk-0.2.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
27
|
+
sf_vector_sdk-0.2.4.dist-info/RECORD,,
|
vector_sdk/__init__.py
CHANGED
vector_sdk/hash/types.py
CHANGED
|
@@ -665,16 +665,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
665
665
|
) -> str:
|
|
666
666
|
"""Internal method to embed a topic with TopicMetadata."""
|
|
667
667
|
# 1. Extract text using the spec
|
|
668
|
-
text = extract_tool_text(
|
|
668
|
+
text = extract_tool_text(tool_collection, data)
|
|
669
669
|
if not text:
|
|
670
670
|
raise ValueError(
|
|
671
671
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
672
672
|
)
|
|
673
673
|
|
|
674
674
|
# 2. Compute content hash
|
|
675
|
-
content_hash = compute_content_hash(
|
|
676
|
-
{"toolCollection": tool_collection, "data": data}
|
|
677
|
-
)
|
|
675
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
678
676
|
if not content_hash:
|
|
679
677
|
raise ValueError(
|
|
680
678
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -686,6 +684,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
686
684
|
# 4. Build document with metadata (TopicMetadata doesn't have toolId)
|
|
687
685
|
document = {
|
|
688
686
|
**metadata.to_dict(),
|
|
687
|
+
"id": data["id"],
|
|
689
688
|
"toolCollection": tool_collection,
|
|
690
689
|
"contentHash": content_hash,
|
|
691
690
|
}
|
|
@@ -698,9 +697,9 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
698
697
|
document_fields=document,
|
|
699
698
|
)
|
|
700
699
|
|
|
701
|
-
# 6. Build text input
|
|
700
|
+
# 6. Build text input - use data["id"] as the TurboPuffer document ID
|
|
702
701
|
text_input = {
|
|
703
|
-
"id":
|
|
702
|
+
"id": data["id"],
|
|
704
703
|
"text": text,
|
|
705
704
|
"document": document,
|
|
706
705
|
}
|
|
@@ -729,16 +728,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
729
728
|
) -> EmbeddingResult:
|
|
730
729
|
"""Internal method to embed a topic and wait for result."""
|
|
731
730
|
# 1. Extract text using the spec
|
|
732
|
-
text = extract_tool_text(
|
|
731
|
+
text = extract_tool_text(tool_collection, data)
|
|
733
732
|
if not text:
|
|
734
733
|
raise ValueError(
|
|
735
734
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
736
735
|
)
|
|
737
736
|
|
|
738
737
|
# 2. Compute content hash
|
|
739
|
-
content_hash = compute_content_hash(
|
|
740
|
-
{"toolCollection": tool_collection, "data": data}
|
|
741
|
-
)
|
|
738
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
742
739
|
if not content_hash:
|
|
743
740
|
raise ValueError(
|
|
744
741
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -750,6 +747,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
750
747
|
# 4. Build document with metadata
|
|
751
748
|
document = {
|
|
752
749
|
**metadata.to_dict(),
|
|
750
|
+
"id": data["id"],
|
|
753
751
|
"toolCollection": tool_collection,
|
|
754
752
|
"contentHash": content_hash,
|
|
755
753
|
}
|
|
@@ -762,9 +760,9 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
762
760
|
document_fields=document,
|
|
763
761
|
)
|
|
764
762
|
|
|
765
|
-
# 6. Build text input
|
|
763
|
+
# 6. Build text input - use data["id"] as the TurboPuffer document ID
|
|
766
764
|
text_input = {
|
|
767
|
-
"id":
|
|
765
|
+
"id": data["id"],
|
|
768
766
|
"text": text,
|
|
769
767
|
"document": document,
|
|
770
768
|
}
|
|
@@ -802,16 +800,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
802
800
|
metadata = item.metadata
|
|
803
801
|
|
|
804
802
|
# Extract text
|
|
805
|
-
text = extract_tool_text(
|
|
803
|
+
text = extract_tool_text(tool_collection, data)
|
|
806
804
|
if not text:
|
|
807
805
|
raise ValueError(
|
|
808
806
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
809
807
|
)
|
|
810
808
|
|
|
811
809
|
# Compute content hash
|
|
812
|
-
content_hash = compute_content_hash(
|
|
813
|
-
{"toolCollection": tool_collection, "data": data}
|
|
814
|
-
)
|
|
810
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
815
811
|
if not content_hash:
|
|
816
812
|
raise ValueError(
|
|
817
813
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -820,12 +816,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
820
816
|
# Build document with metadata (TopicMetadata doesn't have toolId)
|
|
821
817
|
document = {
|
|
822
818
|
**metadata.to_dict(),
|
|
819
|
+
"id": data["id"],
|
|
823
820
|
"toolCollection": tool_collection,
|
|
824
821
|
"contentHash": content_hash,
|
|
825
822
|
}
|
|
826
823
|
|
|
824
|
+
# Use data["id"] as the TurboPuffer document ID
|
|
827
825
|
text_inputs.append({
|
|
828
|
-
"id":
|
|
826
|
+
"id": data["id"],
|
|
829
827
|
"text": text,
|
|
830
828
|
"document": document,
|
|
831
829
|
})
|
|
@@ -834,7 +832,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
834
832
|
storage_config = build_storage_config(
|
|
835
833
|
tool_collection=tool_collection,
|
|
836
834
|
sub_type=None,
|
|
837
|
-
content_hash=text_inputs[0]["
|
|
835
|
+
content_hash=text_inputs[0]["document"]["contentHash"],
|
|
838
836
|
document_fields=text_inputs[0]["document"],
|
|
839
837
|
)
|
|
840
838
|
|
|
@@ -871,16 +869,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
871
869
|
metadata = item.metadata
|
|
872
870
|
|
|
873
871
|
# Extract text
|
|
874
|
-
text = extract_tool_text(
|
|
872
|
+
text = extract_tool_text(tool_collection, data)
|
|
875
873
|
if not text:
|
|
876
874
|
raise ValueError(
|
|
877
875
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
878
876
|
)
|
|
879
877
|
|
|
880
878
|
# Compute content hash
|
|
881
|
-
content_hash = compute_content_hash(
|
|
882
|
-
{"toolCollection": tool_collection, "data": data}
|
|
883
|
-
)
|
|
879
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
884
880
|
if not content_hash:
|
|
885
881
|
raise ValueError(
|
|
886
882
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -889,12 +885,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
889
885
|
# Build document with metadata
|
|
890
886
|
document = {
|
|
891
887
|
**metadata.to_dict(),
|
|
888
|
+
"id": data["id"],
|
|
892
889
|
"toolCollection": tool_collection,
|
|
893
890
|
"contentHash": content_hash,
|
|
894
891
|
}
|
|
895
892
|
|
|
893
|
+
# Use data["id"] as the TurboPuffer document ID
|
|
896
894
|
text_inputs.append({
|
|
897
|
-
"id":
|
|
895
|
+
"id": data["id"],
|
|
898
896
|
"text": text,
|
|
899
897
|
"document": document,
|
|
900
898
|
})
|
|
@@ -903,7 +901,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
903
901
|
storage_config = build_storage_config(
|
|
904
902
|
tool_collection=tool_collection,
|
|
905
903
|
sub_type=None,
|
|
906
|
-
content_hash=text_inputs[0]["
|
|
904
|
+
content_hash=text_inputs[0]["document"]["contentHash"],
|
|
907
905
|
document_fields=text_inputs[0]["document"],
|
|
908
906
|
)
|
|
909
907
|
|
|
@@ -935,16 +933,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
935
933
|
) -> str:
|
|
936
934
|
"""Internal method to embed any tool type."""
|
|
937
935
|
# 1. Extract text using the spec
|
|
938
|
-
text = extract_tool_text(
|
|
936
|
+
text = extract_tool_text(tool_collection, data)
|
|
939
937
|
if not text:
|
|
940
938
|
raise ValueError(
|
|
941
939
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
942
940
|
)
|
|
943
941
|
|
|
944
942
|
# 2. Compute content hash
|
|
945
|
-
content_hash = compute_content_hash(
|
|
946
|
-
{"toolCollection": tool_collection, "data": data}
|
|
947
|
-
)
|
|
943
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
948
944
|
if not content_hash:
|
|
949
945
|
raise ValueError(
|
|
950
946
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -999,16 +995,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
999
995
|
) -> EmbeddingResult:
|
|
1000
996
|
"""Internal method to embed any tool type and wait for result."""
|
|
1001
997
|
# 1. Extract text using the spec
|
|
1002
|
-
text = extract_tool_text(
|
|
998
|
+
text = extract_tool_text(tool_collection, data)
|
|
1003
999
|
if not text:
|
|
1004
1000
|
raise ValueError(
|
|
1005
1001
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
1006
1002
|
)
|
|
1007
1003
|
|
|
1008
1004
|
# 2. Compute content hash
|
|
1009
|
-
content_hash = compute_content_hash(
|
|
1010
|
-
{"toolCollection": tool_collection, "data": data}
|
|
1011
|
-
)
|
|
1005
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
1012
1006
|
if not content_hash:
|
|
1013
1007
|
raise ValueError(
|
|
1014
1008
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -1082,16 +1076,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
1082
1076
|
metadata = item["metadata"]
|
|
1083
1077
|
|
|
1084
1078
|
# Extract text
|
|
1085
|
-
text = extract_tool_text(
|
|
1079
|
+
text = extract_tool_text(tool_collection, data)
|
|
1086
1080
|
if not text:
|
|
1087
1081
|
raise ValueError(
|
|
1088
1082
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
1089
1083
|
)
|
|
1090
1084
|
|
|
1091
1085
|
# Compute content hash
|
|
1092
|
-
content_hash = compute_content_hash(
|
|
1093
|
-
{"toolCollection": tool_collection, "data": data}
|
|
1094
|
-
)
|
|
1086
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
1095
1087
|
if not content_hash:
|
|
1096
1088
|
raise ValueError(
|
|
1097
1089
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -1163,16 +1155,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
1163
1155
|
metadata = item["metadata"]
|
|
1164
1156
|
|
|
1165
1157
|
# Extract text
|
|
1166
|
-
text = extract_tool_text(
|
|
1158
|
+
text = extract_tool_text(tool_collection, data)
|
|
1167
1159
|
if not text:
|
|
1168
1160
|
raise ValueError(
|
|
1169
1161
|
f"Failed to extract text from {tool_collection} - empty content"
|
|
1170
1162
|
)
|
|
1171
1163
|
|
|
1172
1164
|
# Compute content hash
|
|
1173
|
-
content_hash = compute_content_hash(
|
|
1174
|
-
{"toolCollection": tool_collection, "data": data}
|
|
1175
|
-
)
|
|
1165
|
+
content_hash = compute_content_hash(tool_collection, data)
|
|
1176
1166
|
if not content_hash:
|
|
1177
1167
|
raise ValueError(
|
|
1178
1168
|
f"Failed to compute content hash for {tool_collection} - empty content"
|
|
@@ -151,15 +151,15 @@ TOOL_CONFIGS: dict[ToolCollection, ToolConfig] = {
|
|
|
151
151
|
default_priority=PRIORITY_NORMAL,
|
|
152
152
|
turbopuffer=TurboPufferToolConfig(
|
|
153
153
|
enabled=True,
|
|
154
|
-
id_field="
|
|
155
|
-
metadata_fields=
|
|
154
|
+
id_field="id",
|
|
155
|
+
metadata_fields=("toolId", "toolCollection", "topicId", "userId", "contentHash", "id"),
|
|
156
156
|
namespace_pattern="topic_vectors",
|
|
157
157
|
),
|
|
158
158
|
pinecone=PineconeToolConfig(
|
|
159
159
|
enabled=False,
|
|
160
160
|
index_name="tool-vectors",
|
|
161
|
-
id_field="
|
|
162
|
-
metadata_fields=
|
|
161
|
+
id_field="id",
|
|
162
|
+
metadata_fields=("toolId", "toolCollection", "topicId", "userId", "contentHash", "id"),
|
|
163
163
|
namespace_pattern="topic_vectors",
|
|
164
164
|
),
|
|
165
165
|
),
|
|
File without changes
|