sf-vector-sdk 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sf-vector-sdk
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: redis>=5.0.0
@@ -266,8 +266,9 @@ result = client.structured_embeddings.embed_test_question_and_wait(
266
266
  )
267
267
 
268
268
  # Embed a topic - uses TopicMetadata (all fields optional)
269
+ # Note: Topic data requires an "id" field which becomes the TurboPuffer document ID
269
270
  result = client.structured_embeddings.embed_topic_and_wait(
270
- data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
271
+ data={"id": "topic-123", "topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
271
272
  metadata=TopicMetadata(user_id="user123", topic_id="topic456"), # No tool_id needed
272
273
  )
273
274
 
@@ -276,9 +277,9 @@ from vector_sdk import TopicBatchItem
276
277
 
277
278
  batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
278
279
  items=[
279
- TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
280
- TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
281
- TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
280
+ TopicBatchItem(data={"id": "topic-1", "topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
281
+ TopicBatchItem(data={"id": "topic-2", "topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
282
+ TopicBatchItem(data={"id": "topic-3", "topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
282
283
  ],
283
284
  )
284
285
  ```
@@ -1,4 +1,4 @@
1
- vector_sdk/__init__.py,sha256=JBhZJLh_1w0L0mo3pYcXxx2OZdwD4siglLH5a8TxcCs,6979
1
+ vector_sdk/__init__.py,sha256=9UjWp-4N-KTnrsLpNLyRb-YgwEiBgwKLpg9uyO2bzj8,6979
2
2
  vector_sdk/client.py,sha256=NQFGHyR1aM0UToRFy6e9Xm_v6mk0opqzKN8UlHu97n0,17186
3
3
  vector_sdk/content_types.py,sha256=krvFOR58iUZPfYlEVsk0sXD6_ANAFbxEBQGNpt1YPDU,7381
4
4
  vector_sdk/types.py,sha256=rQgA2z3ls21vY-DRPZgfmm8gYFkWJk1dQaJI-nbc0no,25514
@@ -12,7 +12,7 @@ vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py,sha256=cf4PCZK-Otf
12
12
  vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi,sha256=WKj_iRAuhXMNH3a2tf5j-ERYE5HLKamJTcQXm88JjDo,2451
13
13
  vector_sdk/hash/__init__.py,sha256=if-8tGOPyGUZy0_joGH66moE0e5zzwSzfUeMqP_8QsU,723
14
14
  vector_sdk/hash/hasher.py,sha256=k5VSQB-T0TtBM5ipaVE_TQu_uiaiWNjOWSbByxjriwQ,8618
15
- vector_sdk/hash/types.py,sha256=RHDM-ob9cOHPGMI7tXqiN_ZRowTPSc3GYHf8terrd8U,1983
15
+ vector_sdk/hash/types.py,sha256=clBRk_D5SrXWU19K3Jg8COecz9--WZh9Ws4f70T3BXg,2044
16
16
  vector_sdk/namespaces/__init__.py,sha256=S9dJfB39s2zjYOpFn9Fvf8bk7mLKcXk5aPatKOA-xO0,374
17
17
  vector_sdk/namespaces/base.py,sha256=lioZBcd43mijnN0JwTMMEpQ6whiAjaueTDAAIZS1JM0,1156
18
18
  vector_sdk/namespaces/db.py,sha256=a5sEHrfy1xAjRjyM9qfZxr3IznZVA8BnY5W1Hq5jr4I,7230
@@ -20,8 +20,8 @@ vector_sdk/namespaces/embeddings.py,sha256=r0cbCZjj0jZ9oyBpm8lA2BjUYzi8bmunWwFsY
20
20
  vector_sdk/namespaces/search.py,sha256=8ruX0xp5vXD9tS8oXAu1vmF4aC25fNg4gDOtiR8aQ_0,7874
21
21
  vector_sdk/structured/__init__.py,sha256=ZUhrH_l7bX5vA78DSKqDucWhfhYmkDX-W_MPzo5J9JU,1758
22
22
  vector_sdk/structured/router.py,sha256=F3O1TYtbVFCPqVWCCYCt5QcRffX5WPlPQ7K3KlayooQ,5792
23
- vector_sdk/structured/structured_embeddings.py,sha256=Z0enOHx4vdhxAs0sbk9B6XHtRjZSfeYbNbtbq9f8Hh8,37147
24
- vector_sdk/structured/tool_config.py,sha256=YJp-S2_mwoODHWaWJHnGJRaKXuuqbbm2dYHTum2BuG4,8138
25
- sf_vector_sdk-0.2.3.dist-info/METADATA,sha256=MjgxnlU-zYwoIh6qyg68VBcT03aKBP73KJ-NA3hrVbs,15915
26
- sf_vector_sdk-0.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
27
- sf_vector_sdk-0.2.3.dist-info/RECORD,,
23
+ vector_sdk/structured/structured_embeddings.py,sha256=GiIrdAUi8ImsakASTS2Vtda7MZQYwyyhr3alJB-fJnM,36889
24
+ vector_sdk/structured/tool_config.py,sha256=qMwP8UWQTt8mkTYFVgvNXd9Dh_WztJSsqcgAjvQ_YoY,8212
25
+ sf_vector_sdk-0.2.5.dist-info/METADATA,sha256=LGgjur6DpGE4ypd3xOJjqq-vEK74QAXLGwfFf6aC4T4,16069
26
+ sf_vector_sdk-0.2.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
27
+ sf_vector_sdk-0.2.5.dist-info/RECORD,,
vector_sdk/__init__.py CHANGED
@@ -166,7 +166,7 @@ from vector_sdk.types import (
166
166
  validate_model,
167
167
  )
168
168
 
169
- __version__ = "0.2.3"
169
+ __version__ = "0.2.5"
170
170
 
171
171
  __all__ = [
172
172
  # Clients (New API)
vector_sdk/hash/types.py CHANGED
@@ -72,5 +72,7 @@ class TopicData(BaseModel):
72
72
 
73
73
  model_config = ConfigDict(extra="allow")
74
74
 
75
+ # Required - becomes TurboPuffer document ID
76
+ id: str
75
77
  topic: Optional[str] = None
76
78
  description: Optional[str] = None
@@ -665,16 +665,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
665
665
  ) -> str:
666
666
  """Internal method to embed a topic with TopicMetadata."""
667
667
  # 1. Extract text using the spec
668
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
668
+ text = extract_tool_text(tool_collection, data)
669
669
  if not text:
670
670
  raise ValueError(
671
671
  f"Failed to extract text from {tool_collection} - empty content"
672
672
  )
673
673
 
674
674
  # 2. Compute content hash
675
- content_hash = compute_content_hash(
676
- {"toolCollection": tool_collection, "data": data}
677
- )
675
+ content_hash = compute_content_hash(tool_collection, data)
678
676
  if not content_hash:
679
677
  raise ValueError(
680
678
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -686,6 +684,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
686
684
  # 4. Build document with metadata (TopicMetadata doesn't have toolId)
687
685
  document = {
688
686
  **metadata.to_dict(),
687
+ "id": data["id"],
689
688
  "toolCollection": tool_collection,
690
689
  "contentHash": content_hash,
691
690
  }
@@ -698,9 +697,9 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
698
697
  document_fields=document,
699
698
  )
700
699
 
701
- # 6. Build text input
700
+ # 6. Build text input - use data["id"] as the TurboPuffer document ID
702
701
  text_input = {
703
- "id": content_hash,
702
+ "id": data["id"],
704
703
  "text": text,
705
704
  "document": document,
706
705
  }
@@ -729,16 +728,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
729
728
  ) -> EmbeddingResult:
730
729
  """Internal method to embed a topic and wait for result."""
731
730
  # 1. Extract text using the spec
732
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
731
+ text = extract_tool_text(tool_collection, data)
733
732
  if not text:
734
733
  raise ValueError(
735
734
  f"Failed to extract text from {tool_collection} - empty content"
736
735
  )
737
736
 
738
737
  # 2. Compute content hash
739
- content_hash = compute_content_hash(
740
- {"toolCollection": tool_collection, "data": data}
741
- )
738
+ content_hash = compute_content_hash(tool_collection, data)
742
739
  if not content_hash:
743
740
  raise ValueError(
744
741
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -750,6 +747,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
750
747
  # 4. Build document with metadata
751
748
  document = {
752
749
  **metadata.to_dict(),
750
+ "id": data["id"],
753
751
  "toolCollection": tool_collection,
754
752
  "contentHash": content_hash,
755
753
  }
@@ -762,9 +760,9 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
762
760
  document_fields=document,
763
761
  )
764
762
 
765
- # 6. Build text input
763
+ # 6. Build text input - use data["id"] as the TurboPuffer document ID
766
764
  text_input = {
767
- "id": content_hash,
765
+ "id": data["id"],
768
766
  "text": text,
769
767
  "document": document,
770
768
  }
@@ -802,16 +800,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
802
800
  metadata = item.metadata
803
801
 
804
802
  # Extract text
805
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
803
+ text = extract_tool_text(tool_collection, data)
806
804
  if not text:
807
805
  raise ValueError(
808
806
  f"Failed to extract text from {tool_collection} - empty content"
809
807
  )
810
808
 
811
809
  # Compute content hash
812
- content_hash = compute_content_hash(
813
- {"toolCollection": tool_collection, "data": data}
814
- )
810
+ content_hash = compute_content_hash(tool_collection, data)
815
811
  if not content_hash:
816
812
  raise ValueError(
817
813
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -820,12 +816,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
820
816
  # Build document with metadata (TopicMetadata doesn't have toolId)
821
817
  document = {
822
818
  **metadata.to_dict(),
819
+ "id": data["id"],
823
820
  "toolCollection": tool_collection,
824
821
  "contentHash": content_hash,
825
822
  }
826
823
 
824
+ # Use data["id"] as the TurboPuffer document ID
827
825
  text_inputs.append({
828
- "id": content_hash,
826
+ "id": data["id"],
829
827
  "text": text,
830
828
  "document": document,
831
829
  })
@@ -834,7 +832,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
834
832
  storage_config = build_storage_config(
835
833
  tool_collection=tool_collection,
836
834
  sub_type=None,
837
- content_hash=text_inputs[0]["id"],
835
+ content_hash=text_inputs[0]["document"]["contentHash"],
838
836
  document_fields=text_inputs[0]["document"],
839
837
  )
840
838
 
@@ -846,7 +844,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
846
844
  storage=storage_config,
847
845
  metadata={
848
846
  "toolCollection": tool_collection,
849
- "batchSize": len(items),
847
+ "batchSize": str(len(items)),
850
848
  },
851
849
  embedding_model=tool_config.model,
852
850
  embedding_dimensions=tool_config.dimensions,
@@ -871,16 +869,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
871
869
  metadata = item.metadata
872
870
 
873
871
  # Extract text
874
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
872
+ text = extract_tool_text(tool_collection, data)
875
873
  if not text:
876
874
  raise ValueError(
877
875
  f"Failed to extract text from {tool_collection} - empty content"
878
876
  )
879
877
 
880
878
  # Compute content hash
881
- content_hash = compute_content_hash(
882
- {"toolCollection": tool_collection, "data": data}
883
- )
879
+ content_hash = compute_content_hash(tool_collection, data)
884
880
  if not content_hash:
885
881
  raise ValueError(
886
882
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -889,12 +885,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
889
885
  # Build document with metadata
890
886
  document = {
891
887
  **metadata.to_dict(),
888
+ "id": data["id"],
892
889
  "toolCollection": tool_collection,
893
890
  "contentHash": content_hash,
894
891
  }
895
892
 
893
+ # Use data["id"] as the TurboPuffer document ID
896
894
  text_inputs.append({
897
- "id": content_hash,
895
+ "id": data["id"],
898
896
  "text": text,
899
897
  "document": document,
900
898
  })
@@ -903,7 +901,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
903
901
  storage_config = build_storage_config(
904
902
  tool_collection=tool_collection,
905
903
  sub_type=None,
906
- content_hash=text_inputs[0]["id"],
904
+ content_hash=text_inputs[0]["document"]["contentHash"],
907
905
  document_fields=text_inputs[0]["document"],
908
906
  )
909
907
 
@@ -915,7 +913,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
915
913
  storage=storage_config,
916
914
  metadata={
917
915
  "toolCollection": tool_collection,
918
- "batchSize": len(items),
916
+ "batchSize": str(len(items)),
919
917
  },
920
918
  embedding_model=tool_config.model,
921
919
  embedding_dimensions=tool_config.dimensions,
@@ -935,16 +933,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
935
933
  ) -> str:
936
934
  """Internal method to embed any tool type."""
937
935
  # 1. Extract text using the spec
938
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
936
+ text = extract_tool_text(tool_collection, data)
939
937
  if not text:
940
938
  raise ValueError(
941
939
  f"Failed to extract text from {tool_collection} - empty content"
942
940
  )
943
941
 
944
942
  # 2. Compute content hash
945
- content_hash = compute_content_hash(
946
- {"toolCollection": tool_collection, "data": data}
947
- )
943
+ content_hash = compute_content_hash(tool_collection, data)
948
944
  if not content_hash:
949
945
  raise ValueError(
950
946
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -999,16 +995,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
999
995
  ) -> EmbeddingResult:
1000
996
  """Internal method to embed any tool type and wait for result."""
1001
997
  # 1. Extract text using the spec
1002
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
998
+ text = extract_tool_text(tool_collection, data)
1003
999
  if not text:
1004
1000
  raise ValueError(
1005
1001
  f"Failed to extract text from {tool_collection} - empty content"
1006
1002
  )
1007
1003
 
1008
1004
  # 2. Compute content hash
1009
- content_hash = compute_content_hash(
1010
- {"toolCollection": tool_collection, "data": data}
1011
- )
1005
+ content_hash = compute_content_hash(tool_collection, data)
1012
1006
  if not content_hash:
1013
1007
  raise ValueError(
1014
1008
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -1082,16 +1076,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
1082
1076
  metadata = item["metadata"]
1083
1077
 
1084
1078
  # Extract text
1085
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
1079
+ text = extract_tool_text(tool_collection, data)
1086
1080
  if not text:
1087
1081
  raise ValueError(
1088
1082
  f"Failed to extract text from {tool_collection} - empty content"
1089
1083
  )
1090
1084
 
1091
1085
  # Compute content hash
1092
- content_hash = compute_content_hash(
1093
- {"toolCollection": tool_collection, "data": data}
1094
- )
1086
+ content_hash = compute_content_hash(tool_collection, data)
1095
1087
  if not content_hash:
1096
1088
  raise ValueError(
1097
1089
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -1127,7 +1119,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
1127
1119
  storage=storage_config,
1128
1120
  metadata={
1129
1121
  "toolCollection": tool_collection,
1130
- "batchSize": len(items),
1122
+ "batchSize": str(len(items)),
1131
1123
  },
1132
1124
  embedding_model=tool_config.model,
1133
1125
  embedding_dimensions=tool_config.dimensions,
@@ -1163,16 +1155,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
1163
1155
  metadata = item["metadata"]
1164
1156
 
1165
1157
  # Extract text
1166
- text = extract_tool_text({"toolCollection": tool_collection, "data": data})
1158
+ text = extract_tool_text(tool_collection, data)
1167
1159
  if not text:
1168
1160
  raise ValueError(
1169
1161
  f"Failed to extract text from {tool_collection} - empty content"
1170
1162
  )
1171
1163
 
1172
1164
  # Compute content hash
1173
- content_hash = compute_content_hash(
1174
- {"toolCollection": tool_collection, "data": data}
1175
- )
1165
+ content_hash = compute_content_hash(tool_collection, data)
1176
1166
  if not content_hash:
1177
1167
  raise ValueError(
1178
1168
  f"Failed to compute content hash for {tool_collection} - empty content"
@@ -1208,7 +1198,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
1208
1198
  storage=storage_config,
1209
1199
  metadata={
1210
1200
  "toolCollection": tool_collection,
1211
- "batchSize": len(items),
1201
+ "batchSize": str(len(items)),
1212
1202
  },
1213
1203
  embedding_model=tool_config.model,
1214
1204
  embedding_dimensions=tool_config.dimensions,
@@ -151,15 +151,15 @@ TOOL_CONFIGS: dict[ToolCollection, ToolConfig] = {
151
151
  default_priority=PRIORITY_NORMAL,
152
152
  turbopuffer=TurboPufferToolConfig(
153
153
  enabled=True,
154
- id_field="contentHash",
155
- metadata_fields=_DEFAULT_METADATA_FIELDS,
154
+ id_field="id",
155
+ metadata_fields=("toolId", "toolCollection", "topicId", "userId", "contentHash", "id"),
156
156
  namespace_pattern="topic_vectors",
157
157
  ),
158
158
  pinecone=PineconeToolConfig(
159
159
  enabled=False,
160
160
  index_name="tool-vectors",
161
- id_field="contentHash",
162
- metadata_fields=_DEFAULT_METADATA_FIELDS,
161
+ id_field="id",
162
+ metadata_fields=("toolId", "toolCollection", "topicId", "userId", "contentHash", "id"),
163
163
  namespace_pattern="topic_vectors",
164
164
  ),
165
165
  ),