PyPI - sf-vector-sdk - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

sf-vector-sdk 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{sf_vector_sdk-0.2.3.dist-info → sf_vector_sdk-0.2.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sf-vector-sdk
-Version: 0.2.3
+Version: 0.2.5
 Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
 Requires-Python: >=3.11
 Requires-Dist: redis>=5.0.0
@@ -266,8 +266,9 @@ result = client.structured_embeddings.embed_test_question_and_wait(
 )
 # Embed a topic - uses TopicMetadata (all fields optional)
+# Note: Topic data requires an "id" field which becomes the TurboPuffer document ID
 result = client.structured_embeddings.embed_topic_and_wait(
-    data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
+    data={"id": "topic-123", "topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
     metadata=TopicMetadata(user_id="user123", topic_id="topic456"),  # No tool_id needed
 )
@@ -276,9 +277,9 @@ from vector_sdk import TopicBatchItem
 batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
     items=[
-        TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
-        TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
-        TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()),  # All optional
+        TopicBatchItem(data={"id": "topic-1", "topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
+        TopicBatchItem(data={"id": "topic-2", "topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
+        TopicBatchItem(data={"id": "topic-3", "topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()),  # All optional
     ],
 )
 ```

{sf_vector_sdk-0.2.3.dist-info → sf_vector_sdk-0.2.5.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-vector_sdk/__init__.py,sha256=JBhZJLh_1w0L0mo3pYcXxx2OZdwD4siglLH5a8TxcCs,6979
+vector_sdk/__init__.py,sha256=9UjWp-4N-KTnrsLpNLyRb-YgwEiBgwKLpg9uyO2bzj8,6979
 vector_sdk/client.py,sha256=NQFGHyR1aM0UToRFy6e9Xm_v6mk0opqzKN8UlHu97n0,17186
 vector_sdk/content_types.py,sha256=krvFOR58iUZPfYlEVsk0sXD6_ANAFbxEBQGNpt1YPDU,7381
 vector_sdk/types.py,sha256=rQgA2z3ls21vY-DRPZgfmm8gYFkWJk1dQaJI-nbc0no,25514
@@ -12,7 +12,7 @@ vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py,sha256=cf4PCZK-Otf
 vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi,sha256=WKj_iRAuhXMNH3a2tf5j-ERYE5HLKamJTcQXm88JjDo,2451
 vector_sdk/hash/__init__.py,sha256=if-8tGOPyGUZy0_joGH66moE0e5zzwSzfUeMqP_8QsU,723
 vector_sdk/hash/hasher.py,sha256=k5VSQB-T0TtBM5ipaVE_TQu_uiaiWNjOWSbByxjriwQ,8618
-vector_sdk/hash/types.py,sha256=RHDM-ob9cOHPGMI7tXqiN_ZRowTPSc3GYHf8terrd8U,1983
+vector_sdk/hash/types.py,sha256=clBRk_D5SrXWU19K3Jg8COecz9--WZh9Ws4f70T3BXg,2044
 vector_sdk/namespaces/__init__.py,sha256=S9dJfB39s2zjYOpFn9Fvf8bk7mLKcXk5aPatKOA-xO0,374
 vector_sdk/namespaces/base.py,sha256=lioZBcd43mijnN0JwTMMEpQ6whiAjaueTDAAIZS1JM0,1156
 vector_sdk/namespaces/db.py,sha256=a5sEHrfy1xAjRjyM9qfZxr3IznZVA8BnY5W1Hq5jr4I,7230
@@ -20,8 +20,8 @@ vector_sdk/namespaces/embeddings.py,sha256=r0cbCZjj0jZ9oyBpm8lA2BjUYzi8bmunWwFsY
 vector_sdk/namespaces/search.py,sha256=8ruX0xp5vXD9tS8oXAu1vmF4aC25fNg4gDOtiR8aQ_0,7874
 vector_sdk/structured/__init__.py,sha256=ZUhrH_l7bX5vA78DSKqDucWhfhYmkDX-W_MPzo5J9JU,1758
 vector_sdk/structured/router.py,sha256=F3O1TYtbVFCPqVWCCYCt5QcRffX5WPlPQ7K3KlayooQ,5792
-vector_sdk/structured/structured_embeddings.py,sha256=Z0enOHx4vdhxAs0sbk9B6XHtRjZSfeYbNbtbq9f8Hh8,37147
-vector_sdk/structured/tool_config.py,sha256=YJp-S2_mwoODHWaWJHnGJRaKXuuqbbm2dYHTum2BuG4,8138
-sf_vector_sdk-0.2.3.dist-info/METADATA,sha256=MjgxnlU-zYwoIh6qyg68VBcT03aKBP73KJ-NA3hrVbs,15915
-sf_vector_sdk-0.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sf_vector_sdk-0.2.3.dist-info/RECORD,,
+vector_sdk/structured/structured_embeddings.py,sha256=GiIrdAUi8ImsakASTS2Vtda7MZQYwyyhr3alJB-fJnM,36889
+vector_sdk/structured/tool_config.py,sha256=qMwP8UWQTt8mkTYFVgvNXd9Dh_WztJSsqcgAjvQ_YoY,8212
+sf_vector_sdk-0.2.5.dist-info/METADATA,sha256=LGgjur6DpGE4ypd3xOJjqq-vEK74QAXLGwfFf6aC4T4,16069
+sf_vector_sdk-0.2.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sf_vector_sdk-0.2.5.dist-info/RECORD,,

vector_sdk/__init__.py CHANGED Viewed

@@ -166,7 +166,7 @@ from vector_sdk.types import (
     validate_model,
 )
-__version__ = "0.2.3"
+__version__ = "0.2.5"
 __all__ = [
     # Clients (New API)

vector_sdk/hash/types.py CHANGED Viewed

@@ -72,5 +72,7 @@ class TopicData(BaseModel):
     model_config = ConfigDict(extra="allow")
+    # Required - becomes TurboPuffer document ID
+    id: str
     topic: Optional[str] = None
     description: Optional[str] = None

vector_sdk/structured/structured_embeddings.py CHANGED Viewed

@@ -665,16 +665,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
     ) -> str:
         """Internal method to embed a topic with TopicMetadata."""
         # 1. Extract text using the spec
-        text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+        text = extract_tool_text(tool_collection, data)
         if not text:
             raise ValueError(
                 f"Failed to extract text from {tool_collection} - empty content"
             )
         # 2. Compute content hash
-        content_hash = compute_content_hash(
-            {"toolCollection": tool_collection, "data": data}
-        )
+        content_hash = compute_content_hash(tool_collection, data)
         if not content_hash:
             raise ValueError(
                 f"Failed to compute content hash for {tool_collection} - empty content"
@@ -686,6 +684,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
         # 4. Build document with metadata (TopicMetadata doesn't have toolId)
         document = {
             **metadata.to_dict(),
+            "id": data["id"],
             "toolCollection": tool_collection,
             "contentHash": content_hash,
         }
@@ -698,9 +697,9 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             document_fields=document,
         )
-        # 6. Build text input
+        # 6. Build text input - use data["id"] as the TurboPuffer document ID
         text_input = {
-            "id": content_hash,
+            "id": data["id"],
             "text": text,
             "document": document,
         }
@@ -729,16 +728,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
     ) -> EmbeddingResult:
         """Internal method to embed a topic and wait for result."""
         # 1. Extract text using the spec
-        text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+        text = extract_tool_text(tool_collection, data)
         if not text:
             raise ValueError(
                 f"Failed to extract text from {tool_collection} - empty content"
             )
         # 2. Compute content hash
-        content_hash = compute_content_hash(
-            {"toolCollection": tool_collection, "data": data}
-        )
+        content_hash = compute_content_hash(tool_collection, data)
         if not content_hash:
             raise ValueError(
                 f"Failed to compute content hash for {tool_collection} - empty content"
@@ -750,6 +747,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
         # 4. Build document with metadata
         document = {
             **metadata.to_dict(),
+            "id": data["id"],
             "toolCollection": tool_collection,
             "contentHash": content_hash,
         }
@@ -762,9 +760,9 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             document_fields=document,
         )
-        # 6. Build text input
+        # 6. Build text input - use data["id"] as the TurboPuffer document ID
         text_input = {
-            "id": content_hash,
+            "id": data["id"],
             "text": text,
             "document": document,
         }
@@ -802,16 +800,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             metadata = item.metadata
             # Extract text
-            text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+            text = extract_tool_text(tool_collection, data)
             if not text:
                 raise ValueError(
                     f"Failed to extract text from {tool_collection} - empty content"
                 )
             # Compute content hash
-            content_hash = compute_content_hash(
-                {"toolCollection": tool_collection, "data": data}
-            )
+            content_hash = compute_content_hash(tool_collection, data)
             if not content_hash:
                 raise ValueError(
                     f"Failed to compute content hash for {tool_collection} - empty content"
@@ -820,12 +816,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             # Build document with metadata (TopicMetadata doesn't have toolId)
             document = {
                 **metadata.to_dict(),
+                "id": data["id"],
                 "toolCollection": tool_collection,
                 "contentHash": content_hash,
             }
+            # Use data["id"] as the TurboPuffer document ID
             text_inputs.append({
-                "id": content_hash,
+                "id": data["id"],
                 "text": text,
                 "document": document,
             })
@@ -834,7 +832,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
         storage_config = build_storage_config(
             tool_collection=tool_collection,
             sub_type=None,
-            content_hash=text_inputs[0]["id"],
+            content_hash=text_inputs[0]["document"]["contentHash"],
             document_fields=text_inputs[0]["document"],
         )
@@ -846,7 +844,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,
@@ -871,16 +869,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             metadata = item.metadata
             # Extract text
-            text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+            text = extract_tool_text(tool_collection, data)
             if not text:
                 raise ValueError(
                     f"Failed to extract text from {tool_collection} - empty content"
                 )
             # Compute content hash
-            content_hash = compute_content_hash(
-                {"toolCollection": tool_collection, "data": data}
-            )
+            content_hash = compute_content_hash(tool_collection, data)
             if not content_hash:
                 raise ValueError(
                     f"Failed to compute content hash for {tool_collection} - empty content"
@@ -889,12 +885,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             # Build document with metadata
             document = {
                 **metadata.to_dict(),
+                "id": data["id"],
                 "toolCollection": tool_collection,
                 "contentHash": content_hash,
             }
+            # Use data["id"] as the TurboPuffer document ID
             text_inputs.append({
-                "id": content_hash,
+                "id": data["id"],
                 "text": text,
                 "document": document,
             })
@@ -903,7 +901,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
         storage_config = build_storage_config(
             tool_collection=tool_collection,
             sub_type=None,
-            content_hash=text_inputs[0]["id"],
+            content_hash=text_inputs[0]["document"]["contentHash"],
             document_fields=text_inputs[0]["document"],
         )
@@ -915,7 +913,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,
@@ -935,16 +933,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
     ) -> str:
         """Internal method to embed any tool type."""
         # 1. Extract text using the spec
-        text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+        text = extract_tool_text(tool_collection, data)
         if not text:
             raise ValueError(
                 f"Failed to extract text from {tool_collection} - empty content"
             )
         # 2. Compute content hash
-        content_hash = compute_content_hash(
-            {"toolCollection": tool_collection, "data": data}
-        )
+        content_hash = compute_content_hash(tool_collection, data)
         if not content_hash:
             raise ValueError(
                 f"Failed to compute content hash for {tool_collection} - empty content"
@@ -999,16 +995,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
     ) -> EmbeddingResult:
         """Internal method to embed any tool type and wait for result."""
         # 1. Extract text using the spec
-        text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+        text = extract_tool_text(tool_collection, data)
         if not text:
             raise ValueError(
                 f"Failed to extract text from {tool_collection} - empty content"
             )
         # 2. Compute content hash
-        content_hash = compute_content_hash(
-            {"toolCollection": tool_collection, "data": data}
-        )
+        content_hash = compute_content_hash(tool_collection, data)
         if not content_hash:
             raise ValueError(
                 f"Failed to compute content hash for {tool_collection} - empty content"
@@ -1082,16 +1076,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             metadata = item["metadata"]
             # Extract text
-            text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+            text = extract_tool_text(tool_collection, data)
             if not text:
                 raise ValueError(
                     f"Failed to extract text from {tool_collection} - empty content"
                 )
             # Compute content hash
-            content_hash = compute_content_hash(
-                {"toolCollection": tool_collection, "data": data}
-            )
+            content_hash = compute_content_hash(tool_collection, data)
             if not content_hash:
                 raise ValueError(
                     f"Failed to compute content hash for {tool_collection} - empty content"
@@ -1127,7 +1119,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,
@@ -1163,16 +1155,14 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             metadata = item["metadata"]
             # Extract text
-            text = extract_tool_text({"toolCollection": tool_collection, "data": data})
+            text = extract_tool_text(tool_collection, data)
             if not text:
                 raise ValueError(
                     f"Failed to extract text from {tool_collection} - empty content"
                 )
             # Compute content hash
-            content_hash = compute_content_hash(
-                {"toolCollection": tool_collection, "data": data}
-            )
+            content_hash = compute_content_hash(tool_collection, data)
             if not content_hash:
                 raise ValueError(
                     f"Failed to compute content hash for {tool_collection} - empty content"
@@ -1208,7 +1198,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,

vector_sdk/structured/tool_config.py CHANGED Viewed

@@ -151,15 +151,15 @@ TOOL_CONFIGS: dict[ToolCollection, ToolConfig] = {
         default_priority=PRIORITY_NORMAL,
         turbopuffer=TurboPufferToolConfig(
             enabled=True,
-            id_field="contentHash",
-            metadata_fields=_DEFAULT_METADATA_FIELDS,
+            id_field="id",
+            metadata_fields=("toolId", "toolCollection", "topicId", "userId", "contentHash", "id"),
             namespace_pattern="topic_vectors",
         ),
         pinecone=PineconeToolConfig(
             enabled=False,
             index_name="tool-vectors",
-            id_field="contentHash",
-            metadata_fields=_DEFAULT_METADATA_FIELDS,
+            id_field="id",
+            metadata_fields=("toolId", "toolCollection", "topicId", "userId", "contentHash", "id"),
             namespace_pattern="topic_vectors",
         ),
     ),

{sf_vector_sdk-0.2.3.dist-info → sf_vector_sdk-0.2.5.dist-info}/WHEEL RENAMED Viewed

File without changes

sf-vector-sdk 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

sf-vector-sdk 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl