PyPI - sf-vector-sdk - Versions diffs - 0.2.0__tar.gz → 0.2.3__tar.gz - Mend

sf-vector-sdk 0.2.0tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sf-vector-sdk
-Version: 0.2.0
+Version: 0.2.3
 Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
 Requires-Python: >=3.11
 Requires-Dist: redis>=5.0.0
@@ -220,25 +220,42 @@ Type-safe embedding for known tool types (FlashCard, TestQuestion, etc.) with au
 |--------|-------------|
 | `embed_flashcard(data, metadata)` | Embed a flashcard, return request ID |
 | `embed_flashcard_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_flashcard_batch(items)` | Embed batch of flashcards, return request ID |
+| `embed_flashcard_batch_and_wait(items, timeout)` | Embed batch and wait for result |
 | `embed_test_question(data, metadata)` | Embed a test question, return request ID |
 | `embed_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_test_question_batch(items)` | Embed batch of test questions, return request ID |
+| `embed_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
 | `embed_spaced_test_question(data, metadata)` | Embed a spaced test question, return request ID |
 | `embed_spaced_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_spaced_test_question_batch(items)` | Embed batch of spaced test questions, return request ID |
+| `embed_spaced_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
 | `embed_audio_recap(data, metadata)` | Embed an audio recap section, return request ID |
 | `embed_audio_recap_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_audio_recap_batch(items)` | Embed batch of audio recaps, return request ID |
+| `embed_audio_recap_batch_and_wait(items, timeout)` | Embed batch and wait for result |
+| `embed_topic(data, metadata)` | Embed a topic (uses `TopicMetadata`), return request ID |
+| `embed_topic_and_wait(data, metadata, timeout)` | Embed and wait for result (uses `TopicMetadata`) |
+| `embed_topic_batch(items)` | Embed batch of topics (uses `TopicMetadata`), return request ID |
+| `embed_topic_batch_and_wait(items, timeout)` | Embed batch and wait for result (uses `TopicMetadata`) |
+**Metadata Types:**
+- `ToolMetadata` - For tools (FlashCard, TestQuestion, etc.) - requires `tool_id`
+- `TopicMetadata` - For topics only - all fields optional (`user_id`, `topic_id`)
 ```python
-from vector_sdk import VectorClient, ToolMetadata, TestQuestionInput
+from vector_sdk import VectorClient, ToolMetadata, TopicMetadata, TestQuestionInput
 client = VectorClient(redis_url="redis://localhost:6379")
-# Embed a flashcard - SDK handles text extraction, hashing, and routing
+# Embed a flashcard - uses ToolMetadata (tool_id required)
 result = client.structured_embeddings.embed_flashcard_and_wait(
     data={"type": "BASIC", "term": "Mitochondria", "definition": "The powerhouse of the cell"},
     metadata=ToolMetadata(tool_id="tool123", user_id="user456", topic_id="topic789"),
 )
-# Embed a test question
+# Embed a test question - uses ToolMetadata (tool_id required)
 result = client.structured_embeddings.embed_test_question_and_wait(
     data=TestQuestionInput(
         question="What is the capital?",
@@ -247,6 +264,23 @@ result = client.structured_embeddings.embed_test_question_and_wait(
     ),
     metadata=ToolMetadata(tool_id="tool456"),
 )
+# Embed a topic - uses TopicMetadata (all fields optional)
+result = client.structured_embeddings.embed_topic_and_wait(
+    data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
+    metadata=TopicMetadata(user_id="user123", topic_id="topic456"),  # No tool_id needed
+)
+# Batch embedding - embed multiple topics in a single request
+from vector_sdk import TopicBatchItem
+batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
+    items=[
+        TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
+        TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
+        TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()),  # All optional
+    ],
+)
 ```
 **Database Routing:**

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/README.md RENAMED Viewed

@@ -208,25 +208,42 @@ Type-safe embedding for known tool types (FlashCard, TestQuestion, etc.) with au
 |--------|-------------|
 | `embed_flashcard(data, metadata)` | Embed a flashcard, return request ID |
 | `embed_flashcard_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_flashcard_batch(items)` | Embed batch of flashcards, return request ID |
+| `embed_flashcard_batch_and_wait(items, timeout)` | Embed batch and wait for result |
 | `embed_test_question(data, metadata)` | Embed a test question, return request ID |
 | `embed_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_test_question_batch(items)` | Embed batch of test questions, return request ID |
+| `embed_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
 | `embed_spaced_test_question(data, metadata)` | Embed a spaced test question, return request ID |
 | `embed_spaced_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_spaced_test_question_batch(items)` | Embed batch of spaced test questions, return request ID |
+| `embed_spaced_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
 | `embed_audio_recap(data, metadata)` | Embed an audio recap section, return request ID |
 | `embed_audio_recap_and_wait(data, metadata, timeout)` | Embed and wait for result |
+| `embed_audio_recap_batch(items)` | Embed batch of audio recaps, return request ID |
+| `embed_audio_recap_batch_and_wait(items, timeout)` | Embed batch and wait for result |
+| `embed_topic(data, metadata)` | Embed a topic (uses `TopicMetadata`), return request ID |
+| `embed_topic_and_wait(data, metadata, timeout)` | Embed and wait for result (uses `TopicMetadata`) |
+| `embed_topic_batch(items)` | Embed batch of topics (uses `TopicMetadata`), return request ID |
+| `embed_topic_batch_and_wait(items, timeout)` | Embed batch and wait for result (uses `TopicMetadata`) |
+**Metadata Types:**
+- `ToolMetadata` - For tools (FlashCard, TestQuestion, etc.) - requires `tool_id`
+- `TopicMetadata` - For topics only - all fields optional (`user_id`, `topic_id`)
 ```python
-from vector_sdk import VectorClient, ToolMetadata, TestQuestionInput
+from vector_sdk import VectorClient, ToolMetadata, TopicMetadata, TestQuestionInput
 client = VectorClient(redis_url="redis://localhost:6379")
-# Embed a flashcard - SDK handles text extraction, hashing, and routing
+# Embed a flashcard - uses ToolMetadata (tool_id required)
 result = client.structured_embeddings.embed_flashcard_and_wait(
     data={"type": "BASIC", "term": "Mitochondria", "definition": "The powerhouse of the cell"},
     metadata=ToolMetadata(tool_id="tool123", user_id="user456", topic_id="topic789"),
 )
-# Embed a test question
+# Embed a test question - uses ToolMetadata (tool_id required)
 result = client.structured_embeddings.embed_test_question_and_wait(
     data=TestQuestionInput(
         question="What is the capital?",
@@ -235,6 +252,23 @@ result = client.structured_embeddings.embed_test_question_and_wait(
     ),
     metadata=ToolMetadata(tool_id="tool456"),
 )
+# Embed a topic - uses TopicMetadata (all fields optional)
+result = client.structured_embeddings.embed_topic_and_wait(
+    data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
+    metadata=TopicMetadata(user_id="user123", topic_id="topic456"),  # No tool_id needed
+)
+# Batch embedding - embed multiple topics in a single request
+from vector_sdk import TopicBatchItem
+batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
+    items=[
+        TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
+        TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
+        TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()),  # All optional
+    ],
+)
 ```
 **Database Routing:**

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/__init__.py RENAMED Viewed

@@ -70,6 +70,7 @@ from vector_sdk.hash import (
     MultipleChoiceOption,
     QuestionData,
     ToolCollection,
+    TopicData,
     compute_content_hash,
     extract_tool_text,
 )
@@ -89,15 +90,21 @@ from vector_sdk.namespaces import (
 # ============================================================================
 from vector_sdk.structured import (
     TOOL_CONFIGS,
+    AudioRecapBatchItem,
+    BatchItem,
     DatabaseRoutingError,
     DatabaseRoutingMode,
+    FlashCardBatchItem,
     PineconeToolConfig,
     QuestionType,
     StructuredEmbeddingsNamespace,
+    TestQuestionBatchItem,
     TestQuestionInput,
     ToolConfig,
     ToolDatabaseConfig,
     ToolMetadata,
+    TopicBatchItem,
+    TopicMetadata,
     TurboPufferToolConfig,
     build_storage_config,
     get_content_type,
@@ -159,7 +166,7 @@ from vector_sdk.types import (
     validate_model,
 )
-__version__ = "0.2.0"
+__version__ = "0.2.3"
 __all__ = [
     # Clients (New API)
@@ -236,12 +243,21 @@ __all__ = [
     "FlashCardData",
     "QuestionData",
     "AudioRecapSectionData",
+    "TopicData",
     "MultipleChoiceOption",
     "AnswerObject",
     # Structured Embeddings
     "StructuredEmbeddingsNamespace",
     "ToolMetadata",
+    "TopicMetadata",
     "TestQuestionInput",
+    # Batch types
+    "BatchItem",
+    "FlashCardBatchItem",
+    "TestQuestionBatchItem",
+    "AudioRecapBatchItem",
+    "TopicBatchItem",
+    # Tool configuration
     "ToolConfig",
     "ToolDatabaseConfig",
     "TurboPufferToolConfig",

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py RENAMED Viewed

@@ -2,7 +2,7 @@
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # NO CHECKED-IN PROTOBUF GENCODE
 # source: embedding_pipeline/content_types/v1/content_types.proto
-# Protobuf Python Version: 6.33.4
+# Protobuf Python Version: 6.33.5
 """Generated protocol buffer code."""
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
     _runtime_version.Domain.PUBLIC,
     6,
     33,
-    4,
+    5,
     '',
     'embedding_pipeline/content_types/v1/content_types.proto'
 )

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py RENAMED Viewed

@@ -2,7 +2,7 @@
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # NO CHECKED-IN PROTOBUF GENCODE
 # source: embedding_pipeline/db/vectors/v1/vectors.proto
-# Protobuf Python Version: 6.33.4
+# Protobuf Python Version: 6.33.5
 """Generated protocol buffer code."""
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
     _runtime_version.Domain.PUBLIC,
     6,
     33,
-    4,
+    5,
     '',
     'embedding_pipeline/db/vectors/v1/vectors.proto'
 )

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.py RENAMED Viewed

@@ -2,7 +2,7 @@
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # NO CHECKED-IN PROTOBUF GENCODE
 # source: embedding_pipeline/query/v1/query.proto
-# Protobuf Python Version: 6.33.4
+# Protobuf Python Version: 6.33.5
 """Generated protocol buffer code."""
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
     _runtime_version.Domain.PUBLIC,
     6,
     33,
-    4,
+    5,
     '',
     'embedding_pipeline/query/v1/query.proto'
 )

sf_vector_sdk-0.2.3/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py ADDED Viewed

@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: embedding_pipeline/tools/v1/tools.proto
+# Protobuf Python Version: 6.33.5
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC,
+    6,
+    33,
+    5,
+    '',
+    'embedding_pipeline/tools/v1/tools.proto'
+)
+# @@protoc_insertion_point(imports)
+_sym_db = _symbol_database.Default()
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\'embedding_pipeline/tools/v1/tools.proto\x12\x1b\x65mbedding_pipeline.tools.v1\"\xed\x01\n\rTopicMetadata\x12\x1c\n\x07user_id\x18\x01 \x01(\tH\x00R\x06userId\x88\x01\x01\x12\x1e\n\x08topic_id\x18\x02 \x01(\tH\x01R\x07topicId\x88\x01\x01\x12K\n\x05\x65xtra\x18\x03 \x03(\x0b\x32\x35.embedding_pipeline.tools.v1.TopicMetadata.ExtraEntryR\x05\x65xtra\x1a\x38\n\nExtraEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\n\n\x08_user_idB\x0b\n\t_topic_id*\xe4\x01\n\x0eToolCollection\x12\x1f\n\x1bTOOL_COLLECTION_UNSPECIFIED\x10\x00\x12\x1d\n\x19TOOL_COLLECTION_FLASHCARD\x10\x01\x12!\n\x1dTOOL_COLLECTION_TEST_QUESTION\x10\x02\x12(\n$TOOL_COLLECTION_SPACED_TEST_QUESTION\x10\x03\x12*\n&TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION\x10\x04\x12\x19\n\x15TOOL_COLLECTION_TOPIC\x10\x05*\xb2\x01\n\rFlashCardType\x12\x1f\n\x1b\x46LASH_CARD_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15\x46LASH_CARD_TYPE_BASIC\x10\x01\x12\x19\n\x15\x46LASH_CARD_TYPE_CLOZE\x10\x02\x12%\n!FLASH_CARD_TYPE_FILL_IN_THE_BLANK\x10\x03\x12#\n\x1f\x46LASH_CARD_TYPE_MULTIPLE_CHOICE\x10\x04\x42gZegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1b\x06proto3')
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pipeline.tools.v1.tools_pb2', _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+  _globals['DESCRIPTOR']._loaded_options = None
+  _globals['DESCRIPTOR']._serialized_options = b'Zegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1'
+  _globals['_TOPICMETADATA_EXTRAENTRY']._loaded_options = None
+  _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_options = b'8\001'
+  _globals['_TOOLCOLLECTION']._serialized_start=313
+  _globals['_TOOLCOLLECTION']._serialized_end=541
+  _globals['_FLASHCARDTYPE']._serialized_start=544
+  _globals['_FLASHCARDTYPE']._serialized_end=722
+  _globals['_TOPICMETADATA']._serialized_start=73
+  _globals['_TOPICMETADATA']._serialized_end=310
+  _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_start=229
+  _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_end=285
+# @@protoc_insertion_point(module_scope)

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi RENAMED Viewed

@@ -1,6 +1,9 @@
+from google.protobuf.internal import containers as _containers
 from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
 from google.protobuf import descriptor as _descriptor
-from typing import ClassVar as _ClassVar
+from google.protobuf import message as _message
+from collections.abc import Mapping as _Mapping
+from typing import ClassVar as _ClassVar, Optional as _Optional
 DESCRIPTOR: _descriptor.FileDescriptor
@@ -11,6 +14,7 @@ class ToolCollection(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
     TOOL_COLLECTION_TEST_QUESTION: _ClassVar[ToolCollection]
     TOOL_COLLECTION_SPACED_TEST_QUESTION: _ClassVar[ToolCollection]
     TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: _ClassVar[ToolCollection]
+    TOOL_COLLECTION_TOPIC: _ClassVar[ToolCollection]
 class FlashCardType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
     __slots__ = ()
@@ -24,8 +28,26 @@ TOOL_COLLECTION_FLASHCARD: ToolCollection
 TOOL_COLLECTION_TEST_QUESTION: ToolCollection
 TOOL_COLLECTION_SPACED_TEST_QUESTION: ToolCollection
 TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: ToolCollection
+TOOL_COLLECTION_TOPIC: ToolCollection
 FLASH_CARD_TYPE_UNSPECIFIED: FlashCardType
 FLASH_CARD_TYPE_BASIC: FlashCardType
 FLASH_CARD_TYPE_CLOZE: FlashCardType
 FLASH_CARD_TYPE_FILL_IN_THE_BLANK: FlashCardType
 FLASH_CARD_TYPE_MULTIPLE_CHOICE: FlashCardType
+class TopicMetadata(_message.Message):
+    __slots__ = ("user_id", "topic_id", "extra")
+    class ExtraEntry(_message.Message):
+        __slots__ = ("key", "value")
+        KEY_FIELD_NUMBER: _ClassVar[int]
+        VALUE_FIELD_NUMBER: _ClassVar[int]
+        key: str
+        value: str
+        def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
+    USER_ID_FIELD_NUMBER: _ClassVar[int]
+    TOPIC_ID_FIELD_NUMBER: _ClassVar[int]
+    EXTRA_FIELD_NUMBER: _ClassVar[int]
+    user_id: str
+    topic_id: str
+    extra: _containers.ScalarMap[str, str]
+    def __init__(self, user_id: _Optional[str] = ..., topic_id: _Optional[str] = ..., extra: _Optional[_Mapping[str, str]] = ...) -> None: ...

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/hash/__init__.py RENAMED Viewed

@@ -16,6 +16,7 @@ from .types import (
     MultipleChoiceOption,
     QuestionData,
     ToolCollection,
+    TopicData,
 )
 __all__ = [
@@ -26,6 +27,7 @@ __all__ = [
     "FlashCardData",
     "QuestionData",
     "AudioRecapSectionData",
+    "TopicData",
     "MultipleChoiceOption",
     "AnswerObject",
 ]

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/hash/hasher.py RENAMED Viewed

@@ -15,6 +15,7 @@ from .types import (
     MultipleChoiceOption,
     QuestionData,
     ToolCollection,
+    TopicData,
 )
 # Hash length in hex characters (128 bits = 32 hex chars)
@@ -23,7 +24,7 @@ HASH_LENGTH = 32
 def compute_content_hash(
     tool_collection: ToolCollection,
-    data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
+    data: Union[FlashCardData, QuestionData, AudioRecapSectionData, TopicData, dict],
 ) -> str:
     """
     Compute a deterministic content hash for a learning tool.
@@ -52,7 +53,7 @@ def compute_content_hash(
 def extract_tool_text(
     tool_collection: ToolCollection,
-    data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
+    data: Union[FlashCardData, QuestionData, AudioRecapSectionData, TopicData, dict],
 ) -> str:
     """
     Extract the text content from a learning tool for embedding.
@@ -94,6 +95,8 @@ def extract_tool_text(
         return _extract_question_text(data_dict)
     elif tool_collection == "AudioRecapV2Section":
         return _extract_audio_recap_text(data_dict)
+    elif tool_collection == "Topic":
+        return _extract_topic_text(data_dict)
     else:
         return ""
@@ -183,6 +186,29 @@ def _extract_audio_recap_text(data: dict) -> str:
     return ""
+def _extract_topic_text(data: dict) -> str:
+    """
+    Extract text from Topic.
+    Format: "Topic: {topic}. Description: {description}."
+    """
+    parts: list[str] = []
+    topic = data.get("topic")
+    if topic:
+        trimmed = topic.strip()
+        if trimmed:
+            parts.append(f"Topic: {trimmed}.")
+    description = data.get("description")
+    if description:
+        trimmed = description.strip()
+        if trimmed:
+            parts.append(f"Description: {trimmed}.")
+    return " ".join(parts)
 def _strip_flashcard_syntax(text: str) -> str:
     """
     Strip {{...}} markers from cloze/fill-in-blank text.

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/hash/types.py RENAMED Viewed

@@ -9,7 +9,7 @@ from typing import Literal, Optional, Union
 from pydantic import BaseModel, ConfigDict, Field
 # Tool collection types
-ToolCollection = Literal["FlashCard", "TestQuestion", "SpacedTestQuestion", "AudioRecapV2Section"]
+ToolCollection = Literal["FlashCard", "TestQuestion", "SpacedTestQuestion", "AudioRecapV2Section", "Topic"]
 # FlashCard type variants
 FlashCardType = Literal["BASIC", "CLOZE", "FILL_IN_THE_BLANK", "MULTIPLE_CHOICE"]
@@ -65,3 +65,12 @@ class AudioRecapSectionData(BaseModel):
     model_config = ConfigDict(extra="allow")
     script: Optional[str] = None
+class TopicData(BaseModel):
+    """Topic data for content hashing."""
+    model_config = ConfigDict(extra="allow")
+    topic: Optional[str] = None
+    description: Optional[str] = None

{sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/namespaces/embeddings.py RENAMED Viewed

@@ -154,26 +154,21 @@ class EmbeddingsNamespace(BaseNamespace):
         Raises:
             TimeoutError: If no result is received within the timeout
         """
-        channel = f"embedding:result:{request_id}"
-        pubsub = self._redis.pubsub()
-        pubsub.subscribe(channel)
-        try:
-            start_time = datetime.utcnow()
-            while True:
-                message = pubsub.get_message(timeout=1.0)
-                if message and message["type"] == "message":
-                    data = json.loads(message["data"])
-                    return EmbeddingResult.from_dict(data)
-                elapsed = (datetime.utcnow() - start_time).total_seconds()
-                if elapsed >= timeout:
-                    raise TimeoutError(
-                        f"No result received for {request_id} within {timeout}s"
-                    )
-        finally:
-            pubsub.unsubscribe(channel)
-            pubsub.close()
+        list_key = f"embedding:response:{request_id}"
+        # BRPOP blocks until result is available or timeout
+        result = self._redis.brpop(list_key, timeout=timeout)
+        if result is None:
+            raise TimeoutError(
+                f"No result received for {request_id} within {timeout}s"
+            )
+        # result = (key, value)
+        data = json.loads(result[1])
+        # Cleanup the response list
+        self._redis.delete(list_key)
+        return EmbeddingResult.from_dict(data)
     def create_and_wait(
         self,
@@ -189,8 +184,8 @@ class EmbeddingsNamespace(BaseNamespace):
         """
         Create embeddings and wait for the result.
-        This method subscribes to the result channel BEFORE submitting the request,
-        ensuring no race condition where the result is published before we're listening.
+        Uses BRPOP for efficient blocking wait - no race condition since the result
+        is pushed to a list that persists until consumed.
         Args:
             texts: List of text inputs
@@ -205,43 +200,22 @@ class EmbeddingsNamespace(BaseNamespace):
         Returns:
             The embedding result
         """
-        # Generate request ID upfront so we can subscribe before submitting
         request_id = str(uuid.uuid4())
-        channel = f"embedding:result:{request_id}"
-        # Subscribe BEFORE submitting to avoid race condition
-        pubsub = self._redis.pubsub()
-        pubsub.subscribe(channel)
-        try:
-            # Now submit the request (subscription is already active)
-            self.create(
-                texts=texts,
-                content_type=content_type,
-                priority=priority,
-                storage=storage,
-                metadata=metadata,
-                request_id=request_id,
-                embedding_model=embedding_model,
-                embedding_dimensions=embedding_dimensions,
-            )
-            # Wait for message with timeout
-            start_time = datetime.utcnow()
-            while True:
-                message = pubsub.get_message(timeout=1.0)
-                if message and message["type"] == "message":
-                    data = json.loads(message["data"])
-                    return EmbeddingResult.from_dict(data)
-                elapsed = (datetime.utcnow() - start_time).total_seconds()
-                if elapsed >= timeout:
-                    raise TimeoutError(
-                        f"No result received for {request_id} within {timeout}s"
-                    )
-        finally:
-            pubsub.unsubscribe(channel)
-            pubsub.close()
+        # Submit the request first
+        self.create(
+            texts=texts,
+            content_type=content_type,
+            priority=priority,
+            storage=storage,
+            metadata=metadata,
+            request_id=request_id,
+            embedding_model=embedding_model,
+            embedding_dimensions=embedding_dimensions,
+        )
+        # Wait for result via BRPOP
+        return self.wait_for(request_id, timeout)
     def get_queue_depth(self) -> dict[str, int]:
         """

sf-vector-sdk 0.2.0__tar.gz → 0.2.3__tar.gz

sf-vector-sdk 0.2.0tar.gz → 0.2.3tar.gz