sf-vector-sdk 0.2.0__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/PKG-INFO +38 -4
  2. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/README.md +37 -3
  3. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/__init__.py +17 -1
  4. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py +2 -2
  5. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py +2 -2
  6. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.py +2 -2
  7. sf_vector_sdk-0.2.3/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py +45 -0
  8. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi +23 -1
  9. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/hash/__init__.py +2 -0
  10. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/hash/hasher.py +28 -2
  11. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/hash/types.py +10 -1
  12. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/namespaces/embeddings.py +31 -57
  13. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/namespaces/search.py +38 -60
  14. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/structured/__init__.py +13 -0
  15. sf_vector_sdk-0.2.3/vector_sdk/structured/structured_embeddings.py +1216 -0
  16. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/structured/tool_config.py +23 -4
  17. sf_vector_sdk-0.2.0/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py +0 -39
  18. sf_vector_sdk-0.2.0/vector_sdk/structured/structured_embeddings.py +0 -431
  19. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/.gitignore +0 -0
  20. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/pyproject.toml +0 -0
  21. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/tests/__init__.py +0 -0
  22. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/tests/test_content_hash.py +0 -0
  23. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/client.py +0 -0
  24. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/content_types.py +0 -0
  25. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.pyi +0 -0
  26. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.pyi +0 -0
  27. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.pyi +0 -0
  28. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/namespaces/__init__.py +0 -0
  29. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/namespaces/base.py +0 -0
  30. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/namespaces/db.py +0 -0
  31. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/structured/router.py +0 -0
  32. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.3}/vector_sdk/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sf-vector-sdk
3
- Version: 0.2.0
3
+ Version: 0.2.3
4
4
  Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: redis>=5.0.0
@@ -220,25 +220,42 @@ Type-safe embedding for known tool types (FlashCard, TestQuestion, etc.) with au
220
220
  |--------|-------------|
221
221
  | `embed_flashcard(data, metadata)` | Embed a flashcard, return request ID |
222
222
  | `embed_flashcard_and_wait(data, metadata, timeout)` | Embed and wait for result |
223
+ | `embed_flashcard_batch(items)` | Embed batch of flashcards, return request ID |
224
+ | `embed_flashcard_batch_and_wait(items, timeout)` | Embed batch and wait for result |
223
225
  | `embed_test_question(data, metadata)` | Embed a test question, return request ID |
224
226
  | `embed_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
227
+ | `embed_test_question_batch(items)` | Embed batch of test questions, return request ID |
228
+ | `embed_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
225
229
  | `embed_spaced_test_question(data, metadata)` | Embed a spaced test question, return request ID |
226
230
  | `embed_spaced_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
231
+ | `embed_spaced_test_question_batch(items)` | Embed batch of spaced test questions, return request ID |
232
+ | `embed_spaced_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
227
233
  | `embed_audio_recap(data, metadata)` | Embed an audio recap section, return request ID |
228
234
  | `embed_audio_recap_and_wait(data, metadata, timeout)` | Embed and wait for result |
235
+ | `embed_audio_recap_batch(items)` | Embed batch of audio recaps, return request ID |
236
+ | `embed_audio_recap_batch_and_wait(items, timeout)` | Embed batch and wait for result |
237
+ | `embed_topic(data, metadata)` | Embed a topic (uses `TopicMetadata`), return request ID |
238
+ | `embed_topic_and_wait(data, metadata, timeout)` | Embed and wait for result (uses `TopicMetadata`) |
239
+ | `embed_topic_batch(items)` | Embed batch of topics (uses `TopicMetadata`), return request ID |
240
+ | `embed_topic_batch_and_wait(items, timeout)` | Embed batch and wait for result (uses `TopicMetadata`) |
241
+
242
+ **Metadata Types:**
243
+
244
+ - `ToolMetadata` - For tools (FlashCard, TestQuestion, etc.) - requires `tool_id`
245
+ - `TopicMetadata` - For topics only - all fields optional (`user_id`, `topic_id`)
229
246
 
230
247
  ```python
231
- from vector_sdk import VectorClient, ToolMetadata, TestQuestionInput
248
+ from vector_sdk import VectorClient, ToolMetadata, TopicMetadata, TestQuestionInput
232
249
 
233
250
  client = VectorClient(redis_url="redis://localhost:6379")
234
251
 
235
- # Embed a flashcard - SDK handles text extraction, hashing, and routing
252
+ # Embed a flashcard - uses ToolMetadata (tool_id required)
236
253
  result = client.structured_embeddings.embed_flashcard_and_wait(
237
254
  data={"type": "BASIC", "term": "Mitochondria", "definition": "The powerhouse of the cell"},
238
255
  metadata=ToolMetadata(tool_id="tool123", user_id="user456", topic_id="topic789"),
239
256
  )
240
257
 
241
- # Embed a test question
258
+ # Embed a test question - uses ToolMetadata (tool_id required)
242
259
  result = client.structured_embeddings.embed_test_question_and_wait(
243
260
  data=TestQuestionInput(
244
261
  question="What is the capital?",
@@ -247,6 +264,23 @@ result = client.structured_embeddings.embed_test_question_and_wait(
247
264
  ),
248
265
  metadata=ToolMetadata(tool_id="tool456"),
249
266
  )
267
+
268
+ # Embed a topic - uses TopicMetadata (all fields optional)
269
+ result = client.structured_embeddings.embed_topic_and_wait(
270
+ data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
271
+ metadata=TopicMetadata(user_id="user123", topic_id="topic456"), # No tool_id needed
272
+ )
273
+
274
+ # Batch embedding - embed multiple topics in a single request
275
+ from vector_sdk import TopicBatchItem
276
+
277
+ batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
278
+ items=[
279
+ TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
280
+ TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
281
+ TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
282
+ ],
283
+ )
250
284
  ```
251
285
 
252
286
  **Database Routing:**
@@ -208,25 +208,42 @@ Type-safe embedding for known tool types (FlashCard, TestQuestion, etc.) with au
208
208
  |--------|-------------|
209
209
  | `embed_flashcard(data, metadata)` | Embed a flashcard, return request ID |
210
210
  | `embed_flashcard_and_wait(data, metadata, timeout)` | Embed and wait for result |
211
+ | `embed_flashcard_batch(items)` | Embed batch of flashcards, return request ID |
212
+ | `embed_flashcard_batch_and_wait(items, timeout)` | Embed batch and wait for result |
211
213
  | `embed_test_question(data, metadata)` | Embed a test question, return request ID |
212
214
  | `embed_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
215
+ | `embed_test_question_batch(items)` | Embed batch of test questions, return request ID |
216
+ | `embed_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
213
217
  | `embed_spaced_test_question(data, metadata)` | Embed a spaced test question, return request ID |
214
218
  | `embed_spaced_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
219
+ | `embed_spaced_test_question_batch(items)` | Embed batch of spaced test questions, return request ID |
220
+ | `embed_spaced_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
215
221
  | `embed_audio_recap(data, metadata)` | Embed an audio recap section, return request ID |
216
222
  | `embed_audio_recap_and_wait(data, metadata, timeout)` | Embed and wait for result |
223
+ | `embed_audio_recap_batch(items)` | Embed batch of audio recaps, return request ID |
224
+ | `embed_audio_recap_batch_and_wait(items, timeout)` | Embed batch and wait for result |
225
+ | `embed_topic(data, metadata)` | Embed a topic (uses `TopicMetadata`), return request ID |
226
+ | `embed_topic_and_wait(data, metadata, timeout)` | Embed and wait for result (uses `TopicMetadata`) |
227
+ | `embed_topic_batch(items)` | Embed batch of topics (uses `TopicMetadata`), return request ID |
228
+ | `embed_topic_batch_and_wait(items, timeout)` | Embed batch and wait for result (uses `TopicMetadata`) |
229
+
230
+ **Metadata Types:**
231
+
232
+ - `ToolMetadata` - For tools (FlashCard, TestQuestion, etc.) - requires `tool_id`
233
+ - `TopicMetadata` - For topics only - all fields optional (`user_id`, `topic_id`)
217
234
 
218
235
  ```python
219
- from vector_sdk import VectorClient, ToolMetadata, TestQuestionInput
236
+ from vector_sdk import VectorClient, ToolMetadata, TopicMetadata, TestQuestionInput
220
237
 
221
238
  client = VectorClient(redis_url="redis://localhost:6379")
222
239
 
223
- # Embed a flashcard - SDK handles text extraction, hashing, and routing
240
+ # Embed a flashcard - uses ToolMetadata (tool_id required)
224
241
  result = client.structured_embeddings.embed_flashcard_and_wait(
225
242
  data={"type": "BASIC", "term": "Mitochondria", "definition": "The powerhouse of the cell"},
226
243
  metadata=ToolMetadata(tool_id="tool123", user_id="user456", topic_id="topic789"),
227
244
  )
228
245
 
229
- # Embed a test question
246
+ # Embed a test question - uses ToolMetadata (tool_id required)
230
247
  result = client.structured_embeddings.embed_test_question_and_wait(
231
248
  data=TestQuestionInput(
232
249
  question="What is the capital?",
@@ -235,6 +252,23 @@ result = client.structured_embeddings.embed_test_question_and_wait(
235
252
  ),
236
253
  metadata=ToolMetadata(tool_id="tool456"),
237
254
  )
255
+
256
+ # Embed a topic - uses TopicMetadata (all fields optional)
257
+ result = client.structured_embeddings.embed_topic_and_wait(
258
+ data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
259
+ metadata=TopicMetadata(user_id="user123", topic_id="topic456"), # No tool_id needed
260
+ )
261
+
262
+ # Batch embedding - embed multiple topics in a single request
263
+ from vector_sdk import TopicBatchItem
264
+
265
+ batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
266
+ items=[
267
+ TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
268
+ TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
269
+ TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
270
+ ],
271
+ )
238
272
  ```
239
273
 
240
274
  **Database Routing:**
@@ -70,6 +70,7 @@ from vector_sdk.hash import (
70
70
  MultipleChoiceOption,
71
71
  QuestionData,
72
72
  ToolCollection,
73
+ TopicData,
73
74
  compute_content_hash,
74
75
  extract_tool_text,
75
76
  )
@@ -89,15 +90,21 @@ from vector_sdk.namespaces import (
89
90
  # ============================================================================
90
91
  from vector_sdk.structured import (
91
92
  TOOL_CONFIGS,
93
+ AudioRecapBatchItem,
94
+ BatchItem,
92
95
  DatabaseRoutingError,
93
96
  DatabaseRoutingMode,
97
+ FlashCardBatchItem,
94
98
  PineconeToolConfig,
95
99
  QuestionType,
96
100
  StructuredEmbeddingsNamespace,
101
+ TestQuestionBatchItem,
97
102
  TestQuestionInput,
98
103
  ToolConfig,
99
104
  ToolDatabaseConfig,
100
105
  ToolMetadata,
106
+ TopicBatchItem,
107
+ TopicMetadata,
101
108
  TurboPufferToolConfig,
102
109
  build_storage_config,
103
110
  get_content_type,
@@ -159,7 +166,7 @@ from vector_sdk.types import (
159
166
  validate_model,
160
167
  )
161
168
 
162
- __version__ = "0.2.0"
169
+ __version__ = "0.2.3"
163
170
 
164
171
  __all__ = [
165
172
  # Clients (New API)
@@ -236,12 +243,21 @@ __all__ = [
236
243
  "FlashCardData",
237
244
  "QuestionData",
238
245
  "AudioRecapSectionData",
246
+ "TopicData",
239
247
  "MultipleChoiceOption",
240
248
  "AnswerObject",
241
249
  # Structured Embeddings
242
250
  "StructuredEmbeddingsNamespace",
243
251
  "ToolMetadata",
252
+ "TopicMetadata",
244
253
  "TestQuestionInput",
254
+ # Batch types
255
+ "BatchItem",
256
+ "FlashCardBatchItem",
257
+ "TestQuestionBatchItem",
258
+ "AudioRecapBatchItem",
259
+ "TopicBatchItem",
260
+ # Tool configuration
245
261
  "ToolConfig",
246
262
  "ToolDatabaseConfig",
247
263
  "TurboPufferToolConfig",
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: embedding_pipeline/content_types/v1/content_types.proto
5
- # Protobuf Python Version: 6.33.4
5
+ # Protobuf Python Version: 6.33.5
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
15
  33,
16
- 4,
16
+ 5,
17
17
  '',
18
18
  'embedding_pipeline/content_types/v1/content_types.proto'
19
19
  )
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: embedding_pipeline/db/vectors/v1/vectors.proto
5
- # Protobuf Python Version: 6.33.4
5
+ # Protobuf Python Version: 6.33.5
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
15
  33,
16
- 4,
16
+ 5,
17
17
  '',
18
18
  'embedding_pipeline/db/vectors/v1/vectors.proto'
19
19
  )
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: embedding_pipeline/query/v1/query.proto
5
- # Protobuf Python Version: 6.33.4
5
+ # Protobuf Python Version: 6.33.5
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
15
  33,
16
- 4,
16
+ 5,
17
17
  '',
18
18
  'embedding_pipeline/query/v1/query.proto'
19
19
  )
@@ -0,0 +1,45 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # source: embedding_pipeline/tools/v1/tools.proto
5
+ # Protobuf Python Version: 6.33.5
6
+ """Generated protocol buffer code."""
7
+ from google.protobuf import descriptor as _descriptor
8
+ from google.protobuf import descriptor_pool as _descriptor_pool
9
+ from google.protobuf import runtime_version as _runtime_version
10
+ from google.protobuf import symbol_database as _symbol_database
11
+ from google.protobuf.internal import builder as _builder
12
+ _runtime_version.ValidateProtobufRuntimeVersion(
13
+ _runtime_version.Domain.PUBLIC,
14
+ 6,
15
+ 33,
16
+ 5,
17
+ '',
18
+ 'embedding_pipeline/tools/v1/tools.proto'
19
+ )
20
+ # @@protoc_insertion_point(imports)
21
+
22
+ _sym_db = _symbol_database.Default()
23
+
24
+
25
+
26
+
27
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\'embedding_pipeline/tools/v1/tools.proto\x12\x1b\x65mbedding_pipeline.tools.v1\"\xed\x01\n\rTopicMetadata\x12\x1c\n\x07user_id\x18\x01 \x01(\tH\x00R\x06userId\x88\x01\x01\x12\x1e\n\x08topic_id\x18\x02 \x01(\tH\x01R\x07topicId\x88\x01\x01\x12K\n\x05\x65xtra\x18\x03 \x03(\x0b\x32\x35.embedding_pipeline.tools.v1.TopicMetadata.ExtraEntryR\x05\x65xtra\x1a\x38\n\nExtraEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\n\n\x08_user_idB\x0b\n\t_topic_id*\xe4\x01\n\x0eToolCollection\x12\x1f\n\x1bTOOL_COLLECTION_UNSPECIFIED\x10\x00\x12\x1d\n\x19TOOL_COLLECTION_FLASHCARD\x10\x01\x12!\n\x1dTOOL_COLLECTION_TEST_QUESTION\x10\x02\x12(\n$TOOL_COLLECTION_SPACED_TEST_QUESTION\x10\x03\x12*\n&TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION\x10\x04\x12\x19\n\x15TOOL_COLLECTION_TOPIC\x10\x05*\xb2\x01\n\rFlashCardType\x12\x1f\n\x1b\x46LASH_CARD_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15\x46LASH_CARD_TYPE_BASIC\x10\x01\x12\x19\n\x15\x46LASH_CARD_TYPE_CLOZE\x10\x02\x12%\n!FLASH_CARD_TYPE_FILL_IN_THE_BLANK\x10\x03\x12#\n\x1f\x46LASH_CARD_TYPE_MULTIPLE_CHOICE\x10\x04\x42gZegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1b\x06proto3')
28
+
29
+ _globals = globals()
30
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
31
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pipeline.tools.v1.tools_pb2', _globals)
32
+ if not _descriptor._USE_C_DESCRIPTORS:
33
+ _globals['DESCRIPTOR']._loaded_options = None
34
+ _globals['DESCRIPTOR']._serialized_options = b'Zegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1'
35
+ _globals['_TOPICMETADATA_EXTRAENTRY']._loaded_options = None
36
+ _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_options = b'8\001'
37
+ _globals['_TOOLCOLLECTION']._serialized_start=313
38
+ _globals['_TOOLCOLLECTION']._serialized_end=541
39
+ _globals['_FLASHCARDTYPE']._serialized_start=544
40
+ _globals['_FLASHCARDTYPE']._serialized_end=722
41
+ _globals['_TOPICMETADATA']._serialized_start=73
42
+ _globals['_TOPICMETADATA']._serialized_end=310
43
+ _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_start=229
44
+ _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_end=285
45
+ # @@protoc_insertion_point(module_scope)
@@ -1,6 +1,9 @@
1
+ from google.protobuf.internal import containers as _containers
1
2
  from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
2
3
  from google.protobuf import descriptor as _descriptor
3
- from typing import ClassVar as _ClassVar
4
+ from google.protobuf import message as _message
5
+ from collections.abc import Mapping as _Mapping
6
+ from typing import ClassVar as _ClassVar, Optional as _Optional
4
7
 
5
8
  DESCRIPTOR: _descriptor.FileDescriptor
6
9
 
@@ -11,6 +14,7 @@ class ToolCollection(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
11
14
  TOOL_COLLECTION_TEST_QUESTION: _ClassVar[ToolCollection]
12
15
  TOOL_COLLECTION_SPACED_TEST_QUESTION: _ClassVar[ToolCollection]
13
16
  TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: _ClassVar[ToolCollection]
17
+ TOOL_COLLECTION_TOPIC: _ClassVar[ToolCollection]
14
18
 
15
19
  class FlashCardType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
16
20
  __slots__ = ()
@@ -24,8 +28,26 @@ TOOL_COLLECTION_FLASHCARD: ToolCollection
24
28
  TOOL_COLLECTION_TEST_QUESTION: ToolCollection
25
29
  TOOL_COLLECTION_SPACED_TEST_QUESTION: ToolCollection
26
30
  TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: ToolCollection
31
+ TOOL_COLLECTION_TOPIC: ToolCollection
27
32
  FLASH_CARD_TYPE_UNSPECIFIED: FlashCardType
28
33
  FLASH_CARD_TYPE_BASIC: FlashCardType
29
34
  FLASH_CARD_TYPE_CLOZE: FlashCardType
30
35
  FLASH_CARD_TYPE_FILL_IN_THE_BLANK: FlashCardType
31
36
  FLASH_CARD_TYPE_MULTIPLE_CHOICE: FlashCardType
37
+
38
+ class TopicMetadata(_message.Message):
39
+ __slots__ = ("user_id", "topic_id", "extra")
40
+ class ExtraEntry(_message.Message):
41
+ __slots__ = ("key", "value")
42
+ KEY_FIELD_NUMBER: _ClassVar[int]
43
+ VALUE_FIELD_NUMBER: _ClassVar[int]
44
+ key: str
45
+ value: str
46
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
47
+ USER_ID_FIELD_NUMBER: _ClassVar[int]
48
+ TOPIC_ID_FIELD_NUMBER: _ClassVar[int]
49
+ EXTRA_FIELD_NUMBER: _ClassVar[int]
50
+ user_id: str
51
+ topic_id: str
52
+ extra: _containers.ScalarMap[str, str]
53
+ def __init__(self, user_id: _Optional[str] = ..., topic_id: _Optional[str] = ..., extra: _Optional[_Mapping[str, str]] = ...) -> None: ...
@@ -16,6 +16,7 @@ from .types import (
16
16
  MultipleChoiceOption,
17
17
  QuestionData,
18
18
  ToolCollection,
19
+ TopicData,
19
20
  )
20
21
 
21
22
  __all__ = [
@@ -26,6 +27,7 @@ __all__ = [
26
27
  "FlashCardData",
27
28
  "QuestionData",
28
29
  "AudioRecapSectionData",
30
+ "TopicData",
29
31
  "MultipleChoiceOption",
30
32
  "AnswerObject",
31
33
  ]
@@ -15,6 +15,7 @@ from .types import (
15
15
  MultipleChoiceOption,
16
16
  QuestionData,
17
17
  ToolCollection,
18
+ TopicData,
18
19
  )
19
20
 
20
21
  # Hash length in hex characters (128 bits = 32 hex chars)
@@ -23,7 +24,7 @@ HASH_LENGTH = 32
23
24
 
24
25
  def compute_content_hash(
25
26
  tool_collection: ToolCollection,
26
- data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
27
+ data: Union[FlashCardData, QuestionData, AudioRecapSectionData, TopicData, dict],
27
28
  ) -> str:
28
29
  """
29
30
  Compute a deterministic content hash for a learning tool.
@@ -52,7 +53,7 @@ def compute_content_hash(
52
53
 
53
54
  def extract_tool_text(
54
55
  tool_collection: ToolCollection,
55
- data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
56
+ data: Union[FlashCardData, QuestionData, AudioRecapSectionData, TopicData, dict],
56
57
  ) -> str:
57
58
  """
58
59
  Extract the text content from a learning tool for embedding.
@@ -94,6 +95,8 @@ def extract_tool_text(
94
95
  return _extract_question_text(data_dict)
95
96
  elif tool_collection == "AudioRecapV2Section":
96
97
  return _extract_audio_recap_text(data_dict)
98
+ elif tool_collection == "Topic":
99
+ return _extract_topic_text(data_dict)
97
100
  else:
98
101
  return ""
99
102
 
@@ -183,6 +186,29 @@ def _extract_audio_recap_text(data: dict) -> str:
183
186
  return ""
184
187
 
185
188
 
189
+ def _extract_topic_text(data: dict) -> str:
190
+ """
191
+ Extract text from Topic.
192
+
193
+ Format: "Topic: {topic}. Description: {description}."
194
+ """
195
+ parts: list[str] = []
196
+
197
+ topic = data.get("topic")
198
+ if topic:
199
+ trimmed = topic.strip()
200
+ if trimmed:
201
+ parts.append(f"Topic: {trimmed}.")
202
+
203
+ description = data.get("description")
204
+ if description:
205
+ trimmed = description.strip()
206
+ if trimmed:
207
+ parts.append(f"Description: {trimmed}.")
208
+
209
+ return " ".join(parts)
210
+
211
+
186
212
  def _strip_flashcard_syntax(text: str) -> str:
187
213
  """
188
214
  Strip {{...}} markers from cloze/fill-in-blank text.
@@ -9,7 +9,7 @@ from typing import Literal, Optional, Union
9
9
  from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  # Tool collection types
12
- ToolCollection = Literal["FlashCard", "TestQuestion", "SpacedTestQuestion", "AudioRecapV2Section"]
12
+ ToolCollection = Literal["FlashCard", "TestQuestion", "SpacedTestQuestion", "AudioRecapV2Section", "Topic"]
13
13
 
14
14
  # FlashCard type variants
15
15
  FlashCardType = Literal["BASIC", "CLOZE", "FILL_IN_THE_BLANK", "MULTIPLE_CHOICE"]
@@ -65,3 +65,12 @@ class AudioRecapSectionData(BaseModel):
65
65
  model_config = ConfigDict(extra="allow")
66
66
 
67
67
  script: Optional[str] = None
68
+
69
+
70
+ class TopicData(BaseModel):
71
+ """Topic data for content hashing."""
72
+
73
+ model_config = ConfigDict(extra="allow")
74
+
75
+ topic: Optional[str] = None
76
+ description: Optional[str] = None
@@ -154,26 +154,21 @@ class EmbeddingsNamespace(BaseNamespace):
154
154
  Raises:
155
155
  TimeoutError: If no result is received within the timeout
156
156
  """
157
- channel = f"embedding:result:{request_id}"
158
- pubsub = self._redis.pubsub()
159
- pubsub.subscribe(channel)
160
-
161
- try:
162
- start_time = datetime.utcnow()
163
- while True:
164
- message = pubsub.get_message(timeout=1.0)
165
- if message and message["type"] == "message":
166
- data = json.loads(message["data"])
167
- return EmbeddingResult.from_dict(data)
168
-
169
- elapsed = (datetime.utcnow() - start_time).total_seconds()
170
- if elapsed >= timeout:
171
- raise TimeoutError(
172
- f"No result received for {request_id} within {timeout}s"
173
- )
174
- finally:
175
- pubsub.unsubscribe(channel)
176
- pubsub.close()
157
+ list_key = f"embedding:response:{request_id}"
158
+
159
+ # BRPOP blocks until result is available or timeout
160
+ result = self._redis.brpop(list_key, timeout=timeout)
161
+
162
+ if result is None:
163
+ raise TimeoutError(
164
+ f"No result received for {request_id} within {timeout}s"
165
+ )
166
+
167
+ # result = (key, value)
168
+ data = json.loads(result[1])
169
+ # Cleanup the response list
170
+ self._redis.delete(list_key)
171
+ return EmbeddingResult.from_dict(data)
177
172
 
178
173
  def create_and_wait(
179
174
  self,
@@ -189,8 +184,8 @@ class EmbeddingsNamespace(BaseNamespace):
189
184
  """
190
185
  Create embeddings and wait for the result.
191
186
 
192
- This method subscribes to the result channel BEFORE submitting the request,
193
- ensuring no race condition where the result is published before we're listening.
187
+ Uses BRPOP for efficient blocking wait - no race condition since the result
188
+ is pushed to a list that persists until consumed.
194
189
 
195
190
  Args:
196
191
  texts: List of text inputs
@@ -205,43 +200,22 @@ class EmbeddingsNamespace(BaseNamespace):
205
200
  Returns:
206
201
  The embedding result
207
202
  """
208
- # Generate request ID upfront so we can subscribe before submitting
209
203
  request_id = str(uuid.uuid4())
210
- channel = f"embedding:result:{request_id}"
211
-
212
- # Subscribe BEFORE submitting to avoid race condition
213
- pubsub = self._redis.pubsub()
214
- pubsub.subscribe(channel)
215
-
216
- try:
217
- # Now submit the request (subscription is already active)
218
- self.create(
219
- texts=texts,
220
- content_type=content_type,
221
- priority=priority,
222
- storage=storage,
223
- metadata=metadata,
224
- request_id=request_id,
225
- embedding_model=embedding_model,
226
- embedding_dimensions=embedding_dimensions,
227
- )
228
204
 
229
- # Wait for message with timeout
230
- start_time = datetime.utcnow()
231
- while True:
232
- message = pubsub.get_message(timeout=1.0)
233
- if message and message["type"] == "message":
234
- data = json.loads(message["data"])
235
- return EmbeddingResult.from_dict(data)
236
-
237
- elapsed = (datetime.utcnow() - start_time).total_seconds()
238
- if elapsed >= timeout:
239
- raise TimeoutError(
240
- f"No result received for {request_id} within {timeout}s"
241
- )
242
- finally:
243
- pubsub.unsubscribe(channel)
244
- pubsub.close()
205
+ # Submit the request first
206
+ self.create(
207
+ texts=texts,
208
+ content_type=content_type,
209
+ priority=priority,
210
+ storage=storage,
211
+ metadata=metadata,
212
+ request_id=request_id,
213
+ embedding_model=embedding_model,
214
+ embedding_dimensions=embedding_dimensions,
215
+ )
216
+
217
+ # Wait for result via BRPOP
218
+ return self.wait_for(request_id, timeout)
245
219
 
246
220
  def get_queue_depth(self) -> dict[str, int]:
247
221
  """