sf-vector-sdk 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/PKG-INFO +38 -4
  2. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/README.md +37 -3
  3. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/__init__.py +17 -1
  4. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py +2 -2
  5. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py +2 -2
  6. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.py +2 -2
  7. sf_vector_sdk-0.2.2/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py +45 -0
  8. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi +23 -1
  9. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/hash/__init__.py +2 -0
  10. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/hash/hasher.py +28 -2
  11. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/hash/types.py +10 -1
  12. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/structured/__init__.py +13 -0
  13. sf_vector_sdk-0.2.2/vector_sdk/structured/structured_embeddings.py +1216 -0
  14. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/structured/tool_config.py +23 -4
  15. sf_vector_sdk-0.2.0/vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py +0 -39
  16. sf_vector_sdk-0.2.0/vector_sdk/structured/structured_embeddings.py +0 -431
  17. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/.gitignore +0 -0
  18. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/pyproject.toml +0 -0
  19. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/tests/__init__.py +0 -0
  20. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/tests/test_content_hash.py +0 -0
  21. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/client.py +0 -0
  22. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/content_types.py +0 -0
  23. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.pyi +0 -0
  24. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.pyi +0 -0
  25. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.pyi +0 -0
  26. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/namespaces/__init__.py +0 -0
  27. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/namespaces/base.py +0 -0
  28. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/namespaces/db.py +0 -0
  29. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/namespaces/embeddings.py +0 -0
  30. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/namespaces/search.py +0 -0
  31. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/structured/router.py +0 -0
  32. {sf_vector_sdk-0.2.0 → sf_vector_sdk-0.2.2}/vector_sdk/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sf-vector-sdk
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: redis>=5.0.0
@@ -220,25 +220,42 @@ Type-safe embedding for known tool types (FlashCard, TestQuestion, etc.) with au
220
220
  |--------|-------------|
221
221
  | `embed_flashcard(data, metadata)` | Embed a flashcard, return request ID |
222
222
  | `embed_flashcard_and_wait(data, metadata, timeout)` | Embed and wait for result |
223
+ | `embed_flashcard_batch(items)` | Embed batch of flashcards, return request ID |
224
+ | `embed_flashcard_batch_and_wait(items, timeout)` | Embed batch and wait for result |
223
225
  | `embed_test_question(data, metadata)` | Embed a test question, return request ID |
224
226
  | `embed_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
227
+ | `embed_test_question_batch(items)` | Embed batch of test questions, return request ID |
228
+ | `embed_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
225
229
  | `embed_spaced_test_question(data, metadata)` | Embed a spaced test question, return request ID |
226
230
  | `embed_spaced_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
231
+ | `embed_spaced_test_question_batch(items)` | Embed batch of spaced test questions, return request ID |
232
+ | `embed_spaced_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
227
233
  | `embed_audio_recap(data, metadata)` | Embed an audio recap section, return request ID |
228
234
  | `embed_audio_recap_and_wait(data, metadata, timeout)` | Embed and wait for result |
235
+ | `embed_audio_recap_batch(items)` | Embed batch of audio recaps, return request ID |
236
+ | `embed_audio_recap_batch_and_wait(items, timeout)` | Embed batch and wait for result |
237
+ | `embed_topic(data, metadata)` | Embed a topic (uses `TopicMetadata`), return request ID |
238
+ | `embed_topic_and_wait(data, metadata, timeout)` | Embed and wait for result (uses `TopicMetadata`) |
239
+ | `embed_topic_batch(items)` | Embed batch of topics (uses `TopicMetadata`), return request ID |
240
+ | `embed_topic_batch_and_wait(items, timeout)` | Embed batch and wait for result (uses `TopicMetadata`) |
241
+
242
+ **Metadata Types:**
243
+
244
+ - `ToolMetadata` - For tools (FlashCard, TestQuestion, etc.) - requires `tool_id`
245
+ - `TopicMetadata` - For topics only - all fields optional (`user_id`, `topic_id`)
229
246
 
230
247
  ```python
231
- from vector_sdk import VectorClient, ToolMetadata, TestQuestionInput
248
+ from vector_sdk import VectorClient, ToolMetadata, TopicMetadata, TestQuestionInput
232
249
 
233
250
  client = VectorClient(redis_url="redis://localhost:6379")
234
251
 
235
- # Embed a flashcard - SDK handles text extraction, hashing, and routing
252
+ # Embed a flashcard - uses ToolMetadata (tool_id required)
236
253
  result = client.structured_embeddings.embed_flashcard_and_wait(
237
254
  data={"type": "BASIC", "term": "Mitochondria", "definition": "The powerhouse of the cell"},
238
255
  metadata=ToolMetadata(tool_id="tool123", user_id="user456", topic_id="topic789"),
239
256
  )
240
257
 
241
- # Embed a test question
258
+ # Embed a test question - uses ToolMetadata (tool_id required)
242
259
  result = client.structured_embeddings.embed_test_question_and_wait(
243
260
  data=TestQuestionInput(
244
261
  question="What is the capital?",
@@ -247,6 +264,23 @@ result = client.structured_embeddings.embed_test_question_and_wait(
247
264
  ),
248
265
  metadata=ToolMetadata(tool_id="tool456"),
249
266
  )
267
+
268
+ # Embed a topic - uses TopicMetadata (all fields optional)
269
+ result = client.structured_embeddings.embed_topic_and_wait(
270
+ data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
271
+ metadata=TopicMetadata(user_id="user123", topic_id="topic456"), # No tool_id needed
272
+ )
273
+
274
+ # Batch embedding - embed multiple topics in a single request
275
+ from vector_sdk import TopicBatchItem
276
+
277
+ batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
278
+ items=[
279
+ TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
280
+ TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
281
+ TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
282
+ ],
283
+ )
250
284
  ```
251
285
 
252
286
  **Database Routing:**
@@ -208,25 +208,42 @@ Type-safe embedding for known tool types (FlashCard, TestQuestion, etc.) with au
208
208
  |--------|-------------|
209
209
  | `embed_flashcard(data, metadata)` | Embed a flashcard, return request ID |
210
210
  | `embed_flashcard_and_wait(data, metadata, timeout)` | Embed and wait for result |
211
+ | `embed_flashcard_batch(items)` | Embed batch of flashcards, return request ID |
212
+ | `embed_flashcard_batch_and_wait(items, timeout)` | Embed batch and wait for result |
211
213
  | `embed_test_question(data, metadata)` | Embed a test question, return request ID |
212
214
  | `embed_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
215
+ | `embed_test_question_batch(items)` | Embed batch of test questions, return request ID |
216
+ | `embed_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
213
217
  | `embed_spaced_test_question(data, metadata)` | Embed a spaced test question, return request ID |
214
218
  | `embed_spaced_test_question_and_wait(data, metadata, timeout)` | Embed and wait for result |
219
+ | `embed_spaced_test_question_batch(items)` | Embed batch of spaced test questions, return request ID |
220
+ | `embed_spaced_test_question_batch_and_wait(items, timeout)` | Embed batch and wait for result |
215
221
  | `embed_audio_recap(data, metadata)` | Embed an audio recap section, return request ID |
216
222
  | `embed_audio_recap_and_wait(data, metadata, timeout)` | Embed and wait for result |
223
+ | `embed_audio_recap_batch(items)` | Embed batch of audio recaps, return request ID |
224
+ | `embed_audio_recap_batch_and_wait(items, timeout)` | Embed batch and wait for result |
225
+ | `embed_topic(data, metadata)` | Embed a topic (uses `TopicMetadata`), return request ID |
226
+ | `embed_topic_and_wait(data, metadata, timeout)` | Embed and wait for result (uses `TopicMetadata`) |
227
+ | `embed_topic_batch(items)` | Embed batch of topics (uses `TopicMetadata`), return request ID |
228
+ | `embed_topic_batch_and_wait(items, timeout)` | Embed batch and wait for result (uses `TopicMetadata`) |
229
+
230
+ **Metadata Types:**
231
+
232
+ - `ToolMetadata` - For tools (FlashCard, TestQuestion, etc.) - requires `tool_id`
233
+ - `TopicMetadata` - For topics only - all fields optional (`user_id`, `topic_id`)
217
234
 
218
235
  ```python
219
- from vector_sdk import VectorClient, ToolMetadata, TestQuestionInput
236
+ from vector_sdk import VectorClient, ToolMetadata, TopicMetadata, TestQuestionInput
220
237
 
221
238
  client = VectorClient(redis_url="redis://localhost:6379")
222
239
 
223
- # Embed a flashcard - SDK handles text extraction, hashing, and routing
240
+ # Embed a flashcard - uses ToolMetadata (tool_id required)
224
241
  result = client.structured_embeddings.embed_flashcard_and_wait(
225
242
  data={"type": "BASIC", "term": "Mitochondria", "definition": "The powerhouse of the cell"},
226
243
  metadata=ToolMetadata(tool_id="tool123", user_id="user456", topic_id="topic789"),
227
244
  )
228
245
 
229
- # Embed a test question
246
+ # Embed a test question - uses ToolMetadata (tool_id required)
230
247
  result = client.structured_embeddings.embed_test_question_and_wait(
231
248
  data=TestQuestionInput(
232
249
  question="What is the capital?",
@@ -235,6 +252,23 @@ result = client.structured_embeddings.embed_test_question_and_wait(
235
252
  ),
236
253
  metadata=ToolMetadata(tool_id="tool456"),
237
254
  )
255
+
256
+ # Embed a topic - uses TopicMetadata (all fields optional)
257
+ result = client.structured_embeddings.embed_topic_and_wait(
258
+ data={"topic": "Photosynthesis", "description": "The process by which plants convert sunlight to energy"},
259
+ metadata=TopicMetadata(user_id="user123", topic_id="topic456"), # No tool_id needed
260
+ )
261
+
262
+ # Batch embedding - embed multiple topics in a single request
263
+ from vector_sdk import TopicBatchItem
264
+
265
+ batch_result = client.structured_embeddings.embed_topic_batch_and_wait(
266
+ items=[
267
+ TopicBatchItem(data={"topic": "Topic 1", "description": "Description 1"}, metadata=TopicMetadata(user_id="user1")),
268
+ TopicBatchItem(data={"topic": "Topic 2", "description": "Description 2"}, metadata=TopicMetadata(topic_id="topic2")),
269
+ TopicBatchItem(data={"topic": "Topic 3", "description": "Description 3"}, metadata=TopicMetadata()), # All optional
270
+ ],
271
+ )
238
272
  ```
239
273
 
240
274
  **Database Routing:**
@@ -70,6 +70,7 @@ from vector_sdk.hash import (
70
70
  MultipleChoiceOption,
71
71
  QuestionData,
72
72
  ToolCollection,
73
+ TopicData,
73
74
  compute_content_hash,
74
75
  extract_tool_text,
75
76
  )
@@ -89,15 +90,21 @@ from vector_sdk.namespaces import (
89
90
  # ============================================================================
90
91
  from vector_sdk.structured import (
91
92
  TOOL_CONFIGS,
93
+ AudioRecapBatchItem,
94
+ BatchItem,
92
95
  DatabaseRoutingError,
93
96
  DatabaseRoutingMode,
97
+ FlashCardBatchItem,
94
98
  PineconeToolConfig,
95
99
  QuestionType,
96
100
  StructuredEmbeddingsNamespace,
101
+ TestQuestionBatchItem,
97
102
  TestQuestionInput,
98
103
  ToolConfig,
99
104
  ToolDatabaseConfig,
100
105
  ToolMetadata,
106
+ TopicBatchItem,
107
+ TopicMetadata,
101
108
  TurboPufferToolConfig,
102
109
  build_storage_config,
103
110
  get_content_type,
@@ -159,7 +166,7 @@ from vector_sdk.types import (
159
166
  validate_model,
160
167
  )
161
168
 
162
- __version__ = "0.2.0"
169
+ __version__ = "0.2.2"
163
170
 
164
171
  __all__ = [
165
172
  # Clients (New API)
@@ -236,12 +243,21 @@ __all__ = [
236
243
  "FlashCardData",
237
244
  "QuestionData",
238
245
  "AudioRecapSectionData",
246
+ "TopicData",
239
247
  "MultipleChoiceOption",
240
248
  "AnswerObject",
241
249
  # Structured Embeddings
242
250
  "StructuredEmbeddingsNamespace",
243
251
  "ToolMetadata",
252
+ "TopicMetadata",
244
253
  "TestQuestionInput",
254
+ # Batch types
255
+ "BatchItem",
256
+ "FlashCardBatchItem",
257
+ "TestQuestionBatchItem",
258
+ "AudioRecapBatchItem",
259
+ "TopicBatchItem",
260
+ # Tool configuration
245
261
  "ToolConfig",
246
262
  "ToolDatabaseConfig",
247
263
  "TurboPufferToolConfig",
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: embedding_pipeline/content_types/v1/content_types.proto
5
- # Protobuf Python Version: 6.33.4
5
+ # Protobuf Python Version: 6.33.5
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
15
  33,
16
- 4,
16
+ 5,
17
17
  '',
18
18
  'embedding_pipeline/content_types/v1/content_types.proto'
19
19
  )
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: embedding_pipeline/db/vectors/v1/vectors.proto
5
- # Protobuf Python Version: 6.33.4
5
+ # Protobuf Python Version: 6.33.5
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
15
  33,
16
- 4,
16
+ 5,
17
17
  '',
18
18
  'embedding_pipeline/db/vectors/v1/vectors.proto'
19
19
  )
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: embedding_pipeline/query/v1/query.proto
5
- # Protobuf Python Version: 6.33.4
5
+ # Protobuf Python Version: 6.33.5
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
15
  33,
16
- 4,
16
+ 5,
17
17
  '',
18
18
  'embedding_pipeline/query/v1/query.proto'
19
19
  )
@@ -0,0 +1,45 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # source: embedding_pipeline/tools/v1/tools.proto
5
+ # Protobuf Python Version: 6.33.5
6
+ """Generated protocol buffer code."""
7
+ from google.protobuf import descriptor as _descriptor
8
+ from google.protobuf import descriptor_pool as _descriptor_pool
9
+ from google.protobuf import runtime_version as _runtime_version
10
+ from google.protobuf import symbol_database as _symbol_database
11
+ from google.protobuf.internal import builder as _builder
12
+ _runtime_version.ValidateProtobufRuntimeVersion(
13
+ _runtime_version.Domain.PUBLIC,
14
+ 6,
15
+ 33,
16
+ 5,
17
+ '',
18
+ 'embedding_pipeline/tools/v1/tools.proto'
19
+ )
20
+ # @@protoc_insertion_point(imports)
21
+
22
+ _sym_db = _symbol_database.Default()
23
+
24
+
25
+
26
+
27
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\'embedding_pipeline/tools/v1/tools.proto\x12\x1b\x65mbedding_pipeline.tools.v1\"\xed\x01\n\rTopicMetadata\x12\x1c\n\x07user_id\x18\x01 \x01(\tH\x00R\x06userId\x88\x01\x01\x12\x1e\n\x08topic_id\x18\x02 \x01(\tH\x01R\x07topicId\x88\x01\x01\x12K\n\x05\x65xtra\x18\x03 \x03(\x0b\x32\x35.embedding_pipeline.tools.v1.TopicMetadata.ExtraEntryR\x05\x65xtra\x1a\x38\n\nExtraEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\n\n\x08_user_idB\x0b\n\t_topic_id*\xe4\x01\n\x0eToolCollection\x12\x1f\n\x1bTOOL_COLLECTION_UNSPECIFIED\x10\x00\x12\x1d\n\x19TOOL_COLLECTION_FLASHCARD\x10\x01\x12!\n\x1dTOOL_COLLECTION_TEST_QUESTION\x10\x02\x12(\n$TOOL_COLLECTION_SPACED_TEST_QUESTION\x10\x03\x12*\n&TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION\x10\x04\x12\x19\n\x15TOOL_COLLECTION_TOPIC\x10\x05*\xb2\x01\n\rFlashCardType\x12\x1f\n\x1b\x46LASH_CARD_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15\x46LASH_CARD_TYPE_BASIC\x10\x01\x12\x19\n\x15\x46LASH_CARD_TYPE_CLOZE\x10\x02\x12%\n!FLASH_CARD_TYPE_FILL_IN_THE_BLANK\x10\x03\x12#\n\x1f\x46LASH_CARD_TYPE_MULTIPLE_CHOICE\x10\x04\x42gZegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1b\x06proto3')
28
+
29
+ _globals = globals()
30
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
31
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pipeline.tools.v1.tools_pb2', _globals)
32
+ if not _descriptor._USE_C_DESCRIPTORS:
33
+ _globals['DESCRIPTOR']._loaded_options = None
34
+ _globals['DESCRIPTOR']._serialized_options = b'Zegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1'
35
+ _globals['_TOPICMETADATA_EXTRAENTRY']._loaded_options = None
36
+ _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_options = b'8\001'
37
+ _globals['_TOOLCOLLECTION']._serialized_start=313
38
+ _globals['_TOOLCOLLECTION']._serialized_end=541
39
+ _globals['_FLASHCARDTYPE']._serialized_start=544
40
+ _globals['_FLASHCARDTYPE']._serialized_end=722
41
+ _globals['_TOPICMETADATA']._serialized_start=73
42
+ _globals['_TOPICMETADATA']._serialized_end=310
43
+ _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_start=229
44
+ _globals['_TOPICMETADATA_EXTRAENTRY']._serialized_end=285
45
+ # @@protoc_insertion_point(module_scope)
@@ -1,6 +1,9 @@
1
+ from google.protobuf.internal import containers as _containers
1
2
  from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
2
3
  from google.protobuf import descriptor as _descriptor
3
- from typing import ClassVar as _ClassVar
4
+ from google.protobuf import message as _message
5
+ from collections.abc import Mapping as _Mapping
6
+ from typing import ClassVar as _ClassVar, Optional as _Optional
4
7
 
5
8
  DESCRIPTOR: _descriptor.FileDescriptor
6
9
 
@@ -11,6 +14,7 @@ class ToolCollection(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
11
14
  TOOL_COLLECTION_TEST_QUESTION: _ClassVar[ToolCollection]
12
15
  TOOL_COLLECTION_SPACED_TEST_QUESTION: _ClassVar[ToolCollection]
13
16
  TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: _ClassVar[ToolCollection]
17
+ TOOL_COLLECTION_TOPIC: _ClassVar[ToolCollection]
14
18
 
15
19
  class FlashCardType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
16
20
  __slots__ = ()
@@ -24,8 +28,26 @@ TOOL_COLLECTION_FLASHCARD: ToolCollection
24
28
  TOOL_COLLECTION_TEST_QUESTION: ToolCollection
25
29
  TOOL_COLLECTION_SPACED_TEST_QUESTION: ToolCollection
26
30
  TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: ToolCollection
31
+ TOOL_COLLECTION_TOPIC: ToolCollection
27
32
  FLASH_CARD_TYPE_UNSPECIFIED: FlashCardType
28
33
  FLASH_CARD_TYPE_BASIC: FlashCardType
29
34
  FLASH_CARD_TYPE_CLOZE: FlashCardType
30
35
  FLASH_CARD_TYPE_FILL_IN_THE_BLANK: FlashCardType
31
36
  FLASH_CARD_TYPE_MULTIPLE_CHOICE: FlashCardType
37
+
38
+ class TopicMetadata(_message.Message):
39
+ __slots__ = ("user_id", "topic_id", "extra")
40
+ class ExtraEntry(_message.Message):
41
+ __slots__ = ("key", "value")
42
+ KEY_FIELD_NUMBER: _ClassVar[int]
43
+ VALUE_FIELD_NUMBER: _ClassVar[int]
44
+ key: str
45
+ value: str
46
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
47
+ USER_ID_FIELD_NUMBER: _ClassVar[int]
48
+ TOPIC_ID_FIELD_NUMBER: _ClassVar[int]
49
+ EXTRA_FIELD_NUMBER: _ClassVar[int]
50
+ user_id: str
51
+ topic_id: str
52
+ extra: _containers.ScalarMap[str, str]
53
+ def __init__(self, user_id: _Optional[str] = ..., topic_id: _Optional[str] = ..., extra: _Optional[_Mapping[str, str]] = ...) -> None: ...
@@ -16,6 +16,7 @@ from .types import (
16
16
  MultipleChoiceOption,
17
17
  QuestionData,
18
18
  ToolCollection,
19
+ TopicData,
19
20
  )
20
21
 
21
22
  __all__ = [
@@ -26,6 +27,7 @@ __all__ = [
26
27
  "FlashCardData",
27
28
  "QuestionData",
28
29
  "AudioRecapSectionData",
30
+ "TopicData",
29
31
  "MultipleChoiceOption",
30
32
  "AnswerObject",
31
33
  ]
@@ -15,6 +15,7 @@ from .types import (
15
15
  MultipleChoiceOption,
16
16
  QuestionData,
17
17
  ToolCollection,
18
+ TopicData,
18
19
  )
19
20
 
20
21
  # Hash length in hex characters (128 bits = 32 hex chars)
@@ -23,7 +24,7 @@ HASH_LENGTH = 32
23
24
 
24
25
  def compute_content_hash(
25
26
  tool_collection: ToolCollection,
26
- data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
27
+ data: Union[FlashCardData, QuestionData, AudioRecapSectionData, TopicData, dict],
27
28
  ) -> str:
28
29
  """
29
30
  Compute a deterministic content hash for a learning tool.
@@ -52,7 +53,7 @@ def compute_content_hash(
52
53
 
53
54
  def extract_tool_text(
54
55
  tool_collection: ToolCollection,
55
- data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
56
+ data: Union[FlashCardData, QuestionData, AudioRecapSectionData, TopicData, dict],
56
57
  ) -> str:
57
58
  """
58
59
  Extract the text content from a learning tool for embedding.
@@ -94,6 +95,8 @@ def extract_tool_text(
94
95
  return _extract_question_text(data_dict)
95
96
  elif tool_collection == "AudioRecapV2Section":
96
97
  return _extract_audio_recap_text(data_dict)
98
+ elif tool_collection == "Topic":
99
+ return _extract_topic_text(data_dict)
97
100
  else:
98
101
  return ""
99
102
 
@@ -183,6 +186,29 @@ def _extract_audio_recap_text(data: dict) -> str:
183
186
  return ""
184
187
 
185
188
 
189
+ def _extract_topic_text(data: dict) -> str:
190
+ """
191
+ Extract text from Topic.
192
+
193
+ Format: "Topic: {topic}. Description: {description}."
194
+ """
195
+ parts: list[str] = []
196
+
197
+ topic = data.get("topic")
198
+ if topic:
199
+ trimmed = topic.strip()
200
+ if trimmed:
201
+ parts.append(f"Topic: {trimmed}.")
202
+
203
+ description = data.get("description")
204
+ if description:
205
+ trimmed = description.strip()
206
+ if trimmed:
207
+ parts.append(f"Description: {trimmed}.")
208
+
209
+ return " ".join(parts)
210
+
211
+
186
212
  def _strip_flashcard_syntax(text: str) -> str:
187
213
  """
188
214
  Strip {{...}} markers from cloze/fill-in-blank text.
@@ -9,7 +9,7 @@ from typing import Literal, Optional, Union
9
9
  from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  # Tool collection types
12
- ToolCollection = Literal["FlashCard", "TestQuestion", "SpacedTestQuestion", "AudioRecapV2Section"]
12
+ ToolCollection = Literal["FlashCard", "TestQuestion", "SpacedTestQuestion", "AudioRecapV2Section", "Topic"]
13
13
 
14
14
  # FlashCard type variants
15
15
  FlashCardType = Literal["BASIC", "CLOZE", "FILL_IN_THE_BLANK", "MULTIPLE_CHOICE"]
@@ -65,3 +65,12 @@ class AudioRecapSectionData(BaseModel):
65
65
  model_config = ConfigDict(extra="allow")
66
66
 
67
67
  script: Optional[str] = None
68
+
69
+
70
+ class TopicData(BaseModel):
71
+ """Topic data for content hashing."""
72
+
73
+ model_config = ConfigDict(extra="allow")
74
+
75
+ topic: Optional[str] = None
76
+ description: Optional[str] = None
@@ -14,9 +14,15 @@ from .router import (
14
14
  validate_database_routing,
15
15
  )
16
16
  from .structured_embeddings import (
17
+ AudioRecapBatchItem,
18
+ BatchItem,
19
+ FlashCardBatchItem,
17
20
  StructuredEmbeddingsNamespace,
21
+ TestQuestionBatchItem,
18
22
  TestQuestionInput,
19
23
  ToolMetadata,
24
+ TopicBatchItem,
25
+ TopicMetadata,
20
26
  )
21
27
  from .tool_config import (
22
28
  TOOL_CONFIGS,
@@ -37,7 +43,14 @@ __all__ = [
37
43
  "StructuredEmbeddingsNamespace",
38
44
  # Types
39
45
  "ToolMetadata",
46
+ "TopicMetadata",
40
47
  "TestQuestionInput",
48
+ # Batch types
49
+ "BatchItem",
50
+ "FlashCardBatchItem",
51
+ "TestQuestionBatchItem",
52
+ "AudioRecapBatchItem",
53
+ "TopicBatchItem",
41
54
  # Tool configuration
42
55
  "ToolConfig",
43
56
  "ToolDatabaseConfig",