sf-vector-sdk 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # source: embedding_pipeline/query/v1/query.proto
5
+ # Protobuf Python Version: 6.33.4
6
+ """Generated protocol buffer code."""
7
+ from google.protobuf import descriptor as _descriptor
8
+ from google.protobuf import descriptor_pool as _descriptor_pool
9
+ from google.protobuf import runtime_version as _runtime_version
10
+ from google.protobuf import symbol_database as _symbol_database
11
+ from google.protobuf.internal import builder as _builder
12
+ _runtime_version.ValidateProtobufRuntimeVersion(
13
+ _runtime_version.Domain.PUBLIC,
14
+ 6,
15
+ 33,
16
+ 4,
17
+ '',
18
+ 'embedding_pipeline/query/v1/query.proto'
19
+ )
20
+ # @@protoc_insertion_point(imports)
21
+
22
+ _sym_db = _symbol_database.Default()
23
+
24
+
25
+ from embedding_pipeline.content_types.v1 import content_types_pb2 as embedding__pipeline_dot_content__types_dot_v1_dot_content__types__pb2
26
+
27
+
28
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\'embedding_pipeline/query/v1/query.proto\x12\x1b\x65mbedding_pipeline.query.v1\x1a\x37\x65mbedding_pipeline/content_types/v1/content_types.proto\"\xc7\x04\n\x0cQueryRequest\x12\x1d\n\nrequest_id\x18\x01 \x01(\tR\trequestId\x12\x1d\n\nquery_text\x18\x02 \x01(\tR\tqueryText\x12O\n\x08\x64\x61tabase\x18\x03 \x01(\x0e\x32\x33.embedding_pipeline.content_types.v1.VectorDatabaseR\x08\x64\x61tabase\x12_\n\x10\x65mbedding_config\x18\x04 \x01(\x0b\x32\x34.embedding_pipeline.content_types.v1.EmbeddingConfigR\x0f\x65mbeddingConfig\x12K\n\x0cquery_config\x18\x05 \x01(\x0b\x32(.embedding_pipeline.query.v1.QueryConfigR\x0bqueryConfig\x12I\n\x08priority\x18\x06 \x01(\x0e\x32-.embedding_pipeline.content_types.v1.PriorityR\x08priority\x12S\n\x08metadata\x18\x07 \x03(\x0b\x32\x37.embedding_pipeline.query.v1.QueryRequest.MetadataEntryR\x08metadata\x12\x1d\n\ncreated_at\x18\x08 \x01(\tR\tcreatedAt\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\xfa\x02\n\x0bQueryConfig\x12\x13\n\x05top_k\x18\x01 \x01(\x05R\x04topK\x12\x1b\n\tmin_score\x18\x02 \x01(\x02R\x08minScore\x12O\n\x07\x66ilters\x18\x03 \x03(\x0b\x32\x35.embedding_pipeline.query.v1.QueryConfig.FiltersEntryR\x07\x66ilters\x12\x1c\n\tnamespace\x18\x04 \x01(\tR\tnamespace\x12\x1e\n\ncollection\x18\x05 \x01(\tR\ncollection\x12\x1a\n\x08\x64\x61tabase\x18\x06 \x01(\tR\x08\x64\x61tabase\x12\'\n\x0finclude_vectors\x18\x07 \x01(\x08R\x0eincludeVectors\x12)\n\x10include_metadata\x18\x08 \x01(\x08R\x0fincludeMetadata\x1a:\n\x0c\x46iltersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x83\x02\n\x0bQueryResult\x12\x1d\n\nrequest_id\x18\x01 \x01(\tR\trequestId\x12\x16\n\x06status\x18\x02 \x01(\tR\x06status\x12\x42\n\x07matches\x18\x03 \x03(\x0b\x32(.embedding_pipeline.query.v1.VectorMatchR\x07matches\x12\x14\n\x05\x65rror\x18\x04 \x01(\tR\x05\x65rror\x12@\n\x06timing\x18\x05 \x01(\x0b\x32(.embedding_pipeline.query.v1.QueryTimingR\x06timing\x12!\n\x0c\x63ompleted_at\x18\x06 \x01(\tR\x0b\x63ompletedAt\"\xdc\x01\n\x0bVectorMatch\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x14\n\x05score\x18\x02 \x01(\x02R\x05score\x12R\n\x08metadata\x18\x03 \x03(\x0b\x32\x36.embedding_pipeline.query.v1.VectorMatch.MetadataEntryR\x08metadata\x12\x16\n\x06vector\x18\x04 \x03(\x02R\x06vector\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x8c\x01\n\x0bQueryTiming\x12\"\n\rqueue_wait_ms\x18\x01 \x01(\x03R\x0bqueueWaitMs\x12!\n\x0c\x65mbedding_ms\x18\x02 \x01(\x03R\x0b\x65mbeddingMs\x12\x1b\n\tsearch_ms\x18\x03 \x01(\x03R\x08searchMs\x12\x19\n\x08total_ms\x18\x04 \x01(\x03R\x07totalMsBgZegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/query/v1b\x06proto3')
29
+
30
+ _globals = globals()
31
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
32
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pipeline.query.v1.query_pb2', _globals)
33
+ if not _descriptor._USE_C_DESCRIPTORS:
34
+ _globals['DESCRIPTOR']._loaded_options = None
35
+ _globals['DESCRIPTOR']._serialized_options = b'Zegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/query/v1'
36
+ _globals['_QUERYREQUEST_METADATAENTRY']._loaded_options = None
37
+ _globals['_QUERYREQUEST_METADATAENTRY']._serialized_options = b'8\001'
38
+ _globals['_QUERYCONFIG_FILTERSENTRY']._loaded_options = None
39
+ _globals['_QUERYCONFIG_FILTERSENTRY']._serialized_options = b'8\001'
40
+ _globals['_VECTORMATCH_METADATAENTRY']._loaded_options = None
41
+ _globals['_VECTORMATCH_METADATAENTRY']._serialized_options = b'8\001'
42
+ _globals['_QUERYREQUEST']._serialized_start=130
43
+ _globals['_QUERYREQUEST']._serialized_end=713
44
+ _globals['_QUERYREQUEST_METADATAENTRY']._serialized_start=654
45
+ _globals['_QUERYREQUEST_METADATAENTRY']._serialized_end=713
46
+ _globals['_QUERYCONFIG']._serialized_start=716
47
+ _globals['_QUERYCONFIG']._serialized_end=1094
48
+ _globals['_QUERYCONFIG_FILTERSENTRY']._serialized_start=1036
49
+ _globals['_QUERYCONFIG_FILTERSENTRY']._serialized_end=1094
50
+ _globals['_QUERYRESULT']._serialized_start=1097
51
+ _globals['_QUERYRESULT']._serialized_end=1356
52
+ _globals['_VECTORMATCH']._serialized_start=1359
53
+ _globals['_VECTORMATCH']._serialized_end=1579
54
+ _globals['_VECTORMATCH_METADATAENTRY']._serialized_start=654
55
+ _globals['_VECTORMATCH_METADATAENTRY']._serialized_end=713
56
+ _globals['_QUERYTIMING']._serialized_start=1582
57
+ _globals['_QUERYTIMING']._serialized_end=1722
58
+ # @@protoc_insertion_point(module_scope)
@@ -0,0 +1,109 @@
1
+ from embedding_pipeline.content_types.v1 import content_types_pb2 as _content_types_pb2
2
+ from google.protobuf.internal import containers as _containers
3
+ from google.protobuf import descriptor as _descriptor
4
+ from google.protobuf import message as _message
5
+ from collections.abc import Iterable as _Iterable, Mapping as _Mapping
6
+ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
7
+
8
+ DESCRIPTOR: _descriptor.FileDescriptor
9
+
10
+ class QueryRequest(_message.Message):
11
+ __slots__ = ("request_id", "query_text", "database", "embedding_config", "query_config", "priority", "metadata", "created_at")
12
+ class MetadataEntry(_message.Message):
13
+ __slots__ = ("key", "value")
14
+ KEY_FIELD_NUMBER: _ClassVar[int]
15
+ VALUE_FIELD_NUMBER: _ClassVar[int]
16
+ key: str
17
+ value: str
18
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
19
+ REQUEST_ID_FIELD_NUMBER: _ClassVar[int]
20
+ QUERY_TEXT_FIELD_NUMBER: _ClassVar[int]
21
+ DATABASE_FIELD_NUMBER: _ClassVar[int]
22
+ EMBEDDING_CONFIG_FIELD_NUMBER: _ClassVar[int]
23
+ QUERY_CONFIG_FIELD_NUMBER: _ClassVar[int]
24
+ PRIORITY_FIELD_NUMBER: _ClassVar[int]
25
+ METADATA_FIELD_NUMBER: _ClassVar[int]
26
+ CREATED_AT_FIELD_NUMBER: _ClassVar[int]
27
+ request_id: str
28
+ query_text: str
29
+ database: _content_types_pb2.VectorDatabase
30
+ embedding_config: _content_types_pb2.EmbeddingConfig
31
+ query_config: QueryConfig
32
+ priority: _content_types_pb2.Priority
33
+ metadata: _containers.ScalarMap[str, str]
34
+ created_at: str
35
+ def __init__(self, request_id: _Optional[str] = ..., query_text: _Optional[str] = ..., database: _Optional[_Union[_content_types_pb2.VectorDatabase, str]] = ..., embedding_config: _Optional[_Union[_content_types_pb2.EmbeddingConfig, _Mapping]] = ..., query_config: _Optional[_Union[QueryConfig, _Mapping]] = ..., priority: _Optional[_Union[_content_types_pb2.Priority, str]] = ..., metadata: _Optional[_Mapping[str, str]] = ..., created_at: _Optional[str] = ...) -> None: ...
36
+
37
+ class QueryConfig(_message.Message):
38
+ __slots__ = ("top_k", "min_score", "filters", "namespace", "collection", "database", "include_vectors", "include_metadata")
39
+ class FiltersEntry(_message.Message):
40
+ __slots__ = ("key", "value")
41
+ KEY_FIELD_NUMBER: _ClassVar[int]
42
+ VALUE_FIELD_NUMBER: _ClassVar[int]
43
+ key: str
44
+ value: str
45
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
46
+ TOP_K_FIELD_NUMBER: _ClassVar[int]
47
+ MIN_SCORE_FIELD_NUMBER: _ClassVar[int]
48
+ FILTERS_FIELD_NUMBER: _ClassVar[int]
49
+ NAMESPACE_FIELD_NUMBER: _ClassVar[int]
50
+ COLLECTION_FIELD_NUMBER: _ClassVar[int]
51
+ DATABASE_FIELD_NUMBER: _ClassVar[int]
52
+ INCLUDE_VECTORS_FIELD_NUMBER: _ClassVar[int]
53
+ INCLUDE_METADATA_FIELD_NUMBER: _ClassVar[int]
54
+ top_k: int
55
+ min_score: float
56
+ filters: _containers.ScalarMap[str, str]
57
+ namespace: str
58
+ collection: str
59
+ database: str
60
+ include_vectors: bool
61
+ include_metadata: bool
62
+ def __init__(self, top_k: _Optional[int] = ..., min_score: _Optional[float] = ..., filters: _Optional[_Mapping[str, str]] = ..., namespace: _Optional[str] = ..., collection: _Optional[str] = ..., database: _Optional[str] = ..., include_vectors: _Optional[bool] = ..., include_metadata: _Optional[bool] = ...) -> None: ...
63
+
64
+ class QueryResult(_message.Message):
65
+ __slots__ = ("request_id", "status", "matches", "error", "timing", "completed_at")
66
+ REQUEST_ID_FIELD_NUMBER: _ClassVar[int]
67
+ STATUS_FIELD_NUMBER: _ClassVar[int]
68
+ MATCHES_FIELD_NUMBER: _ClassVar[int]
69
+ ERROR_FIELD_NUMBER: _ClassVar[int]
70
+ TIMING_FIELD_NUMBER: _ClassVar[int]
71
+ COMPLETED_AT_FIELD_NUMBER: _ClassVar[int]
72
+ request_id: str
73
+ status: str
74
+ matches: _containers.RepeatedCompositeFieldContainer[VectorMatch]
75
+ error: str
76
+ timing: QueryTiming
77
+ completed_at: str
78
+ def __init__(self, request_id: _Optional[str] = ..., status: _Optional[str] = ..., matches: _Optional[_Iterable[_Union[VectorMatch, _Mapping]]] = ..., error: _Optional[str] = ..., timing: _Optional[_Union[QueryTiming, _Mapping]] = ..., completed_at: _Optional[str] = ...) -> None: ...
79
+
80
+ class VectorMatch(_message.Message):
81
+ __slots__ = ("id", "score", "metadata", "vector")
82
+ class MetadataEntry(_message.Message):
83
+ __slots__ = ("key", "value")
84
+ KEY_FIELD_NUMBER: _ClassVar[int]
85
+ VALUE_FIELD_NUMBER: _ClassVar[int]
86
+ key: str
87
+ value: str
88
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
89
+ ID_FIELD_NUMBER: _ClassVar[int]
90
+ SCORE_FIELD_NUMBER: _ClassVar[int]
91
+ METADATA_FIELD_NUMBER: _ClassVar[int]
92
+ VECTOR_FIELD_NUMBER: _ClassVar[int]
93
+ id: str
94
+ score: float
95
+ metadata: _containers.ScalarMap[str, str]
96
+ vector: _containers.RepeatedScalarFieldContainer[float]
97
+ def __init__(self, id: _Optional[str] = ..., score: _Optional[float] = ..., metadata: _Optional[_Mapping[str, str]] = ..., vector: _Optional[_Iterable[float]] = ...) -> None: ...
98
+
99
+ class QueryTiming(_message.Message):
100
+ __slots__ = ("queue_wait_ms", "embedding_ms", "search_ms", "total_ms")
101
+ QUEUE_WAIT_MS_FIELD_NUMBER: _ClassVar[int]
102
+ EMBEDDING_MS_FIELD_NUMBER: _ClassVar[int]
103
+ SEARCH_MS_FIELD_NUMBER: _ClassVar[int]
104
+ TOTAL_MS_FIELD_NUMBER: _ClassVar[int]
105
+ queue_wait_ms: int
106
+ embedding_ms: int
107
+ search_ms: int
108
+ total_ms: int
109
+ def __init__(self, queue_wait_ms: _Optional[int] = ..., embedding_ms: _Optional[int] = ..., search_ms: _Optional[int] = ..., total_ms: _Optional[int] = ...) -> None: ...
@@ -0,0 +1,39 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # source: embedding_pipeline/tools/v1/tools.proto
5
+ # Protobuf Python Version: 6.33.4
6
+ """Generated protocol buffer code."""
7
+ from google.protobuf import descriptor as _descriptor
8
+ from google.protobuf import descriptor_pool as _descriptor_pool
9
+ from google.protobuf import runtime_version as _runtime_version
10
+ from google.protobuf import symbol_database as _symbol_database
11
+ from google.protobuf.internal import builder as _builder
12
+ _runtime_version.ValidateProtobufRuntimeVersion(
13
+ _runtime_version.Domain.PUBLIC,
14
+ 6,
15
+ 33,
16
+ 4,
17
+ '',
18
+ 'embedding_pipeline/tools/v1/tools.proto'
19
+ )
20
+ # @@protoc_insertion_point(imports)
21
+
22
+ _sym_db = _symbol_database.Default()
23
+
24
+
25
+
26
+
27
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\'embedding_pipeline/tools/v1/tools.proto\x12\x1b\x65mbedding_pipeline.tools.v1*\xc9\x01\n\x0eToolCollection\x12\x1f\n\x1bTOOL_COLLECTION_UNSPECIFIED\x10\x00\x12\x1d\n\x19TOOL_COLLECTION_FLASHCARD\x10\x01\x12!\n\x1dTOOL_COLLECTION_TEST_QUESTION\x10\x02\x12(\n$TOOL_COLLECTION_SPACED_TEST_QUESTION\x10\x03\x12*\n&TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION\x10\x04*\xb2\x01\n\rFlashCardType\x12\x1f\n\x1b\x46LASH_CARD_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15\x46LASH_CARD_TYPE_BASIC\x10\x01\x12\x19\n\x15\x46LASH_CARD_TYPE_CLOZE\x10\x02\x12%\n!FLASH_CARD_TYPE_FILL_IN_THE_BLANK\x10\x03\x12#\n\x1f\x46LASH_CARD_TYPE_MULTIPLE_CHOICE\x10\x04\x42gZegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1b\x06proto3')
28
+
29
+ _globals = globals()
30
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
31
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pipeline.tools.v1.tools_pb2', _globals)
32
+ if not _descriptor._USE_C_DESCRIPTORS:
33
+ _globals['DESCRIPTOR']._loaded_options = None
34
+ _globals['DESCRIPTOR']._serialized_options = b'Zegithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/tools/v1'
35
+ _globals['_TOOLCOLLECTION']._serialized_start=73
36
+ _globals['_TOOLCOLLECTION']._serialized_end=274
37
+ _globals['_FLASHCARDTYPE']._serialized_start=277
38
+ _globals['_FLASHCARDTYPE']._serialized_end=455
39
+ # @@protoc_insertion_point(module_scope)
@@ -0,0 +1,31 @@
1
+ from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
2
+ from google.protobuf import descriptor as _descriptor
3
+ from typing import ClassVar as _ClassVar
4
+
5
+ DESCRIPTOR: _descriptor.FileDescriptor
6
+
7
+ class ToolCollection(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
8
+ __slots__ = ()
9
+ TOOL_COLLECTION_UNSPECIFIED: _ClassVar[ToolCollection]
10
+ TOOL_COLLECTION_FLASHCARD: _ClassVar[ToolCollection]
11
+ TOOL_COLLECTION_TEST_QUESTION: _ClassVar[ToolCollection]
12
+ TOOL_COLLECTION_SPACED_TEST_QUESTION: _ClassVar[ToolCollection]
13
+ TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: _ClassVar[ToolCollection]
14
+
15
+ class FlashCardType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
16
+ __slots__ = ()
17
+ FLASH_CARD_TYPE_UNSPECIFIED: _ClassVar[FlashCardType]
18
+ FLASH_CARD_TYPE_BASIC: _ClassVar[FlashCardType]
19
+ FLASH_CARD_TYPE_CLOZE: _ClassVar[FlashCardType]
20
+ FLASH_CARD_TYPE_FILL_IN_THE_BLANK: _ClassVar[FlashCardType]
21
+ FLASH_CARD_TYPE_MULTIPLE_CHOICE: _ClassVar[FlashCardType]
22
+ TOOL_COLLECTION_UNSPECIFIED: ToolCollection
23
+ TOOL_COLLECTION_FLASHCARD: ToolCollection
24
+ TOOL_COLLECTION_TEST_QUESTION: ToolCollection
25
+ TOOL_COLLECTION_SPACED_TEST_QUESTION: ToolCollection
26
+ TOOL_COLLECTION_AUDIO_RECAP_V2_SECTION: ToolCollection
27
+ FLASH_CARD_TYPE_UNSPECIFIED: FlashCardType
28
+ FLASH_CARD_TYPE_BASIC: FlashCardType
29
+ FLASH_CARD_TYPE_CLOZE: FlashCardType
30
+ FLASH_CARD_TYPE_FILL_IN_THE_BLANK: FlashCardType
31
+ FLASH_CARD_TYPE_MULTIPLE_CHOICE: FlashCardType
@@ -0,0 +1,31 @@
1
+ """
2
+ Content Hash Module.
3
+
4
+ Generates deterministic content hashes for learning tools.
5
+ Mirrors the TypeScript implementation in packages/ts/vector-sdk/src/common/hash.ts
6
+
7
+ See docs/CONTENT-HASH-SPEC.md for the full specification.
8
+ """
9
+
10
+ from .hasher import compute_content_hash, extract_tool_text
11
+ from .types import (
12
+ AnswerObject,
13
+ AudioRecapSectionData,
14
+ FlashCardData,
15
+ FlashCardType,
16
+ MultipleChoiceOption,
17
+ QuestionData,
18
+ ToolCollection,
19
+ )
20
+
21
+ __all__ = [
22
+ "compute_content_hash",
23
+ "extract_tool_text",
24
+ "ToolCollection",
25
+ "FlashCardType",
26
+ "FlashCardData",
27
+ "QuestionData",
28
+ "AudioRecapSectionData",
29
+ "MultipleChoiceOption",
30
+ "AnswerObject",
31
+ ]
@@ -0,0 +1,259 @@
1
+ """
2
+ Content hash computation.
3
+
4
+ Generates deterministic content hashes for learning tools.
5
+ Mirrors the TypeScript implementation in packages/ts/vector-sdk/src/common/hash.ts
6
+ """
7
+
8
+ import hashlib
9
+ import re
10
+ from typing import Union
11
+
12
+ from .types import (
13
+ AudioRecapSectionData,
14
+ FlashCardData,
15
+ MultipleChoiceOption,
16
+ QuestionData,
17
+ ToolCollection,
18
+ )
19
+
20
+ # Hash length in hex characters (128 bits = 32 hex chars)
21
+ HASH_LENGTH = 32
22
+
23
+
24
+ def compute_content_hash(
25
+ tool_collection: ToolCollection,
26
+ data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
27
+ ) -> str:
28
+ """
29
+ Compute a deterministic content hash for a learning tool.
30
+
31
+ The hash is computed by:
32
+ 1. Extracting text content based on the tool collection type
33
+ 2. Computing SHA-256 hash of the text
34
+ 3. Truncating to first 32 hex characters (128 bits)
35
+
36
+ For FlashCard, if no type is provided, defaults to "BASIC".
37
+ Other tool collections (TestQuestion, SpacedTestQuestion, AudioRecapV2Section)
38
+ do not have type variants.
39
+
40
+ Args:
41
+ tool_collection: The tool type (FlashCard, TestQuestion, etc.)
42
+ data: Tool-specific data (can be Pydantic model or dict)
43
+
44
+ Returns:
45
+ The content hash (32 hex chars) or empty string if no content
46
+ """
47
+ text = extract_tool_text(tool_collection, data)
48
+ if not text:
49
+ return ""
50
+ return _compute_hash(text)
51
+
52
+
53
+ def extract_tool_text(
54
+ tool_collection: ToolCollection,
55
+ data: Union[FlashCardData, QuestionData, AudioRecapSectionData, dict],
56
+ ) -> str:
57
+ """
58
+ Extract the text content from a learning tool for embedding.
59
+
60
+ This function extracts the text that will be used for vector embedding
61
+ based on the tool collection type. Use this when you need the raw text
62
+ for embedding rather than the hash.
63
+
64
+ Text extraction rules:
65
+ - FlashCard (BASIC): "Term: {term} Definition: {definition}"
66
+ - FlashCard (CLOZE/FILL_IN_THE_BLANK): Same as BASIC, with {{...}} syntax stripped
67
+ - FlashCard (MULTIPLE_CHOICE): "Term: {term} Options: {opt1, opt2, ...}"
68
+ - TestQuestion/SpacedTestQuestion: "Question: {question} Answers: {a1, a2, ...} Explanation: {explanation}"
69
+ - AudioRecapV2Section: "Script: {script}"
70
+
71
+ Args:
72
+ tool_collection: The tool type (FlashCard, TestQuestion, etc.)
73
+ data: Tool-specific data (can be Pydantic model or dict)
74
+
75
+ Returns:
76
+ The extracted text string or empty string if no content
77
+
78
+ Example:
79
+ >>> text = extract_tool_text(
80
+ ... "FlashCard",
81
+ ... {"type": "BASIC", "term": "Hello", "definition": "World"}
82
+ ... )
83
+ >>> # Returns: "Term: Hello Definition: World"
84
+ """
85
+ # Convert dict to appropriate model if needed
86
+ if isinstance(data, dict):
87
+ data_dict = data
88
+ else:
89
+ data_dict = data.model_dump(by_alias=True) if hasattr(data, "model_dump") else dict(data)
90
+
91
+ if tool_collection == "FlashCard":
92
+ return _extract_flashcard_text(data_dict)
93
+ elif tool_collection in ("TestQuestion", "SpacedTestQuestion"):
94
+ return _extract_question_text(data_dict)
95
+ elif tool_collection == "AudioRecapV2Section":
96
+ return _extract_audio_recap_text(data_dict)
97
+ else:
98
+ return ""
99
+
100
+
101
+ def _compute_hash(text: str) -> str:
102
+ """Compute SHA-256 hash and truncate to HASH_LENGTH characters."""
103
+ if not text:
104
+ return ""
105
+ hash_bytes = hashlib.sha256(text.encode("utf-8")).hexdigest()
106
+ return hash_bytes[:HASH_LENGTH]
107
+
108
+
109
+ def _extract_flashcard_text(data: dict) -> str:
110
+ """
111
+ Extract text from a FlashCard document.
112
+
113
+ Handles all variants: BASIC, CLOZE, FILL_IN_THE_BLANK, MULTIPLE_CHOICE
114
+ """
115
+ parts: list[str] = []
116
+ card_type = data.get("type") or "BASIC"
117
+
118
+ # Extract term (all types have this)
119
+ term = data.get("term")
120
+ if term:
121
+ clean_term = _strip_flashcard_syntax(term.strip())
122
+ if clean_term:
123
+ parts.append(f"Term: {clean_term}")
124
+
125
+ # Handle definition/options based on type
126
+ if card_type == "MULTIPLE_CHOICE":
127
+ # Check both camelCase and snake_case keys
128
+ options = data.get("multipleChoiceOptions") or data.get("multiple_choice_options")
129
+ option_texts = _get_multiple_choice_options(options)
130
+ if option_texts:
131
+ parts.append(f"Options: {', '.join(option_texts)}")
132
+ else:
133
+ # BASIC, CLOZE, FILL_IN_THE_BLANK use definition
134
+ definition = data.get("definition")
135
+ if definition:
136
+ clean_def = _strip_flashcard_syntax(definition.strip())
137
+ if clean_def:
138
+ parts.append(f"Definition: {clean_def}")
139
+
140
+ return " ".join(parts)
141
+
142
+
143
+ def _extract_question_text(data: dict) -> str:
144
+ """
145
+ Extract text from TestQuestion or SpacedTestQuestion.
146
+
147
+ Format: "Question: {question} Answers: {a1, a2, ...} Explanation: {explanation}"
148
+ """
149
+ parts: list[str] = []
150
+
151
+ question = data.get("question")
152
+ if question:
153
+ trimmed = question.strip()
154
+ if trimmed:
155
+ parts.append(f"Question: {trimmed}")
156
+
157
+ # Extract answers array
158
+ answers = data.get("answers")
159
+ answer_texts = _get_string_array(answers)
160
+ if answer_texts:
161
+ parts.append(f"Answers: {', '.join(answer_texts)}")
162
+
163
+ explanation = data.get("explanation")
164
+ if explanation:
165
+ trimmed = explanation.strip()
166
+ if trimmed:
167
+ parts.append(f"Explanation: {trimmed}")
168
+
169
+ return " ".join(parts)
170
+
171
+
172
+ def _extract_audio_recap_text(data: dict) -> str:
173
+ """
174
+ Extract text from AudioRecapV2Section.
175
+
176
+ Format: "Script: {script}"
177
+ """
178
+ script = data.get("script")
179
+ if script:
180
+ trimmed = script.strip()
181
+ if trimmed:
182
+ return f"Script: {trimmed}"
183
+ return ""
184
+
185
+
186
+ def _strip_flashcard_syntax(text: str) -> str:
187
+ """
188
+ Strip {{...}} markers from cloze/fill-in-blank text.
189
+
190
+ Example: "The {{mitochondria}} is the powerhouse" -> "The mitochondria is the powerhouse"
191
+ """
192
+ if not text:
193
+ return ""
194
+ # Replace {{word}} with just word
195
+ return re.sub(r"\{\{([^}]+)\}\}", r"\1", text)
196
+
197
+
198
+ def _get_multiple_choice_options(options: list | None) -> list[str]:
199
+ """
200
+ Extract option text from multiple choice options array.
201
+
202
+ Options can be strings or objects with "text" or "option" fields.
203
+ """
204
+ if not options or not isinstance(options, list):
205
+ return []
206
+
207
+ result: list[str] = []
208
+ for opt in options:
209
+ text = _extract_option_text(opt)
210
+ if text:
211
+ result.append(text)
212
+ return result
213
+
214
+
215
+ def _extract_option_text(item: str | dict | MultipleChoiceOption) -> str:
216
+ """Extract text from a single option (string or object)."""
217
+ if isinstance(item, str):
218
+ return item.strip()
219
+
220
+ if isinstance(item, dict):
221
+ # Try "text" field first, then "option"
222
+ text = item.get("text")
223
+ if text and isinstance(text, str):
224
+ return text.strip()
225
+ option = item.get("option")
226
+ if option and isinstance(option, str):
227
+ return option.strip()
228
+
229
+ if hasattr(item, "text") and item.text:
230
+ return item.text.strip()
231
+ if hasattr(item, "option") and item.option:
232
+ return item.option.strip()
233
+
234
+ return ""
235
+
236
+
237
+ def _get_string_array(answers: list | None) -> list[str]:
238
+ """Extract string array from answers (handles both string[] and AnswerObject[])."""
239
+ if not answers or not isinstance(answers, list):
240
+ return []
241
+
242
+ result: list[str] = []
243
+ for ans in answers:
244
+ if isinstance(ans, str):
245
+ trimmed = ans.strip()
246
+ if trimmed:
247
+ result.append(trimmed)
248
+ elif isinstance(ans, dict):
249
+ text = ans.get("text")
250
+ if text and isinstance(text, str):
251
+ trimmed = text.strip()
252
+ if trimmed:
253
+ result.append(trimmed)
254
+ elif hasattr(ans, "text") and ans.text:
255
+ trimmed = ans.text.strip()
256
+ if trimmed:
257
+ result.append(trimmed)
258
+
259
+ return result
@@ -0,0 +1,67 @@
1
+ """
2
+ Type definitions for content hashing.
3
+
4
+ These types mirror the TypeScript SDK types and are derived from the proto definitions.
5
+ """
6
+
7
+ from typing import Literal, Optional, Union
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field
10
+
11
+ # Tool collection types
12
+ ToolCollection = Literal["FlashCard", "TestQuestion", "SpacedTestQuestion", "AudioRecapV2Section"]
13
+
14
+ # FlashCard type variants
15
+ FlashCardType = Literal["BASIC", "CLOZE", "FILL_IN_THE_BLANK", "MULTIPLE_CHOICE"]
16
+
17
+
18
+ class MultipleChoiceOption(BaseModel):
19
+ """Multiple choice option structure."""
20
+
21
+ model_config = ConfigDict(extra="allow")
22
+
23
+ text: Optional[str] = None
24
+ option: Optional[str] = None
25
+
26
+
27
+ class AnswerObject(BaseModel):
28
+ """Answer object structure (can be string or object with text field)."""
29
+
30
+ model_config = ConfigDict(extra="allow")
31
+
32
+ text: Optional[str] = None
33
+
34
+
35
+ class FlashCardData(BaseModel):
36
+ """
37
+ FlashCard data for content hashing.
38
+
39
+ If type is not provided, defaults to "BASIC".
40
+ """
41
+
42
+ model_config = ConfigDict(extra="allow", populate_by_name=True)
43
+
44
+ type: Optional[FlashCardType] = "BASIC"
45
+ term: Optional[str] = None
46
+ definition: Optional[str] = None
47
+ multiple_choice_options: Optional[list[Union[str, MultipleChoiceOption, dict]]] = Field(
48
+ default=None, alias="multipleChoiceOptions"
49
+ )
50
+
51
+
52
+ class QuestionData(BaseModel):
53
+ """Question data for TestQuestion and SpacedTestQuestion."""
54
+
55
+ model_config = ConfigDict(extra="allow")
56
+
57
+ question: Optional[str] = None
58
+ answers: Optional[list[Union[str, AnswerObject, dict]]] = None
59
+ explanation: Optional[str] = None
60
+
61
+
62
+ class AudioRecapSectionData(BaseModel):
63
+ """AudioRecapV2Section data for content hashing."""
64
+
65
+ model_config = ConfigDict(extra="allow")
66
+
67
+ script: Optional[str] = None
@@ -0,0 +1,13 @@
1
+ """Namespace exports for the Vector SDK."""
2
+
3
+ from vector_sdk.namespaces.base import BaseNamespace
4
+ from vector_sdk.namespaces.db import DBNamespace
5
+ from vector_sdk.namespaces.embeddings import EmbeddingsNamespace
6
+ from vector_sdk.namespaces.search import SearchNamespace
7
+
8
+ __all__ = [
9
+ "BaseNamespace",
10
+ "EmbeddingsNamespace",
11
+ "SearchNamespace",
12
+ "DBNamespace",
13
+ ]
@@ -0,0 +1,45 @@
1
+ """
2
+ Base namespace class providing shared context for all namespace implementations.
3
+ """
4
+
5
+ from typing import Optional
6
+
7
+ from redis import Redis
8
+
9
+
10
+ class BaseNamespace:
11
+ """
12
+ Base class for all namespace implementations.
13
+ Provides access to shared Redis and HTTP clients.
14
+ """
15
+
16
+ def __init__(self, redis: Redis, http_url: Optional[str] = None):
17
+ """
18
+ Initialize the base namespace.
19
+
20
+ Args:
21
+ redis: Redis client instance
22
+ http_url: Optional HTTP URL for query-gateway API
23
+ """
24
+ self._redis = redis
25
+ self._http_url = http_url
26
+
27
+ def _require_http_url(self, method_name: str) -> str:
28
+ """
29
+ Helper to require http_url for HTTP-based operations.
30
+
31
+ Args:
32
+ method_name: Name of the method requiring http_url
33
+
34
+ Returns:
35
+ The http_url
36
+
37
+ Raises:
38
+ ValueError: If http_url is not configured
39
+ """
40
+ if not self._http_url:
41
+ raise ValueError(
42
+ f"http_url is required for {method_name}. "
43
+ "Set it in VectorClient constructor."
44
+ )
45
+ return self._http_url