sf-vector-sdk 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sf_vector_sdk-0.2.0.dist-info/METADATA +476 -0
- sf_vector_sdk-0.2.0.dist-info/RECORD +27 -0
- sf_vector_sdk-0.2.0.dist-info/WHEEL +4 -0
- vector_sdk/__init__.py +262 -0
- vector_sdk/client.py +538 -0
- vector_sdk/content_types.py +233 -0
- vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py +57 -0
- vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.pyi +141 -0
- vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py +58 -0
- vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.pyi +145 -0
- vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.py +58 -0
- vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.pyi +109 -0
- vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py +39 -0
- vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi +31 -0
- vector_sdk/hash/__init__.py +31 -0
- vector_sdk/hash/hasher.py +259 -0
- vector_sdk/hash/types.py +67 -0
- vector_sdk/namespaces/__init__.py +13 -0
- vector_sdk/namespaces/base.py +45 -0
- vector_sdk/namespaces/db.py +230 -0
- vector_sdk/namespaces/embeddings.py +268 -0
- vector_sdk/namespaces/search.py +258 -0
- vector_sdk/structured/__init__.py +60 -0
- vector_sdk/structured/router.py +190 -0
- vector_sdk/structured/structured_embeddings.py +431 -0
- vector_sdk/structured/tool_config.py +254 -0
- vector_sdk/types.py +864 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Content Type Registry
|
|
3
|
+
|
|
4
|
+
Defines the pre-configured embedding settings for each content type.
|
|
5
|
+
This ensures consistent model selection, dimensions, and storage across all SDK consumers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import IntEnum
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ContentType(IntEnum):
|
|
13
|
+
"""Content types that can be embedded"""
|
|
14
|
+
UNSPECIFIED = 0
|
|
15
|
+
TOPIC = 1
|
|
16
|
+
FLASHCARD = 2
|
|
17
|
+
TEST_QUESTION = 3
|
|
18
|
+
SPACED_TEST_QUESTION = 4
|
|
19
|
+
AUDIO_RECAP = 5
|
|
20
|
+
DOCUMENT = 6
|
|
21
|
+
NOTE = 7
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class EmbeddingModel(IntEnum):
|
|
25
|
+
"""Vertex AI embedding models"""
|
|
26
|
+
UNSPECIFIED = 0
|
|
27
|
+
GEMINI_001 = 1 # gemini-embedding-001 - 3072 dimensions
|
|
28
|
+
TEXT_004 = 2 # text-embedding-004 - 768 dimensions
|
|
29
|
+
MULTILINGUAL_002 = 3 # text-multilingual-embedding-002
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Priority(IntEnum):
|
|
33
|
+
"""Queue priority levels"""
|
|
34
|
+
UNSPECIFIED = 0
|
|
35
|
+
CRITICAL = 1 # Reserved quota, immediate processing
|
|
36
|
+
HIGH = 2 # Processed before normal
|
|
37
|
+
NORMAL = 3 # Standard processing
|
|
38
|
+
LOW = 4 # Processed when capacity available
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class MongoDBStorageConfig:
|
|
43
|
+
"""MongoDB storage configuration"""
|
|
44
|
+
database: str
|
|
45
|
+
collection: str
|
|
46
|
+
embedding_field: str
|
|
47
|
+
upsert_key: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class TurboPufferStorageConfig:
|
|
52
|
+
"""TurboPuffer storage configuration"""
|
|
53
|
+
namespace: str
|
|
54
|
+
id_field: str
|
|
55
|
+
metadata_fields: list[str]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class ContentTypeConfig:
|
|
60
|
+
"""Configuration for a content type's embedding settings"""
|
|
61
|
+
content_type: ContentType
|
|
62
|
+
model: EmbeddingModel
|
|
63
|
+
dimensions: int
|
|
64
|
+
default_priority: Priority
|
|
65
|
+
mongodb: MongoDBStorageConfig
|
|
66
|
+
turbopuffer: TurboPufferStorageConfig
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Pre-configured settings for all supported content types
|
|
70
|
+
CONTENT_TYPE_CONFIGS: dict[ContentType, ContentTypeConfig] = {
|
|
71
|
+
ContentType.TOPIC: ContentTypeConfig(
|
|
72
|
+
content_type=ContentType.TOPIC,
|
|
73
|
+
model=EmbeddingModel.GEMINI_001,
|
|
74
|
+
dimensions=3072,
|
|
75
|
+
default_priority=Priority.NORMAL,
|
|
76
|
+
mongodb=MongoDBStorageConfig(
|
|
77
|
+
database="events_new",
|
|
78
|
+
collection="topic_vectors",
|
|
79
|
+
embedding_field="topicDescriptionEmbedding",
|
|
80
|
+
upsert_key="combinationId",
|
|
81
|
+
),
|
|
82
|
+
turbopuffer=TurboPufferStorageConfig(
|
|
83
|
+
namespace="topic_vectors",
|
|
84
|
+
id_field="_id",
|
|
85
|
+
metadata_fields=["topicId", "userId", "contentHash"],
|
|
86
|
+
),
|
|
87
|
+
),
|
|
88
|
+
ContentType.FLASHCARD: ContentTypeConfig(
|
|
89
|
+
content_type=ContentType.FLASHCARD,
|
|
90
|
+
model=EmbeddingModel.GEMINI_001,
|
|
91
|
+
dimensions=3072,
|
|
92
|
+
default_priority=Priority.HIGH,
|
|
93
|
+
mongodb=MongoDBStorageConfig(
|
|
94
|
+
database="events_new",
|
|
95
|
+
collection="tool_vectors",
|
|
96
|
+
embedding_field="toolEmbedding",
|
|
97
|
+
upsert_key="contentHash",
|
|
98
|
+
),
|
|
99
|
+
turbopuffer=TurboPufferStorageConfig(
|
|
100
|
+
namespace="tool_vectors",
|
|
101
|
+
id_field="_id",
|
|
102
|
+
metadata_fields=["toolId", "toolCollection", "topicId", "userId"],
|
|
103
|
+
),
|
|
104
|
+
),
|
|
105
|
+
ContentType.TEST_QUESTION: ContentTypeConfig(
|
|
106
|
+
content_type=ContentType.TEST_QUESTION,
|
|
107
|
+
model=EmbeddingModel.GEMINI_001,
|
|
108
|
+
dimensions=3072,
|
|
109
|
+
default_priority=Priority.HIGH,
|
|
110
|
+
mongodb=MongoDBStorageConfig(
|
|
111
|
+
database="events_new",
|
|
112
|
+
collection="tool_vectors",
|
|
113
|
+
embedding_field="toolEmbedding",
|
|
114
|
+
upsert_key="contentHash",
|
|
115
|
+
),
|
|
116
|
+
turbopuffer=TurboPufferStorageConfig(
|
|
117
|
+
namespace="tool_vectors",
|
|
118
|
+
id_field="_id",
|
|
119
|
+
metadata_fields=["toolId", "toolCollection", "topicId", "userId"],
|
|
120
|
+
),
|
|
121
|
+
),
|
|
122
|
+
ContentType.SPACED_TEST_QUESTION: ContentTypeConfig(
|
|
123
|
+
content_type=ContentType.SPACED_TEST_QUESTION,
|
|
124
|
+
model=EmbeddingModel.GEMINI_001,
|
|
125
|
+
dimensions=3072,
|
|
126
|
+
default_priority=Priority.NORMAL,
|
|
127
|
+
mongodb=MongoDBStorageConfig(
|
|
128
|
+
database="events_new",
|
|
129
|
+
collection="tool_vectors",
|
|
130
|
+
embedding_field="toolEmbedding",
|
|
131
|
+
upsert_key="contentHash",
|
|
132
|
+
),
|
|
133
|
+
turbopuffer=TurboPufferStorageConfig(
|
|
134
|
+
namespace="tool_vectors",
|
|
135
|
+
id_field="_id",
|
|
136
|
+
metadata_fields=["toolId", "toolCollection", "topicId", "userId"],
|
|
137
|
+
),
|
|
138
|
+
),
|
|
139
|
+
ContentType.AUDIO_RECAP: ContentTypeConfig(
|
|
140
|
+
content_type=ContentType.AUDIO_RECAP,
|
|
141
|
+
model=EmbeddingModel.GEMINI_001,
|
|
142
|
+
dimensions=3072,
|
|
143
|
+
default_priority=Priority.NORMAL,
|
|
144
|
+
mongodb=MongoDBStorageConfig(
|
|
145
|
+
database="events_new",
|
|
146
|
+
collection="tool_vectors",
|
|
147
|
+
embedding_field="toolEmbedding",
|
|
148
|
+
upsert_key="contentHash",
|
|
149
|
+
),
|
|
150
|
+
turbopuffer=TurboPufferStorageConfig(
|
|
151
|
+
namespace="tool_vectors",
|
|
152
|
+
id_field="_id",
|
|
153
|
+
metadata_fields=["toolId", "toolCollection", "topicId", "userId"],
|
|
154
|
+
),
|
|
155
|
+
),
|
|
156
|
+
ContentType.DOCUMENT: ContentTypeConfig(
|
|
157
|
+
content_type=ContentType.DOCUMENT,
|
|
158
|
+
model=EmbeddingModel.GEMINI_001,
|
|
159
|
+
dimensions=3072,
|
|
160
|
+
default_priority=Priority.NORMAL,
|
|
161
|
+
mongodb=MongoDBStorageConfig(
|
|
162
|
+
database="events_new",
|
|
163
|
+
collection="document_vectors",
|
|
164
|
+
embedding_field="documentEmbedding",
|
|
165
|
+
upsert_key="documentId",
|
|
166
|
+
),
|
|
167
|
+
turbopuffer=TurboPufferStorageConfig(
|
|
168
|
+
namespace="document_vectors",
|
|
169
|
+
id_field="_id",
|
|
170
|
+
metadata_fields=["documentId", "userId", "sourceType"],
|
|
171
|
+
),
|
|
172
|
+
),
|
|
173
|
+
ContentType.NOTE: ContentTypeConfig(
|
|
174
|
+
content_type=ContentType.NOTE,
|
|
175
|
+
model=EmbeddingModel.GEMINI_001,
|
|
176
|
+
dimensions=3072,
|
|
177
|
+
default_priority=Priority.LOW,
|
|
178
|
+
mongodb=MongoDBStorageConfig(
|
|
179
|
+
database="events_new",
|
|
180
|
+
collection="note_vectors",
|
|
181
|
+
embedding_field="noteEmbedding",
|
|
182
|
+
upsert_key="noteId",
|
|
183
|
+
),
|
|
184
|
+
turbopuffer=TurboPufferStorageConfig(
|
|
185
|
+
namespace="note_vectors",
|
|
186
|
+
id_field="_id",
|
|
187
|
+
metadata_fields=["noteId", "userId", "topicId"],
|
|
188
|
+
),
|
|
189
|
+
),
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_content_type_config(content_type: ContentType) -> ContentTypeConfig:
|
|
194
|
+
"""
|
|
195
|
+
Get the configuration for a content type.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
content_type: The content type to get config for
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
The configuration for the content type
|
|
202
|
+
|
|
203
|
+
Raises:
|
|
204
|
+
ValueError: If the content type is not supported
|
|
205
|
+
"""
|
|
206
|
+
if content_type not in CONTENT_TYPE_CONFIGS:
|
|
207
|
+
raise ValueError(f"Unsupported content type: {content_type}")
|
|
208
|
+
return CONTENT_TYPE_CONFIGS[content_type]
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def get_model_name(model: EmbeddingModel) -> str:
|
|
212
|
+
"""Get the Vertex AI model name string"""
|
|
213
|
+
model_names = {
|
|
214
|
+
EmbeddingModel.GEMINI_001: "gemini-embedding-001",
|
|
215
|
+
EmbeddingModel.TEXT_004: "text-embedding-004",
|
|
216
|
+
EmbeddingModel.MULTILINGUAL_002: "text-multilingual-embedding-002",
|
|
217
|
+
}
|
|
218
|
+
return model_names.get(model, "gemini-embedding-001")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def get_priority_string(priority: Priority) -> str:
|
|
222
|
+
"""Get the priority string for Redis streams"""
|
|
223
|
+
priority_strings = {
|
|
224
|
+
Priority.CRITICAL: "critical",
|
|
225
|
+
Priority.HIGH: "high",
|
|
226
|
+
Priority.NORMAL: "normal",
|
|
227
|
+
Priority.LOW: "low",
|
|
228
|
+
}
|
|
229
|
+
return priority_strings.get(priority, "normal")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# All supported content types (excluding UNSPECIFIED)
|
|
233
|
+
SUPPORTED_CONTENT_TYPES = [ct for ct in ContentType if ct != ContentType.UNSPECIFIED]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
|
+
# source: embedding_pipeline/content_types/v1/content_types.proto
|
|
5
|
+
# Protobuf Python Version: 6.33.4
|
|
6
|
+
"""Generated protocol buffer code."""
|
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
11
|
+
from google.protobuf.internal import builder as _builder
|
|
12
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
|
13
|
+
_runtime_version.Domain.PUBLIC,
|
|
14
|
+
6,
|
|
15
|
+
33,
|
|
16
|
+
4,
|
|
17
|
+
'',
|
|
18
|
+
'embedding_pipeline/content_types/v1/content_types.proto'
|
|
19
|
+
)
|
|
20
|
+
# @@protoc_insertion_point(imports)
|
|
21
|
+
|
|
22
|
+
_sym_db = _symbol_database.Default()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n7embedding_pipeline/content_types/v1/content_types.proto\x12#embedding_pipeline.content_types.v1\"\xbb\x04\n\x11\x43ontentTypeConfig\x12S\n\x0c\x63ontent_type\x18\x01 \x01(\x0e\x32\x30.embedding_pipeline.content_types.v1.ContentTypeR\x0b\x63ontentType\x12I\n\x05model\x18\x02 \x01(\x0e\x32\x33.embedding_pipeline.content_types.v1.EmbeddingModelR\x05model\x12\x1e\n\ndimensions\x18\x03 \x01(\x05R\ndimensions\x12X\n\x10\x64\x65\x66\x61ult_priority\x18\x04 \x01(\x0e\x32-.embedding_pipeline.content_types.v1.PriorityR\x0f\x64\x65\x66\x61ultPriority\x12S\n\x07mongodb\x18\x05 \x01(\x0b\x32\x39.embedding_pipeline.content_types.v1.MongoDBStorageConfigR\x07mongodb\x12_\n\x0bturbopuffer\x18\x06 \x01(\x0b\x32=.embedding_pipeline.content_types.v1.TurboPufferStorageConfigR\x0bturbopuffer\x12V\n\x08pinecone\x18\x07 \x01(\x0b\x32:.embedding_pipeline.content_types.v1.PineconeStorageConfigR\x08pinecone\"\x9a\x01\n\x14MongoDBStorageConfig\x12\x1a\n\x08\x64\x61tabase\x18\x01 \x01(\tR\x08\x64\x61tabase\x12\x1e\n\ncollection\x18\x02 \x01(\tR\ncollection\x12\'\n\x0f\x65mbedding_field\x18\x03 \x01(\tR\x0e\x65mbeddingField\x12\x1d\n\nupsert_key\x18\x04 \x01(\tR\tupsertKey\"|\n\x18TurboPufferStorageConfig\x12\x1c\n\tnamespace\x18\x01 \x01(\tR\tnamespace\x12\x19\n\x08id_field\x18\x02 \x01(\tR\x07idField\x12\'\n\x0fmetadata_fields\x18\x03 \x03(\tR\x0emetadataFields\"\x98\x01\n\x15PineconeStorageConfig\x12\x1d\n\nindex_name\x18\x01 \x01(\tR\tindexName\x12\x1c\n\tnamespace\x18\x02 \x01(\tR\tnamespace\x12\x19\n\x08id_field\x18\x03 \x01(\tR\x07idField\x12\'\n\x0fmetadata_fields\x18\x04 \x03(\tR\x0emetadataFields\"|\n\x0f\x45mbeddingConfig\x12I\n\x05model\x18\x01 \x01(\x0e\x32\x33.embedding_pipeline.content_types.v1.EmbeddingModelR\x05model\x12\x1e\n\ndimensions\x18\x02 \x01(\x05R\ndimensions\"g\n\x13\x43ontentTypeRegistry\x12P\n\x07\x63onfigs\x18\x01 \x03(\x0b\x32\x36.embedding_pipeline.content_types.v1.ContentTypeConfigR\x07\x63onfigs*\xf6\x01\n\x0b\x43ontentType\x12\x1c\n\x18\x43ONTENT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x43ONTENT_TYPE_TOPIC\x10\x01\x12\x1a\n\x16\x43ONTENT_TYPE_FLASHCARD\x10\x02\x12\x1e\n\x1a\x43ONTENT_TYPE_TEST_QUESTION\x10\x03\x12%\n!CONTENT_TYPE_SPACED_TEST_QUESTION\x10\x04\x12\x1c\n\x18\x43ONTENT_TYPE_AUDIO_RECAP\x10\x05\x12\x19\n\x15\x43ONTENT_TYPE_DOCUMENT\x10\x06\x12\x15\n\x11\x43ONTENT_TYPE_NOTE\x10\x07*u\n\x11\x45mbeddingProvider\x12\"\n\x1e\x45MBEDDING_PROVIDER_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x45MBEDDING_PROVIDER_GOOGLE\x10\x01\x12\x1d\n\x19\x45MBEDDING_PROVIDER_OPENAI\x10\x02*\xdd\x01\n\x0e\x45mbeddingModel\x12\x1f\n\x1b\x45MBEDDING_MODEL_UNSPECIFIED\x10\x00\x12\x1e\n\x1a\x45MBEDDING_MODEL_GEMINI_001\x10\x01\x12\x1c\n\x18\x45MBEDDING_MODEL_TEXT_004\x10\x02\x12$\n EMBEDDING_MODEL_MULTILINGUAL_002\x10\x03\x12\"\n\x1e\x45MBEDDING_MODEL_OPENAI_3_SMALL\x10\n\x12\"\n\x1e\x45MBEDDING_MODEL_OPENAI_3_LARGE\x10\x0b*u\n\x08Priority\x12\x18\n\x14PRIORITY_UNSPECIFIED\x10\x00\x12\x15\n\x11PRIORITY_CRITICAL\x10\x01\x12\x11\n\rPRIORITY_HIGH\x10\x02\x12\x13\n\x0fPRIORITY_NORMAL\x10\x03\x12\x10\n\x0cPRIORITY_LOW\x10\x04*\x8d\x01\n\x0eVectorDatabase\x12\x1f\n\x1bVECTOR_DATABASE_UNSPECIFIED\x10\x00\x12\x1b\n\x17VECTOR_DATABASE_MONGODB\x10\x01\x12\x1f\n\x1bVECTOR_DATABASE_TURBOPUFFER\x10\x02\x12\x1c\n\x18VECTOR_DATABASE_PINECONE\x10\x03\x42oZmgithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/content_types/v1b\x06proto3')
|
|
28
|
+
|
|
29
|
+
_globals = globals()
|
|
30
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
31
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pipeline.content_types.v1.content_types_pb2', _globals)
|
|
32
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
33
|
+
_globals['DESCRIPTOR']._loaded_options = None
|
|
34
|
+
_globals['DESCRIPTOR']._serialized_options = b'Zmgithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/content_types/v1'
|
|
35
|
+
_globals['_CONTENTTYPE']._serialized_start=1340
|
|
36
|
+
_globals['_CONTENTTYPE']._serialized_end=1586
|
|
37
|
+
_globals['_EMBEDDINGPROVIDER']._serialized_start=1588
|
|
38
|
+
_globals['_EMBEDDINGPROVIDER']._serialized_end=1705
|
|
39
|
+
_globals['_EMBEDDINGMODEL']._serialized_start=1708
|
|
40
|
+
_globals['_EMBEDDINGMODEL']._serialized_end=1929
|
|
41
|
+
_globals['_PRIORITY']._serialized_start=1931
|
|
42
|
+
_globals['_PRIORITY']._serialized_end=2048
|
|
43
|
+
_globals['_VECTORDATABASE']._serialized_start=2051
|
|
44
|
+
_globals['_VECTORDATABASE']._serialized_end=2192
|
|
45
|
+
_globals['_CONTENTTYPECONFIG']._serialized_start=97
|
|
46
|
+
_globals['_CONTENTTYPECONFIG']._serialized_end=668
|
|
47
|
+
_globals['_MONGODBSTORAGECONFIG']._serialized_start=671
|
|
48
|
+
_globals['_MONGODBSTORAGECONFIG']._serialized_end=825
|
|
49
|
+
_globals['_TURBOPUFFERSTORAGECONFIG']._serialized_start=827
|
|
50
|
+
_globals['_TURBOPUFFERSTORAGECONFIG']._serialized_end=951
|
|
51
|
+
_globals['_PINECONESTORAGECONFIG']._serialized_start=954
|
|
52
|
+
_globals['_PINECONESTORAGECONFIG']._serialized_end=1106
|
|
53
|
+
_globals['_EMBEDDINGCONFIG']._serialized_start=1108
|
|
54
|
+
_globals['_EMBEDDINGCONFIG']._serialized_end=1232
|
|
55
|
+
_globals['_CONTENTTYPEREGISTRY']._serialized_start=1234
|
|
56
|
+
_globals['_CONTENTTYPEREGISTRY']._serialized_end=1337
|
|
57
|
+
# @@protoc_insertion_point(module_scope)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from google.protobuf.internal import containers as _containers
|
|
2
|
+
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
|
|
3
|
+
from google.protobuf import descriptor as _descriptor
|
|
4
|
+
from google.protobuf import message as _message
|
|
5
|
+
from collections.abc import Iterable as _Iterable, Mapping as _Mapping
|
|
6
|
+
from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
|
|
7
|
+
|
|
8
|
+
DESCRIPTOR: _descriptor.FileDescriptor
|
|
9
|
+
|
|
10
|
+
class ContentType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
11
|
+
__slots__ = ()
|
|
12
|
+
CONTENT_TYPE_UNSPECIFIED: _ClassVar[ContentType]
|
|
13
|
+
CONTENT_TYPE_TOPIC: _ClassVar[ContentType]
|
|
14
|
+
CONTENT_TYPE_FLASHCARD: _ClassVar[ContentType]
|
|
15
|
+
CONTENT_TYPE_TEST_QUESTION: _ClassVar[ContentType]
|
|
16
|
+
CONTENT_TYPE_SPACED_TEST_QUESTION: _ClassVar[ContentType]
|
|
17
|
+
CONTENT_TYPE_AUDIO_RECAP: _ClassVar[ContentType]
|
|
18
|
+
CONTENT_TYPE_DOCUMENT: _ClassVar[ContentType]
|
|
19
|
+
CONTENT_TYPE_NOTE: _ClassVar[ContentType]
|
|
20
|
+
|
|
21
|
+
class EmbeddingProvider(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
22
|
+
__slots__ = ()
|
|
23
|
+
EMBEDDING_PROVIDER_UNSPECIFIED: _ClassVar[EmbeddingProvider]
|
|
24
|
+
EMBEDDING_PROVIDER_GOOGLE: _ClassVar[EmbeddingProvider]
|
|
25
|
+
EMBEDDING_PROVIDER_OPENAI: _ClassVar[EmbeddingProvider]
|
|
26
|
+
|
|
27
|
+
class EmbeddingModel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
28
|
+
__slots__ = ()
|
|
29
|
+
EMBEDDING_MODEL_UNSPECIFIED: _ClassVar[EmbeddingModel]
|
|
30
|
+
EMBEDDING_MODEL_GEMINI_001: _ClassVar[EmbeddingModel]
|
|
31
|
+
EMBEDDING_MODEL_TEXT_004: _ClassVar[EmbeddingModel]
|
|
32
|
+
EMBEDDING_MODEL_MULTILINGUAL_002: _ClassVar[EmbeddingModel]
|
|
33
|
+
EMBEDDING_MODEL_OPENAI_3_SMALL: _ClassVar[EmbeddingModel]
|
|
34
|
+
EMBEDDING_MODEL_OPENAI_3_LARGE: _ClassVar[EmbeddingModel]
|
|
35
|
+
|
|
36
|
+
class Priority(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
37
|
+
__slots__ = ()
|
|
38
|
+
PRIORITY_UNSPECIFIED: _ClassVar[Priority]
|
|
39
|
+
PRIORITY_CRITICAL: _ClassVar[Priority]
|
|
40
|
+
PRIORITY_HIGH: _ClassVar[Priority]
|
|
41
|
+
PRIORITY_NORMAL: _ClassVar[Priority]
|
|
42
|
+
PRIORITY_LOW: _ClassVar[Priority]
|
|
43
|
+
|
|
44
|
+
class VectorDatabase(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
45
|
+
__slots__ = ()
|
|
46
|
+
VECTOR_DATABASE_UNSPECIFIED: _ClassVar[VectorDatabase]
|
|
47
|
+
VECTOR_DATABASE_MONGODB: _ClassVar[VectorDatabase]
|
|
48
|
+
VECTOR_DATABASE_TURBOPUFFER: _ClassVar[VectorDatabase]
|
|
49
|
+
VECTOR_DATABASE_PINECONE: _ClassVar[VectorDatabase]
|
|
50
|
+
CONTENT_TYPE_UNSPECIFIED: ContentType
|
|
51
|
+
CONTENT_TYPE_TOPIC: ContentType
|
|
52
|
+
CONTENT_TYPE_FLASHCARD: ContentType
|
|
53
|
+
CONTENT_TYPE_TEST_QUESTION: ContentType
|
|
54
|
+
CONTENT_TYPE_SPACED_TEST_QUESTION: ContentType
|
|
55
|
+
CONTENT_TYPE_AUDIO_RECAP: ContentType
|
|
56
|
+
CONTENT_TYPE_DOCUMENT: ContentType
|
|
57
|
+
CONTENT_TYPE_NOTE: ContentType
|
|
58
|
+
EMBEDDING_PROVIDER_UNSPECIFIED: EmbeddingProvider
|
|
59
|
+
EMBEDDING_PROVIDER_GOOGLE: EmbeddingProvider
|
|
60
|
+
EMBEDDING_PROVIDER_OPENAI: EmbeddingProvider
|
|
61
|
+
EMBEDDING_MODEL_UNSPECIFIED: EmbeddingModel
|
|
62
|
+
EMBEDDING_MODEL_GEMINI_001: EmbeddingModel
|
|
63
|
+
EMBEDDING_MODEL_TEXT_004: EmbeddingModel
|
|
64
|
+
EMBEDDING_MODEL_MULTILINGUAL_002: EmbeddingModel
|
|
65
|
+
EMBEDDING_MODEL_OPENAI_3_SMALL: EmbeddingModel
|
|
66
|
+
EMBEDDING_MODEL_OPENAI_3_LARGE: EmbeddingModel
|
|
67
|
+
PRIORITY_UNSPECIFIED: Priority
|
|
68
|
+
PRIORITY_CRITICAL: Priority
|
|
69
|
+
PRIORITY_HIGH: Priority
|
|
70
|
+
PRIORITY_NORMAL: Priority
|
|
71
|
+
PRIORITY_LOW: Priority
|
|
72
|
+
VECTOR_DATABASE_UNSPECIFIED: VectorDatabase
|
|
73
|
+
VECTOR_DATABASE_MONGODB: VectorDatabase
|
|
74
|
+
VECTOR_DATABASE_TURBOPUFFER: VectorDatabase
|
|
75
|
+
VECTOR_DATABASE_PINECONE: VectorDatabase
|
|
76
|
+
|
|
77
|
+
class ContentTypeConfig(_message.Message):
|
|
78
|
+
__slots__ = ("content_type", "model", "dimensions", "default_priority", "mongodb", "turbopuffer", "pinecone")
|
|
79
|
+
CONTENT_TYPE_FIELD_NUMBER: _ClassVar[int]
|
|
80
|
+
MODEL_FIELD_NUMBER: _ClassVar[int]
|
|
81
|
+
DIMENSIONS_FIELD_NUMBER: _ClassVar[int]
|
|
82
|
+
DEFAULT_PRIORITY_FIELD_NUMBER: _ClassVar[int]
|
|
83
|
+
MONGODB_FIELD_NUMBER: _ClassVar[int]
|
|
84
|
+
TURBOPUFFER_FIELD_NUMBER: _ClassVar[int]
|
|
85
|
+
PINECONE_FIELD_NUMBER: _ClassVar[int]
|
|
86
|
+
content_type: ContentType
|
|
87
|
+
model: EmbeddingModel
|
|
88
|
+
dimensions: int
|
|
89
|
+
default_priority: Priority
|
|
90
|
+
mongodb: MongoDBStorageConfig
|
|
91
|
+
turbopuffer: TurboPufferStorageConfig
|
|
92
|
+
pinecone: PineconeStorageConfig
|
|
93
|
+
def __init__(self, content_type: _Optional[_Union[ContentType, str]] = ..., model: _Optional[_Union[EmbeddingModel, str]] = ..., dimensions: _Optional[int] = ..., default_priority: _Optional[_Union[Priority, str]] = ..., mongodb: _Optional[_Union[MongoDBStorageConfig, _Mapping]] = ..., turbopuffer: _Optional[_Union[TurboPufferStorageConfig, _Mapping]] = ..., pinecone: _Optional[_Union[PineconeStorageConfig, _Mapping]] = ...) -> None: ...
|
|
94
|
+
|
|
95
|
+
class MongoDBStorageConfig(_message.Message):
|
|
96
|
+
__slots__ = ("database", "collection", "embedding_field", "upsert_key")
|
|
97
|
+
DATABASE_FIELD_NUMBER: _ClassVar[int]
|
|
98
|
+
COLLECTION_FIELD_NUMBER: _ClassVar[int]
|
|
99
|
+
EMBEDDING_FIELD_FIELD_NUMBER: _ClassVar[int]
|
|
100
|
+
UPSERT_KEY_FIELD_NUMBER: _ClassVar[int]
|
|
101
|
+
database: str
|
|
102
|
+
collection: str
|
|
103
|
+
embedding_field: str
|
|
104
|
+
upsert_key: str
|
|
105
|
+
def __init__(self, database: _Optional[str] = ..., collection: _Optional[str] = ..., embedding_field: _Optional[str] = ..., upsert_key: _Optional[str] = ...) -> None: ...
|
|
106
|
+
|
|
107
|
+
class TurboPufferStorageConfig(_message.Message):
|
|
108
|
+
__slots__ = ("namespace", "id_field", "metadata_fields")
|
|
109
|
+
NAMESPACE_FIELD_NUMBER: _ClassVar[int]
|
|
110
|
+
ID_FIELD_FIELD_NUMBER: _ClassVar[int]
|
|
111
|
+
METADATA_FIELDS_FIELD_NUMBER: _ClassVar[int]
|
|
112
|
+
namespace: str
|
|
113
|
+
id_field: str
|
|
114
|
+
metadata_fields: _containers.RepeatedScalarFieldContainer[str]
|
|
115
|
+
def __init__(self, namespace: _Optional[str] = ..., id_field: _Optional[str] = ..., metadata_fields: _Optional[_Iterable[str]] = ...) -> None: ...
|
|
116
|
+
|
|
117
|
+
class PineconeStorageConfig(_message.Message):
|
|
118
|
+
__slots__ = ("index_name", "namespace", "id_field", "metadata_fields")
|
|
119
|
+
INDEX_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
120
|
+
NAMESPACE_FIELD_NUMBER: _ClassVar[int]
|
|
121
|
+
ID_FIELD_FIELD_NUMBER: _ClassVar[int]
|
|
122
|
+
METADATA_FIELDS_FIELD_NUMBER: _ClassVar[int]
|
|
123
|
+
index_name: str
|
|
124
|
+
namespace: str
|
|
125
|
+
id_field: str
|
|
126
|
+
metadata_fields: _containers.RepeatedScalarFieldContainer[str]
|
|
127
|
+
def __init__(self, index_name: _Optional[str] = ..., namespace: _Optional[str] = ..., id_field: _Optional[str] = ..., metadata_fields: _Optional[_Iterable[str]] = ...) -> None: ...
|
|
128
|
+
|
|
129
|
+
class EmbeddingConfig(_message.Message):
|
|
130
|
+
__slots__ = ("model", "dimensions")
|
|
131
|
+
MODEL_FIELD_NUMBER: _ClassVar[int]
|
|
132
|
+
DIMENSIONS_FIELD_NUMBER: _ClassVar[int]
|
|
133
|
+
model: EmbeddingModel
|
|
134
|
+
dimensions: int
|
|
135
|
+
def __init__(self, model: _Optional[_Union[EmbeddingModel, str]] = ..., dimensions: _Optional[int] = ...) -> None: ...
|
|
136
|
+
|
|
137
|
+
class ContentTypeRegistry(_message.Message):
|
|
138
|
+
__slots__ = ("configs",)
|
|
139
|
+
CONFIGS_FIELD_NUMBER: _ClassVar[int]
|
|
140
|
+
configs: _containers.RepeatedCompositeFieldContainer[ContentTypeConfig]
|
|
141
|
+
def __init__(self, configs: _Optional[_Iterable[_Union[ContentTypeConfig, _Mapping]]] = ...) -> None: ...
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
|
+
# source: embedding_pipeline/db/vectors/v1/vectors.proto
|
|
5
|
+
# Protobuf Python Version: 6.33.4
|
|
6
|
+
"""Generated protocol buffer code."""
|
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
11
|
+
from google.protobuf.internal import builder as _builder
|
|
12
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
|
13
|
+
_runtime_version.Domain.PUBLIC,
|
|
14
|
+
6,
|
|
15
|
+
33,
|
|
16
|
+
4,
|
|
17
|
+
'',
|
|
18
|
+
'embedding_pipeline/db/vectors/v1/vectors.proto'
|
|
19
|
+
)
|
|
20
|
+
# @@protoc_insertion_point(imports)
|
|
21
|
+
|
|
22
|
+
_sym_db = _symbol_database.Default()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n.embedding_pipeline/db/vectors/v1/vectors.proto\x12 embedding_pipeline.db.vectors.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"\xfc\x02\n\x0e\x43lusterVersion\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x1c\n\talgorithm\x18\x02 \x01(\tR\talgorithm\x12\x18\n\x07version\x18\x03 \x01(\tR\x07version\x12`\n\nparameters\x18\x04 \x03(\x0b\x32@.embedding_pipeline.db.vectors.v1.ClusterVersion.ParametersEntryR\nparameters\x12%\n\x0etotal_clusters\x18\x05 \x01(\x05R\rtotalClusters\x12\x1f\n\x0btotal_items\x18\x06 \x01(\x05R\ntotalItems\x12\x39\n\ncreated_at\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\x1a=\n\x0fParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x8d\x04\n\x0bTopicVector\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x19\n\x08topic_id\x18\x02 \x01(\tR\x07topicId\x12\x17\n\x07user_id\x18\x03 \x01(\tR\x06userId\x12\x1c\n\tembedding\x18\x04 \x03(\x02R\tembedding\x12W\n\x08metadata\x18\x05 \x03(\x0b\x32;.embedding_pipeline.db.vectors.v1.TopicVector.MetadataEntryR\x08metadata\x12*\n\x0e\x63ombination_id\x18\x06 \x01(\tH\x00R\rcombinationId\x88\x01\x01\x12\x39\n\ncreated_at\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\x12\x39\n\nupdated_at\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tupdatedAt\x12\x37\n\x15last_complete_version\x18\t \x01(\x05H\x01R\x13lastCompleteVersion\x88\x01\x01\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x11\n\x0f_combination_idB\x18\n\x16_last_complete_version\"\xe9\x03\n\nToolVector\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x17\n\x07tool_id\x18\x02 \x01(\tR\x06toolId\x12\x19\n\x08topic_id\x18\x03 \x01(\tR\x07topicId\x12\'\n\x0ftool_collection\x18\x04 \x01(\tR\x0etoolCollection\x12\x1c\n\tembedding\x18\x05 \x03(\x02R\tembedding\x12V\n\x08metadata\x18\x06 \x03(\x0b\x32:.embedding_pipeline.db.vectors.v1.ToolVector.MetadataEntryR\x08metadata\x12#\n\x0btool_set_id\x18\x07 \x01(\tH\x00R\ttoolSetId\x88\x01\x01\x12\x33\n\x13tool_set_collection\x18\x08 \x01(\tH\x01R\x11toolSetCollection\x88\x01\x01\x12\x39\n\ncreated_at\x18\t \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_tool_set_idB\x16\n\x14_tool_set_collection\"\x80\x05\n\x12TopicGroupEnhanced\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x14\n\x05topic\x18\x02 \x01(\tR\x05topic\x12 \n\x0b\x64\x65scription\x18\x03 \x01(\tR\x0b\x64\x65scription\x12*\n\x0e\x63\x65ntroid_index\x18\x04 \x01(\x05H\x00R\rcentroidIndex\x88\x01\x01\x12&\n\x0csubject_type\x18\x05 \x01(\tH\x01R\x0bsubjectType\x88\x01\x01\x12+\n\x0ftopics_in_group\x18\x06 \x01(\x05H\x02R\rtopicsInGroup\x88\x01\x01\x12\x1d\n\x07version\x18\x07 \x01(\x05H\x03R\x07version\x88\x01\x01\x12\x46\n\x0enext_recluster\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.TimestampH\x04R\rnextRecluster\x88\x01\x01\x12\"\n\ncluster_id\x18\t \x01(\tH\x05R\tclusterId\x88\x01\x01\x12(\n\rcluster_level\x18\n \x01(\x05H\x06R\x0c\x63lusterLevel\x88\x01\x01\x12\x39\n\ncreated_at\x18\x0b \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\x12\x39\n\nupdated_at\x18\x0c \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tupdatedAtB\x11\n\x0f_centroid_indexB\x0f\n\r_subject_typeB\x12\n\x10_topics_in_groupB\n\n\x08_versionB\x11\n\x0f_next_reclusterB\r\n\x0b_cluster_idB\x10\n\x0e_cluster_level\"\xaf\x03\n\x0b\x43lusterNode\x12\x1d\n\ncluster_id\x18\x01 \x01(\tR\tclusterId\x12\x14\n\x05level\x18\x02 \x01(\x05R\x05level\x12 \n\tparent_id\x18\x03 \x01(\tH\x00R\x08parentId\x88\x01\x01\x12\x1a\n\x08\x63hildren\x18\x04 \x03(\tR\x08\x63hildren\x12\x1d\n\nitem_count\x18\x05 \x01(\x05R\titemCount\x12\x1d\n\x07summary\x18\x06 \x01(\tH\x01R\x07summary\x88\x01\x01\x12\x1a\n\x08\x63\x65ntroid\x18\x07 \x03(\x02R\x08\x63\x65ntroid\x12.\n\x10silhouette_score\x18\x08 \x01(\x01H\x02R\x0fsilhouetteScore\x88\x01\x01\x12\x39\n\ncreated_at\x18\t \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\x12\x39\n\nupdated_at\x18\n \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tupdatedAtB\x0c\n\n_parent_idB\n\n\x08_summaryB\x13\n\x11_silhouette_scoreBlZjgithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/db/vectors/v1b\x06proto3')
|
|
29
|
+
|
|
30
|
+
_globals = globals()
|
|
31
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
32
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'embedding_pipeline.db.vectors.v1.vectors_pb2', _globals)
|
|
33
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
34
|
+
_globals['DESCRIPTOR']._loaded_options = None
|
|
35
|
+
_globals['DESCRIPTOR']._serialized_options = b'Zjgithub.com/GoStudyFetchGo/vector-management-monorepo/packages/go/proto-go/embedding_pipeline/db/vectors/v1'
|
|
36
|
+
_globals['_CLUSTERVERSION_PARAMETERSENTRY']._loaded_options = None
|
|
37
|
+
_globals['_CLUSTERVERSION_PARAMETERSENTRY']._serialized_options = b'8\001'
|
|
38
|
+
_globals['_TOPICVECTOR_METADATAENTRY']._loaded_options = None
|
|
39
|
+
_globals['_TOPICVECTOR_METADATAENTRY']._serialized_options = b'8\001'
|
|
40
|
+
_globals['_TOOLVECTOR_METADATAENTRY']._loaded_options = None
|
|
41
|
+
_globals['_TOOLVECTOR_METADATAENTRY']._serialized_options = b'8\001'
|
|
42
|
+
_globals['_CLUSTERVERSION']._serialized_start=118
|
|
43
|
+
_globals['_CLUSTERVERSION']._serialized_end=498
|
|
44
|
+
_globals['_CLUSTERVERSION_PARAMETERSENTRY']._serialized_start=437
|
|
45
|
+
_globals['_CLUSTERVERSION_PARAMETERSENTRY']._serialized_end=498
|
|
46
|
+
_globals['_TOPICVECTOR']._serialized_start=501
|
|
47
|
+
_globals['_TOPICVECTOR']._serialized_end=1026
|
|
48
|
+
_globals['_TOPICVECTOR_METADATAENTRY']._serialized_start=922
|
|
49
|
+
_globals['_TOPICVECTOR_METADATAENTRY']._serialized_end=981
|
|
50
|
+
_globals['_TOOLVECTOR']._serialized_start=1029
|
|
51
|
+
_globals['_TOOLVECTOR']._serialized_end=1518
|
|
52
|
+
_globals['_TOOLVECTOR_METADATAENTRY']._serialized_start=922
|
|
53
|
+
_globals['_TOOLVECTOR_METADATAENTRY']._serialized_end=981
|
|
54
|
+
_globals['_TOPICGROUPENHANCED']._serialized_start=1521
|
|
55
|
+
_globals['_TOPICGROUPENHANCED']._serialized_end=2161
|
|
56
|
+
_globals['_CLUSTERNODE']._serialized_start=2164
|
|
57
|
+
_globals['_CLUSTERNODE']._serialized_end=2595
|
|
58
|
+
# @@protoc_insertion_point(module_scope)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
from google.protobuf import timestamp_pb2 as _timestamp_pb2
|
|
4
|
+
from google.protobuf.internal import containers as _containers
|
|
5
|
+
from google.protobuf import descriptor as _descriptor
|
|
6
|
+
from google.protobuf import message as _message
|
|
7
|
+
from collections.abc import Iterable as _Iterable, Mapping as _Mapping
|
|
8
|
+
from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
|
|
9
|
+
|
|
10
|
+
DESCRIPTOR: _descriptor.FileDescriptor
|
|
11
|
+
|
|
12
|
+
class ClusterVersion(_message.Message):
|
|
13
|
+
__slots__ = ("id", "algorithm", "version", "parameters", "total_clusters", "total_items", "created_at")
|
|
14
|
+
class ParametersEntry(_message.Message):
|
|
15
|
+
__slots__ = ("key", "value")
|
|
16
|
+
KEY_FIELD_NUMBER: _ClassVar[int]
|
|
17
|
+
VALUE_FIELD_NUMBER: _ClassVar[int]
|
|
18
|
+
key: str
|
|
19
|
+
value: str
|
|
20
|
+
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
|
|
21
|
+
ID_FIELD_NUMBER: _ClassVar[int]
|
|
22
|
+
ALGORITHM_FIELD_NUMBER: _ClassVar[int]
|
|
23
|
+
VERSION_FIELD_NUMBER: _ClassVar[int]
|
|
24
|
+
PARAMETERS_FIELD_NUMBER: _ClassVar[int]
|
|
25
|
+
TOTAL_CLUSTERS_FIELD_NUMBER: _ClassVar[int]
|
|
26
|
+
TOTAL_ITEMS_FIELD_NUMBER: _ClassVar[int]
|
|
27
|
+
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
28
|
+
id: str
|
|
29
|
+
algorithm: str
|
|
30
|
+
version: str
|
|
31
|
+
parameters: _containers.ScalarMap[str, str]
|
|
32
|
+
total_clusters: int
|
|
33
|
+
total_items: int
|
|
34
|
+
created_at: _timestamp_pb2.Timestamp
|
|
35
|
+
def __init__(self, id: _Optional[str] = ..., algorithm: _Optional[str] = ..., version: _Optional[str] = ..., parameters: _Optional[_Mapping[str, str]] = ..., total_clusters: _Optional[int] = ..., total_items: _Optional[int] = ..., created_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ...
|
|
36
|
+
|
|
37
|
+
class TopicVector(_message.Message):
|
|
38
|
+
__slots__ = ("id", "topic_id", "user_id", "embedding", "metadata", "combination_id", "created_at", "updated_at", "last_complete_version")
|
|
39
|
+
class MetadataEntry(_message.Message):
|
|
40
|
+
__slots__ = ("key", "value")
|
|
41
|
+
KEY_FIELD_NUMBER: _ClassVar[int]
|
|
42
|
+
VALUE_FIELD_NUMBER: _ClassVar[int]
|
|
43
|
+
key: str
|
|
44
|
+
value: str
|
|
45
|
+
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
|
|
46
|
+
ID_FIELD_NUMBER: _ClassVar[int]
|
|
47
|
+
TOPIC_ID_FIELD_NUMBER: _ClassVar[int]
|
|
48
|
+
USER_ID_FIELD_NUMBER: _ClassVar[int]
|
|
49
|
+
EMBEDDING_FIELD_NUMBER: _ClassVar[int]
|
|
50
|
+
METADATA_FIELD_NUMBER: _ClassVar[int]
|
|
51
|
+
COMBINATION_ID_FIELD_NUMBER: _ClassVar[int]
|
|
52
|
+
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
53
|
+
UPDATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
54
|
+
LAST_COMPLETE_VERSION_FIELD_NUMBER: _ClassVar[int]
|
|
55
|
+
id: str
|
|
56
|
+
topic_id: str
|
|
57
|
+
user_id: str
|
|
58
|
+
embedding: _containers.RepeatedScalarFieldContainer[float]
|
|
59
|
+
metadata: _containers.ScalarMap[str, str]
|
|
60
|
+
combination_id: str
|
|
61
|
+
created_at: _timestamp_pb2.Timestamp
|
|
62
|
+
updated_at: _timestamp_pb2.Timestamp
|
|
63
|
+
last_complete_version: int
|
|
64
|
+
def __init__(self, id: _Optional[str] = ..., topic_id: _Optional[str] = ..., user_id: _Optional[str] = ..., embedding: _Optional[_Iterable[float]] = ..., metadata: _Optional[_Mapping[str, str]] = ..., combination_id: _Optional[str] = ..., created_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., updated_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., last_complete_version: _Optional[int] = ...) -> None: ...
|
|
65
|
+
|
|
66
|
+
class ToolVector(_message.Message):
|
|
67
|
+
__slots__ = ("id", "tool_id", "topic_id", "tool_collection", "embedding", "metadata", "tool_set_id", "tool_set_collection", "created_at")
|
|
68
|
+
class MetadataEntry(_message.Message):
|
|
69
|
+
__slots__ = ("key", "value")
|
|
70
|
+
KEY_FIELD_NUMBER: _ClassVar[int]
|
|
71
|
+
VALUE_FIELD_NUMBER: _ClassVar[int]
|
|
72
|
+
key: str
|
|
73
|
+
value: str
|
|
74
|
+
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
|
|
75
|
+
ID_FIELD_NUMBER: _ClassVar[int]
|
|
76
|
+
TOOL_ID_FIELD_NUMBER: _ClassVar[int]
|
|
77
|
+
TOPIC_ID_FIELD_NUMBER: _ClassVar[int]
|
|
78
|
+
TOOL_COLLECTION_FIELD_NUMBER: _ClassVar[int]
|
|
79
|
+
EMBEDDING_FIELD_NUMBER: _ClassVar[int]
|
|
80
|
+
METADATA_FIELD_NUMBER: _ClassVar[int]
|
|
81
|
+
TOOL_SET_ID_FIELD_NUMBER: _ClassVar[int]
|
|
82
|
+
TOOL_SET_COLLECTION_FIELD_NUMBER: _ClassVar[int]
|
|
83
|
+
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
84
|
+
id: str
|
|
85
|
+
tool_id: str
|
|
86
|
+
topic_id: str
|
|
87
|
+
tool_collection: str
|
|
88
|
+
embedding: _containers.RepeatedScalarFieldContainer[float]
|
|
89
|
+
metadata: _containers.ScalarMap[str, str]
|
|
90
|
+
tool_set_id: str
|
|
91
|
+
tool_set_collection: str
|
|
92
|
+
created_at: _timestamp_pb2.Timestamp
|
|
93
|
+
def __init__(self, id: _Optional[str] = ..., tool_id: _Optional[str] = ..., topic_id: _Optional[str] = ..., tool_collection: _Optional[str] = ..., embedding: _Optional[_Iterable[float]] = ..., metadata: _Optional[_Mapping[str, str]] = ..., tool_set_id: _Optional[str] = ..., tool_set_collection: _Optional[str] = ..., created_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ...
|
|
94
|
+
|
|
95
|
+
class TopicGroupEnhanced(_message.Message):
|
|
96
|
+
__slots__ = ("id", "topic", "description", "centroid_index", "subject_type", "topics_in_group", "version", "next_recluster", "cluster_id", "cluster_level", "created_at", "updated_at")
|
|
97
|
+
ID_FIELD_NUMBER: _ClassVar[int]
|
|
98
|
+
TOPIC_FIELD_NUMBER: _ClassVar[int]
|
|
99
|
+
DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
|
|
100
|
+
CENTROID_INDEX_FIELD_NUMBER: _ClassVar[int]
|
|
101
|
+
SUBJECT_TYPE_FIELD_NUMBER: _ClassVar[int]
|
|
102
|
+
TOPICS_IN_GROUP_FIELD_NUMBER: _ClassVar[int]
|
|
103
|
+
VERSION_FIELD_NUMBER: _ClassVar[int]
|
|
104
|
+
NEXT_RECLUSTER_FIELD_NUMBER: _ClassVar[int]
|
|
105
|
+
CLUSTER_ID_FIELD_NUMBER: _ClassVar[int]
|
|
106
|
+
CLUSTER_LEVEL_FIELD_NUMBER: _ClassVar[int]
|
|
107
|
+
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
108
|
+
UPDATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
109
|
+
id: str
|
|
110
|
+
topic: str
|
|
111
|
+
description: str
|
|
112
|
+
centroid_index: int
|
|
113
|
+
subject_type: str
|
|
114
|
+
topics_in_group: int
|
|
115
|
+
version: int
|
|
116
|
+
next_recluster: _timestamp_pb2.Timestamp
|
|
117
|
+
cluster_id: str
|
|
118
|
+
cluster_level: int
|
|
119
|
+
created_at: _timestamp_pb2.Timestamp
|
|
120
|
+
updated_at: _timestamp_pb2.Timestamp
|
|
121
|
+
def __init__(self, id: _Optional[str] = ..., topic: _Optional[str] = ..., description: _Optional[str] = ..., centroid_index: _Optional[int] = ..., subject_type: _Optional[str] = ..., topics_in_group: _Optional[int] = ..., version: _Optional[int] = ..., next_recluster: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., cluster_id: _Optional[str] = ..., cluster_level: _Optional[int] = ..., created_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., updated_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ...
|
|
122
|
+
|
|
123
|
+
class ClusterNode(_message.Message):
|
|
124
|
+
__slots__ = ("cluster_id", "level", "parent_id", "children", "item_count", "summary", "centroid", "silhouette_score", "created_at", "updated_at")
|
|
125
|
+
CLUSTER_ID_FIELD_NUMBER: _ClassVar[int]
|
|
126
|
+
LEVEL_FIELD_NUMBER: _ClassVar[int]
|
|
127
|
+
PARENT_ID_FIELD_NUMBER: _ClassVar[int]
|
|
128
|
+
CHILDREN_FIELD_NUMBER: _ClassVar[int]
|
|
129
|
+
ITEM_COUNT_FIELD_NUMBER: _ClassVar[int]
|
|
130
|
+
SUMMARY_FIELD_NUMBER: _ClassVar[int]
|
|
131
|
+
CENTROID_FIELD_NUMBER: _ClassVar[int]
|
|
132
|
+
SILHOUETTE_SCORE_FIELD_NUMBER: _ClassVar[int]
|
|
133
|
+
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
134
|
+
UPDATED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
135
|
+
cluster_id: str
|
|
136
|
+
level: int
|
|
137
|
+
parent_id: str
|
|
138
|
+
children: _containers.RepeatedScalarFieldContainer[str]
|
|
139
|
+
item_count: int
|
|
140
|
+
summary: str
|
|
141
|
+
centroid: _containers.RepeatedScalarFieldContainer[float]
|
|
142
|
+
silhouette_score: float
|
|
143
|
+
created_at: _timestamp_pb2.Timestamp
|
|
144
|
+
updated_at: _timestamp_pb2.Timestamp
|
|
145
|
+
def __init__(self, cluster_id: _Optional[str] = ..., level: _Optional[int] = ..., parent_id: _Optional[str] = ..., children: _Optional[_Iterable[str]] = ..., item_count: _Optional[int] = ..., summary: _Optional[str] = ..., centroid: _Optional[_Iterable[float]] = ..., silhouette_score: _Optional[float] = ..., created_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., updated_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ...
|