gllm-inference-binary 0.5.55__cp313-cp313-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-inference-binary might be problematic. Click here for more details.
- gllm_inference/__init__.pyi +0 -0
- gllm_inference/builder/__init__.pyi +6 -0
- gllm_inference/builder/_build_invoker.pyi +28 -0
- gllm_inference/builder/build_em_invoker.pyi +130 -0
- gllm_inference/builder/build_lm_invoker.pyi +213 -0
- gllm_inference/builder/build_lm_request_processor.pyi +88 -0
- gllm_inference/builder/build_output_parser.pyi +29 -0
- gllm_inference/catalog/__init__.pyi +4 -0
- gllm_inference/catalog/catalog.pyi +121 -0
- gllm_inference/catalog/lm_request_processor_catalog.pyi +112 -0
- gllm_inference/catalog/prompt_builder_catalog.pyi +82 -0
- gllm_inference/constants.pyi +12 -0
- gllm_inference/em_invoker/__init__.pyi +12 -0
- gllm_inference/em_invoker/azure_openai_em_invoker.pyi +88 -0
- gllm_inference/em_invoker/bedrock_em_invoker.pyi +118 -0
- gllm_inference/em_invoker/cohere_em_invoker.pyi +128 -0
- gllm_inference/em_invoker/em_invoker.pyi +90 -0
- gllm_inference/em_invoker/google_em_invoker.pyi +129 -0
- gllm_inference/em_invoker/jina_em_invoker.pyi +103 -0
- gllm_inference/em_invoker/langchain/__init__.pyi +3 -0
- gllm_inference/em_invoker/langchain/em_invoker_embeddings.pyi +84 -0
- gllm_inference/em_invoker/langchain_em_invoker.pyi +46 -0
- gllm_inference/em_invoker/openai_compatible_em_invoker.pyi +41 -0
- gllm_inference/em_invoker/openai_em_invoker.pyi +118 -0
- gllm_inference/em_invoker/schema/__init__.pyi +0 -0
- gllm_inference/em_invoker/schema/bedrock.pyi +29 -0
- gllm_inference/em_invoker/schema/cohere.pyi +20 -0
- gllm_inference/em_invoker/schema/google.pyi +9 -0
- gllm_inference/em_invoker/schema/jina.pyi +29 -0
- gllm_inference/em_invoker/schema/langchain.pyi +5 -0
- gllm_inference/em_invoker/schema/openai.pyi +7 -0
- gllm_inference/em_invoker/schema/openai_compatible.pyi +7 -0
- gllm_inference/em_invoker/schema/twelvelabs.pyi +17 -0
- gllm_inference/em_invoker/schema/voyage.pyi +15 -0
- gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +101 -0
- gllm_inference/em_invoker/voyage_em_invoker.pyi +104 -0
- gllm_inference/exceptions/__init__.pyi +4 -0
- gllm_inference/exceptions/error_parser.pyi +41 -0
- gllm_inference/exceptions/exceptions.pyi +132 -0
- gllm_inference/exceptions/provider_error_map.pyi +24 -0
- gllm_inference/lm_invoker/__init__.pyi +14 -0
- gllm_inference/lm_invoker/anthropic_lm_invoker.pyi +318 -0
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +237 -0
- gllm_inference/lm_invoker/batch/__init__.pyi +3 -0
- gllm_inference/lm_invoker/batch/batch_operations.pyi +127 -0
- gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +212 -0
- gllm_inference/lm_invoker/datasaur_lm_invoker.pyi +157 -0
- gllm_inference/lm_invoker/google_lm_invoker.pyi +327 -0
- gllm_inference/lm_invoker/langchain_lm_invoker.pyi +239 -0
- gllm_inference/lm_invoker/litellm_lm_invoker.pyi +224 -0
- gllm_inference/lm_invoker/lm_invoker.pyi +165 -0
- gllm_inference/lm_invoker/openai_chat_completions_lm_invoker.pyi +253 -0
- gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +52 -0
- gllm_inference/lm_invoker/openai_lm_invoker.pyi +404 -0
- gllm_inference/lm_invoker/portkey_lm_invoker.pyi +296 -0
- gllm_inference/lm_invoker/schema/__init__.pyi +0 -0
- gllm_inference/lm_invoker/schema/anthropic.pyi +56 -0
- gllm_inference/lm_invoker/schema/bedrock.pyi +53 -0
- gllm_inference/lm_invoker/schema/datasaur.pyi +14 -0
- gllm_inference/lm_invoker/schema/google.pyi +24 -0
- gllm_inference/lm_invoker/schema/langchain.pyi +23 -0
- gllm_inference/lm_invoker/schema/openai.pyi +106 -0
- gllm_inference/lm_invoker/schema/openai_chat_completions.pyi +62 -0
- gllm_inference/lm_invoker/schema/portkey.pyi +31 -0
- gllm_inference/lm_invoker/schema/xai.pyi +31 -0
- gllm_inference/lm_invoker/xai_lm_invoker.pyi +253 -0
- gllm_inference/model/__init__.pyi +12 -0
- gllm_inference/model/em/__init__.pyi +0 -0
- gllm_inference/model/em/cohere_em.pyi +17 -0
- gllm_inference/model/em/google_em.pyi +16 -0
- gllm_inference/model/em/jina_em.pyi +22 -0
- gllm_inference/model/em/openai_em.pyi +15 -0
- gllm_inference/model/em/twelvelabs_em.pyi +13 -0
- gllm_inference/model/em/voyage_em.pyi +20 -0
- gllm_inference/model/lm/__init__.pyi +0 -0
- gllm_inference/model/lm/anthropic_lm.pyi +22 -0
- gllm_inference/model/lm/google_lm.pyi +18 -0
- gllm_inference/model/lm/openai_lm.pyi +27 -0
- gllm_inference/model/lm/xai_lm.pyi +19 -0
- gllm_inference/output_parser/__init__.pyi +3 -0
- gllm_inference/output_parser/json_output_parser.pyi +60 -0
- gllm_inference/output_parser/output_parser.pyi +27 -0
- gllm_inference/prompt_builder/__init__.pyi +3 -0
- gllm_inference/prompt_builder/format_strategy/__init__.pyi +4 -0
- gllm_inference/prompt_builder/format_strategy/format_strategy.pyi +55 -0
- gllm_inference/prompt_builder/format_strategy/jinja_format_strategy.pyi +45 -0
- gllm_inference/prompt_builder/format_strategy/string_format_strategy.pyi +20 -0
- gllm_inference/prompt_builder/prompt_builder.pyi +69 -0
- gllm_inference/prompt_formatter/__init__.pyi +7 -0
- gllm_inference/prompt_formatter/agnostic_prompt_formatter.pyi +49 -0
- gllm_inference/prompt_formatter/huggingface_prompt_formatter.pyi +55 -0
- gllm_inference/prompt_formatter/llama_prompt_formatter.pyi +59 -0
- gllm_inference/prompt_formatter/mistral_prompt_formatter.pyi +53 -0
- gllm_inference/prompt_formatter/openai_prompt_formatter.pyi +35 -0
- gllm_inference/prompt_formatter/prompt_formatter.pyi +30 -0
- gllm_inference/realtime_chat/__init__.pyi +3 -0
- gllm_inference/realtime_chat/google_realtime_chat.pyi +205 -0
- gllm_inference/realtime_chat/input_streamer/__init__.pyi +4 -0
- gllm_inference/realtime_chat/input_streamer/input_streamer.pyi +36 -0
- gllm_inference/realtime_chat/input_streamer/keyboard_input_streamer.pyi +27 -0
- gllm_inference/realtime_chat/input_streamer/linux_mic_input_streamer.pyi +36 -0
- gllm_inference/realtime_chat/output_streamer/__init__.pyi +4 -0
- gllm_inference/realtime_chat/output_streamer/console_output_streamer.pyi +21 -0
- gllm_inference/realtime_chat/output_streamer/linux_speaker_output_streamer.pyi +42 -0
- gllm_inference/realtime_chat/output_streamer/output_streamer.pyi +33 -0
- gllm_inference/realtime_chat/realtime_chat.pyi +28 -0
- gllm_inference/request_processor/__init__.pyi +4 -0
- gllm_inference/request_processor/lm_request_processor.pyi +101 -0
- gllm_inference/request_processor/uses_lm_mixin.pyi +130 -0
- gllm_inference/schema/__init__.pyi +18 -0
- gllm_inference/schema/activity.pyi +64 -0
- gllm_inference/schema/attachment.pyi +88 -0
- gllm_inference/schema/code_exec_result.pyi +14 -0
- gllm_inference/schema/config.pyi +15 -0
- gllm_inference/schema/enums.pyi +80 -0
- gllm_inference/schema/events.pyi +105 -0
- gllm_inference/schema/lm_input.pyi +4 -0
- gllm_inference/schema/lm_output.pyi +188 -0
- gllm_inference/schema/mcp.pyi +31 -0
- gllm_inference/schema/message.pyi +52 -0
- gllm_inference/schema/model_id.pyi +176 -0
- gllm_inference/schema/reasoning.pyi +15 -0
- gllm_inference/schema/token_usage.pyi +75 -0
- gllm_inference/schema/tool_call.pyi +14 -0
- gllm_inference/schema/tool_result.pyi +11 -0
- gllm_inference/schema/type_alias.pyi +11 -0
- gllm_inference/utils/__init__.pyi +5 -0
- gllm_inference/utils/io_utils.pyi +26 -0
- gllm_inference/utils/langchain.pyi +30 -0
- gllm_inference/utils/validation.pyi +12 -0
- gllm_inference.build/.gitignore +1 -0
- gllm_inference.cpython-313-darwin.so +0 -0
- gllm_inference.pyi +154 -0
- gllm_inference_binary-0.5.55.dist-info/METADATA +138 -0
- gllm_inference_binary-0.5.55.dist-info/RECORD +137 -0
- gllm_inference_binary-0.5.55.dist-info/WHEEL +5 -0
- gllm_inference_binary-0.5.55.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_inference.builder import build_lm_request_processor as build_lm_request_processor
|
|
3
|
+
from gllm_inference.catalog.catalog import BaseCatalog as BaseCatalog
|
|
4
|
+
from gllm_inference.request_processor import LMRequestProcessor as LMRequestProcessor
|
|
5
|
+
|
|
6
|
+
MODEL_ID_ENV_VAR_REGEX_PATTERN: str
|
|
7
|
+
LM_REQUEST_PROCESSOR_REQUIRED_COLUMNS: Incomplete
|
|
8
|
+
CONFIG_SCHEMA_MAP: Incomplete
|
|
9
|
+
logger: Incomplete
|
|
10
|
+
|
|
11
|
+
class LMRequestProcessorCatalog(BaseCatalog[LMRequestProcessor]):
|
|
12
|
+
'''Loads multiple LM request processors from certain sources.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
components (dict[str, LMRequestProcessor]): Dictionary of the loaded LM request processors.
|
|
16
|
+
|
|
17
|
+
Initialization:
|
|
18
|
+
# Example 1: Load from Google Sheets using client email and private key
|
|
19
|
+
```python
|
|
20
|
+
catalog = LMRequestProcessorCatalog.from_gsheets(
|
|
21
|
+
sheet_id="...",
|
|
22
|
+
worksheet_id="...",
|
|
23
|
+
client_email="...",
|
|
24
|
+
private_key="...",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
lm_request_processor = catalog.name
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
# Example 2: Load from Google Sheets using credential file
|
|
31
|
+
```python
|
|
32
|
+
catalog = LMRequestProcessorCatalog.from_gsheets(
|
|
33
|
+
sheet_id="...",
|
|
34
|
+
worksheet_id="...",
|
|
35
|
+
credential_file_path="...",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
lm_request_processor = catalog.name
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
# Example 3: Load from CSV
|
|
42
|
+
```python
|
|
43
|
+
catalog = LMRequestProcessorCatalog.from_csv(csv_path="...")
|
|
44
|
+
|
|
45
|
+
lm_request_processor = catalog.name
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
# Example 4: Load from record
|
|
49
|
+
```python
|
|
50
|
+
records=[
|
|
51
|
+
{
|
|
52
|
+
"name": "answer_question",
|
|
53
|
+
"system_template": (
|
|
54
|
+
"You are helpful assistant.\\n"
|
|
55
|
+
"Answer the following question based on the provided context.\\n"
|
|
56
|
+
"```{context}```"
|
|
57
|
+
),
|
|
58
|
+
"user_template": "{query}",
|
|
59
|
+
"key_defaults": \'{"context": "<default context>"}\',
|
|
60
|
+
"model_id": "openai/gpt-5-nano",
|
|
61
|
+
"credentials": "OPENAI_API_KEY",
|
|
62
|
+
"config": "",
|
|
63
|
+
"output_parser_type": "none",
|
|
64
|
+
},
|
|
65
|
+
]
|
|
66
|
+
catalog = LMRequestProcessorCatalog.from_records(records=records)
|
|
67
|
+
lm_request_processor = catalog.answer_question
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Template Format Example:
|
|
71
|
+
# Example 1: Google Sheets
|
|
72
|
+
For an example of how a Google Sheets file can be formatted to be loaded using LMRequestProcessorCatalog, see:
|
|
73
|
+
https://docs.google.com/spreadsheets/d/1CX9i45yEinv1UdB3s6uHNMj7mxr2-s1NFHfFDvMsq0E/edit?usp=drive_link
|
|
74
|
+
|
|
75
|
+
# Example 2: CSV
|
|
76
|
+
For an example of how a CSV file can be formatted to be loaded using LMRequestProcessorCatalog, see:
|
|
77
|
+
https://drive.google.com/file/d/1_2rSoxh3CR2KZxIyUmpowMrt0Lm0YqAb/view?usp=drive_link
|
|
78
|
+
|
|
79
|
+
Template Explanation:
|
|
80
|
+
The required columns are:
|
|
81
|
+
1. name (str): The name of the LM request processor.
|
|
82
|
+
2. system_template (str): The system template of the prompt builder.
|
|
83
|
+
3. user_template (str): The user template of the prompt builder.
|
|
84
|
+
4. key_defaults (json_str): The default values for the prompt template keys.
|
|
85
|
+
5. model_id (str): The model ID of the LM invoker.
|
|
86
|
+
6. credentials (str | json_str): The credentials of the LM invoker.
|
|
87
|
+
7. config (json_str): The additional configuration of the LM invoker.
|
|
88
|
+
8. output_parser_type (str): The type of the output parser.
|
|
89
|
+
|
|
90
|
+
Important Notes:
|
|
91
|
+
1. At least one of `system_template` or `user_template` must be filled.
|
|
92
|
+
2. `key_defaults` is optional. If filled, must be a dictionary containing the default values for the
|
|
93
|
+
prompt template keys. These default values will be applied when the corresponding keys are not provided
|
|
94
|
+
in the runtime input. If it is empty, the prompt template keys will not have default values.
|
|
95
|
+
3. The `model_id`:
|
|
96
|
+
3.1. Must be filled with the model ID of the LM invoker, e.g. "openai/gpt-5-nano".
|
|
97
|
+
3.2. Can be partially loaded from the environment variable using the "${ENV_VAR_KEY}" syntax,
|
|
98
|
+
e.g. "azure-openai/${AZURE_ENDPOINT}/${AZURE_DEPLOYMENT}".
|
|
99
|
+
3.3. For the available model ID formats, see: https://gdplabs.gitbook.io/sdk/resources/supported-models
|
|
100
|
+
4. `credentials` is optional. If it is filled, it can either be:
|
|
101
|
+
4.1. An environment variable name containing the API key (e.g. OPENAI_API_KEY).
|
|
102
|
+
4.2. An environment variable name containing the path to a credentials JSON file
|
|
103
|
+
(e.g. GOOGLE_CREDENTIALS_FILE_PATH). Currently only supported for Google Vertex AI.
|
|
104
|
+
4.3. A dictionary of credentials, with each value being an environment variable name corresponding to the
|
|
105
|
+
credential (e.g. {"api_key": "OPENAI_API_KEY"}). Currently supported for Bedrock and LangChain.
|
|
106
|
+
If it is empty, the LM invoker will use the default credentials loaded from the environment variables.
|
|
107
|
+
5. `config` is optional. If filled, must be a dictionary containing the configuration for the LM invoker.
|
|
108
|
+
If it is empty, the LM invoker will use the default configuration.
|
|
109
|
+
6. `output_parser_type` can either be:
|
|
110
|
+
6.1. none: No output parser will be used.
|
|
111
|
+
6.2. json: The JSONOutputParser will be used.
|
|
112
|
+
'''
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_inference.catalog.catalog import BaseCatalog as BaseCatalog
|
|
3
|
+
from gllm_inference.prompt_builder.prompt_builder import PromptBuilder as PromptBuilder
|
|
4
|
+
|
|
5
|
+
PROMPT_BUILDER_REQUIRED_COLUMNS: Incomplete
|
|
6
|
+
logger: Incomplete
|
|
7
|
+
|
|
8
|
+
class PromptBuilderCatalog(BaseCatalog[PromptBuilder]):
|
|
9
|
+
'''Loads multiple prompt builders from certain sources.
|
|
10
|
+
|
|
11
|
+
Attributes:
|
|
12
|
+
components (dict[str, PromptBuilder]): Dictionary of the loaded prompt builders.
|
|
13
|
+
|
|
14
|
+
Initialization:
|
|
15
|
+
# Example 1: Load from Google Sheets using client email and private key
|
|
16
|
+
```python
|
|
17
|
+
catalog = PromptBuilderCatalog.from_gsheets(
|
|
18
|
+
sheet_id="...",
|
|
19
|
+
worksheet_id="...",
|
|
20
|
+
client_email="...",
|
|
21
|
+
private_key="...",
|
|
22
|
+
)
|
|
23
|
+
prompt_builder = catalog.name
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
# Example 2: Load from Google Sheets using credential file
|
|
27
|
+
```python
|
|
28
|
+
catalog = PromptBuilderCatalog.from_gsheets(
|
|
29
|
+
sheet_id="...",
|
|
30
|
+
worksheet_id="...",
|
|
31
|
+
credential_file_path="...",
|
|
32
|
+
)
|
|
33
|
+
prompt_builder = catalog.name
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
# Example 3: Load from CSV
|
|
37
|
+
```python
|
|
38
|
+
catalog = PromptBuilderCatalog.from_csv(csv_path="...")
|
|
39
|
+
prompt_builder = catalog.name
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
# Example 4: Load from records
|
|
43
|
+
```python
|
|
44
|
+
records=[
|
|
45
|
+
{
|
|
46
|
+
"name": "answer_question",
|
|
47
|
+
"system": (
|
|
48
|
+
"You are helpful assistant.\\n"
|
|
49
|
+
"Answer the following question based on the provided context.\\n"
|
|
50
|
+
"```{context}```"
|
|
51
|
+
),
|
|
52
|
+
"user": "{query}",
|
|
53
|
+
"key_defaults": \'{"context": "<default context>"}\',
|
|
54
|
+
},
|
|
55
|
+
]
|
|
56
|
+
catalog = PromptBuilderCatalog.from_records(records=records)
|
|
57
|
+
prompt_builder = catalog.answer_question
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Template Example:
|
|
61
|
+
# Example 1: Google Sheets
|
|
62
|
+
For an example of how a Google Sheets file can be formatted to be loaded using PromptBuilderCatalog, see:
|
|
63
|
+
https://docs.google.com/spreadsheets/d/12IwSKv8hMhyWXSQnLx9LgCj0cxaR1f9gOmbEDGleurE/edit?usp=drive_link
|
|
64
|
+
|
|
65
|
+
# Example 2: CSV
|
|
66
|
+
For an example of how a CSV file can be formatted to be loaded using PromptBuilderCatalog, see:
|
|
67
|
+
https://drive.google.com/file/d/1KQgddMdbcZBZmroQFtjSl-TKLohq84Fz/view?usp=drive_link
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
Template explanation:
|
|
71
|
+
The required columns are:
|
|
72
|
+
1. name (str): The name of the prompt builder.
|
|
73
|
+
2. system (str): The system template of the prompt builder.
|
|
74
|
+
3. user (str): The user template of the prompt builder.
|
|
75
|
+
4. key_defaults (json_str): The default values for the prompt template keys.
|
|
76
|
+
|
|
77
|
+
Important Notes:
|
|
78
|
+
1. At least one of the `system` and `user` columns must be filled.
|
|
79
|
+
2. `key_defaults` is optional. If filled, must be a dictionary containing the default values for the
|
|
80
|
+
prompt template keys. These default values will be applied when the corresponding keys are not provided
|
|
81
|
+
in the runtime input. If it is empty, the prompt template keys will not have default values.
|
|
82
|
+
'''
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
|
|
3
|
+
AZURE_OPENAI_URL_SUFFIX: str
|
|
4
|
+
DOCUMENT_MIME_TYPES: Incomplete
|
|
5
|
+
EMBEDDING_ENDPOINT: str
|
|
6
|
+
GOOGLE_SCOPES: Incomplete
|
|
7
|
+
GRPC_ENABLE_RETRIES_KEY: str
|
|
8
|
+
INVOKER_DEFAULT_TIMEOUT: float
|
|
9
|
+
INVOKER_PROPAGATED_MAX_RETRIES: int
|
|
10
|
+
JINA_DEFAULT_URL: str
|
|
11
|
+
OPENAI_DEFAULT_URL: str
|
|
12
|
+
SECONDS_TO_MILLISECONDS: int
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from gllm_inference.em_invoker.azure_openai_em_invoker import AzureOpenAIEMInvoker as AzureOpenAIEMInvoker
|
|
2
|
+
from gllm_inference.em_invoker.bedrock_em_invoker import BedrockEMInvoker as BedrockEMInvoker
|
|
3
|
+
from gllm_inference.em_invoker.cohere_em_invoker import CohereEMInvoker as CohereEMInvoker
|
|
4
|
+
from gllm_inference.em_invoker.google_em_invoker import GoogleEMInvoker as GoogleEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.jina_em_invoker import JinaEMInvoker as JinaEMInvoker
|
|
6
|
+
from gllm_inference.em_invoker.langchain_em_invoker import LangChainEMInvoker as LangChainEMInvoker
|
|
7
|
+
from gllm_inference.em_invoker.openai_compatible_em_invoker import OpenAICompatibleEMInvoker as OpenAICompatibleEMInvoker
|
|
8
|
+
from gllm_inference.em_invoker.openai_em_invoker import OpenAIEMInvoker as OpenAIEMInvoker
|
|
9
|
+
from gllm_inference.em_invoker.twelevelabs_em_invoker import TwelveLabsEMInvoker as TwelveLabsEMInvoker
|
|
10
|
+
from gllm_inference.em_invoker.voyage_em_invoker import VoyageEMInvoker as VoyageEMInvoker
|
|
11
|
+
|
|
12
|
+
__all__ = ['AzureOpenAIEMInvoker', 'BedrockEMInvoker', 'CohereEMInvoker', 'GoogleEMInvoker', 'JinaEMInvoker', 'LangChainEMInvoker', 'OpenAIEMInvoker', 'OpenAICompatibleEMInvoker', 'TwelveLabsEMInvoker', 'VoyageEMInvoker']
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
3
|
+
from gllm_inference.constants import AZURE_OPENAI_URL_SUFFIX as AZURE_OPENAI_URL_SUFFIX, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
|
|
4
|
+
from gllm_inference.em_invoker.openai_em_invoker import OpenAIEMInvoker as OpenAIEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.schema.openai import Key as Key
|
|
6
|
+
from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
class AzureOpenAIEMInvoker(OpenAIEMInvoker):
|
|
10
|
+
'''An embedding model invoker to interact with Azure OpenAI embedding models.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
model_id (str): The model ID of the embedding model.
|
|
14
|
+
model_provider (str): The provider of the embedding model.
|
|
15
|
+
model_name (str): The name of the Azure OpenAI embedding model deployment.
|
|
16
|
+
client_kwargs (dict[str, Any]): The keyword arguments for the Azure OpenAI client.
|
|
17
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
|
|
18
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
19
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
20
|
+
|
|
21
|
+
Input types:
|
|
22
|
+
The `AzureOpenAIEMInvoker` only supports text inputs.
|
|
23
|
+
|
|
24
|
+
Output format:
|
|
25
|
+
The `AzureOpenAIEMInvoker` can embed either:
|
|
26
|
+
1. A single content.
|
|
27
|
+
1. A single content is a single text.
|
|
28
|
+
2. The output will be a `Vector`, representing the embedding of the content.
|
|
29
|
+
|
|
30
|
+
# Example 1: Embedding a text content.
|
|
31
|
+
```python
|
|
32
|
+
text = "This is a text"
|
|
33
|
+
result = await em_invoker.invoke(text)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
The above examples will return a `Vector` with a size of (embedding_size,).
|
|
37
|
+
|
|
38
|
+
2. A list of contents.
|
|
39
|
+
1. A list of contents is a list of texts.
|
|
40
|
+
2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
|
|
41
|
+
embedding of each single content.
|
|
42
|
+
|
|
43
|
+
# Example: Embedding a list of contents.
|
|
44
|
+
```python
|
|
45
|
+
text1 = "This is a text"
|
|
46
|
+
text2 = "This is another text"
|
|
47
|
+
text3 = "This is yet another text"
|
|
48
|
+
result = await em_invoker.invoke([text1, text2, text3])
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The above examples will return a `list[Vector]` with a size of (3, embedding_size).
|
|
52
|
+
|
|
53
|
+
Retry and timeout:
|
|
54
|
+
The `AzureOpenAIEMInvoker` supports retry and timeout configuration.
|
|
55
|
+
By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
|
|
56
|
+
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
57
|
+
|
|
58
|
+
Retry config examples:
|
|
59
|
+
```python
|
|
60
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
61
|
+
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
62
|
+
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
63
|
+
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Usage example:
|
|
67
|
+
```python
|
|
68
|
+
em_invoker = AzureOpenAIEMInvoker(..., retry_config=retry_config)
|
|
69
|
+
```
|
|
70
|
+
'''
|
|
71
|
+
client_kwargs: Incomplete
|
|
72
|
+
def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
73
|
+
"""Initializes a new instance of the AzureOpenAIEMInvoker class.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
azure_endpoint (str): The endpoint of the Azure OpenAI service.
|
|
77
|
+
azure_deployment (str): The deployment name of the Azure OpenAI service.
|
|
78
|
+
api_key (str | None, optional): The API key for authenticating with Azure OpenAI. Defaults to None, in
|
|
79
|
+
which case the `AZURE_OPENAI_API_KEY` environment variable will be used.
|
|
80
|
+
api_version (str | None, optional): Deprecated parameter to be removed in v0.6. Defaults to None.
|
|
81
|
+
model_kwargs (dict[str, Any] | None, optional): Additional model parameters. Defaults to None.
|
|
82
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
83
|
+
Defaults to None.
|
|
84
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
85
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
86
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
87
|
+
Defaults to None, in which case no truncation is applied.
|
|
88
|
+
"""
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from enum import StrEnum
|
|
3
|
+
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
4
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.schema.bedrock import InputType as InputType, Key as Key, OutputType as OutputType
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
|
|
7
|
+
from gllm_inference.exceptions.provider_error_map import BEDROCK_ERROR_MAPPING as BEDROCK_ERROR_MAPPING
|
|
8
|
+
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EMContent as EMContent, ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
class ModelType(StrEnum):
|
|
12
|
+
"""Defines the type of the Bedrock embedding model."""
|
|
13
|
+
COHERE = 'cohere'
|
|
14
|
+
MARENGO = 'marengo'
|
|
15
|
+
TITAN = 'titan'
|
|
16
|
+
|
|
17
|
+
SUPPORTED_ATTACHMENTS: Incomplete
|
|
18
|
+
|
|
19
|
+
class BedrockEMInvoker(BaseEMInvoker):
|
|
20
|
+
'''An embedding model invoker to interact with AWS Bedrock embedding models.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
model_id (str): The model ID of the embedding model.
|
|
24
|
+
model_provider (str): The provider of the embedding model.
|
|
25
|
+
model_name (str): The name of the embedding model.
|
|
26
|
+
session (Session): The Bedrock client session.
|
|
27
|
+
client_kwargs (dict[str, Any]): The Bedrock client kwargs.
|
|
28
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
|
|
29
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
30
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
31
|
+
|
|
32
|
+
Input types:
|
|
33
|
+
The `BedrockEMInvoker` supports:
|
|
34
|
+
1. Text inputs for Cohere, Titan, and Marengo models
|
|
35
|
+
2. Image inputs for Marengo models through Attachment objects
|
|
36
|
+
|
|
37
|
+
Output format:
|
|
38
|
+
The `BedrockEMInvoker` can embed either:
|
|
39
|
+
1. A single content.
|
|
40
|
+
1. A single content is a single text or single image (image only supported for Marengo).
|
|
41
|
+
2. The output will be a `Vector`, representing the embedding of the content.
|
|
42
|
+
|
|
43
|
+
# Example 1: Embedding a text content.
|
|
44
|
+
```python
|
|
45
|
+
text = "This is a text"
|
|
46
|
+
result = await em_invoker.invoke(text)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
# Example 2: Embedding an image with Marengo.
|
|
50
|
+
```python
|
|
51
|
+
em_invoker = BedrockEMInvoker(
|
|
52
|
+
model_name="us.twelvelabs.marengo-2.7"
|
|
53
|
+
)
|
|
54
|
+
image = Attachment.from_path("path/to/local/image.png")
|
|
55
|
+
result = await em_invoker.invoke(image)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The above examples will return a `Vector` with a size of (embedding_size,).
|
|
59
|
+
|
|
60
|
+
2. A list of contents.
|
|
61
|
+
1. A list of contents is a list of texts or images (images only supported for Marengo).
|
|
62
|
+
2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
|
|
63
|
+
embedding of each single content.
|
|
64
|
+
|
|
65
|
+
# Example: Embedding a list of contents.
|
|
66
|
+
```python
|
|
67
|
+
text1 = "This is a text"
|
|
68
|
+
text2 = "This is another text"
|
|
69
|
+
text3 = "This is yet another text"
|
|
70
|
+
result = await em_invoker.invoke([text1, text2, text3])
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The above examples will return a `list[Vector]` with a size of (3, embedding_size).
|
|
74
|
+
|
|
75
|
+
Retry and timeout:
|
|
76
|
+
The `BedrockEMInvoker` supports retry and timeout configuration.
|
|
77
|
+
By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
|
|
78
|
+
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
79
|
+
|
|
80
|
+
Retry config examples:
|
|
81
|
+
```python
|
|
82
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
83
|
+
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
84
|
+
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
85
|
+
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Usage example:
|
|
89
|
+
```python
|
|
90
|
+
em_invoker = BedrockEMInvoker(..., retry_config=retry_config)
|
|
91
|
+
```
|
|
92
|
+
'''
|
|
93
|
+
session: Incomplete
|
|
94
|
+
client_kwargs: Incomplete
|
|
95
|
+
def __init__(self, model_name: str, access_key_id: str | None = None, secret_access_key: str | None = None, region_name: str = 'us-east-1', model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
96
|
+
'''Initializes a new instance of the BedrockEMInvoker class.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
model_name (str): The name of the Bedrock embedding model to be used.
|
|
100
|
+
access_key_id (str | None, optional): The AWS access key ID. Defaults to None, in which case
|
|
101
|
+
the `AWS_ACCESS_KEY_ID` environment variable will be used.
|
|
102
|
+
secret_access_key (str | None, optional): The AWS secret access key. Defaults to None, in which case
|
|
103
|
+
the `AWS_SECRET_ACCESS_KEY` environment variable will be used.
|
|
104
|
+
region_name (str, optional): The AWS region name. Defaults to "us-east-1".
|
|
105
|
+
model_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the Bedrock client.
|
|
106
|
+
Defaults to None.
|
|
107
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
108
|
+
Defaults to None.
|
|
109
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
110
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
111
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
112
|
+
Defaults to None, in which case no truncation is applied.
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If the model name is not supported.
|
|
116
|
+
ValueError: If `access_key_id` or `secret_access_key` is neither provided nor set in the
|
|
117
|
+
`AWS_ACCESS_KEY_ID` or `AWS_SECRET_ACCESS_KEY` environment variables, respectively.
|
|
118
|
+
'''
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
3
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
|
|
4
|
+
from gllm_inference.em_invoker.schema.cohere import CohereInputType as CohereInputType, Key as Key
|
|
5
|
+
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EMContent as EMContent, ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
|
|
6
|
+
from gllm_inference.utils import validate_string_enum as validate_string_enum
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
SUPPORTED_ATTACHMENTS: Incomplete
|
|
10
|
+
MULTIMODAL_MODEL_VERSION: Incomplete
|
|
11
|
+
|
|
12
|
+
class CohereEMInvoker(BaseEMInvoker):
|
|
13
|
+
'''An embedding model invoker to interact with Cohere embedding models.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
model_id (str): The model ID of the embedding model.
|
|
17
|
+
model_provider (str): The provider of the embedding model (Cohere).
|
|
18
|
+
model_name (str): The name of the Cohere embedding model.
|
|
19
|
+
client (AsyncClient): The asynchronous client for the Cohere API.
|
|
20
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
|
|
21
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
22
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
23
|
+
input_type (CohereInputType): The input type for the embedding model. Supported values include:
|
|
24
|
+
1. `CohereInputType.SEARCH_DOCUMENT`,
|
|
25
|
+
2. `CohereInputType.SEARCH_QUERY`,
|
|
26
|
+
3. `CohereInputType.CLASSIFICATION`,
|
|
27
|
+
4. `CohereInputType.CLUSTERING`,
|
|
28
|
+
5. `CohereInputType.IMAGE`.
|
|
29
|
+
|
|
30
|
+
Initialization:
|
|
31
|
+
You can initialize the `CohereEMInvoker` as follows:
|
|
32
|
+
```python
|
|
33
|
+
em_invoker = CohereEMInvoker(
|
|
34
|
+
model_name="embed-english-v4.0",
|
|
35
|
+
input_type="search_document"
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Note: The `input_type` parameter can be one of the following:
|
|
40
|
+
1. "search_document"
|
|
41
|
+
2. "search_query"
|
|
42
|
+
3. "classification"
|
|
43
|
+
4. "clustering"
|
|
44
|
+
5. "image"
|
|
45
|
+
|
|
46
|
+
This parameter is optional and defaults to "search_document". For more information about
|
|
47
|
+
input_type, please refer to https://docs.cohere.com/docs/embeddings#the-input_type-parameter.
|
|
48
|
+
|
|
49
|
+
Input types:
|
|
50
|
+
The `CohereEMInvoker` supports the following input types: text and image.
|
|
51
|
+
Non-text inputs must be passed as an `Attachment` object.
|
|
52
|
+
|
|
53
|
+
Output format:
|
|
54
|
+
The `CohereEMInvoker` can embed either:
|
|
55
|
+
1. A single content.
|
|
56
|
+
1. A single content is either a text or an image.
|
|
57
|
+
2. The output will be a `Vector`, representing the embedding of the content.
|
|
58
|
+
|
|
59
|
+
# Example 1: Embedding a text content.
|
|
60
|
+
```python
|
|
61
|
+
text = "What animal is in this image?"
|
|
62
|
+
result = await em_invoker.invoke(text)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
# Example 2: Embedding an image content.
|
|
66
|
+
```python
|
|
67
|
+
image = Attachment.from_path("path/to/local/image.png")
|
|
68
|
+
result = await em_invoker.invoke(image)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
The above examples will return a `Vector` with a size of (embedding_size,).
|
|
72
|
+
|
|
73
|
+
2. A list of contents.
|
|
74
|
+
1. A list of contents is a list that consists of any of the above single contents.
|
|
75
|
+
2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
|
|
76
|
+
embedding of each single content.
|
|
77
|
+
|
|
78
|
+
# Example: Embedding a list of contents.
|
|
79
|
+
```python
|
|
80
|
+
text = "What animal is in this image?"
|
|
81
|
+
image = Attachment.from_path("path/to/local/image.png")
|
|
82
|
+
result = await em_invoker.invoke([text, image])
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
The above examples will return a `list[Vector]` with a size of (2, embedding_size).
|
|
86
|
+
|
|
87
|
+
Retry and timeout:
|
|
88
|
+
The `CohereEMInvoker` supports retry and timeout configuration.
|
|
89
|
+
By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
|
|
90
|
+
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
91
|
+
|
|
92
|
+
Retry config examples:
|
|
93
|
+
```python
|
|
94
|
+
retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
|
|
95
|
+
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
96
|
+
retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
|
|
97
|
+
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Usage example:
|
|
101
|
+
```python
|
|
102
|
+
em_invoker = CohereEMInvoker(..., retry_config=retry_config)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
'''
|
|
106
|
+
input_type: Incomplete
|
|
107
|
+
client: Incomplete
|
|
108
|
+
def __init__(self, model_name: str, api_key: str | None = None, base_url: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None, input_type: CohereInputType = ...) -> None:
|
|
109
|
+
'''Initializes a new instance of the CohereEMInvoker class.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
model_name (str): The name of the Cohere embedding model to be used.
|
|
113
|
+
api_key (str | None, optional): The API key for authenticating with Cohere. Defaults to None, in which
|
|
114
|
+
case the `COHERE_API_KEY` environment variable will be used.
|
|
115
|
+
base_url (str | None, optional): The base URL for a custom Cohere-compatible endpoint.
|
|
116
|
+
Defaults to None, in which case Cohere\'s default URL will be used.
|
|
117
|
+
model_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the Cohere client.
|
|
118
|
+
Defaults to None.
|
|
119
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
120
|
+
Defaults to None.
|
|
121
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
122
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
123
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
124
|
+
Defaults to None, in which case no truncation is applied.
|
|
125
|
+
input_type (CohereInputType, optional): The input type for the embedding model.
|
|
126
|
+
Defaults to `CohereInputType.SEARCH_DOCUMENT`. Valid values are: "search_document", "search_query",
|
|
127
|
+
"classification", "clustering", and "image".
|
|
128
|
+
'''
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from _typeshed import Incomplete
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from gllm_core.utils.retry import RetryConfig
|
|
5
|
+
from gllm_inference.constants import DOCUMENT_MIME_TYPES as DOCUMENT_MIME_TYPES, INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_to_base_invoker_error as convert_to_base_invoker_error
|
|
7
|
+
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EMContent as EMContent, ModelId as ModelId, TruncateSide as TruncateSide, TruncationConfig as TruncationConfig, Vector as Vector
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
class BaseEMInvoker(ABC, metaclass=abc.ABCMeta):
|
|
11
|
+
"""A base class for embedding model invokers used in Gen AI applications.
|
|
12
|
+
|
|
13
|
+
The `BaseEMInvoker` class provides a framework for invoking embedding models.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
model_id (str): The model ID of the embedding model.
|
|
17
|
+
model_provider (str): The provider of the embedding model.
|
|
18
|
+
model_name (str): The name of the embedding model.
|
|
19
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the
|
|
20
|
+
embedding model. Defaults to None, in which case an empty dictionary is used.
|
|
21
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
22
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
23
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
24
|
+
Defaults to None, in which case no truncation is applied.
|
|
25
|
+
"""
|
|
26
|
+
default_hyperparameters: Incomplete
|
|
27
|
+
retry_config: Incomplete
|
|
28
|
+
truncation_config: Incomplete
|
|
29
|
+
def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
30
|
+
"""Initializes a new instance of the BaseEMInvoker class.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
model_id (ModelId): The model ID of the embedding model.
|
|
34
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the
|
|
35
|
+
embedding model. Defaults to None, in which case an empty dictionary is used.
|
|
36
|
+
supported_attachments (set[str] | None, optional): A set of supported attachment types. Defaults to None,
|
|
37
|
+
in which case an empty set is used (indicating that no attachments are supported).
|
|
38
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
39
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
40
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
41
|
+
Defaults to None, in which case no truncation is applied.
|
|
42
|
+
"""
|
|
43
|
+
@property
|
|
44
|
+
def model_id(self) -> str:
|
|
45
|
+
"""The model ID of the embedding model.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
str: The model ID of the embedding model.
|
|
49
|
+
"""
|
|
50
|
+
@property
|
|
51
|
+
def model_provider(self) -> str:
|
|
52
|
+
"""The provider of the embedding model.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
str: The provider of the embedding model.
|
|
56
|
+
"""
|
|
57
|
+
@property
|
|
58
|
+
def model_name(self) -> str:
|
|
59
|
+
"""The name of the embedding model.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
str: The name of the embedding model.
|
|
63
|
+
"""
|
|
64
|
+
async def invoke(self, content: EMContent | list[EMContent], hyperparameters: dict[str, Any] | None = None) -> Vector | list[Vector]:
|
|
65
|
+
"""Invokes the embedding model with the provided content or list of contents.
|
|
66
|
+
|
|
67
|
+
This method invokes the embedding model with the provided content or list of contents.
|
|
68
|
+
It includes retry logic with exponential backoff for transient failures.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
content (EMContent | list[EMContent]): The input or list of inputs to be embedded using the embedding model.
|
|
72
|
+
hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the embedding model.
|
|
73
|
+
Defaults to None, in which case the default hyperparameters are used.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Vector | list[Vector]: The vector representations of the input contents:
|
|
77
|
+
1. If the input is an `EMContent`, the output is a `Vector`.
|
|
78
|
+
2. If the input is a `list[EMContent]`, the output is a `list[Vector]`.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
CancelledError: If the invocation is cancelled.
|
|
82
|
+
ModelNotFoundError: If the model is not found.
|
|
83
|
+
ProviderAuthError: If the model authentication fails.
|
|
84
|
+
ProviderInternalError: If the model internal error occurs.
|
|
85
|
+
ProviderInvalidArgsError: If the model parameters are invalid.
|
|
86
|
+
ProviderOverloadedError: If the model is overloaded.
|
|
87
|
+
ProviderRateLimitError: If the model rate limit is exceeded.
|
|
88
|
+
TimeoutError: If the invocation times out.
|
|
89
|
+
ValueError: If the input content is invalid.
|
|
90
|
+
"""
|