gllm-inference-binary 0.5.9b1__cp311-cp311-macosx_10_9_universal2.macosx_13_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-inference-binary might be problematic. Click here for more details.
- gllm_inference/__init__.pyi +0 -0
- gllm_inference/builder/__init__.pyi +6 -0
- gllm_inference/builder/build_em_invoker.pyi +137 -0
- gllm_inference/builder/build_lm_invoker.pyi +161 -0
- gllm_inference/builder/build_lm_request_processor.pyi +93 -0
- gllm_inference/builder/build_output_parser.pyi +29 -0
- gllm_inference/catalog/__init__.pyi +4 -0
- gllm_inference/catalog/catalog.pyi +121 -0
- gllm_inference/catalog/lm_request_processor_catalog.pyi +112 -0
- gllm_inference/catalog/prompt_builder_catalog.pyi +82 -0
- gllm_inference/constants.pyi +10 -0
- gllm_inference/em_invoker/__init__.pyi +10 -0
- gllm_inference/em_invoker/azure_openai_em_invoker.pyi +88 -0
- gllm_inference/em_invoker/bedrock_em_invoker.pyi +106 -0
- gllm_inference/em_invoker/em_invoker.pyi +90 -0
- gllm_inference/em_invoker/google_em_invoker.pyi +129 -0
- gllm_inference/em_invoker/langchain/__init__.pyi +3 -0
- gllm_inference/em_invoker/langchain/em_invoker_embeddings.pyi +84 -0
- gllm_inference/em_invoker/langchain_em_invoker.pyi +46 -0
- gllm_inference/em_invoker/openai_compatible_em_invoker.pyi +96 -0
- gllm_inference/em_invoker/openai_em_invoker.pyi +90 -0
- gllm_inference/em_invoker/schema/__init__.pyi +0 -0
- gllm_inference/em_invoker/schema/bedrock.pyi +22 -0
- gllm_inference/em_invoker/schema/google.pyi +9 -0
- gllm_inference/em_invoker/schema/langchain.pyi +5 -0
- gllm_inference/em_invoker/schema/openai.pyi +7 -0
- gllm_inference/em_invoker/schema/openai_compatible.pyi +7 -0
- gllm_inference/em_invoker/schema/twelvelabs.pyi +17 -0
- gllm_inference/em_invoker/schema/voyage.pyi +15 -0
- gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +101 -0
- gllm_inference/em_invoker/voyage_em_invoker.pyi +104 -0
- gllm_inference/exceptions/__init__.pyi +4 -0
- gllm_inference/exceptions/error_parser.pyi +41 -0
- gllm_inference/exceptions/exceptions.pyi +132 -0
- gllm_inference/exceptions/provider_error_map.pyi +23 -0
- gllm_inference/lm_invoker/__init__.pyi +12 -0
- gllm_inference/lm_invoker/anthropic_lm_invoker.pyi +275 -0
- gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +252 -0
- gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +234 -0
- gllm_inference/lm_invoker/datasaur_lm_invoker.pyi +166 -0
- gllm_inference/lm_invoker/google_lm_invoker.pyi +317 -0
- gllm_inference/lm_invoker/langchain_lm_invoker.pyi +260 -0
- gllm_inference/lm_invoker/litellm_lm_invoker.pyi +248 -0
- gllm_inference/lm_invoker/lm_invoker.pyi +152 -0
- gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +265 -0
- gllm_inference/lm_invoker/openai_lm_invoker.pyi +362 -0
- gllm_inference/lm_invoker/schema/__init__.pyi +0 -0
- gllm_inference/lm_invoker/schema/anthropic.pyi +50 -0
- gllm_inference/lm_invoker/schema/bedrock.pyi +53 -0
- gllm_inference/lm_invoker/schema/datasaur.pyi +12 -0
- gllm_inference/lm_invoker/schema/google.pyi +24 -0
- gllm_inference/lm_invoker/schema/langchain.pyi +23 -0
- gllm_inference/lm_invoker/schema/openai.pyi +91 -0
- gllm_inference/lm_invoker/schema/openai_compatible.pyi +60 -0
- gllm_inference/lm_invoker/schema/xai.pyi +31 -0
- gllm_inference/lm_invoker/xai_lm_invoker.pyi +305 -0
- gllm_inference/model/__init__.pyi +9 -0
- gllm_inference/model/em/__init__.pyi +0 -0
- gllm_inference/model/em/google_em.pyi +16 -0
- gllm_inference/model/em/openai_em.pyi +15 -0
- gllm_inference/model/em/twelvelabs_em.pyi +13 -0
- gllm_inference/model/em/voyage_em.pyi +20 -0
- gllm_inference/model/lm/__init__.pyi +0 -0
- gllm_inference/model/lm/anthropic_lm.pyi +20 -0
- gllm_inference/model/lm/google_lm.pyi +17 -0
- gllm_inference/model/lm/openai_lm.pyi +27 -0
- gllm_inference/output_parser/__init__.pyi +3 -0
- gllm_inference/output_parser/json_output_parser.pyi +60 -0
- gllm_inference/output_parser/output_parser.pyi +27 -0
- gllm_inference/prompt_builder/__init__.pyi +3 -0
- gllm_inference/prompt_builder/prompt_builder.pyi +56 -0
- gllm_inference/prompt_formatter/__init__.pyi +7 -0
- gllm_inference/prompt_formatter/agnostic_prompt_formatter.pyi +49 -0
- gllm_inference/prompt_formatter/huggingface_prompt_formatter.pyi +55 -0
- gllm_inference/prompt_formatter/llama_prompt_formatter.pyi +59 -0
- gllm_inference/prompt_formatter/mistral_prompt_formatter.pyi +53 -0
- gllm_inference/prompt_formatter/openai_prompt_formatter.pyi +35 -0
- gllm_inference/prompt_formatter/prompt_formatter.pyi +30 -0
- gllm_inference/request_processor/__init__.pyi +4 -0
- gllm_inference/request_processor/lm_request_processor.pyi +101 -0
- gllm_inference/request_processor/uses_lm_mixin.pyi +130 -0
- gllm_inference/schema/__init__.pyi +14 -0
- gllm_inference/schema/attachment.pyi +88 -0
- gllm_inference/schema/code_exec_result.pyi +14 -0
- gllm_inference/schema/config.pyi +15 -0
- gllm_inference/schema/enums.pyi +29 -0
- gllm_inference/schema/lm_output.pyi +36 -0
- gllm_inference/schema/message.pyi +52 -0
- gllm_inference/schema/model_id.pyi +147 -0
- gllm_inference/schema/reasoning.pyi +15 -0
- gllm_inference/schema/token_usage.pyi +75 -0
- gllm_inference/schema/tool_call.pyi +14 -0
- gllm_inference/schema/tool_result.pyi +11 -0
- gllm_inference/schema/type_alias.pyi +11 -0
- gllm_inference/utils/__init__.pyi +5 -0
- gllm_inference/utils/io_utils.pyi +26 -0
- gllm_inference/utils/langchain.pyi +30 -0
- gllm_inference/utils/validation.pyi +12 -0
- gllm_inference.build/.gitignore +1 -0
- gllm_inference.cpython-311-darwin.so +0 -0
- gllm_inference.pyi +123 -0
- gllm_inference_binary-0.5.9b1.dist-info/METADATA +71 -0
- gllm_inference_binary-0.5.9b1.dist-info/RECORD +105 -0
- gllm_inference_binary-0.5.9b1.dist-info/WHEEL +6 -0
- gllm_inference_binary-0.5.9b1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_inference.catalog.catalog import BaseCatalog as BaseCatalog
|
|
3
|
+
from gllm_inference.prompt_builder.prompt_builder import PromptBuilder as PromptBuilder
|
|
4
|
+
|
|
5
|
+
PROMPT_BUILDER_REQUIRED_COLUMNS: Incomplete
|
|
6
|
+
logger: Incomplete
|
|
7
|
+
|
|
8
|
+
class PromptBuilderCatalog(BaseCatalog[PromptBuilder]):
|
|
9
|
+
'''Loads multiple prompt builders from certain sources.
|
|
10
|
+
|
|
11
|
+
Attributes:
|
|
12
|
+
components (dict[str, PromptBuilder]): Dictionary of the loaded prompt builders.
|
|
13
|
+
|
|
14
|
+
Initialization:
|
|
15
|
+
# Example 1: Load from Google Sheets using client email and private key
|
|
16
|
+
```python
|
|
17
|
+
catalog = PromptBuilderCatalog.from_gsheets(
|
|
18
|
+
sheet_id="...",
|
|
19
|
+
worksheet_id="...",
|
|
20
|
+
client_email="...",
|
|
21
|
+
private_key="...",
|
|
22
|
+
)
|
|
23
|
+
prompt_builder = catalog.name
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
# Example 2: Load from Google Sheets using credential file
|
|
27
|
+
```python
|
|
28
|
+
catalog = PromptBuilderCatalog.from_gsheets(
|
|
29
|
+
sheet_id="...",
|
|
30
|
+
worksheet_id="...",
|
|
31
|
+
credential_file_path="...",
|
|
32
|
+
)
|
|
33
|
+
prompt_builder = catalog.name
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
# Example 3: Load from CSV
|
|
37
|
+
```python
|
|
38
|
+
catalog = PromptBuilderCatalog.from_csv(csv_path="...")
|
|
39
|
+
prompt_builder = catalog.name
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
# Example 4: Load from records
|
|
43
|
+
```python
|
|
44
|
+
records=[
|
|
45
|
+
{
|
|
46
|
+
"name": "answer_question",
|
|
47
|
+
"system": (
|
|
48
|
+
"You are helpful assistant.\\n"
|
|
49
|
+
"Answer the following question based on the provided context.\\n"
|
|
50
|
+
"```{context}```"
|
|
51
|
+
),
|
|
52
|
+
"user": "{query}",
|
|
53
|
+
"key_defaults": \'{"context": "<default context>"}\',
|
|
54
|
+
},
|
|
55
|
+
]
|
|
56
|
+
catalog = PromptBuilderCatalog.from_records(records=records)
|
|
57
|
+
prompt_builder = catalog.answer_question
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Template Example:
|
|
61
|
+
# Example 1: Google Sheets
|
|
62
|
+
For an example of how a Google Sheets file can be formatted to be loaded using PromptBuilderCatalog, see:
|
|
63
|
+
https://docs.google.com/spreadsheets/d/12IwSKv8hMhyWXSQnLx9LgCj0cxaR1f9gOmbEDGleurE/edit?usp=drive_link
|
|
64
|
+
|
|
65
|
+
# Example 2: CSV
|
|
66
|
+
For an example of how a CSV file can be formatted to be loaded using PromptBuilderCatalog, see:
|
|
67
|
+
https://drive.google.com/file/d/1KQgddMdbcZBZmroQFtjSl-TKLohq84Fz/view?usp=drive_link
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
Template explanation:
|
|
71
|
+
The required columns are:
|
|
72
|
+
1. name (str): The name of the prompt builder.
|
|
73
|
+
2. system (str): The system template of the prompt builder.
|
|
74
|
+
3. user (str): The user template of the prompt builder.
|
|
75
|
+
4. key_defaults (json_str): The default values for the prompt template keys.
|
|
76
|
+
|
|
77
|
+
Important Notes:
|
|
78
|
+
1. At least one of the `system` and `user` columns must be filled.
|
|
79
|
+
2. `key_defaults` is optional. If filled, must be a dictionary containing the default values for the
|
|
80
|
+
prompt template keys. These default values will be applied when the corresponding keys are not provided
|
|
81
|
+
in the runtime input. If it is empty, the prompt template keys will not have default values.
|
|
82
|
+
'''
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
|
|
3
|
+
AZURE_OPENAI_URL_SUFFIX: str
|
|
4
|
+
DOCUMENT_MIME_TYPES: Incomplete
|
|
5
|
+
GOOGLE_SCOPES: Incomplete
|
|
6
|
+
GRPC_ENABLE_RETRIES_KEY: str
|
|
7
|
+
INVOKER_PROPAGATED_MAX_RETRIES: int
|
|
8
|
+
INVOKER_DEFAULT_TIMEOUT: float
|
|
9
|
+
HEX_REPR_LENGTH: int
|
|
10
|
+
SECONDS_TO_MILLISECONDS: int
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from gllm_inference.em_invoker.azure_openai_em_invoker import AzureOpenAIEMInvoker as AzureOpenAIEMInvoker
|
|
2
|
+
from gllm_inference.em_invoker.bedrock_em_invoker import BedrockEMInvoker as BedrockEMInvoker
|
|
3
|
+
from gllm_inference.em_invoker.google_em_invoker import GoogleEMInvoker as GoogleEMInvoker
|
|
4
|
+
from gllm_inference.em_invoker.langchain_em_invoker import LangChainEMInvoker as LangChainEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.openai_compatible_em_invoker import OpenAICompatibleEMInvoker as OpenAICompatibleEMInvoker
|
|
6
|
+
from gllm_inference.em_invoker.openai_em_invoker import OpenAIEMInvoker as OpenAIEMInvoker
|
|
7
|
+
from gllm_inference.em_invoker.twelevelabs_em_invoker import TwelveLabsEMInvoker as TwelveLabsEMInvoker
|
|
8
|
+
from gllm_inference.em_invoker.voyage_em_invoker import VoyageEMInvoker as VoyageEMInvoker
|
|
9
|
+
|
|
10
|
+
__all__ = ['AzureOpenAIEMInvoker', 'BedrockEMInvoker', 'GoogleEMInvoker', 'LangChainEMInvoker', 'OpenAIEMInvoker', 'OpenAICompatibleEMInvoker', 'TwelveLabsEMInvoker', 'VoyageEMInvoker']
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
3
|
+
from gllm_inference.constants import AZURE_OPENAI_URL_SUFFIX as AZURE_OPENAI_URL_SUFFIX, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
|
|
4
|
+
from gllm_inference.em_invoker.openai_em_invoker import OpenAIEMInvoker as OpenAIEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.schema.openai import Key as Key
|
|
6
|
+
from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
class AzureOpenAIEMInvoker(OpenAIEMInvoker):
|
|
10
|
+
'''An embedding model invoker to interact with Azure OpenAI embedding models.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
model_id (str): The model ID of the embedding model.
|
|
14
|
+
model_provider (str): The provider of the embedding model.
|
|
15
|
+
model_name (str): The name of the Azure OpenAI embedding model deployment.
|
|
16
|
+
client (AsyncAzureOpenAI): The client for the Azure OpenAI API.
|
|
17
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
|
|
18
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
19
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
20
|
+
|
|
21
|
+
Input types:
|
|
22
|
+
The `AzureOpenAIEMInvoker` only supports text inputs.
|
|
23
|
+
|
|
24
|
+
Output format:
|
|
25
|
+
The `AzureOpenAIEMInvoker` can embed either:
|
|
26
|
+
1. A single content.
|
|
27
|
+
1. A single content is a single text.
|
|
28
|
+
2. The output will be a `Vector`, representing the embedding of the content.
|
|
29
|
+
|
|
30
|
+
# Example 1: Embedding a text content.
|
|
31
|
+
```python
|
|
32
|
+
text = "This is a text"
|
|
33
|
+
result = await em_invoker.invoke(text)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
The above examples will return a `Vector` with a size of (embedding_size,).
|
|
37
|
+
|
|
38
|
+
2. A list of contents.
|
|
39
|
+
1. A list of contents is a list of texts.
|
|
40
|
+
2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
|
|
41
|
+
embedding of each single content.
|
|
42
|
+
|
|
43
|
+
# Example: Embedding a list of contents.
|
|
44
|
+
```python
|
|
45
|
+
text1 = "This is a text"
|
|
46
|
+
text2 = "This is another text"
|
|
47
|
+
text3 = "This is yet another text"
|
|
48
|
+
result = await em_invoker.invoke([text1, text2, text3])
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The above examples will return a `list[Vector]` with a size of (3, embedding_size).
|
|
52
|
+
|
|
53
|
+
Retry and timeout:
|
|
54
|
+
The `AzureOpenAIEMInvoker` supports retry and timeout configuration.
|
|
55
|
+
By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
|
|
56
|
+
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
57
|
+
|
|
58
|
+
Retry config examples:
|
|
59
|
+
```python
|
|
60
|
+
retry_config = RetryConfig(max_retries=0, timeout=0.0) # No retry, no timeout
|
|
61
|
+
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
62
|
+
retry_config = RetryConfig(max_retries=5, timeout=0.0) # 5 max retries, no timeout
|
|
63
|
+
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Usage example:
|
|
67
|
+
```python
|
|
68
|
+
em_invoker = AzureOpenAIEMInvoker(..., retry_config=retry_config)
|
|
69
|
+
```
|
|
70
|
+
'''
|
|
71
|
+
client: Incomplete
|
|
72
|
+
def __init__(self, azure_endpoint: str, azure_deployment: str, api_key: str | None = None, api_version: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
73
|
+
"""Initializes a new instance of the AzureOpenAIEMInvoker class.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
azure_endpoint (str): The endpoint of the Azure OpenAI service.
|
|
77
|
+
azure_deployment (str): The deployment name of the Azure OpenAI service.
|
|
78
|
+
api_key (str | None, optional): The API key for authenticating with Azure OpenAI. Defaults to None, in
|
|
79
|
+
which case the `AZURE_OPENAI_API_KEY` environment variable will be used.
|
|
80
|
+
api_version (str | None, optional): Deprecated parameter to be removed in v0.6. Defaults to None.
|
|
81
|
+
model_kwargs (dict[str, Any] | None, optional): Additional model parameters. Defaults to None.
|
|
82
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
83
|
+
Defaults to None.
|
|
84
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
85
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
86
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
87
|
+
Defaults to None, in which case no truncation is applied.
|
|
88
|
+
"""
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from enum import StrEnum
|
|
3
|
+
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
4
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.schema.bedrock import InputType as InputType, Key as Key, OutputType as OutputType
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
|
|
7
|
+
from gllm_inference.exceptions.provider_error_map import BEDROCK_ERROR_MAPPING as BEDROCK_ERROR_MAPPING
|
|
8
|
+
from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
class ModelType(StrEnum):
|
|
12
|
+
"""Defines the type of the Bedrock embedding model."""
|
|
13
|
+
COHERE = 'cohere'
|
|
14
|
+
TITAN = 'titan'
|
|
15
|
+
|
|
16
|
+
SUPPORTED_ATTACHMENTS: Incomplete
|
|
17
|
+
|
|
18
|
+
class BedrockEMInvoker(BaseEMInvoker):
|
|
19
|
+
'''An embedding model invoker to interact with AWS Bedrock embedding models.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
model_id (str): The model ID of the embedding model.
|
|
23
|
+
model_provider (str): The provider of the embedding model.
|
|
24
|
+
model_name (str): The name of the embedding model.
|
|
25
|
+
session (Session): The Bedrock client session.
|
|
26
|
+
client_kwargs (dict[str, Any]): The Bedrock client kwargs.
|
|
27
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
|
|
28
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
29
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
30
|
+
|
|
31
|
+
Input types:
|
|
32
|
+
The `BedrockEMInvoker` only supports text inputs.
|
|
33
|
+
|
|
34
|
+
Output format:
|
|
35
|
+
The `BedrockEMInvoker` can embed either:
|
|
36
|
+
1. A single content.
|
|
37
|
+
1. A single content is a single text.
|
|
38
|
+
2. The output will be a `Vector`, representing the embedding of the content.
|
|
39
|
+
|
|
40
|
+
# Example 1: Embedding a text content.
|
|
41
|
+
```python
|
|
42
|
+
text = "This is a text"
|
|
43
|
+
result = await em_invoker.invoke(text)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The above examples will return a `Vector` with a size of (embedding_size,).
|
|
47
|
+
|
|
48
|
+
2. A list of contents.
|
|
49
|
+
1. A list of contents is a list of texts.
|
|
50
|
+
2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
|
|
51
|
+
embedding of each single content.
|
|
52
|
+
|
|
53
|
+
# Example: Embedding a list of contents.
|
|
54
|
+
```python
|
|
55
|
+
text1 = "This is a text"
|
|
56
|
+
text2 = "This is another text"
|
|
57
|
+
text3 = "This is yet another text"
|
|
58
|
+
result = await em_invoker.invoke([text1, text2, text3])
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
The above examples will return a `list[Vector]` with a size of (3, embedding_size).
|
|
62
|
+
|
|
63
|
+
Retry and timeout:
|
|
64
|
+
The `BedrockEMInvoker` supports retry and timeout configuration.
|
|
65
|
+
By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
|
|
66
|
+
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
67
|
+
|
|
68
|
+
Retry config examples:
|
|
69
|
+
```python
|
|
70
|
+
retry_config = RetryConfig(max_retries=0, timeout=0.0) # No retry, no timeout
|
|
71
|
+
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
72
|
+
retry_config = RetryConfig(max_retries=5, timeout=0.0) # 5 max retries, no timeout
|
|
73
|
+
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Usage example:
|
|
77
|
+
```python
|
|
78
|
+
em_invoker = BedrockEMInvoker(..., retry_config=retry_config)
|
|
79
|
+
```
|
|
80
|
+
'''
|
|
81
|
+
session: Incomplete
|
|
82
|
+
client_kwargs: Incomplete
|
|
83
|
+
def __init__(self, model_name: str, access_key_id: str | None = None, secret_access_key: str | None = None, region_name: str = 'us-east-1', model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
84
|
+
'''Initializes a new instance of the BedrockEMInvoker class.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
model_name (str): The name of the Bedrock embedding model to be used.
|
|
88
|
+
access_key_id (str | None, optional): The AWS access key ID. Defaults to None, in which case
|
|
89
|
+
the `AWS_ACCESS_KEY_ID` environment variable will be used.
|
|
90
|
+
secret_access_key (str | None, optional): The AWS secret access key. Defaults to None, in which case
|
|
91
|
+
the `AWS_SECRET_ACCESS_KEY` environment variable will be used.
|
|
92
|
+
region_name (str, optional): The AWS region name. Defaults to "us-east-1".
|
|
93
|
+
model_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the Bedrock client.
|
|
94
|
+
Defaults to None.
|
|
95
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
96
|
+
Defaults to None.
|
|
97
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
98
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
99
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
100
|
+
Defaults to None, in which case no truncation is applied.
|
|
101
|
+
|
|
102
|
+
Raises:
|
|
103
|
+
ValueError: If the model name is not supported.
|
|
104
|
+
ValueError: If `access_key_id` or `secret_access_key` is neither provided nor set in the
|
|
105
|
+
`AWS_ACCESS_KEY_ID` or `AWS_SECRET_ACCESS_KEY` environment variables, respectively.
|
|
106
|
+
'''
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from _typeshed import Incomplete
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from gllm_core.utils.retry import RetryConfig
|
|
5
|
+
from gllm_inference.constants import DOCUMENT_MIME_TYPES as DOCUMENT_MIME_TYPES, INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_to_base_invoker_error as convert_to_base_invoker_error
|
|
7
|
+
from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EMContent as EMContent, ModelId as ModelId, TruncateSide as TruncateSide, TruncationConfig as TruncationConfig, Vector as Vector
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
class BaseEMInvoker(ABC, metaclass=abc.ABCMeta):
|
|
11
|
+
"""A base class for embedding model invokers used in Gen AI applications.
|
|
12
|
+
|
|
13
|
+
The `BaseEMInvoker` class provides a framework for invoking embedding models.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
model_id (str): The model ID of the embedding model.
|
|
17
|
+
model_provider (str): The provider of the embedding model.
|
|
18
|
+
model_name (str): The name of the embedding model.
|
|
19
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the
|
|
20
|
+
embedding model. Defaults to None, in which case an empty dictionary is used.
|
|
21
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
22
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
23
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
24
|
+
Defaults to None, in which case no truncation is applied.
|
|
25
|
+
"""
|
|
26
|
+
default_hyperparameters: Incomplete
|
|
27
|
+
retry_config: Incomplete
|
|
28
|
+
truncation_config: Incomplete
|
|
29
|
+
def __init__(self, model_id: ModelId, default_hyperparameters: dict[str, Any] | None = None, supported_attachments: set[str] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
30
|
+
"""Initializes a new instance of the BaseEMInvoker class.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
model_id (ModelId): The model ID of the embedding model.
|
|
34
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the
|
|
35
|
+
embedding model. Defaults to None, in which case an empty dictionary is used.
|
|
36
|
+
supported_attachments (set[str] | None, optional): A set of supported attachment types. Defaults to None,
|
|
37
|
+
in which case an empty set is used (indicating that no attachments are supported).
|
|
38
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
39
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
40
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
41
|
+
Defaults to None, in which case no truncation is applied.
|
|
42
|
+
"""
|
|
43
|
+
@property
|
|
44
|
+
def model_id(self) -> str:
|
|
45
|
+
"""The model ID of the embedding model.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
str: The model ID of the embedding model.
|
|
49
|
+
"""
|
|
50
|
+
@property
|
|
51
|
+
def model_provider(self) -> str:
|
|
52
|
+
"""The provider of the embedding model.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
str: The provider of the embedding model.
|
|
56
|
+
"""
|
|
57
|
+
@property
|
|
58
|
+
def model_name(self) -> str:
|
|
59
|
+
"""The name of the embedding model.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
str: The name of the embedding model.
|
|
63
|
+
"""
|
|
64
|
+
async def invoke(self, content: EMContent | list[EMContent], hyperparameters: dict[str, Any] | None = None) -> Vector | list[Vector]:
|
|
65
|
+
"""Invokes the embedding model with the provided content or list of contents.
|
|
66
|
+
|
|
67
|
+
This method invokes the embedding model with the provided content or list of contents.
|
|
68
|
+
It includes retry logic with exponential backoff for transient failures.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
content (EMContent | list[EMContent]): The input or list of inputs to be embedded using the embedding model.
|
|
72
|
+
hyperparameters (dict[str, Any] | None, optional): A dictionary of hyperparameters for the embedding model.
|
|
73
|
+
Defaults to None, in which case the default hyperparameters are used.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Vector | list[Vector]: The vector representations of the input contents:
|
|
77
|
+
1. If the input is an `EMContent`, the output is a `Vector`.
|
|
78
|
+
2. If the input is a `list[EMContent]`, the output is a `list[Vector]`.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
CancelledError: If the invocation is cancelled.
|
|
82
|
+
ModelNotFoundError: If the model is not found.
|
|
83
|
+
ProviderAuthError: If the model authentication fails.
|
|
84
|
+
ProviderInternalError: If the model internal error occurs.
|
|
85
|
+
ProviderInvalidArgsError: If the model parameters are invalid.
|
|
86
|
+
ProviderOverloadedError: If the model is overloaded.
|
|
87
|
+
ProviderRateLimitError: If the model rate limit is exceeded.
|
|
88
|
+
TimeoutError: If the invocation times out.
|
|
89
|
+
ValueError: If the input content is invalid.
|
|
90
|
+
"""
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.utils.retry import RetryConfig as RetryConfig
|
|
3
|
+
from gllm_inference.constants import GOOGLE_SCOPES as GOOGLE_SCOPES, SECONDS_TO_MILLISECONDS as SECONDS_TO_MILLISECONDS
|
|
4
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.schema.google import Key as Key
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
|
|
7
|
+
from gllm_inference.exceptions.provider_error_map import GOOGLE_ERROR_MAPPING as GOOGLE_ERROR_MAPPING
|
|
8
|
+
from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
SUPPORTED_ATTACHMENTS: Incomplete
|
|
12
|
+
|
|
13
|
+
class GoogleEMInvoker(BaseEMInvoker):
|
|
14
|
+
'''An embedding model invoker to interact with Google embedding models.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
model_id (str): The model ID of the embedding model.
|
|
18
|
+
model_provider (str): The provider of the embedding model.
|
|
19
|
+
model_name (str): The name of the embedding model.
|
|
20
|
+
client_params (dict[str, Any]): The Google client instance init parameters.
|
|
21
|
+
default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
|
|
22
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
23
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
24
|
+
|
|
25
|
+
Initialization:
|
|
26
|
+
The `GoogleEMInvoker` can use either Google Gen AI or Google Vertex AI.
|
|
27
|
+
|
|
28
|
+
Google Gen AI is recommended for quick prototyping and development.
|
|
29
|
+
It requires a Gemini API key for authentication.
|
|
30
|
+
|
|
31
|
+
Usage example:
|
|
32
|
+
```python
|
|
33
|
+
em_invoker = GoogleEMInvoker(
|
|
34
|
+
model_name="text-embedding-004",
|
|
35
|
+
api_key="your_api_key"
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Google Vertex AI is recommended to build production-ready applications.
|
|
40
|
+
It requires a service account JSON file for authentication.
|
|
41
|
+
|
|
42
|
+
Usage example:
|
|
43
|
+
```python
|
|
44
|
+
em_invoker = GoogleEMInvoker(
|
|
45
|
+
model_name="text-embedding-004",
|
|
46
|
+
credentials_path="path/to/service_account.json"
|
|
47
|
+
)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
If neither `api_key` nor `credentials_path` is provided, Google Gen AI will be used by default.
|
|
51
|
+
The `GOOGLE_API_KEY` environment variable will be used for authentication.
|
|
52
|
+
|
|
53
|
+
Input types:
|
|
54
|
+
The `GoogleEMInvoker` only supports text inputs.
|
|
55
|
+
|
|
56
|
+
Output format:
|
|
57
|
+
The `GoogleEMInvoker` can embed either:
|
|
58
|
+
1. A single content.
|
|
59
|
+
1. A single content is a single text.
|
|
60
|
+
2. The output will be a `Vector`, representing the embedding of the content.
|
|
61
|
+
|
|
62
|
+
# Example 1: Embedding a text content.
|
|
63
|
+
```python
|
|
64
|
+
text = "This is a text"
|
|
65
|
+
result = await em_invoker.invoke(text)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The above examples will return a `Vector` with a size of (embedding_size,).
|
|
69
|
+
|
|
70
|
+
2. A list of contents.
|
|
71
|
+
1. A list of contents is a list of texts.
|
|
72
|
+
2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
|
|
73
|
+
embedding of each single content.
|
|
74
|
+
|
|
75
|
+
# Example: Embedding a list of contents.
|
|
76
|
+
```python
|
|
77
|
+
text1 = "This is a text"
|
|
78
|
+
text2 = "This is another text"
|
|
79
|
+
text3 = "This is yet another text"
|
|
80
|
+
result = await em_invoker.invoke([text1, text2, text3])
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The above examples will return a `list[Vector]` with a size of (3, embedding_size).
|
|
84
|
+
|
|
85
|
+
Retry and timeout:
|
|
86
|
+
The `GoogleEMInvoker` supports retry and timeout configuration.
|
|
87
|
+
By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
|
|
88
|
+
They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
|
|
89
|
+
|
|
90
|
+
Retry config examples:
|
|
91
|
+
```python
|
|
92
|
+
retry_config = RetryConfig(max_retries=0, timeout=0.0) # No retry, no timeout
|
|
93
|
+
retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
|
|
94
|
+
retry_config = RetryConfig(max_retries=5, timeout=0.0) # 5 max retries, no timeout
|
|
95
|
+
retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Usage example:
|
|
99
|
+
```python
|
|
100
|
+
em_invoker = GoogleEMInvoker(..., retry_config=retry_config)
|
|
101
|
+
```
|
|
102
|
+
'''
|
|
103
|
+
client_params: Incomplete
|
|
104
|
+
def __init__(self, model_name: str, api_key: str | None = None, credentials_path: str | None = None, project_id: str | None = None, location: str = 'us-central1', model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
105
|
+
'''Initializes a new instance of the GoogleEMInvoker class.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
model_name (str): The name of the model to use.
|
|
109
|
+
api_key (str | None, optional): Required for Google Gen AI authentication. Cannot be used together
|
|
110
|
+
with `credentials_path`. Defaults to None.
|
|
111
|
+
credentials_path (str | None, optional): Required for Google Vertex AI authentication. Path to the service
|
|
112
|
+
account credentials JSON file. Cannot be used together with `api_key`. Defaults to None.
|
|
113
|
+
project_id (str | None, optional): The Google Cloud project ID for Vertex AI. Only used when authenticating
|
|
114
|
+
with `credentials_path`. Defaults to None, in which case it will be loaded from the credentials file.
|
|
115
|
+
location (str, optional): The location of the Google Cloud project for Vertex AI. Only used when
|
|
116
|
+
authenticating with `credentials_path`. Defaults to "us-central1".
|
|
117
|
+
model_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the Google client.
|
|
118
|
+
Defaults to None.
|
|
119
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
120
|
+
Defaults to None.
|
|
121
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
122
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
123
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
124
|
+
Defaults to None, in which case no truncation is applied.
|
|
125
|
+
|
|
126
|
+
Note:
|
|
127
|
+
If neither `api_key` nor `credentials_path` is provided, Google Gen AI will be used by default.
|
|
128
|
+
The `GOOGLE_API_KEY` environment variable will be used for authentication.
|
|
129
|
+
'''
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
|
|
2
|
+
from gllm_inference.schema import Attachment as Attachment, Vector as Vector
|
|
3
|
+
from gllm_inference.utils.io_utils import DEFAULT_BASE64_ALLOWED_MIMETYPES as DEFAULT_BASE64_ALLOWED_MIMETYPES, base64_to_bytes as base64_to_bytes
|
|
4
|
+
from langchain_core.embeddings import Embeddings
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
class EMInvokerEmbeddings(BaseModel, Embeddings, arbitrary_types_allowed=True):
|
|
9
|
+
'''An adapter class that enables an `EMInvoker` to be used as a LangChain `Embeddings`.
|
|
10
|
+
|
|
11
|
+
Attributes:
|
|
12
|
+
em_invoker (BaseEMInvoker): The `EMInvoker` instance to be interacted with.
|
|
13
|
+
use_base64 (bool):
|
|
14
|
+
Whether to apply strict base64 encoding to the input.
|
|
15
|
+
1, If `True`, only inputs with specific MIME types (e.g. images,
|
|
16
|
+
audio, and video) will be converted into base64 strings before being sent.
|
|
17
|
+
2. If `False`, each input is treated as a raw string.
|
|
18
|
+
|
|
19
|
+
This ensures "strict" handling: base64 encoding is not applied
|
|
20
|
+
universally, but only when required for those MIME types.
|
|
21
|
+
|
|
22
|
+
Usage example:
|
|
23
|
+
```python
|
|
24
|
+
from gllm_inference.em_invoker.langchain import EMInvokerEmbeddings
|
|
25
|
+
from gllm_inference.em_invoker import OpenAIEMInvoker
|
|
26
|
+
|
|
27
|
+
em_invoker = OpenAIEMInvoker(...)
|
|
28
|
+
embeddings = EMInvokerEmbeddings(em_invoker=em_invoker)
|
|
29
|
+
```
|
|
30
|
+
'''
|
|
31
|
+
em_invoker: BaseEMInvoker
|
|
32
|
+
use_base64: bool
|
|
33
|
+
async def aembed_documents(self, texts: list[str], **kwargs: Any) -> list[Vector]:
|
|
34
|
+
"""Asynchronously embed documents using the `EMInvoker`.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
texts (list[str]): The list of texts to embed.
|
|
38
|
+
**kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
list[Vector]: List of embeddings, one for each text.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
ValueError: If `texts` is not a list of strings.
|
|
45
|
+
"""
|
|
46
|
+
async def aembed_query(self, text: str, **kwargs: Any) -> Vector:
|
|
47
|
+
"""Asynchronously embed query using the `EMInvoker`.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
text (str): The text to embed.
|
|
51
|
+
**kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Vector: Embeddings for the text.
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ValueError: If `text` is not a string.
|
|
58
|
+
"""
|
|
59
|
+
def embed_documents(self, texts: list[str], **kwargs: Any) -> list[Vector]:
|
|
60
|
+
"""Embed documents using the `EMInvoker`.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
texts (list[str]): The list of texts to embed.
|
|
64
|
+
**kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
list[Vector]: List of embeddings, one for each text.
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
ValueError: If `texts` is not a list of strings.
|
|
71
|
+
"""
|
|
72
|
+
def embed_query(self, text: str, **kwargs: Any) -> Vector:
|
|
73
|
+
"""Embed query using the `EMInvoker`.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
text (str): The text to embed.
|
|
77
|
+
**kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Vector: Embeddings for the text.
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
ValueError: If `text` is not a string.
|
|
84
|
+
"""
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_core.utils.retry import RetryConfig
|
|
3
|
+
from gllm_inference.constants import INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
|
|
4
|
+
from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
|
|
5
|
+
from gllm_inference.em_invoker.schema.langchain import Key as Key
|
|
6
|
+
from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError, build_debug_info as build_debug_info
|
|
7
|
+
from gllm_inference.exceptions.provider_error_map import ALL_PROVIDER_ERROR_MAPPINGS as ALL_PROVIDER_ERROR_MAPPINGS, LANGCHAIN_ERROR_CODE_MAPPING as LANGCHAIN_ERROR_CODE_MAPPING
|
|
8
|
+
from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
|
|
9
|
+
from gllm_inference.utils import load_langchain_model as load_langchain_model, parse_model_data as parse_model_data
|
|
10
|
+
from langchain_core.embeddings import Embeddings as Embeddings
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
SUPPORTED_ATTACHMENTS: Incomplete
|
|
14
|
+
|
|
15
|
+
class LangChainEMInvoker(BaseEMInvoker):
|
|
16
|
+
"""A language model invoker to interact with LangChain's Embeddings.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
model_id (str): The model ID of the embedding model.
|
|
20
|
+
model_provider (str): The provider of the embedding model.
|
|
21
|
+
model_name (str): The name of the embedding model.
|
|
22
|
+
em (Embeddings): The instance to interact with an embedding model defined using LangChain's Embeddings.
|
|
23
|
+
retry_config (RetryConfig): The retry configuration for the embedding model.
|
|
24
|
+
truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
|
|
25
|
+
"""
|
|
26
|
+
model: Incomplete
|
|
27
|
+
def __init__(self, model: Embeddings | None = None, model_class_path: str | None = None, model_name: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
|
|
28
|
+
'''Initializes a new instance of the LangChainEMInvoker class.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
model (Embeddings | None, optional): The LangChain\'s Embeddings instance. If provided, will take
|
|
32
|
+
precedence over the `model_class_path` parameter. Defaults to None.
|
|
33
|
+
model_class_path (str | None, optional): The LangChain\'s Embeddings class path. Must be formatted as
|
|
34
|
+
"<package>.<class>" (e.g. "langchain_openai.OpenAIEmbeddings"). Ignored if `model` is provided.
|
|
35
|
+
Defaults to None.
|
|
36
|
+
model_name (str | None, optional): The model name. Only used if `model_class_path` is provided.
|
|
37
|
+
Defaults to None.
|
|
38
|
+
model_kwargs (dict[str, Any] | None, optional): The additional keyword arguments. Only used if
|
|
39
|
+
`model_class_path` is provided. Defaults to None.
|
|
40
|
+
default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
|
|
41
|
+
Defaults to None.
|
|
42
|
+
retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
|
|
43
|
+
Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
|
|
44
|
+
truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
|
|
45
|
+
Defaults to None, in which case no truncation is applied.
|
|
46
|
+
'''
|