gllm-inference-binary 0.5.55__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_inference/__init__.pyi +0 -0
  2. gllm_inference/builder/__init__.pyi +6 -0
  3. gllm_inference/builder/_build_invoker.pyi +28 -0
  4. gllm_inference/builder/build_em_invoker.pyi +130 -0
  5. gllm_inference/builder/build_lm_invoker.pyi +213 -0
  6. gllm_inference/builder/build_lm_request_processor.pyi +88 -0
  7. gllm_inference/builder/build_output_parser.pyi +29 -0
  8. gllm_inference/catalog/__init__.pyi +4 -0
  9. gllm_inference/catalog/catalog.pyi +121 -0
  10. gllm_inference/catalog/lm_request_processor_catalog.pyi +112 -0
  11. gllm_inference/catalog/prompt_builder_catalog.pyi +82 -0
  12. gllm_inference/constants.pyi +12 -0
  13. gllm_inference/em_invoker/__init__.pyi +12 -0
  14. gllm_inference/em_invoker/azure_openai_em_invoker.pyi +88 -0
  15. gllm_inference/em_invoker/bedrock_em_invoker.pyi +118 -0
  16. gllm_inference/em_invoker/cohere_em_invoker.pyi +128 -0
  17. gllm_inference/em_invoker/em_invoker.pyi +90 -0
  18. gllm_inference/em_invoker/google_em_invoker.pyi +129 -0
  19. gllm_inference/em_invoker/jina_em_invoker.pyi +103 -0
  20. gllm_inference/em_invoker/langchain/__init__.pyi +3 -0
  21. gllm_inference/em_invoker/langchain/em_invoker_embeddings.pyi +84 -0
  22. gllm_inference/em_invoker/langchain_em_invoker.pyi +46 -0
  23. gllm_inference/em_invoker/openai_compatible_em_invoker.pyi +41 -0
  24. gllm_inference/em_invoker/openai_em_invoker.pyi +118 -0
  25. gllm_inference/em_invoker/schema/__init__.pyi +0 -0
  26. gllm_inference/em_invoker/schema/bedrock.pyi +29 -0
  27. gllm_inference/em_invoker/schema/cohere.pyi +20 -0
  28. gllm_inference/em_invoker/schema/google.pyi +9 -0
  29. gllm_inference/em_invoker/schema/jina.pyi +29 -0
  30. gllm_inference/em_invoker/schema/langchain.pyi +5 -0
  31. gllm_inference/em_invoker/schema/openai.pyi +7 -0
  32. gllm_inference/em_invoker/schema/openai_compatible.pyi +7 -0
  33. gllm_inference/em_invoker/schema/twelvelabs.pyi +17 -0
  34. gllm_inference/em_invoker/schema/voyage.pyi +15 -0
  35. gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +101 -0
  36. gllm_inference/em_invoker/voyage_em_invoker.pyi +104 -0
  37. gllm_inference/exceptions/__init__.pyi +4 -0
  38. gllm_inference/exceptions/error_parser.pyi +41 -0
  39. gllm_inference/exceptions/exceptions.pyi +132 -0
  40. gllm_inference/exceptions/provider_error_map.pyi +24 -0
  41. gllm_inference/lm_invoker/__init__.pyi +14 -0
  42. gllm_inference/lm_invoker/anthropic_lm_invoker.pyi +318 -0
  43. gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +237 -0
  44. gllm_inference/lm_invoker/batch/__init__.pyi +3 -0
  45. gllm_inference/lm_invoker/batch/batch_operations.pyi +127 -0
  46. gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +212 -0
  47. gllm_inference/lm_invoker/datasaur_lm_invoker.pyi +157 -0
  48. gllm_inference/lm_invoker/google_lm_invoker.pyi +327 -0
  49. gllm_inference/lm_invoker/langchain_lm_invoker.pyi +239 -0
  50. gllm_inference/lm_invoker/litellm_lm_invoker.pyi +224 -0
  51. gllm_inference/lm_invoker/lm_invoker.pyi +165 -0
  52. gllm_inference/lm_invoker/openai_chat_completions_lm_invoker.pyi +253 -0
  53. gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +52 -0
  54. gllm_inference/lm_invoker/openai_lm_invoker.pyi +404 -0
  55. gllm_inference/lm_invoker/portkey_lm_invoker.pyi +296 -0
  56. gllm_inference/lm_invoker/schema/__init__.pyi +0 -0
  57. gllm_inference/lm_invoker/schema/anthropic.pyi +56 -0
  58. gllm_inference/lm_invoker/schema/bedrock.pyi +53 -0
  59. gllm_inference/lm_invoker/schema/datasaur.pyi +14 -0
  60. gllm_inference/lm_invoker/schema/google.pyi +24 -0
  61. gllm_inference/lm_invoker/schema/langchain.pyi +23 -0
  62. gllm_inference/lm_invoker/schema/openai.pyi +106 -0
  63. gllm_inference/lm_invoker/schema/openai_chat_completions.pyi +62 -0
  64. gllm_inference/lm_invoker/schema/portkey.pyi +31 -0
  65. gllm_inference/lm_invoker/schema/xai.pyi +31 -0
  66. gllm_inference/lm_invoker/xai_lm_invoker.pyi +253 -0
  67. gllm_inference/model/__init__.pyi +12 -0
  68. gllm_inference/model/em/__init__.pyi +0 -0
  69. gllm_inference/model/em/cohere_em.pyi +17 -0
  70. gllm_inference/model/em/google_em.pyi +16 -0
  71. gllm_inference/model/em/jina_em.pyi +22 -0
  72. gllm_inference/model/em/openai_em.pyi +15 -0
  73. gllm_inference/model/em/twelvelabs_em.pyi +13 -0
  74. gllm_inference/model/em/voyage_em.pyi +20 -0
  75. gllm_inference/model/lm/__init__.pyi +0 -0
  76. gllm_inference/model/lm/anthropic_lm.pyi +22 -0
  77. gllm_inference/model/lm/google_lm.pyi +18 -0
  78. gllm_inference/model/lm/openai_lm.pyi +27 -0
  79. gllm_inference/model/lm/xai_lm.pyi +19 -0
  80. gllm_inference/output_parser/__init__.pyi +3 -0
  81. gllm_inference/output_parser/json_output_parser.pyi +60 -0
  82. gllm_inference/output_parser/output_parser.pyi +27 -0
  83. gllm_inference/prompt_builder/__init__.pyi +3 -0
  84. gllm_inference/prompt_builder/format_strategy/__init__.pyi +4 -0
  85. gllm_inference/prompt_builder/format_strategy/format_strategy.pyi +55 -0
  86. gllm_inference/prompt_builder/format_strategy/jinja_format_strategy.pyi +45 -0
  87. gllm_inference/prompt_builder/format_strategy/string_format_strategy.pyi +20 -0
  88. gllm_inference/prompt_builder/prompt_builder.pyi +69 -0
  89. gllm_inference/prompt_formatter/__init__.pyi +7 -0
  90. gllm_inference/prompt_formatter/agnostic_prompt_formatter.pyi +49 -0
  91. gllm_inference/prompt_formatter/huggingface_prompt_formatter.pyi +55 -0
  92. gllm_inference/prompt_formatter/llama_prompt_formatter.pyi +59 -0
  93. gllm_inference/prompt_formatter/mistral_prompt_formatter.pyi +53 -0
  94. gllm_inference/prompt_formatter/openai_prompt_formatter.pyi +35 -0
  95. gllm_inference/prompt_formatter/prompt_formatter.pyi +30 -0
  96. gllm_inference/realtime_chat/__init__.pyi +3 -0
  97. gllm_inference/realtime_chat/google_realtime_chat.pyi +205 -0
  98. gllm_inference/realtime_chat/input_streamer/__init__.pyi +4 -0
  99. gllm_inference/realtime_chat/input_streamer/input_streamer.pyi +36 -0
  100. gllm_inference/realtime_chat/input_streamer/keyboard_input_streamer.pyi +27 -0
  101. gllm_inference/realtime_chat/input_streamer/linux_mic_input_streamer.pyi +36 -0
  102. gllm_inference/realtime_chat/output_streamer/__init__.pyi +4 -0
  103. gllm_inference/realtime_chat/output_streamer/console_output_streamer.pyi +21 -0
  104. gllm_inference/realtime_chat/output_streamer/linux_speaker_output_streamer.pyi +42 -0
  105. gllm_inference/realtime_chat/output_streamer/output_streamer.pyi +33 -0
  106. gllm_inference/realtime_chat/realtime_chat.pyi +28 -0
  107. gllm_inference/request_processor/__init__.pyi +4 -0
  108. gllm_inference/request_processor/lm_request_processor.pyi +101 -0
  109. gllm_inference/request_processor/uses_lm_mixin.pyi +130 -0
  110. gllm_inference/schema/__init__.pyi +18 -0
  111. gllm_inference/schema/activity.pyi +64 -0
  112. gllm_inference/schema/attachment.pyi +88 -0
  113. gllm_inference/schema/code_exec_result.pyi +14 -0
  114. gllm_inference/schema/config.pyi +15 -0
  115. gllm_inference/schema/enums.pyi +80 -0
  116. gllm_inference/schema/events.pyi +105 -0
  117. gllm_inference/schema/lm_input.pyi +4 -0
  118. gllm_inference/schema/lm_output.pyi +188 -0
  119. gllm_inference/schema/mcp.pyi +31 -0
  120. gllm_inference/schema/message.pyi +52 -0
  121. gllm_inference/schema/model_id.pyi +176 -0
  122. gllm_inference/schema/reasoning.pyi +15 -0
  123. gllm_inference/schema/token_usage.pyi +75 -0
  124. gllm_inference/schema/tool_call.pyi +14 -0
  125. gllm_inference/schema/tool_result.pyi +11 -0
  126. gllm_inference/schema/type_alias.pyi +11 -0
  127. gllm_inference/utils/__init__.pyi +5 -0
  128. gllm_inference/utils/io_utils.pyi +26 -0
  129. gllm_inference/utils/langchain.pyi +30 -0
  130. gllm_inference/utils/validation.pyi +12 -0
  131. gllm_inference.build/.gitignore +1 -0
  132. gllm_inference.cpython-312-darwin.so +0 -0
  133. gllm_inference.pyi +153 -0
  134. gllm_inference_binary-0.5.55.dist-info/METADATA +138 -0
  135. gllm_inference_binary-0.5.55.dist-info/RECORD +137 -0
  136. gllm_inference_binary-0.5.55.dist-info/WHEEL +5 -0
  137. gllm_inference_binary-0.5.55.dist-info/top_level.txt +1 -0
@@ -0,0 +1,129 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.utils.retry import RetryConfig as RetryConfig
3
+ from gllm_inference.constants import GOOGLE_SCOPES as GOOGLE_SCOPES, SECONDS_TO_MILLISECONDS as SECONDS_TO_MILLISECONDS
4
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
5
+ from gllm_inference.em_invoker.schema.google import Key as Key
6
+ from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
7
+ from gllm_inference.exceptions.provider_error_map import GOOGLE_ERROR_MAPPING as GOOGLE_ERROR_MAPPING
8
+ from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
9
+ from typing import Any
10
+
11
+ SUPPORTED_ATTACHMENTS: Incomplete
12
+
13
+ class GoogleEMInvoker(BaseEMInvoker):
14
+ '''An embedding model invoker to interact with Google embedding models.
15
+
16
+ Attributes:
17
+ model_id (str): The model ID of the embedding model.
18
+ model_provider (str): The provider of the embedding model.
19
+ model_name (str): The name of the embedding model.
20
+ client_params (dict[str, Any]): The Google client instance init parameters.
21
+ default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
22
+ retry_config (RetryConfig): The retry configuration for the embedding model.
23
+ truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
24
+
25
+ Initialization:
26
+ The `GoogleEMInvoker` can use either Google Gen AI or Google Vertex AI.
27
+
28
+ Google Gen AI is recommended for quick prototyping and development.
29
+ It requires a Gemini API key for authentication.
30
+
31
+ Usage example:
32
+ ```python
33
+ em_invoker = GoogleEMInvoker(
34
+ model_name="text-embedding-004",
35
+ api_key="your_api_key"
36
+ )
37
+ ```
38
+
39
+ Google Vertex AI is recommended to build production-ready applications.
40
+ It requires a service account JSON file for authentication.
41
+
42
+ Usage example:
43
+ ```python
44
+ em_invoker = GoogleEMInvoker(
45
+ model_name="text-embedding-004",
46
+ credentials_path="path/to/service_account.json"
47
+ )
48
+ ```
49
+
50
+ If neither `api_key` nor `credentials_path` is provided, Google Gen AI will be used by default.
51
+ The `GOOGLE_API_KEY` environment variable will be used for authentication.
52
+
53
+ Input types:
54
+ The `GoogleEMInvoker` only supports text inputs.
55
+
56
+ Output format:
57
+ The `GoogleEMInvoker` can embed either:
58
+ 1. A single content.
59
+ 1. A single content is a single text.
60
+ 2. The output will be a `Vector`, representing the embedding of the content.
61
+
62
+ # Example 1: Embedding a text content.
63
+ ```python
64
+ text = "This is a text"
65
+ result = await em_invoker.invoke(text)
66
+ ```
67
+
68
+ The above examples will return a `Vector` with a size of (embedding_size,).
69
+
70
+ 2. A list of contents.
71
+ 1. A list of contents is a list of texts.
72
+ 2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
73
+ embedding of each single content.
74
+
75
+ # Example: Embedding a list of contents.
76
+ ```python
77
+ text1 = "This is a text"
78
+ text2 = "This is another text"
79
+ text3 = "This is yet another text"
80
+ result = await em_invoker.invoke([text1, text2, text3])
81
+ ```
82
+
83
+ The above examples will return a `list[Vector]` with a size of (3, embedding_size).
84
+
85
+ Retry and timeout:
86
+ The `GoogleEMInvoker` supports retry and timeout configuration.
87
+ By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
88
+ They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
89
+
90
+ Retry config examples:
91
+ ```python
92
+ retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
93
+ retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
94
+ retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
95
+ retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
96
+ ```
97
+
98
+ Usage example:
99
+ ```python
100
+ em_invoker = GoogleEMInvoker(..., retry_config=retry_config)
101
+ ```
102
+ '''
103
+ client_params: Incomplete
104
+ def __init__(self, model_name: str, api_key: str | None = None, credentials_path: str | None = None, project_id: str | None = None, location: str = 'us-central1', model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
105
+ '''Initializes a new instance of the GoogleEMInvoker class.
106
+
107
+ Args:
108
+ model_name (str): The name of the model to use.
109
+ api_key (str | None, optional): Required for Google Gen AI authentication. Cannot be used together
110
+ with `credentials_path`. Defaults to None.
111
+ credentials_path (str | None, optional): Required for Google Vertex AI authentication. Path to the service
112
+ account credentials JSON file. Cannot be used together with `api_key`. Defaults to None.
113
+ project_id (str | None, optional): The Google Cloud project ID for Vertex AI. Only used when authenticating
114
+ with `credentials_path`. Defaults to None, in which case it will be loaded from the credentials file.
115
+ location (str, optional): The location of the Google Cloud project for Vertex AI. Only used when
116
+ authenticating with `credentials_path`. Defaults to "us-central1".
117
+ model_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the Google client.
118
+ Defaults to None.
119
+ default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
120
+ Defaults to None.
121
+ retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
122
+ Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
123
+ truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
124
+ Defaults to None, in which case no truncation is applied.
125
+
126
+ Note:
127
+ If neither `api_key` nor `credentials_path` is provided, Google Gen AI will be used by default.
128
+ The `GOOGLE_API_KEY` environment variable will be used for authentication.
129
+ '''
@@ -0,0 +1,103 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.utils.retry import RetryConfig as RetryConfig
3
+ from gllm_inference.constants import EMBEDDING_ENDPOINT as EMBEDDING_ENDPOINT, JINA_DEFAULT_URL as JINA_DEFAULT_URL
4
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
5
+ from gllm_inference.em_invoker.schema.jina import InputType as InputType, Key as Key
6
+ from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, ProviderInternalError as ProviderInternalError
7
+ from gllm_inference.exceptions.error_parser import convert_http_status_to_base_invoker_error as convert_http_status_to_base_invoker_error
8
+ from gllm_inference.schema import Attachment as Attachment, AttachmentType as AttachmentType, EMContent as EMContent, ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
9
+ from typing import Any
10
+
11
+ SUPPORTED_ATTACHMENTS: Incomplete
12
+ MULTIMODAL_MODELS: Incomplete
13
+
14
+ class JinaEMInvoker(BaseEMInvoker):
15
+ '''An embedding model invoker to interact with Jina AI embedding models.
16
+
17
+ Attributes:
18
+ model_id (str): The model ID of the embedding model.
19
+ model_provider (str): The provider of the embedding model.
20
+ model_name (str): The name of the embedding model.
21
+ client (AsyncClient): The client for the Jina AI API.
22
+ default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
23
+ retry_config (RetryConfig): The retry configuration for the embedding model.
24
+ truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
25
+
26
+ Input types:
27
+ The `JinaEMInvoker` supports the following input types: text and image.
28
+ Non-text inputs must be passed as a `Attachment` object.
29
+
30
+ Output format:
31
+ The `JinaEMInvoker` can embed either:
32
+ 1. A single content.
33
+ 1. A single content is either a text or an image.
34
+ 2. The output will be a `Vector`, representing the embedding of the content.
35
+
36
+ # Example 1: Embedding a text content.
37
+ ```python
38
+ text = "What animal is in this image?"
39
+ result = await em_invoker.invoke(text)
40
+ ```
41
+
42
+ # Example 2: Embedding an image content.
43
+ ```python
44
+ image = Attachment.from_path("path/to/local/image.png")
45
+ result = await em_invoker.invoke(image)
46
+ ```
47
+
48
+ The above examples will return a `Vector` with a size of (embedding_size,).
49
+
50
+ 2. A list of contents.
51
+ 1. A list of contents is a list that consists of any of the above single contents.
52
+ 2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
53
+ embedding of each single content.
54
+
55
+ # Example: Embedding a list of contents.
56
+ ```python
57
+ text = "What animal is in this image?"
58
+ image = Attachment.from_path("path/to/local/image.png")
59
+ result = await em_invoker.invoke([text, image])
60
+ ```
61
+
62
+ The above examples will return a `list[Vector]` with a size of (2, embedding_size).
63
+
64
+ Retry and timeout:
65
+ The `JinaEMInvoker` supports retry and timeout configuration.
66
+ By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
67
+ They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
68
+
69
+ Retry config examples:
70
+ ```python
71
+ retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
72
+ retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
73
+ retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
74
+ retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
75
+ ```
76
+
77
+ Usage example:
78
+ ```python
79
+ em_invoker = JinaEMInvoker(..., retry_config=retry_config)
80
+ ```
81
+ '''
82
+ client: Incomplete
83
+ model_kwargs: Incomplete
84
+ def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
85
+ '''Initializes a new instance of the JinaEMInvoker class.
86
+
87
+ Args:
88
+ model_name (str): The name of the Jina embedding model to be used.
89
+ api_key (str | None, optional): The API key for authenticating with Jina AI.
90
+ Defaults to None, in which case the `JINA_API_KEY` environment variable will be used.
91
+ base_url (str, optional): The base URL for the Jina AI API. Defaults to "https://api.jina.ai/v1".
92
+ model_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the HTTP client.
93
+ Defaults to None.
94
+ default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
95
+ Defaults to None.
96
+ retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
97
+ Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
98
+ truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
99
+ Defaults to None, in which case no truncation is applied.
100
+
101
+ Raises:
102
+ ValueError: If neither `api_key` nor `JINA_API_KEY` environment variable is provided.
103
+ '''
@@ -0,0 +1,3 @@
1
+ from gllm_inference.em_invoker.langchain.em_invoker_embeddings import EMInvokerEmbeddings as EMInvokerEmbeddings
2
+
3
+ __all__ = ['EMInvokerEmbeddings']
@@ -0,0 +1,84 @@
1
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
2
+ from gllm_inference.schema import Attachment as Attachment, Vector as Vector
3
+ from gllm_inference.utils.io_utils import DEFAULT_BASE64_ALLOWED_MIMETYPES as DEFAULT_BASE64_ALLOWED_MIMETYPES, base64_to_bytes as base64_to_bytes
4
+ from langchain_core.embeddings import Embeddings
5
+ from pydantic import BaseModel
6
+ from typing import Any
7
+
8
+ class EMInvokerEmbeddings(BaseModel, Embeddings, arbitrary_types_allowed=True):
9
+ '''An adapter class that enables an `EMInvoker` to be used as a LangChain `Embeddings`.
10
+
11
+ Attributes:
12
+ em_invoker (BaseEMInvoker): The `EMInvoker` instance to be interacted with.
13
+ use_base64 (bool):
14
+ Whether to apply strict base64 encoding to the input.
15
+ 1, If `True`, only inputs with specific MIME types (e.g. images,
16
+ audio, and video) will be converted into base64 strings before being sent.
17
+ 2. If `False`, each input is treated as a raw string.
18
+
19
+ This ensures "strict" handling: base64 encoding is not applied
20
+ universally, but only when required for those MIME types.
21
+
22
+ Usage example:
23
+ ```python
24
+ from gllm_inference.em_invoker.langchain import EMInvokerEmbeddings
25
+ from gllm_inference.em_invoker import OpenAIEMInvoker
26
+
27
+ em_invoker = OpenAIEMInvoker(...)
28
+ embeddings = EMInvokerEmbeddings(em_invoker=em_invoker)
29
+ ```
30
+ '''
31
+ em_invoker: BaseEMInvoker
32
+ use_base64: bool
33
+ async def aembed_documents(self, texts: list[str], **kwargs: Any) -> list[Vector]:
34
+ """Asynchronously embed documents using the `EMInvoker`.
35
+
36
+ Args:
37
+ texts (list[str]): The list of texts to embed.
38
+ **kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
39
+
40
+ Returns:
41
+ list[Vector]: List of embeddings, one for each text.
42
+
43
+ Raises:
44
+ ValueError: If `texts` is not a list of strings.
45
+ """
46
+ async def aembed_query(self, text: str, **kwargs: Any) -> Vector:
47
+ """Asynchronously embed query using the `EMInvoker`.
48
+
49
+ Args:
50
+ text (str): The text to embed.
51
+ **kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
52
+
53
+ Returns:
54
+ Vector: Embeddings for the text.
55
+
56
+ Raises:
57
+ ValueError: If `text` is not a string.
58
+ """
59
+ def embed_documents(self, texts: list[str], **kwargs: Any) -> list[Vector]:
60
+ """Embed documents using the `EMInvoker`.
61
+
62
+ Args:
63
+ texts (list[str]): The list of texts to embed.
64
+ **kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
65
+
66
+ Returns:
67
+ list[Vector]: List of embeddings, one for each text.
68
+
69
+ Raises:
70
+ ValueError: If `texts` is not a list of strings.
71
+ """
72
+ def embed_query(self, text: str, **kwargs: Any) -> Vector:
73
+ """Embed query using the `EMInvoker`.
74
+
75
+ Args:
76
+ text (str): The text to embed.
77
+ **kwargs (Any): Additional keyword arguments to pass to the EMInvoker's `invoke` method.
78
+
79
+ Returns:
80
+ Vector: Embeddings for the text.
81
+
82
+ Raises:
83
+ ValueError: If `text` is not a string.
84
+ """
@@ -0,0 +1,46 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.utils.retry import RetryConfig
3
+ from gllm_inference.constants import INVOKER_DEFAULT_TIMEOUT as INVOKER_DEFAULT_TIMEOUT, INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
4
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
5
+ from gllm_inference.em_invoker.schema.langchain import Key as Key
6
+ from gllm_inference.exceptions import BaseInvokerError as BaseInvokerError, InvokerRuntimeError as InvokerRuntimeError, build_debug_info as build_debug_info
7
+ from gllm_inference.exceptions.provider_error_map import ALL_PROVIDER_ERROR_MAPPINGS as ALL_PROVIDER_ERROR_MAPPINGS, LANGCHAIN_ERROR_CODE_MAPPING as LANGCHAIN_ERROR_CODE_MAPPING
8
+ from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig, Vector as Vector
9
+ from gllm_inference.utils import load_langchain_model as load_langchain_model, parse_model_data as parse_model_data
10
+ from langchain_core.embeddings import Embeddings as Embeddings
11
+ from typing import Any
12
+
13
+ SUPPORTED_ATTACHMENTS: Incomplete
14
+
15
+ class LangChainEMInvoker(BaseEMInvoker):
16
+ """A language model invoker to interact with LangChain's Embeddings.
17
+
18
+ Attributes:
19
+ model_id (str): The model ID of the embedding model.
20
+ model_provider (str): The provider of the embedding model.
21
+ model_name (str): The name of the embedding model.
22
+ em (Embeddings): The instance to interact with an embedding model defined using LangChain's Embeddings.
23
+ retry_config (RetryConfig): The retry configuration for the embedding model.
24
+ truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
25
+ """
26
+ model: Incomplete
27
+ def __init__(self, model: Embeddings | None = None, model_class_path: str | None = None, model_name: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
28
+ '''Initializes a new instance of the LangChainEMInvoker class.
29
+
30
+ Args:
31
+ model (Embeddings | None, optional): The LangChain\'s Embeddings instance. If provided, will take
32
+ precedence over the `model_class_path` parameter. Defaults to None.
33
+ model_class_path (str | None, optional): The LangChain\'s Embeddings class path. Must be formatted as
34
+ "<package>.<class>" (e.g. "langchain_openai.OpenAIEmbeddings"). Ignored if `model` is provided.
35
+ Defaults to None.
36
+ model_name (str | None, optional): The model name. Only used if `model_class_path` is provided.
37
+ Defaults to None.
38
+ model_kwargs (dict[str, Any] | None, optional): The additional keyword arguments. Only used if
39
+ `model_class_path` is provided. Defaults to None.
40
+ default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
41
+ Defaults to None.
42
+ retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
43
+ Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
44
+ truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
45
+ Defaults to None, in which case no truncation is applied.
46
+ '''
@@ -0,0 +1,41 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.utils.retry import RetryConfig as RetryConfig
3
+ from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES
4
+ from gllm_inference.em_invoker.openai_em_invoker import OpenAIEMInvoker as OpenAIEMInvoker
5
+ from gllm_inference.em_invoker.schema.openai_compatible import Key as Key
6
+ from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, TruncationConfig as TruncationConfig
7
+ from typing import Any
8
+
9
+ DEPRECATION_MESSAGE: str
10
+
11
+ class OpenAICompatibleEMInvoker(OpenAIEMInvoker):
12
+ """An embedding model invoker to interact with endpoints compatible with OpenAI's embedding API contract.
13
+
14
+ Attributes:
15
+ model_id (str): The model ID of the embedding model.
16
+ model_provider (str): The provider of the embedding model.
17
+ model_name (str): The name of the embedding model.
18
+ client_kwargs (dict[str, Any]): The keyword arguments for the OpenAI client.
19
+ default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
20
+ retry_config (RetryConfig): The retry configuration for the embedding model.
21
+ truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
22
+
23
+ This class is deprecated and will be removed in v0.6. Please use the `OpenAIEMInvoker` class instead.
24
+ """
25
+ client_kwargs: Incomplete
26
+ def __init__(self, model_name: str, base_url: str, api_key: str | None = None, model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
27
+ """Initializes a new instance of the OpenAICompatibleEMInvoker class.
28
+
29
+ Args:
30
+ model_name (str): The name of the embedding model hosted on the OpenAI compatible endpoint.
31
+ base_url (str): The base URL for the OpenAI compatible endpoint.
32
+ api_key (str | None, optional): The API key for authenticating with the OpenAI compatible endpoint.
33
+ Defaults to None, in which case the `OPENAI_API_KEY` environment variable will be used.
34
+ model_kwargs (dict[str, Any] | None, optional): Additional model parameters. Defaults to None.
35
+ default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
36
+ Defaults to None.
37
+ retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
38
+ Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
39
+ truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
40
+ Defaults to None, in which case no truncation is applied.
41
+ """
@@ -0,0 +1,118 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_core.utils.retry import RetryConfig as RetryConfig
3
+ from gllm_inference.constants import INVOKER_PROPAGATED_MAX_RETRIES as INVOKER_PROPAGATED_MAX_RETRIES, OPENAI_DEFAULT_URL as OPENAI_DEFAULT_URL
4
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker as BaseEMInvoker
5
+ from gllm_inference.em_invoker.schema.openai import Key as Key
6
+ from gllm_inference.schema import ModelId as ModelId, ModelProvider as ModelProvider, Vector as Vector
7
+ from gllm_inference.schema.config import TruncationConfig as TruncationConfig
8
+ from typing import Any
9
+
10
+ SUPPORTED_ATTACHMENTS: Incomplete
11
+
12
+ class OpenAIEMInvoker(BaseEMInvoker):
13
+ '''An embedding model invoker to interact with OpenAI embedding models.
14
+
15
+ Attributes:
16
+ model_id (str): The model ID of the embedding model.
17
+ model_provider (str): The provider of the embedding model.
18
+ model_name (str): The name of the embedding model.
19
+ client_kwargs (dict[str, Any]): The keyword arguments for the OpenAI client.
20
+ default_hyperparameters (dict[str, Any]): Default hyperparameters for invoking the embedding model.
21
+ retry_config (RetryConfig): The retry configuration for the embedding model.
22
+ truncation_config (TruncationConfig | None): The truncation configuration for the embedding model.
23
+
24
+ Basic usage:
25
+ The `OpenAIEMInvoker` can be used as follows:
26
+ ```python
27
+ em_invoker = OpenAIEMInvoker(model_name="text-embedding-3-small")
28
+ result = await em_invoker.invoke("Hi there!")
29
+ ```
30
+
31
+ OpenAI compatible endpoints:
32
+ The `OpenAIEMInvoker` can also be used to interact with endpoints that are compatible with
33
+ OpenAI\'s Embeddings API schema. This includes but are not limited to:
34
+ 1. Text Embeddings Inference (https://github.com/huggingface/text-embeddings-inference)
35
+ 2. vLLM (https://vllm.ai/)
36
+ Please note that the supported features and capabilities may vary between different endpoints and
37
+ language models. Using features that are not supported by the endpoint will result in an error.
38
+
39
+ This customization can be done by setting the `base_url` parameter to the base URL of the endpoint:
40
+ ```python
41
+ em_invoker = OpenAIEMInvoker(
42
+ model_name="<model-name>",
43
+ api_key="<your-api-key>",
44
+ base_url="<https://base-url>",
45
+ )
46
+ result = await em_invoker.invoke("Hi there!")
47
+ ```
48
+
49
+ Input types:
50
+ The `OpenAIEMInvoker` only supports text inputs.
51
+
52
+ Output format:
53
+ The `OpenAIEMInvoker` can embed either:
54
+ 1. A single content.
55
+ 1. A single content is a single text.
56
+ 2. The output will be a `Vector`, representing the embedding of the content.
57
+
58
+ # Example 1: Embedding a text content.
59
+ ```python
60
+ text = "This is a text"
61
+ result = await em_invoker.invoke(text)
62
+ ```
63
+
64
+ The above examples will return a `Vector` with a size of (embedding_size,).
65
+
66
+ 2. A list of contents.
67
+ 1. A list of contents is a list of texts.
68
+ 2. The output will be a `list[Vector]`, where each element is a `Vector` representing the
69
+ embedding of each single content.
70
+
71
+ # Example: Embedding a list of contents.
72
+ ```python
73
+ text1 = "This is a text"
74
+ text2 = "This is another text"
75
+ text3 = "This is yet another text"
76
+ result = await em_invoker.invoke([text1, text2, text3])
77
+ ```
78
+
79
+ The above examples will return a `list[Vector]` with a size of (3, embedding_size).
80
+
81
+ Retry and timeout:
82
+ The `OpenAIEMInvoker` supports retry and timeout configuration.
83
+ By default, the max retries is set to 0 and the timeout is set to 30.0 seconds.
84
+ They can be customized by providing a custom `RetryConfig` object to the `retry_config` parameter.
85
+
86
+ Retry config examples:
87
+ ```python
88
+ retry_config = RetryConfig(max_retries=0, timeout=None) # No retry, no timeout
89
+ retry_config = RetryConfig(max_retries=0, timeout=10.0) # No retry, 10.0 seconds timeout
90
+ retry_config = RetryConfig(max_retries=5, timeout=None) # 5 max retries, no timeout
91
+ retry_config = RetryConfig(max_retries=5, timeout=10.0) # 5 max retries, 10.0 seconds timeout
92
+ ```
93
+
94
+ Usage example:
95
+ ```python
96
+ em_invoker = OpenAIEMInvoker(..., retry_config=retry_config)
97
+ ```
98
+ '''
99
+ client_kwargs: Incomplete
100
+ def __init__(self, model_name: str, api_key: str | None = None, base_url: str = ..., model_kwargs: dict[str, Any] | None = None, default_hyperparameters: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, truncation_config: TruncationConfig | None = None) -> None:
101
+ '''Initializes a new instance of the OpenAIEMInvoker class.
102
+
103
+ Args:
104
+ model_name (str): The name of the OpenAI embedding model to be used.
105
+ api_key (str | None, optional): The API key for authenticating with OpenAI. Defaults to None, in which
106
+ case the `OPENAI_API_KEY` environment variable will be used. If the endpoint does not require an
107
+ API key, a dummy value can be passed (e.g. "<empty>").
108
+ base_url (str, optional): The base URL of a custom endpoint that is compatible with OpenAI\'s
109
+ Embeddings API schema. Defaults to OpenAI\'s default URL.
110
+ model_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the OpenAI client.
111
+ Defaults to None.
112
+ default_hyperparameters (dict[str, Any] | None, optional): Default hyperparameters for invoking the model.
113
+ Defaults to None.
114
+ retry_config (RetryConfig | None, optional): The retry configuration for the embedding model.
115
+ Defaults to None, in which case a default config with no retry and 30.0 seconds timeout will be used.
116
+ truncation_config (TruncationConfig | None, optional): Configuration for text truncation behavior.
117
+ Defaults to None, in which case no truncation is applied.
118
+ '''
File without changes
@@ -0,0 +1,29 @@
1
+ class Key:
2
+ """Defines valid keys in Bedrock."""
3
+ ACCEPT: str
4
+ BASE64_STRING: str
5
+ CONTENT_TYPE: str
6
+ HTTP_STATUS_CODE: str
7
+ INPUT_TEXT: str
8
+ INPUT_TYPE: str
9
+ INPUT_TYPE_MARENGO: str
10
+ MEDIA_SOURCE: str
11
+ MODEL_ID: str
12
+ RESPONSE_METADATA: str
13
+ TEXT_TRUNCATE: str
14
+ TEXTS: str
15
+
16
+ class InputType:
17
+ """Defines valid input types in Bedrock."""
18
+ APPLICATION_JSON: str
19
+ IMAGE: str
20
+ SEARCH_DOCUMENT: str
21
+ SEARCH_QUERY: str
22
+ TEXT: str
23
+
24
+ class OutputType:
25
+ """Defines valid output types in Bedrock."""
26
+ BODY: str
27
+ DATA: str
28
+ EMBEDDING: str
29
+ EMBEDDINGS: str
@@ -0,0 +1,20 @@
1
+ from enum import StrEnum
2
+
3
+ class Key(StrEnum):
4
+ """Defines valid keys in Cohere."""
5
+ BASE_URL = 'base_url'
6
+ IMAGE_URL = 'image_url'
7
+ INPUT_TYPE = 'input_type'
8
+ MAX_RETRIES = 'max_retries'
9
+ MODEL = 'model'
10
+ TIMEOUT = 'timeout'
11
+ TYPE = 'type'
12
+ URL = 'url'
13
+
14
+ class CohereInputType(StrEnum):
15
+ """Defines valid embedding input types for Cohere embedding API."""
16
+ CLASSIFICATION = 'classification'
17
+ CLUSTERING = 'clustering'
18
+ IMAGE = 'image'
19
+ SEARCH_DOCUMENT = 'search_document'
20
+ SEARCH_QUERY = 'search_query'
@@ -0,0 +1,9 @@
1
+ class Key:
2
+ """Defines valid keys in Google."""
3
+ API_KEY: str
4
+ CREDENTIALS: str
5
+ HTTP_OPTIONS: str
6
+ LOCATION: str
7
+ PROJECT: str
8
+ TIMEOUT: str
9
+ VERTEXAI: str
@@ -0,0 +1,29 @@
1
+ from enum import StrEnum
2
+
3
+ class InputType(StrEnum):
4
+ """Defines the supported input types for the Jina AI embedding API."""
5
+ IMAGE_URL = 'image_url'
6
+ TEXT = 'text'
7
+
8
+ class Key(StrEnum):
9
+ """Defines key constants used in the Jina AI API payloads."""
10
+ DATA = 'data'
11
+ EMBEDDING = 'embedding'
12
+ EMBEDDINGS = 'embeddings'
13
+ ERROR = 'error'
14
+ IMAGE_URL = 'image_url'
15
+ INPUT = 'input'
16
+ JSON = 'json'
17
+ MESSAGE = 'message'
18
+ MODEL = 'model'
19
+ RESPONSE = 'response'
20
+ STATUS = 'status'
21
+ TASK = 'task'
22
+ TEXT = 'text'
23
+ TYPE = 'type'
24
+ URL = 'url'
25
+
26
+ class OutputType(StrEnum):
27
+ """Defines the expected output types returned by the Jina AI embedding API."""
28
+ DATA = 'data'
29
+ EMBEDDING = 'embedding'
@@ -0,0 +1,5 @@
1
+ class Key:
2
+ """Defines valid keys in LangChain."""
3
+ ERROR_CODE: str
4
+ MAX_RETRIES: str
5
+ TIMEOUT: str
@@ -0,0 +1,7 @@
1
+ class Key:
2
+ """Defines valid keys in OpenAI."""
3
+ API_KEY: str
4
+ BASE_URL: str
5
+ MAX_RETRIES: str
6
+ MODEL: str
7
+ TIMEOUT: str
@@ -0,0 +1,7 @@
1
+ class Key:
2
+ """Defines valid keys in OpenAI Compatible."""
3
+ API_KEY: str
4
+ BASE_URL: str
5
+ MAX_RETRIES: str
6
+ MODEL: str
7
+ TIMEOUT: str