gllm-inference-binary 0.5.55__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_inference/__init__.pyi +0 -0
  2. gllm_inference/builder/__init__.pyi +6 -0
  3. gllm_inference/builder/_build_invoker.pyi +28 -0
  4. gllm_inference/builder/build_em_invoker.pyi +130 -0
  5. gllm_inference/builder/build_lm_invoker.pyi +213 -0
  6. gllm_inference/builder/build_lm_request_processor.pyi +88 -0
  7. gllm_inference/builder/build_output_parser.pyi +29 -0
  8. gllm_inference/catalog/__init__.pyi +4 -0
  9. gllm_inference/catalog/catalog.pyi +121 -0
  10. gllm_inference/catalog/lm_request_processor_catalog.pyi +112 -0
  11. gllm_inference/catalog/prompt_builder_catalog.pyi +82 -0
  12. gllm_inference/constants.pyi +12 -0
  13. gllm_inference/em_invoker/__init__.pyi +12 -0
  14. gllm_inference/em_invoker/azure_openai_em_invoker.pyi +88 -0
  15. gllm_inference/em_invoker/bedrock_em_invoker.pyi +118 -0
  16. gllm_inference/em_invoker/cohere_em_invoker.pyi +128 -0
  17. gllm_inference/em_invoker/em_invoker.pyi +90 -0
  18. gllm_inference/em_invoker/google_em_invoker.pyi +129 -0
  19. gllm_inference/em_invoker/jina_em_invoker.pyi +103 -0
  20. gllm_inference/em_invoker/langchain/__init__.pyi +3 -0
  21. gllm_inference/em_invoker/langchain/em_invoker_embeddings.pyi +84 -0
  22. gllm_inference/em_invoker/langchain_em_invoker.pyi +46 -0
  23. gllm_inference/em_invoker/openai_compatible_em_invoker.pyi +41 -0
  24. gllm_inference/em_invoker/openai_em_invoker.pyi +118 -0
  25. gllm_inference/em_invoker/schema/__init__.pyi +0 -0
  26. gllm_inference/em_invoker/schema/bedrock.pyi +29 -0
  27. gllm_inference/em_invoker/schema/cohere.pyi +20 -0
  28. gllm_inference/em_invoker/schema/google.pyi +9 -0
  29. gllm_inference/em_invoker/schema/jina.pyi +29 -0
  30. gllm_inference/em_invoker/schema/langchain.pyi +5 -0
  31. gllm_inference/em_invoker/schema/openai.pyi +7 -0
  32. gllm_inference/em_invoker/schema/openai_compatible.pyi +7 -0
  33. gllm_inference/em_invoker/schema/twelvelabs.pyi +17 -0
  34. gllm_inference/em_invoker/schema/voyage.pyi +15 -0
  35. gllm_inference/em_invoker/twelevelabs_em_invoker.pyi +101 -0
  36. gllm_inference/em_invoker/voyage_em_invoker.pyi +104 -0
  37. gllm_inference/exceptions/__init__.pyi +4 -0
  38. gllm_inference/exceptions/error_parser.pyi +41 -0
  39. gllm_inference/exceptions/exceptions.pyi +132 -0
  40. gllm_inference/exceptions/provider_error_map.pyi +24 -0
  41. gllm_inference/lm_invoker/__init__.pyi +14 -0
  42. gllm_inference/lm_invoker/anthropic_lm_invoker.pyi +318 -0
  43. gllm_inference/lm_invoker/azure_openai_lm_invoker.pyi +237 -0
  44. gllm_inference/lm_invoker/batch/__init__.pyi +3 -0
  45. gllm_inference/lm_invoker/batch/batch_operations.pyi +127 -0
  46. gllm_inference/lm_invoker/bedrock_lm_invoker.pyi +212 -0
  47. gllm_inference/lm_invoker/datasaur_lm_invoker.pyi +157 -0
  48. gllm_inference/lm_invoker/google_lm_invoker.pyi +327 -0
  49. gllm_inference/lm_invoker/langchain_lm_invoker.pyi +239 -0
  50. gllm_inference/lm_invoker/litellm_lm_invoker.pyi +224 -0
  51. gllm_inference/lm_invoker/lm_invoker.pyi +165 -0
  52. gllm_inference/lm_invoker/openai_chat_completions_lm_invoker.pyi +253 -0
  53. gllm_inference/lm_invoker/openai_compatible_lm_invoker.pyi +52 -0
  54. gllm_inference/lm_invoker/openai_lm_invoker.pyi +404 -0
  55. gllm_inference/lm_invoker/portkey_lm_invoker.pyi +296 -0
  56. gllm_inference/lm_invoker/schema/__init__.pyi +0 -0
  57. gllm_inference/lm_invoker/schema/anthropic.pyi +56 -0
  58. gllm_inference/lm_invoker/schema/bedrock.pyi +53 -0
  59. gllm_inference/lm_invoker/schema/datasaur.pyi +14 -0
  60. gllm_inference/lm_invoker/schema/google.pyi +24 -0
  61. gllm_inference/lm_invoker/schema/langchain.pyi +23 -0
  62. gllm_inference/lm_invoker/schema/openai.pyi +106 -0
  63. gllm_inference/lm_invoker/schema/openai_chat_completions.pyi +62 -0
  64. gllm_inference/lm_invoker/schema/portkey.pyi +31 -0
  65. gllm_inference/lm_invoker/schema/xai.pyi +31 -0
  66. gllm_inference/lm_invoker/xai_lm_invoker.pyi +253 -0
  67. gllm_inference/model/__init__.pyi +12 -0
  68. gllm_inference/model/em/__init__.pyi +0 -0
  69. gllm_inference/model/em/cohere_em.pyi +17 -0
  70. gllm_inference/model/em/google_em.pyi +16 -0
  71. gllm_inference/model/em/jina_em.pyi +22 -0
  72. gllm_inference/model/em/openai_em.pyi +15 -0
  73. gllm_inference/model/em/twelvelabs_em.pyi +13 -0
  74. gllm_inference/model/em/voyage_em.pyi +20 -0
  75. gllm_inference/model/lm/__init__.pyi +0 -0
  76. gllm_inference/model/lm/anthropic_lm.pyi +22 -0
  77. gllm_inference/model/lm/google_lm.pyi +18 -0
  78. gllm_inference/model/lm/openai_lm.pyi +27 -0
  79. gllm_inference/model/lm/xai_lm.pyi +19 -0
  80. gllm_inference/output_parser/__init__.pyi +3 -0
  81. gllm_inference/output_parser/json_output_parser.pyi +60 -0
  82. gllm_inference/output_parser/output_parser.pyi +27 -0
  83. gllm_inference/prompt_builder/__init__.pyi +3 -0
  84. gllm_inference/prompt_builder/format_strategy/__init__.pyi +4 -0
  85. gllm_inference/prompt_builder/format_strategy/format_strategy.pyi +55 -0
  86. gllm_inference/prompt_builder/format_strategy/jinja_format_strategy.pyi +45 -0
  87. gllm_inference/prompt_builder/format_strategy/string_format_strategy.pyi +20 -0
  88. gllm_inference/prompt_builder/prompt_builder.pyi +69 -0
  89. gllm_inference/prompt_formatter/__init__.pyi +7 -0
  90. gllm_inference/prompt_formatter/agnostic_prompt_formatter.pyi +49 -0
  91. gllm_inference/prompt_formatter/huggingface_prompt_formatter.pyi +55 -0
  92. gllm_inference/prompt_formatter/llama_prompt_formatter.pyi +59 -0
  93. gllm_inference/prompt_formatter/mistral_prompt_formatter.pyi +53 -0
  94. gllm_inference/prompt_formatter/openai_prompt_formatter.pyi +35 -0
  95. gllm_inference/prompt_formatter/prompt_formatter.pyi +30 -0
  96. gllm_inference/realtime_chat/__init__.pyi +3 -0
  97. gllm_inference/realtime_chat/google_realtime_chat.pyi +205 -0
  98. gllm_inference/realtime_chat/input_streamer/__init__.pyi +4 -0
  99. gllm_inference/realtime_chat/input_streamer/input_streamer.pyi +36 -0
  100. gllm_inference/realtime_chat/input_streamer/keyboard_input_streamer.pyi +27 -0
  101. gllm_inference/realtime_chat/input_streamer/linux_mic_input_streamer.pyi +36 -0
  102. gllm_inference/realtime_chat/output_streamer/__init__.pyi +4 -0
  103. gllm_inference/realtime_chat/output_streamer/console_output_streamer.pyi +21 -0
  104. gllm_inference/realtime_chat/output_streamer/linux_speaker_output_streamer.pyi +42 -0
  105. gllm_inference/realtime_chat/output_streamer/output_streamer.pyi +33 -0
  106. gllm_inference/realtime_chat/realtime_chat.pyi +28 -0
  107. gllm_inference/request_processor/__init__.pyi +4 -0
  108. gllm_inference/request_processor/lm_request_processor.pyi +101 -0
  109. gllm_inference/request_processor/uses_lm_mixin.pyi +130 -0
  110. gllm_inference/schema/__init__.pyi +18 -0
  111. gllm_inference/schema/activity.pyi +64 -0
  112. gllm_inference/schema/attachment.pyi +88 -0
  113. gllm_inference/schema/code_exec_result.pyi +14 -0
  114. gllm_inference/schema/config.pyi +15 -0
  115. gllm_inference/schema/enums.pyi +80 -0
  116. gllm_inference/schema/events.pyi +105 -0
  117. gllm_inference/schema/lm_input.pyi +4 -0
  118. gllm_inference/schema/lm_output.pyi +188 -0
  119. gllm_inference/schema/mcp.pyi +31 -0
  120. gllm_inference/schema/message.pyi +52 -0
  121. gllm_inference/schema/model_id.pyi +176 -0
  122. gllm_inference/schema/reasoning.pyi +15 -0
  123. gllm_inference/schema/token_usage.pyi +75 -0
  124. gllm_inference/schema/tool_call.pyi +14 -0
  125. gllm_inference/schema/tool_result.pyi +11 -0
  126. gllm_inference/schema/type_alias.pyi +11 -0
  127. gllm_inference/utils/__init__.pyi +5 -0
  128. gllm_inference/utils/io_utils.pyi +26 -0
  129. gllm_inference/utils/langchain.pyi +30 -0
  130. gllm_inference/utils/validation.pyi +12 -0
  131. gllm_inference.build/.gitignore +1 -0
  132. gllm_inference.cpython-312-darwin.so +0 -0
  133. gllm_inference.pyi +153 -0
  134. gllm_inference_binary-0.5.55.dist-info/METADATA +138 -0
  135. gllm_inference_binary-0.5.55.dist-info/RECORD +137 -0
  136. gllm_inference_binary-0.5.55.dist-info/WHEEL +5 -0
  137. gllm_inference_binary-0.5.55.dist-info/top_level.txt +1 -0
@@ -0,0 +1,45 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_inference.prompt_builder.format_strategy.format_strategy import BasePromptFormattingStrategy as BasePromptFormattingStrategy
3
+ from gllm_inference.schema import JinjaEnvType as JinjaEnvType
4
+ from jinja2.sandbox import SandboxedEnvironment
5
+ from typing import Any
6
+
7
+ JINJA_DEFAULT_BLACKLISTED_FILTERS: list[str]
8
+ JINJA_DEFAULT_SAFE_GLOBALS: dict[str, Any]
9
+ JINJA_DANGEROUS_PATTERNS: list[str]
10
+ PROMPT_BUILDER_VARIABLE_START_STRING: str
11
+ PROMPT_BUILDER_VARIABLE_END_STRING: str
12
+
13
+ class JinjaFormatStrategy(BasePromptFormattingStrategy):
14
+ """Jinja2 template engine for formatting prompts.
15
+
16
+ Attributes:
17
+ jinja_env (SandboxedEnvironment): The Jinja environment for rendering templates.
18
+ key_defaults (dict[str, str]): The default values for the keys.
19
+ """
20
+ jinja_env: Incomplete
21
+ def __init__(self, environment: JinjaEnvType | SandboxedEnvironment = ..., key_defaults: dict[str, str] | None = None) -> None:
22
+ """Initialize the JinjaFormatStrategy.
23
+
24
+ Args:
25
+ environment (JinjaEnvType | SandboxedEnvironment, optional): The environment for Jinja rendering.
26
+ It can be one of the following:
27
+ 1. `JinjaEnvType.RESTRICTED`: Uses a minimal, restricted Jinja environment.
28
+ Safest for most cases.
29
+ 2. `JinjaEnvType.JINJA_DEFAULT`: Uses the full Jinja environment. Allows more powerful templating,
30
+ but with fewer safety restrictions.
31
+ 3. `SandboxedEnvironment` instance: A custom Jinja `SandboxedEnvironment` object provided by the
32
+ user. Offers fine-grained control over template execution.
33
+ Defaults to `JinjaEnvType.RESTRICTED`
34
+ key_defaults (dict[str, str], optional): The default values for the keys. Defaults to None, in which
35
+ case no default values are used.
36
+ """
37
+ def extract_keys(self, template: str | None) -> set[str]:
38
+ """Extract keys from Jinja template using AST analysis.
39
+
40
+ Args:
41
+ template (str | None): The template to extract keys from.
42
+
43
+ Returns:
44
+ set[str]: The set of keys found in the template.
45
+ """
@@ -0,0 +1,20 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_inference.prompt_builder.format_strategy.format_strategy import BasePromptFormattingStrategy as BasePromptFormattingStrategy
3
+
4
+ KEY_EXTRACTOR_REGEX: Incomplete
5
+
6
+ class StringFormatStrategy(BasePromptFormattingStrategy):
7
+ """String format strategy using str.format() method.
8
+
9
+ Attributes:
10
+ key_defaults (dict[str, str]): The default values for the keys.
11
+ """
12
+ def extract_keys(self, template: str | None) -> set[str]:
13
+ """Extract keys from a template.
14
+
15
+ Args:
16
+ template (str | None): The template to extract keys from.
17
+
18
+ Returns:
19
+ set[str]: The set of keys found in the template.
20
+ """
@@ -0,0 +1,69 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_inference.prompt_builder.format_strategy import JinjaFormatStrategy as JinjaFormatStrategy, StringFormatStrategy as StringFormatStrategy
3
+ from gllm_inference.schema import JinjaEnvType as JinjaEnvType, Message as Message, MessageContent as MessageContent, MessageRole as MessageRole
4
+ from jinja2.sandbox import SandboxedEnvironment as SandboxedEnvironment
5
+ from typing import Any
6
+
7
+ class PromptBuilder:
8
+ """A prompt builder class used in Gen AI applications.
9
+
10
+ Attributes:
11
+ system_template (str): The system prompt template. May contain placeholders enclosed in curly braces `{}`.
12
+ user_template (str): The user prompt template. May contain placeholders enclosed in curly braces `{}`.
13
+ prompt_key_set (set[str]): A set of expected keys that must be present in the prompt templates.
14
+ key_defaults (dict[str, str]): Default values for the keys in the prompt templates.
15
+ strategy (BasePromptFormattingStrategy): The format strategy to be used for formatting the prompt.
16
+ """
17
+ key_defaults: Incomplete
18
+ system_template: Incomplete
19
+ user_template: Incomplete
20
+ strategy: Incomplete
21
+ prompt_key_set: Incomplete
22
+ def __init__(self, system_template: str = '', user_template: str = '', key_defaults: dict[str, str] | None = None, ignore_extra_keys: bool | None = None, use_jinja: bool = False, jinja_env: JinjaEnvType | SandboxedEnvironment = ...) -> None:
23
+ """Initializes a new instance of the PromptBuilder class.
24
+
25
+ Args:
26
+ system_template (str, optional): The system prompt template. May contain placeholders enclosed in curly
27
+ braces `{}`. Defaults to an empty string.
28
+ user_template (str, optional): The user prompt template. May contain placeholders enclosed in curly
29
+ braces `{}`. Defaults to an empty string.
30
+ key_defaults (dict[str, str] | None, optional): Default values for the keys in the prompt templates.
31
+ Applied when the corresponding keys are not provided in the runtime input.
32
+ Defaults to None, in which case no default values will be assigned to the keys.
33
+ ignore_extra_keys (bool | None, optional): Deprecated parameter. Will be removed in v0.6. Extra keys
34
+ will always raise a warning only instead of raising an error.
35
+ use_jinja (bool, optional): Whether to use Jinja for rendering the prompt templates.
36
+ Defaults to False.
37
+ jinja_env (JinjaEnvType | SandboxedEnvironment, optional): The environment for Jinja rendering.
38
+ It can be one of the following:
39
+ 1. `JinjaEnvType.RESTRICTED`: Uses a minimal, restricted Jinja environment.
40
+ Safest for most cases.
41
+ 2. `JinjaEnvType.JINJA_DEFAULT`: Uses the full Jinja environment. Allows more powerful templating,
42
+ but with fewer safety restrictions.
43
+ 3. `SandboxedEnvironment` instance: A custom Jinja `SandboxedEnvironment` object provided by the
44
+ user. Offers fine-grained control over template execution.
45
+ Defaults to `JinjaEnvType.RESTRICTED`
46
+
47
+ Raises:
48
+ ValueError: If both `system_template` and `user_template` are empty.
49
+ """
50
+ def format(self, history: list[Message] | None = None, extra_contents: list[MessageContent] | None = None, **kwargs: Any) -> list[Message]:
51
+ """Formats the prompt templates into a list of messages.
52
+
53
+ This method processes each prompt template, replacing the placeholders in the template content with the
54
+ corresponding values from `kwargs`. If any required key is missing from `kwargs`, it raises a `ValueError`.
55
+ It also handles the provided history and extra contents. It formats the prompt as a list of messages.
56
+
57
+ Args:
58
+ history (list[Message] | None, optional): The history to be included in the prompt. Defaults to None.
59
+ extra_contents (list[MessageContent] | None, optional): The extra contents to be included in the user
60
+ message. Defaults to None.
61
+ **kwargs (Any): A dictionary of placeholder values to be injected into the prompt templates.
62
+ Values must be either a string or an object that can be serialized to a string.
63
+
64
+ Returns:
65
+ list[Message]: A list of formatted messages.
66
+
67
+ Raises:
68
+ ValueError: If a required key for the prompt template is missing from `kwargs`.
69
+ """
@@ -0,0 +1,7 @@
1
+ from gllm_inference.prompt_formatter.agnostic_prompt_formatter import AgnosticPromptFormatter as AgnosticPromptFormatter
2
+ from gllm_inference.prompt_formatter.huggingface_prompt_formatter import HuggingFacePromptFormatter as HuggingFacePromptFormatter
3
+ from gllm_inference.prompt_formatter.llama_prompt_formatter import LlamaPromptFormatter as LlamaPromptFormatter
4
+ from gllm_inference.prompt_formatter.mistral_prompt_formatter import MistralPromptFormatter as MistralPromptFormatter
5
+ from gllm_inference.prompt_formatter.openai_prompt_formatter import OpenAIPromptFormatter as OpenAIPromptFormatter
6
+
7
+ __all__ = ['AgnosticPromptFormatter', 'HuggingFacePromptFormatter', 'LlamaPromptFormatter', 'MistralPromptFormatter', 'OpenAIPromptFormatter']
@@ -0,0 +1,49 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_inference.prompt_formatter.prompt_formatter import BasePromptFormatter as BasePromptFormatter
3
+ from gllm_inference.schema import MessageRole as MessageRole
4
+
5
+ class AgnosticPromptFormatter(BasePromptFormatter):
6
+ '''A prompt formatter that formats prompt without any specific model formatting.
7
+
8
+ The `AgnosticPromptFormatter` class formats a prompt by joining the content of the prompt templates using a
9
+ specified separator. It is designed to work independently of specific model types.
10
+
11
+ Attributes:
12
+ content_separator (str): A string used to separate each content in a message.
13
+ message_separator (str): A string used to separate each message.
14
+
15
+ Usage:
16
+ The `AgnosticPromptFormatter` can be used to format a prompt for any model.
17
+ The `content_separator` and `message_separator` can be customized to define the format of the prompt.
18
+
19
+ Usage example:
20
+ ```python
21
+ prompt = [
22
+ (MessageRole.USER, ["Hello", "how are you?"]),
23
+ (MessageRole.ASSISTANT, ["I\'m fine", "thank you!"]),
24
+ (MessageRole.USER, ["What is the capital of France?"]),
25
+ ]
26
+ prompt_formatter = AgnosticPromptFormatter(
27
+ message_separator="\\n###\\n",
28
+ content_separator="---"
29
+ )
30
+ print(prompt_formatter.format(prompt))
31
+ ```
32
+
33
+ Output example:
34
+ ```
35
+ Hello---how are you?
36
+ ###
37
+ I\'m fine---thank you!
38
+ ###
39
+ What is the capital of France?
40
+ ```
41
+ '''
42
+ message_separator: Incomplete
43
+ def __init__(self, message_separator: str = '\n', content_separator: str = '\n') -> None:
44
+ '''Initializes a new instance of the AgnosticPromptFormatter class.
45
+
46
+ Args:
47
+ message_separator (str, optional): A string used to separate each message. Defaults to "\\n".
48
+ content_separator (str, optional): A string used to separate each content in a message. Defaults to "\\n".
49
+ '''
@@ -0,0 +1,55 @@
1
+ from gllm_inference.prompt_formatter.prompt_formatter import BasePromptFormatter as BasePromptFormatter
2
+ from gllm_inference.schema import MessageRole as MessageRole
3
+
4
+ TOKENIZER_LOAD_ERROR_MESSAGE: str
5
+
6
+ class HuggingFacePromptFormatter(BasePromptFormatter):
7
+ '''A prompt formatter that formats prompt using HuggingFace model\'s specific formatting.
8
+
9
+ The `HuggingFacePromptFormatter` class is designed to format prompt using a HuggingFace model\'s specific formatting.
10
+ It does so by using the model\'s tokenizer\'s `apply_chat_template` method.
11
+
12
+ Attributes:
13
+ content_separator (str): A string used to separate each content in a message.
14
+ tokenizer (PreTrainedTokenizer): The HuggingFace model tokenizer used for chat templating.
15
+
16
+ Usage:
17
+ The `HuggingFacePromptFormatter` can be used to format a prompt using a HuggingFace model\'s specific formatting.
18
+ The `content_separator` and `model_name_or_path` can be customized to define the format of the prompt.
19
+ The `model_name_or_path` defines the name of the HuggingFace model whose tokenizer will be used to format
20
+ the prompt using the `apply_chat_template` method.
21
+
22
+ Usage example:
23
+ ```python
24
+ prompt = [
25
+ (MessageRole.USER, ["Hello", "how are you?"]),
26
+ (MessageRole.ASSISTANT, ["I\'m fine", "thank you!"]),
27
+ (MessageRole.USER, ["What is the capital of France?"]),
28
+ ]
29
+ prompt_formatter = HuggingFacePromptFormatter(
30
+ model_name_or_path="mistralai/Mistral-7B-Instruct-v0.1",
31
+ content_separator="---"
32
+ )
33
+ print(prompt_formatter.format(prompt))
34
+ ```
35
+
36
+ Output example:
37
+ ```
38
+ <s>[INST] Hello---how are you? [/INST]I\'m fine---thank you!</s> [INST] What is the capital of France? [/INST]
39
+ ```
40
+
41
+ Using a gated model:
42
+ If you\'re trying to access the prompt builder template of a gated model, you\'d need to:
43
+ 1. Request access to the gated repo using your HuggingFace account.
44
+ 2. Login to HuggingFace in your system. This can be done as follows:
45
+ 2.1. Install huggingface-hub: ```pip install huggingface-hub```
46
+ 2.2. Login to HuggingFace: ```huggingface-cli login```
47
+ 2.3. Enter your HuggingFace token.
48
+ '''
49
+ def __init__(self, model_name_or_path: str, content_separator: str = '\n') -> None:
50
+ '''Initializes a new instance of the HuggingFacePromptFormatter class.
51
+
52
+ Args:
53
+ model_name_or_path (str): The model name or path of the HuggingFace model tokenizer to be loaded.
54
+ content_separator (str, optional): A string used to separate each content in a message. Defaults to "\\n".
55
+ '''
@@ -0,0 +1,59 @@
1
+ from gllm_inference.prompt_formatter import HuggingFacePromptFormatter as HuggingFacePromptFormatter
2
+
3
+ class LlamaPromptFormatter(HuggingFacePromptFormatter):
4
+ '''A prompt formatter that formats prompt using Llama model\'s specific formatting.
5
+
6
+ The `LlamaPromptFormatter` class is designed to format prompt using a Llama model\'s specific formatting.
7
+ It does so by using the model\'s tokenizer\'s `apply_chat_template` method.
8
+
9
+ Attributes:
10
+ content_separator (str): A string used to separate each content in a message.
11
+ tokenizer (PreTrainedTokenizer): The HuggingFace model tokenizer used for chat templating.
12
+
13
+ Usage:
14
+ The `LlamaPromptFormatter` can be used to format a prompt using a Llama model\'s specific formatting.
15
+ The `content_separator` and `model_name` can be customized to define the format of the prompt.
16
+ The `model_name` defines the name of the HuggingFace model whose tokenizer will be used to format
17
+ the prompt using the `apply_chat_template` method.
18
+
19
+ Usage example:
20
+ ```python
21
+ prompt = [
22
+ (MessageRole.USER, ["Hello", "how are you?"]),
23
+ (MessageRole.ASSISTANT, ["I\'m fine", "thank you!"]),
24
+ (MessageRole.USER, ["What is the capital of France?"]),
25
+ ]
26
+ prompt_formatter = LlamaPromptFormatter(
27
+ model_name_or_path="meta-llama/Meta-Llama-3.1-8B-Instruct",
28
+ content_separator="---"
29
+ )
30
+ print(prompt_formatter.format(prompt))
31
+ ```
32
+
33
+ Output example:
34
+ ```
35
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
36
+
37
+ Hello---how are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
38
+
39
+ I\'m fine---thank you!<|eot_id|><|start_header_id|>user<|end_header_id|>
40
+
41
+ What is the capital of France?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
42
+ ```
43
+
44
+ Using a gated model:
45
+ If you\'re trying to access the prompt builder template of a gated model, you\'d need to:
46
+ 1. Request access to the gated repo using your HuggingFace account.
47
+ 2. Login to HuggingFace in your system. This can be done as follows:
48
+ 2.1. Install huggingface-hub: ```pip install huggingface-hub```
49
+ 2.2. Login to HuggingFace: ```huggingface-cli login```
50
+ 2.3. Enter your HuggingFace token.
51
+ '''
52
+ def __init__(self, model_name: str = 'Meta-Llama-3.1-8B-Instruct', content_separator: str = '\n') -> None:
53
+ '''Initializes a new instance of the LlamaPromptFormatter class.
54
+
55
+ Args:
56
+ model_name (str, optional): The name of the Llama model tokenizer to be loaded. Defaults to
57
+ `Meta-Llama-3.1-8B-Instruct`.
58
+ content_separator (str, optional): A string used to separate each content in a message. Defaults to "\\n".
59
+ '''
@@ -0,0 +1,53 @@
1
+ from gllm_inference.prompt_formatter import HuggingFacePromptFormatter as HuggingFacePromptFormatter
2
+
3
+ class MistralPromptFormatter(HuggingFacePromptFormatter):
4
+ '''A prompt formatter that formats prompt using Mistral model\'s specific formatting.
5
+
6
+ The `MistralPromptFormatter` class is designed to format prompt using a Mistral model\'s specific formatting.
7
+ It does so by using the model\'s tokenizer\'s `apply_chat_template` method.
8
+
9
+ Attributes:
10
+ content_separator (str): A string used to separate each content in a message.
11
+ tokenizer (PreTrainedTokenizer): The HuggingFace model tokenizer used for chat templating.
12
+
13
+ Usage:
14
+ The `MistralPromptFormatter` can be used to format a prompt using a Mistral model\'s specific formatting.
15
+ The `content_separator` and `model_name` can be customized to define the format of the prompt.
16
+ The `model_name` defines the name of the HuggingFace model whose tokenizer will be used to format
17
+ the prompt using the `apply_chat_template` method.
18
+
19
+ Usage example:
20
+ ```python
21
+ prompt = [
22
+ (MessageRole.USER, ["Hello", "how are you?"]),
23
+ (MessageRole.ASSISTANT, ["I\'m fine", "thank you!"]),
24
+ (MessageRole.USER, ["What is the capital of France?"]),
25
+ ]
26
+ prompt_formatter = MistralPromptFormatter(
27
+ model_name_or_path="mistralai/Mistral-7B-Instruct-v0.1",
28
+ content_separator="---"
29
+ )
30
+ print(prompt_formatter.format(prompt))
31
+ ```
32
+
33
+ Output example:
34
+ ```
35
+ <s>[INST] Hello---how are you? [/INST]I\'m fine---thank you!</s> [INST] What is the capital of France? [/INST]
36
+ ```
37
+
38
+ Using a gated model:
39
+ If you\'re trying to access the prompt builder template of a gated model, you\'d need to:
40
+ 1. Request access to the gated repo using your HuggingFace account.
41
+ 2. Login to HuggingFace in your system. This can be done as follows:
42
+ 2.1. Install huggingface-hub: ```pip install huggingface-hub```
43
+ 2.2. Login to HuggingFace: ```huggingface-cli login```
44
+ 2.3. Enter your HuggingFace token.
45
+ '''
46
+ def __init__(self, model_name: str = 'Mistral-7B-Instruct-v0.3', content_separator: str = '\n') -> None:
47
+ '''Initializes a new instance of the MistralPromptFormatter class.
48
+
49
+ Args:
50
+ model_name (str, optional): The name of the Mistral model tokenizer to be loaded. Defaults to
51
+ `Mistral-7B-Instruct-v0.3`.
52
+ content_separator (str, optional): A string used to separate each content in a message. Defaults to "\\n".
53
+ '''
@@ -0,0 +1,35 @@
1
+ from gllm_inference.prompt_formatter.prompt_formatter import BasePromptFormatter as BasePromptFormatter
2
+ from gllm_inference.schema import MessageRole as MessageRole
3
+
4
+ class OpenAIPromptFormatter(BasePromptFormatter):
5
+ '''A prompt formatter that formats prompt with OpenAI\'s specific formatting.
6
+
7
+ The `OpenAIPromptFormatter` class formats a prompt by utilizing OpenAI\'s specific formatting.
8
+
9
+ Attributes:
10
+ content_separator (str): A string used to separate each content in a message.
11
+
12
+ Usage:
13
+ The `OpenAIPromptFormatter` can be used to format a prompt for OpenAI\'s models.
14
+ The `content_separator` can be customized to define the format of the prompt.
15
+
16
+ Usage example:
17
+ ```python
18
+ prompt = [
19
+ (MessageRole.USER, ["Hello", "how are you?"]),
20
+ (MessageRole.ASSISTANT, ["I\'m fine", "thank you!"]),
21
+ (MessageRole.USER, ["What is the capital of France?"]),
22
+ ]
23
+ prompt_formatter = OpenAIPromptFormatter(
24
+ content_separator="---"
25
+ )
26
+ print(prompt_formatter.format(prompt))
27
+ ```
28
+
29
+ Output example:
30
+ ```
31
+ User: Hello---how are you?
32
+ Assistant: I\'m fine---thank you!
33
+ User: What is the capital of France?
34
+ ```
35
+ '''
@@ -0,0 +1,30 @@
1
+ import abc
2
+ from _typeshed import Incomplete
3
+ from abc import ABC
4
+ from gllm_inference.schema import Message as Message, MessageContent as MessageContent, MessageRole as MessageRole
5
+
6
+ class BasePromptFormatter(ABC, metaclass=abc.ABCMeta):
7
+ """A base class for prompt formatters used in Gen AI applications.
8
+
9
+ The prompt formatter class is used to format a prompt into a string with specific formatting.
10
+
11
+ Attributes:
12
+ content_separator (str): The separator to be used between the string in a single message.
13
+ """
14
+ content_separator: Incomplete
15
+ def __init__(self, content_separator: str = '\n') -> None:
16
+ '''Initializes a new instance of the BasePromptFormatter class.
17
+
18
+ Args:
19
+ content_separator (str, optional): The separator to be used between the string in a single message.
20
+ Defaults to "\\n".
21
+ '''
22
+ def format(self, messages: list[Message]) -> str:
23
+ """Formats the prompt as a string.
24
+
25
+ Args:
26
+ messages (list[Message]): The messages to be formatted as a string.
27
+
28
+ Returns:
29
+ str: The formatted messages as a string.
30
+ """
@@ -0,0 +1,3 @@
1
+ from gllm_inference.realtime_chat.google_realtime_chat import GoogleRealtimeChat as GoogleRealtimeChat
2
+
3
+ __all__ = ['GoogleRealtimeChat']
@@ -0,0 +1,205 @@
1
+ import asyncio
2
+ import logging
3
+ from _typeshed import Incomplete
4
+ from gllm_inference.constants import GOOGLE_SCOPES as GOOGLE_SCOPES
5
+ from gllm_inference.realtime_chat.input_streamer import KeyboardInputStreamer as KeyboardInputStreamer
6
+ from gllm_inference.realtime_chat.input_streamer.input_streamer import BaseInputStreamer as BaseInputStreamer
7
+ from gllm_inference.realtime_chat.output_streamer import ConsoleOutputStreamer as ConsoleOutputStreamer
8
+ from gllm_inference.realtime_chat.output_streamer.output_streamer import BaseOutputStreamer as BaseOutputStreamer
9
+ from gllm_inference.realtime_chat.realtime_chat import BaseRealtimeChat as BaseRealtimeChat
10
+ from pydantic import BaseModel
11
+ from typing import Literal
12
+
13
+ DEFAULT_POST_OUTPUT_AUDIO_DELAY: float
14
+ LIVE_CONNECT_CONFIG: Incomplete
15
+
16
+ class GoogleIOStreamerState(BaseModel):
17
+ '''[BETA] Defines the state of the GoogleIOStreamer with thread-safe properties.
18
+
19
+ Attributes:
20
+ is_streaming_output (bool): Whether the output is streaming.
21
+ console_mode (Literal["input", "user", "assistant"]): The current console mode.
22
+ terminated (bool): Whether the conversation is terminated.
23
+ '''
24
+ is_streaming_output: bool
25
+ console_mode: Literal['input', 'user', 'assistant']
26
+ terminated: bool
27
+ async def set_streaming_output(self, value: bool) -> None:
28
+ """Thread-safe setter for is_streaming_output.
29
+
30
+ Args:
31
+ value (bool): The value to set for is_streaming_output.
32
+ """
33
+ async def get_streaming_output(self) -> bool:
34
+ """Thread-safe getter for is_streaming_output.
35
+
36
+ Returns:
37
+ bool: The value of is_streaming_output.
38
+ """
39
+ async def set_console_mode(self, value: Literal['input', 'user', 'assistant']) -> None:
40
+ '''Thread-safe setter for console_mode.
41
+
42
+ Args:
43
+ value (Literal["input", "user", "assistant"]): The value to set for console_mode.
44
+ '''
45
+ async def get_console_mode(self) -> Literal['input', 'user', 'assistant']:
46
+ '''Thread-safe getter for console_mode.
47
+
48
+ Returns:
49
+ Literal["input", "user", "assistant"]: The value of console_mode.
50
+ '''
51
+ async def set_terminated(self, value: bool) -> None:
52
+ """Thread-safe setter for terminated.
53
+
54
+ Args:
55
+ value (bool): The value to set for terminated.
56
+ """
57
+ async def get_terminated(self) -> bool:
58
+ """Thread-safe getter for terminated.
59
+
60
+ Returns:
61
+ bool: The value of terminated.
62
+ """
63
+
64
+ class GoogleIOStreamer:
65
+ """[BETA] Defines the GoogleIOStreamer.
66
+
67
+ This class manages the realtime conversation lifecycle.
68
+ It handles the IO operations between the model and the input/output streamers.
69
+
70
+ Attributes:
71
+ session (AsyncSession): The session of the GoogleIOStreamer.
72
+ task_group (asyncio.TaskGroup): The task group of the GoogleIOStreamer.
73
+ input_queue (asyncio.Queue): The input queue of the GoogleIOStreamer.
74
+ output_queue (asyncio.Queue): The output queue of the GoogleIOStreamer.
75
+ input_streamers (list[BaseInputStreamer]): The input streamers of the GoogleIOStreamer.
76
+ output_streamers (list[BaseOutputStreamer]): The output streamers of the GoogleIOStreamer.
77
+ post_output_audio_delay (float): The delay in seconds to post the output audio.
78
+ """
79
+ session: AsyncSession
80
+ task_group: Incomplete
81
+ input_queue: Incomplete
82
+ output_queue: Incomplete
83
+ state: Incomplete
84
+ input_streamers: Incomplete
85
+ output_streamers: Incomplete
86
+ post_output_audio_delay: Incomplete
87
+ def __init__(self, session: AsyncSession, task_group: asyncio.TaskGroup, input_queue: asyncio.Queue, output_queue: asyncio.Queue, input_streamers: list[BaseInputStreamer], output_streamers: list[BaseOutputStreamer], post_output_audio_delay: float, logger: logging.Logger) -> None:
88
+ """Initializes a new instance of the GoogleIOStreamer class.
89
+
90
+ Args:
91
+ session (AsyncSession): The session of the GoogleIOStreamer.
92
+ task_group (asyncio.TaskGroup): The task group of the GoogleIOStreamer.
93
+ input_queue (asyncio.Queue): The input queue of the GoogleIOStreamer.
94
+ output_queue (asyncio.Queue): The output queue of the GoogleIOStreamer.
95
+ input_streamers (list[BaseInputStreamer]): The input streamers of the GoogleIOStreamer.
96
+ output_streamers (list[BaseOutputStreamer]): The output streamers of the GoogleIOStreamer.
97
+ post_output_audio_delay (float): The delay in seconds to post the output audio.
98
+ logger (logging.Logger): The logger of the GoogleIOStreamer.
99
+ """
100
+ async def start(self) -> None:
101
+ """Processes the realtime conversation.
102
+
103
+ This method is used to start the realtime conversation.
104
+ It initializes the input and output streamers, creates the necessary tasks, and starts the conversation.
105
+ When the conversation is terminated, it cleans up the input and output streamers.
106
+ """
107
+
108
+ class GoogleRealtimeChat(BaseRealtimeChat):
109
+ '''[BETA] A realtime chat module to interact with Gemini Live models.
110
+
111
+ Warning:
112
+ The \'GoogleRealtimeChat\' class is currently in beta and may be subject to changes in the future.
113
+ It is intended only for quick prototyping in local environments.
114
+ Please avoid using it in production environments.
115
+
116
+ Attributes:
117
+ model_name (str): The name of the language model.
118
+ client_params (dict[str, Any]): The Google client instance init parameters.
119
+
120
+ Basic usage:
121
+ The `GoogleRealtimeChat` can be used as started as follows:
122
+ ```python
123
+ realtime_chat = GoogleRealtimeChat(model_name="gemini-live-2.5-flash-preview")
124
+ await realtime_chat.invoke()
125
+ ```
126
+
127
+ Custom IO streamers:
128
+ The `GoogleRealtimeChat` can be used with custom IO streamers.
129
+ ```python
130
+ input_streamers = [KeyboardInputStreamer(), LinuxMicInputStreamer()]
131
+ output_streamers = [ConsoleOutputStreamer(), LinuxSpeakerOutputStreamer()]
132
+ realtime_chat = GoogleRealtimeChat(model_name="gemini-live-2.5-flash-preview")
133
+ await realtime_chat.start(input_streamers=input_streamers, output_streamers=output_streamers)
134
+ ```
135
+
136
+ In the above example, we added a capability to use a Linux system microphone and speaker,
137
+ allowing realtime audio input and output to the model.
138
+
139
+ Authentication:
140
+ The `GoogleRealtimeChat` can use either Google Gen AI or Google Vertex AI.
141
+
142
+ Google Gen AI is recommended for quick prototyping and development.
143
+ It requires a Gemini API key for authentication.
144
+
145
+ Usage example:
146
+ ```python
147
+ realtime_chat = GoogleRealtimeChat(
148
+ model_name="gemini-live-2.5-flash-preview",
149
+ api_key="your_api_key"
150
+ )
151
+ ```
152
+
153
+ Google Vertex AI is recommended to build production-ready applications.
154
+ It requires a service account JSON file for authentication.
155
+
156
+ Usage example:
157
+ ```python
158
+ realtime_chat = GoogleRealtimeChat(
159
+ model_name="gemini-live-2.5-flash-preview",
160
+ credentials_path="path/to/service_account.json"
161
+ )
162
+ ```
163
+
164
+ If neither `api_key` nor `credentials_path` is provided, Google Gen AI will be used by default.
165
+ The `GOOGLE_API_KEY` environment variable will be used for authentication.
166
+ '''
167
+ model_name: Incomplete
168
+ client_params: Incomplete
169
+ def __init__(self, model_name: str, api_key: str | None = None, credentials_path: str | None = None, project_id: str | None = None, location: str = 'us-central1') -> None:
170
+ '''Initializes a new instance of the GoogleRealtimeChat class.
171
+
172
+ Args:
173
+ model_name (str): The name of the model to use.
174
+ api_key (str | None, optional): Required for Google Gen AI authentication. Cannot be used together
175
+ with `credentials_path`. Defaults to None.
176
+ credentials_path (str | None, optional): Required for Google Vertex AI authentication. Path to the service
177
+ account credentials JSON file. Cannot be used together with `api_key`. Defaults to None.
178
+ project_id (str | None, optional): The Google Cloud project ID for Vertex AI. Only used when authenticating
179
+ with `credentials_path`. Defaults to None, in which case it will be loaded from the credentials file.
180
+ location (str, optional): The location of the Google Cloud project for Vertex AI. Only used when
181
+ authenticating with `credentials_path`. Defaults to "us-central1".
182
+
183
+ Note:
184
+ If neither `api_key` nor `credentials_path` is provided, Google Gen AI will be used by default.
185
+ The `GOOGLE_API_KEY` environment variable will be used for authentication.
186
+ '''
187
+ async def start(self, input_streamers: list[BaseInputStreamer] | None = None, output_streamers: list[BaseOutputStreamer] | None = None, post_output_audio_delay: float = ...) -> None:
188
+ """Starts the realtime conversation using the provided input and output streamers.
189
+
190
+ This method is used to start the realtime conversation using a `GoogleIOStreamer`.
191
+ The streamers are responsible for handling the input and output of the conversation.
192
+
193
+ Args:
194
+ input_streamers (list[BaseInputStreamer] | None, optional): The input streamers to use.
195
+ Defaults to None, in which case a `KeyboardInputStreamer` will be used.
196
+ output_streamers (list[BaseOutputStreamer] | None, optional): The output streamers to use.
197
+ Defaults to None, in which case a `ConsoleOutputStreamer` will be used.
198
+ post_output_audio_delay (float, optional): The delay in seconds to post the output audio.
199
+ Defaults to 0.5 seconds.
200
+
201
+ Raises:
202
+ ValueError: If the `input_streamers` or `output_streamers` is an empty list.
203
+ ValueError: If the `post_output_audio_delay` is not greater than 0.
204
+ Exception: If the conversation fails to process.
205
+ """
@@ -0,0 +1,4 @@
1
+ from gllm_inference.realtime_chat.input_streamer.keyboard_input_streamer import KeyboardInputStreamer as KeyboardInputStreamer
2
+ from gllm_inference.realtime_chat.input_streamer.linux_mic_input_streamer import LinuxMicInputStreamer as LinuxMicInputStreamer
3
+
4
+ __all__ = ['KeyboardInputStreamer', 'LinuxMicInputStreamer']