nvidia-nat-llama-index 1.3.0.dev2__py3-none-any.whl → 1.3.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ async def azure_openai_llama_index(embedder_config: AzureOpenAIEmbedderModelConf
28
28
 
29
29
  from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
30
30
 
31
- client = AzureOpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True))
31
+ client = AzureOpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True, exclude_none=True))
32
32
 
33
33
  if isinstance(embedder_config, RetryMixin):
34
34
  client = patch_with_retry(client,
@@ -40,17 +40,22 @@ async def azure_openai_llama_index(embedder_config: AzureOpenAIEmbedderModelConf
40
40
 
41
41
 
42
42
  @register_embedder_client(config_type=NIMEmbedderModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
43
- async def nim_llamaindex(embedder_config: NIMEmbedderModelConfig, _builder: Builder):
43
+ async def nim_llama_index(embedder_config: NIMEmbedderModelConfig, _builder: Builder):
44
44
 
45
45
  from llama_index.embeddings.nvidia import NVIDIAEmbedding # pylint: disable=no-name-in-module
46
46
 
47
- config_obj = {
48
- **embedder_config.model_dump(exclude={"type", "model_name"}, by_alias=True),
49
- "model":
50
- embedder_config.model_name,
51
- }
47
+ client = NVIDIAEmbedding(
48
+ **embedder_config.model_dump(exclude={"type", "model_name"}, by_alias=True, exclude_none=True),
49
+ model=embedder_config.model_name,
50
+ )
52
51
 
53
- yield NVIDIAEmbedding(**config_obj)
52
+ if isinstance(embedder_config, RetryMixin):
53
+ client = patch_with_retry(client,
54
+ retries=embedder_config.num_retries,
55
+ retry_codes=embedder_config.retry_on_status_codes,
56
+ retry_on_messages=embedder_config.retry_on_errors)
57
+
58
+ yield client
54
59
 
55
60
 
56
61
  @register_embedder_client(config_type=OpenAIEmbedderModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -58,7 +63,7 @@ async def openai_llama_index(embedder_config: OpenAIEmbedderModelConfig, _builde
58
63
 
59
64
  from llama_index.embeddings.openai import OpenAIEmbedding
60
65
 
61
- client = OpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True))
66
+ client = OpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True, exclude_none=True))
62
67
 
63
68
  if isinstance(embedder_config, RetryMixin):
64
69
  client = patch_with_retry(client,
@@ -13,33 +13,71 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ from collections.abc import Sequence
17
+ from typing import TypeVar
18
+
16
19
  from nat.builder.builder import Builder
17
20
  from nat.builder.framework_enum import LLMFrameworkEnum
18
21
  from nat.cli.register_workflow import register_llm_client
22
+ from nat.data_models.llm import LLMBaseConfig
19
23
  from nat.data_models.retry_mixin import RetryMixin
24
+ from nat.data_models.thinking_mixin import ThinkingMixin
20
25
  from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig
21
26
  from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
27
+ from nat.llm.litellm_llm import LiteLlmModelConfig
22
28
  from nat.llm.nim_llm import NIMModelConfig
23
29
  from nat.llm.openai_llm import OpenAIModelConfig
30
+ from nat.llm.utils.thinking import BaseThinkingInjector
31
+ from nat.llm.utils.thinking import FunctionArgumentWrapper
32
+ from nat.llm.utils.thinking import patch_with_thinking
24
33
  from nat.utils.exception_handlers.automatic_retries import patch_with_retry
34
+ from nat.utils.type_utils import override
25
35
 
36
+ ModelType = TypeVar("ModelType")
26
37
 
27
- @register_llm_client(config_type=AWSBedrockModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
28
- async def aws_bedrock_llama_index(llm_config: AWSBedrockModelConfig, _builder: Builder):
29
38
 
30
- from llama_index.llms.bedrock import Bedrock
39
+ def _patch_llm_based_on_config(client: ModelType, llm_config: LLMBaseConfig) -> ModelType:
40
+
41
+ from llama_index.core.base.llms.types import ChatMessage
31
42
 
32
- kwargs = llm_config.model_dump(exclude={"type", "max_tokens"}, by_alias=True)
43
+ class LlamaIndexThinkingInjector(BaseThinkingInjector):
33
44
 
34
- llm = Bedrock(**kwargs)
45
+ @override
46
+ def inject(self, messages: Sequence[ChatMessage], *args, **kwargs) -> FunctionArgumentWrapper:
47
+ new_messages = [ChatMessage(role="system", content=self.system_prompt)] + list(messages)
48
+ return FunctionArgumentWrapper(new_messages, *args, **kwargs)
35
49
 
36
50
  if isinstance(llm_config, RetryMixin):
37
- llm = patch_with_retry(llm,
38
- retries=llm_config.num_retries,
39
- retry_codes=llm_config.retry_on_status_codes,
40
- retry_on_messages=llm_config.retry_on_errors)
51
+ client = patch_with_retry(client,
52
+ retries=llm_config.num_retries,
53
+ retry_codes=llm_config.retry_on_status_codes,
54
+ retry_on_messages=llm_config.retry_on_errors)
55
+
56
+ if isinstance(llm_config, ThinkingMixin) and llm_config.thinking_system_prompt is not None:
57
+ client = patch_with_thinking(
58
+ client,
59
+ LlamaIndexThinkingInjector(
60
+ system_prompt=llm_config.thinking_system_prompt,
61
+ function_names=[
62
+ "chat",
63
+ "stream_chat",
64
+ "achat",
65
+ "astream_chat",
66
+ ],
67
+ ))
68
+
69
+ return client
70
+
71
+
72
+ @register_llm_client(config_type=AWSBedrockModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
73
+ async def aws_bedrock_llama_index(llm_config: AWSBedrockModelConfig, _builder: Builder):
74
+
75
+ from llama_index.llms.bedrock import Bedrock
41
76
 
42
- yield llm
77
+ # LlamaIndex uses context_size instead of max_tokens
78
+ llm = Bedrock(**llm_config.model_dump(exclude={"type", "top_p", "thinking"}, by_alias=True))
79
+
80
+ yield _patch_llm_based_on_config(llm, llm_config)
43
81
 
44
82
 
45
83
  @register_llm_client(config_type=AzureOpenAIModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -47,17 +85,9 @@ async def azure_openai_llama_index(llm_config: AzureOpenAIModelConfig, _builder:
47
85
 
48
86
  from llama_index.llms.azure_openai import AzureOpenAI
49
87
 
50
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
51
-
52
- llm = AzureOpenAI(**kwargs)
53
-
54
- if isinstance(llm_config, RetryMixin):
55
- llm = patch_with_retry(llm,
56
- retries=llm_config.num_retries,
57
- retry_codes=llm_config.retry_on_status_codes,
58
- retry_on_messages=llm_config.retry_on_errors)
88
+ llm = AzureOpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True))
59
89
 
60
- yield llm
90
+ yield _patch_llm_based_on_config(llm, llm_config)
61
91
 
62
92
 
63
93
  @register_llm_client(config_type=NIMModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -65,20 +95,9 @@ async def nim_llama_index(llm_config: NIMModelConfig, _builder: Builder):
65
95
 
66
96
  from llama_index.llms.nvidia import NVIDIA
67
97
 
68
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
69
-
70
- if ("base_url" in kwargs and kwargs["base_url"] is None):
71
- del kwargs["base_url"]
98
+ llm = NVIDIA(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
72
99
 
73
- llm = NVIDIA(**kwargs)
74
-
75
- if isinstance(llm_config, RetryMixin):
76
- llm = patch_with_retry(llm,
77
- retries=llm_config.num_retries,
78
- retry_codes=llm_config.retry_on_status_codes,
79
- retry_on_messages=llm_config.retry_on_errors)
80
-
81
- yield llm
100
+ yield _patch_llm_based_on_config(llm, llm_config)
82
101
 
83
102
 
84
103
  @register_llm_client(config_type=OpenAIModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -86,17 +105,16 @@ async def openai_llama_index(llm_config: OpenAIModelConfig, _builder: Builder):
86
105
 
87
106
  from llama_index.llms.openai import OpenAI
88
107
 
89
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
108
+ llm = OpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
90
109
 
91
- if ("base_url" in kwargs and kwargs["base_url"] is None):
92
- del kwargs["base_url"]
110
+ yield _patch_llm_based_on_config(llm, llm_config)
93
111
 
94
- llm = OpenAI(**kwargs)
95
112
 
96
- if isinstance(llm_config, RetryMixin):
97
- llm = patch_with_retry(llm,
98
- retries=llm_config.num_retries,
99
- retry_codes=llm_config.retry_on_status_codes,
100
- retry_on_messages=llm_config.retry_on_errors)
113
+ @register_llm_client(config_type=LiteLlmModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
114
+ async def litellm_llama_index(llm_config: LiteLlmModelConfig, _builder: Builder):
115
+
116
+ from llama_index.llms.litellm import LiteLLM
117
+
118
+ llm = LiteLLM(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
101
119
 
102
- yield llm
120
+ yield _patch_llm_based_on_config(llm, llm_config)
@@ -13,7 +13,6 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- # pylint: disable=unused-import
17
16
  # flake8: noqa
18
17
  # isort:skip_file
19
18
 
@@ -1,20 +1,24 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nvidia-nat-llama-index
3
- Version: 1.3.0.dev2
3
+ Version: 1.3.0rc2
4
4
  Summary: Subpackage for Llama-Index integration in NeMo Agent toolkit
5
5
  Keywords: ai,rag,agents
6
6
  Classifier: Programming Language :: Python
7
- Requires-Python: <3.13,>=3.11
7
+ Classifier: Programming Language :: Python :: 3.11
8
+ Classifier: Programming Language :: Python :: 3.12
9
+ Classifier: Programming Language :: Python :: 3.13
10
+ Requires-Python: <3.14,>=3.11
8
11
  Description-Content-Type: text/markdown
9
- Requires-Dist: nvidia-nat==v1.3.0-dev2
12
+ Requires-Dist: nvidia-nat==v1.3.0-rc2
10
13
  Requires-Dist: llama-index-core~=0.12.21
11
- Requires-Dist: llama-index-embeddings-azure-openai~=0.3.1
14
+ Requires-Dist: llama-index-embeddings-azure-openai~=0.3.9
12
15
  Requires-Dist: llama-index-embeddings-nvidia~=0.3.1
13
16
  Requires-Dist: llama-index-embeddings-openai~=0.3.1
14
- Requires-Dist: llama-index-llms-azure-openai~=0.3.1
17
+ Requires-Dist: llama-index-llms-azure-openai~=0.3.2
15
18
  Requires-Dist: llama-index-llms-bedrock~=0.3.8
19
+ Requires-Dist: llama-index-llms-litellm~=0.5.1
16
20
  Requires-Dist: llama-index-llms-nvidia~=0.3.1
17
- Requires-Dist: llama-index-llms-openai~=0.3.38
21
+ Requires-Dist: llama-index-llms-openai~=0.3.42
18
22
  Requires-Dist: llama-index-readers-file~=0.4.4
19
23
  Requires-Dist: llama-index~=0.12.21
20
24
 
@@ -0,0 +1,11 @@
1
+ nat/meta/pypi.md,sha256=s9C3pgWB0HLIXTx5QPryNOWN0O2fIRIap0p9_zCHlTs,1112
2
+ nat/plugins/llama_index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ nat/plugins/llama_index/embedder.py,sha256=4c4uAa19UArqA_Npe5mwEnUhCaguTbyWcAntBzcPEeQ,3493
4
+ nat/plugins/llama_index/llm.py,sha256=ms6qwcBISWhiiOhIUXFNuhpDenTIS5s7U47wB3fEgg0,4944
5
+ nat/plugins/llama_index/register.py,sha256=1x_b8u6cuQwh4Iz_7TcIFWXvLIL9IIKUPE-zR9d6ug8,859
6
+ nat/plugins/llama_index/tool_wrapper.py,sha256=VFKMIIeLdWqHwW2Ax11E2w-_9w3ow6Iuhra1Hk78RYM,1387
7
+ nvidia_nat_llama_index-1.3.0rc2.dist-info/METADATA,sha256=maeF6QMZmaslavxxemYe4eoQ5LnAz6Bku_SL9foX63U,2105
8
+ nvidia_nat_llama_index-1.3.0rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ nvidia_nat_llama_index-1.3.0rc2.dist-info/entry_points.txt,sha256=2LqRRju5448P2v8B3y6TSPnk-nOd5T3AmV5JibCnoQc,68
10
+ nvidia_nat_llama_index-1.3.0rc2.dist-info/top_level.txt,sha256=8-CJ2cP6-f0ZReXe5Hzqp-5pvzzHz-5Ds5H2bGqh1-U,4
11
+ nvidia_nat_llama_index-1.3.0rc2.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- nat/meta/pypi.md,sha256=s9C3pgWB0HLIXTx5QPryNOWN0O2fIRIap0p9_zCHlTs,1112
2
- nat/plugins/llama_index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- nat/plugins/llama_index/embedder.py,sha256=ROGlJJxwIZXV3Sg93Z9RIelOOhxuJcrw0B1id43tn_U,3130
4
- nat/plugins/llama_index/llm.py,sha256=PtXibAiZWCv1pjEeP5A77Tyi-rVUGCHc81jEoIdy3p8,3949
5
- nat/plugins/llama_index/register.py,sha256=ICmUCqrNvWTDFe6Zjm6B6KUkgqsyqfFPTHMGU74KsA4,891
6
- nat/plugins/llama_index/tool_wrapper.py,sha256=VFKMIIeLdWqHwW2Ax11E2w-_9w3ow6Iuhra1Hk78RYM,1387
7
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/METADATA,sha256=uKukv46PucXHXNFo51uQa_FkYeR6JHtk91E5W8cIzxY,1908
8
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/entry_points.txt,sha256=2LqRRju5448P2v8B3y6TSPnk-nOd5T3AmV5JibCnoQc,68
10
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/top_level.txt,sha256=8-CJ2cP6-f0ZReXe5Hzqp-5pvzzHz-5Ds5H2bGqh1-U,4
11
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/RECORD,,