nvidia-nat-llama-index 1.3.dev0__py3-none-any.whl → 1.3.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,28 +12,58 @@
12
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
- # pylint: disable=unused-argument
16
15
 
17
16
  from nat.builder.builder import Builder
18
17
  from nat.builder.framework_enum import LLMFrameworkEnum
19
18
  from nat.cli.register_workflow import register_embedder_client
20
19
  from nat.data_models.retry_mixin import RetryMixin
20
+ from nat.embedder.azure_openai_embedder import AzureOpenAIEmbedderModelConfig
21
21
  from nat.embedder.nim_embedder import NIMEmbedderModelConfig
22
+ from nat.embedder.openai_embedder import OpenAIEmbedderModelConfig
22
23
  from nat.utils.exception_handlers.automatic_retries import patch_with_retry
23
24
 
24
25
 
26
+ @register_embedder_client(config_type=AzureOpenAIEmbedderModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
27
+ async def azure_openai_llama_index(embedder_config: AzureOpenAIEmbedderModelConfig, _builder: Builder):
28
+
29
+ from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
30
+
31
+ client = AzureOpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True, exclude_none=True))
32
+
33
+ if isinstance(embedder_config, RetryMixin):
34
+ client = patch_with_retry(client,
35
+ retries=embedder_config.num_retries,
36
+ retry_codes=embedder_config.retry_on_status_codes,
37
+ retry_on_messages=embedder_config.retry_on_errors)
38
+
39
+ yield client
40
+
41
+
25
42
  @register_embedder_client(config_type=NIMEmbedderModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
26
- async def nim_llamaindex(embedder_config: NIMEmbedderModelConfig, builder: Builder):
43
+ async def nim_llama_index(embedder_config: NIMEmbedderModelConfig, _builder: Builder):
27
44
 
28
45
  from llama_index.embeddings.nvidia import NVIDIAEmbedding # pylint: disable=no-name-in-module
29
46
 
30
- config_obj = {
31
- **embedder_config.model_dump(exclude={"type", "model_name"}, by_alias=True),
32
- "model":
33
- embedder_config.model_name,
34
- }
47
+ client = NVIDIAEmbedding(
48
+ **embedder_config.model_dump(exclude={"type", "model_name"}, by_alias=True, exclude_none=True),
49
+ model=embedder_config.model_name,
50
+ )
51
+
52
+ if isinstance(embedder_config, RetryMixin):
53
+ client = patch_with_retry(client,
54
+ retries=embedder_config.num_retries,
55
+ retry_codes=embedder_config.retry_on_status_codes,
56
+ retry_on_messages=embedder_config.retry_on_errors)
57
+
58
+ yield client
59
+
60
+
61
+ @register_embedder_client(config_type=OpenAIEmbedderModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
62
+ async def openai_llama_index(embedder_config: OpenAIEmbedderModelConfig, _builder: Builder):
63
+
64
+ from llama_index.embeddings.openai import OpenAIEmbedding
35
65
 
36
- client = NVIDIAEmbedding(**config_obj)
66
+ client = OpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True, exclude_none=True))
37
67
 
38
68
  if isinstance(embedder_config, RetryMixin):
39
69
  client = patch_with_retry(client,
@@ -13,71 +13,97 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ from collections.abc import Sequence
17
+ from typing import TypeVar
18
+
16
19
  from nat.builder.builder import Builder
17
20
  from nat.builder.framework_enum import LLMFrameworkEnum
18
21
  from nat.cli.register_workflow import register_llm_client
22
+ from nat.data_models.llm import LLMBaseConfig
19
23
  from nat.data_models.retry_mixin import RetryMixin
24
+ from nat.data_models.thinking_mixin import ThinkingMixin
20
25
  from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig
26
+ from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
21
27
  from nat.llm.nim_llm import NIMModelConfig
22
28
  from nat.llm.openai_llm import OpenAIModelConfig
29
+ from nat.llm.utils.thinking import BaseThinkingInjector
30
+ from nat.llm.utils.thinking import FunctionArgumentWrapper
31
+ from nat.llm.utils.thinking import patch_with_thinking
23
32
  from nat.utils.exception_handlers.automatic_retries import patch_with_retry
33
+ from nat.utils.type_utils import override
24
34
 
35
+ ModelType = TypeVar("ModelType")
25
36
 
26
- @register_llm_client(config_type=NIMModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
27
- async def nim_llama_index(llm_config: NIMModelConfig, builder: Builder):
28
37
 
29
- from llama_index.llms.nvidia import NVIDIA
38
+ def _patch_llm_based_on_config(client: ModelType, llm_config: LLMBaseConfig) -> ModelType:
30
39
 
31
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
40
+ from llama_index.core.base.llms.types import ChatMessage
32
41
 
33
- if ("base_url" in kwargs and kwargs["base_url"] is None):
34
- del kwargs["base_url"]
42
+ class LlamaIndexThinkingInjector(BaseThinkingInjector):
35
43
 
36
- llm = NVIDIA(**kwargs)
44
+ @override
45
+ def inject(self, messages: Sequence[ChatMessage], *args, **kwargs) -> FunctionArgumentWrapper:
46
+ new_messages = [ChatMessage(role="system", content=self.system_prompt)] + list(messages)
47
+ return FunctionArgumentWrapper(new_messages, *args, **kwargs)
37
48
 
38
49
  if isinstance(llm_config, RetryMixin):
39
- llm = patch_with_retry(llm,
40
- retries=llm_config.num_retries,
41
- retry_codes=llm_config.retry_on_status_codes,
42
- retry_on_messages=llm_config.retry_on_errors)
50
+ client = patch_with_retry(client,
51
+ retries=llm_config.num_retries,
52
+ retry_codes=llm_config.retry_on_status_codes,
53
+ retry_on_messages=llm_config.retry_on_errors)
54
+
55
+ if isinstance(llm_config, ThinkingMixin) and llm_config.thinking_system_prompt is not None:
56
+ client = patch_with_thinking(
57
+ client,
58
+ LlamaIndexThinkingInjector(
59
+ system_prompt=llm_config.thinking_system_prompt,
60
+ function_names=[
61
+ "chat",
62
+ "stream_chat",
63
+ "achat",
64
+ "astream_chat",
65
+ ],
66
+ ))
67
+
68
+ return client
43
69
 
44
- yield llm
45
70
 
71
+ @register_llm_client(config_type=AWSBedrockModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
72
+ async def aws_bedrock_llama_index(llm_config: AWSBedrockModelConfig, _builder: Builder):
46
73
 
47
- @register_llm_client(config_type=OpenAIModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
48
- async def openai_llama_index(llm_config: OpenAIModelConfig, builder: Builder):
74
+ from llama_index.llms.bedrock import Bedrock
49
75
 
50
- from llama_index.llms.openai import OpenAI
76
+ # LlamaIndex uses context_size instead of max_tokens
77
+ llm = Bedrock(**llm_config.model_dump(exclude={"type", "top_p", "thinking"}, by_alias=True))
51
78
 
52
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
79
+ yield _patch_llm_based_on_config(llm, llm_config)
53
80
 
54
- if ("base_url" in kwargs and kwargs["base_url"] is None):
55
- del kwargs["base_url"]
56
81
 
57
- llm = OpenAI(**kwargs)
82
+ @register_llm_client(config_type=AzureOpenAIModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
83
+ async def azure_openai_llama_index(llm_config: AzureOpenAIModelConfig, _builder: Builder):
58
84
 
59
- if isinstance(llm_config, RetryMixin):
60
- llm = patch_with_retry(llm,
61
- retries=llm_config.num_retries,
62
- retry_codes=llm_config.retry_on_status_codes,
63
- retry_on_messages=llm_config.retry_on_errors)
85
+ from llama_index.llms.azure_openai import AzureOpenAI
64
86
 
65
- yield llm
87
+ llm = AzureOpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True))
66
88
 
89
+ yield _patch_llm_based_on_config(llm, llm_config)
67
90
 
68
- @register_llm_client(config_type=AWSBedrockModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
69
- async def aws_bedrock_llama_index(llm_config: AWSBedrockModelConfig, builder: Builder):
70
91
 
71
- from llama_index.llms.bedrock import Bedrock
92
+ @register_llm_client(config_type=NIMModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
93
+ async def nim_llama_index(llm_config: NIMModelConfig, _builder: Builder):
72
94
 
73
- kwargs = llm_config.model_dump(exclude={"type", "max_tokens"}, by_alias=True)
95
+ from llama_index.llms.nvidia import NVIDIA
74
96
 
75
- llm = Bedrock(**kwargs)
97
+ llm = NVIDIA(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
76
98
 
77
- if isinstance(llm_config, RetryMixin):
78
- llm = patch_with_retry(llm,
79
- retries=llm_config.num_retries,
80
- retry_codes=llm_config.retry_on_status_codes,
81
- retry_on_messages=llm_config.retry_on_errors)
99
+ yield _patch_llm_based_on_config(llm, llm_config)
100
+
101
+
102
+ @register_llm_client(config_type=OpenAIModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
103
+ async def openai_llama_index(llm_config: OpenAIModelConfig, _builder: Builder):
104
+
105
+ from llama_index.llms.openai import OpenAI
106
+
107
+ llm = OpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
82
108
 
83
- yield llm
109
+ yield _patch_llm_based_on_config(llm, llm_config)
@@ -13,12 +13,11 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- # pylint: disable=unused-import
17
16
  # flake8: noqa
18
17
  # isort:skip_file
19
18
 
20
19
  # Import any providers which need to be automatically registered here
21
20
 
21
+ from . import embedder
22
22
  from . import llm
23
23
  from . import tool_wrapper
24
- from . import embedder
@@ -1,18 +1,25 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nvidia-nat-llama-index
3
- Version: 1.3.dev0
3
+ Version: 1.3.0rc1
4
4
  Summary: Subpackage for Llama-Index integration in NeMo Agent toolkit
5
5
  Keywords: ai,rag,agents
6
6
  Classifier: Programming Language :: Python
7
- Requires-Python: <3.13,>=3.11
7
+ Classifier: Programming Language :: Python :: 3.11
8
+ Classifier: Programming Language :: Python :: 3.12
9
+ Classifier: Programming Language :: Python :: 3.13
10
+ Requires-Python: <3.14,>=3.11
8
11
  Description-Content-Type: text/markdown
9
- Requires-Dist: nvidia-nat==v1.3-dev
10
- Requires-Dist: llama-index-core==0.12.21
11
- Requires-Dist: llama-index-embeddings-nvidia==0.3.1
12
- Requires-Dist: llama-index-llms-bedrock==0.3.8
13
- Requires-Dist: llama-index-llms-nvidia==0.3.1
14
- Requires-Dist: llama-index-readers-file==0.4.4
15
- Requires-Dist: llama-index==0.12.21
12
+ Requires-Dist: nvidia-nat==v1.3.0-rc1
13
+ Requires-Dist: llama-index-core~=0.12.21
14
+ Requires-Dist: llama-index-embeddings-azure-openai~=0.3.9
15
+ Requires-Dist: llama-index-embeddings-nvidia~=0.3.1
16
+ Requires-Dist: llama-index-embeddings-openai~=0.3.1
17
+ Requires-Dist: llama-index-llms-azure-openai~=0.3.2
18
+ Requires-Dist: llama-index-llms-bedrock~=0.3.8
19
+ Requires-Dist: llama-index-llms-nvidia~=0.3.1
20
+ Requires-Dist: llama-index-llms-openai~=0.3.42
21
+ Requires-Dist: llama-index-readers-file~=0.4.4
22
+ Requires-Dist: llama-index~=0.12.21
16
23
 
17
24
  <!--
18
25
  SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
@@ -0,0 +1,11 @@
1
+ nat/meta/pypi.md,sha256=s9C3pgWB0HLIXTx5QPryNOWN0O2fIRIap0p9_zCHlTs,1112
2
+ nat/plugins/llama_index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ nat/plugins/llama_index/embedder.py,sha256=4c4uAa19UArqA_Npe5mwEnUhCaguTbyWcAntBzcPEeQ,3493
4
+ nat/plugins/llama_index/llm.py,sha256=0M1m1sjGwVMNibiAzkH2_hnAfdC-Ptc6PM9IKDmfSL8,4500
5
+ nat/plugins/llama_index/register.py,sha256=1x_b8u6cuQwh4Iz_7TcIFWXvLIL9IIKUPE-zR9d6ug8,859
6
+ nat/plugins/llama_index/tool_wrapper.py,sha256=VFKMIIeLdWqHwW2Ax11E2w-_9w3ow6Iuhra1Hk78RYM,1387
7
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/METADATA,sha256=RwXDLjxwnnOjiWB-XC5y6olMd0ckHTzRIDlE3SAafyg,2058
8
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/entry_points.txt,sha256=2LqRRju5448P2v8B3y6TSPnk-nOd5T3AmV5JibCnoQc,68
10
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/top_level.txt,sha256=8-CJ2cP6-f0ZReXe5Hzqp-5pvzzHz-5Ds5H2bGqh1-U,4
11
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- nat/meta/pypi.md,sha256=s9C3pgWB0HLIXTx5QPryNOWN0O2fIRIap0p9_zCHlTs,1112
2
- nat/plugins/llama_index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- nat/plugins/llama_index/embedder.py,sha256=L7Wx9QJH2Sh0y6kmgisjtTZfDC9YcuuusKl0Tn_7JgA,1921
4
- nat/plugins/llama_index/llm.py,sha256=VyuIqks2G9Zcs9a0X7jJyASzSUdtL0Pns0v4_rFEsJE,3220
5
- nat/plugins/llama_index/register.py,sha256=DelI6uth8dO8h_7czPW4Akfr_ajXDBwhEa1dJzRf5u4,891
6
- nat/plugins/llama_index/tool_wrapper.py,sha256=VFKMIIeLdWqHwW2Ax11E2w-_9w3ow6Iuhra1Hk78RYM,1387
7
- nvidia_nat_llama_index-1.3.dev0.dist-info/METADATA,sha256=EP1nCqjUnjLBuvlh_JKpFIG_rebxwmKzo0MjDejM0MY,1694
8
- nvidia_nat_llama_index-1.3.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- nvidia_nat_llama_index-1.3.dev0.dist-info/entry_points.txt,sha256=2LqRRju5448P2v8B3y6TSPnk-nOd5T3AmV5JibCnoQc,68
10
- nvidia_nat_llama_index-1.3.dev0.dist-info/top_level.txt,sha256=8-CJ2cP6-f0ZReXe5Hzqp-5pvzzHz-5Ds5H2bGqh1-U,4
11
- nvidia_nat_llama_index-1.3.dev0.dist-info/RECORD,,