nvidia-nat-llama-index 1.3.0.dev2__py3-none-any.whl → 1.3.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ async def azure_openai_llama_index(embedder_config: AzureOpenAIEmbedderModelConf
28
28
 
29
29
  from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
30
30
 
31
- client = AzureOpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True))
31
+ client = AzureOpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True, exclude_none=True))
32
32
 
33
33
  if isinstance(embedder_config, RetryMixin):
34
34
  client = patch_with_retry(client,
@@ -40,17 +40,22 @@ async def azure_openai_llama_index(embedder_config: AzureOpenAIEmbedderModelConf
40
40
 
41
41
 
42
42
  @register_embedder_client(config_type=NIMEmbedderModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
43
- async def nim_llamaindex(embedder_config: NIMEmbedderModelConfig, _builder: Builder):
43
+ async def nim_llama_index(embedder_config: NIMEmbedderModelConfig, _builder: Builder):
44
44
 
45
45
  from llama_index.embeddings.nvidia import NVIDIAEmbedding # pylint: disable=no-name-in-module
46
46
 
47
- config_obj = {
48
- **embedder_config.model_dump(exclude={"type", "model_name"}, by_alias=True),
49
- "model":
50
- embedder_config.model_name,
51
- }
47
+ client = NVIDIAEmbedding(
48
+ **embedder_config.model_dump(exclude={"type", "model_name"}, by_alias=True, exclude_none=True),
49
+ model=embedder_config.model_name,
50
+ )
52
51
 
53
- yield NVIDIAEmbedding(**config_obj)
52
+ if isinstance(embedder_config, RetryMixin):
53
+ client = patch_with_retry(client,
54
+ retries=embedder_config.num_retries,
55
+ retry_codes=embedder_config.retry_on_status_codes,
56
+ retry_on_messages=embedder_config.retry_on_errors)
57
+
58
+ yield client
54
59
 
55
60
 
56
61
  @register_embedder_client(config_type=OpenAIEmbedderModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -58,7 +63,7 @@ async def openai_llama_index(embedder_config: OpenAIEmbedderModelConfig, _builde
58
63
 
59
64
  from llama_index.embeddings.openai import OpenAIEmbedding
60
65
 
61
- client = OpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True))
66
+ client = OpenAIEmbedding(**embedder_config.model_dump(exclude={"type"}, by_alias=True, exclude_none=True))
62
67
 
63
68
  if isinstance(embedder_config, RetryMixin):
64
69
  client = patch_with_retry(client,
@@ -13,33 +13,70 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ from collections.abc import Sequence
17
+ from typing import TypeVar
18
+
16
19
  from nat.builder.builder import Builder
17
20
  from nat.builder.framework_enum import LLMFrameworkEnum
18
21
  from nat.cli.register_workflow import register_llm_client
22
+ from nat.data_models.llm import LLMBaseConfig
19
23
  from nat.data_models.retry_mixin import RetryMixin
24
+ from nat.data_models.thinking_mixin import ThinkingMixin
20
25
  from nat.llm.aws_bedrock_llm import AWSBedrockModelConfig
21
26
  from nat.llm.azure_openai_llm import AzureOpenAIModelConfig
22
27
  from nat.llm.nim_llm import NIMModelConfig
23
28
  from nat.llm.openai_llm import OpenAIModelConfig
29
+ from nat.llm.utils.thinking import BaseThinkingInjector
30
+ from nat.llm.utils.thinking import FunctionArgumentWrapper
31
+ from nat.llm.utils.thinking import patch_with_thinking
24
32
  from nat.utils.exception_handlers.automatic_retries import patch_with_retry
33
+ from nat.utils.type_utils import override
25
34
 
35
+ ModelType = TypeVar("ModelType")
26
36
 
27
- @register_llm_client(config_type=AWSBedrockModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
28
- async def aws_bedrock_llama_index(llm_config: AWSBedrockModelConfig, _builder: Builder):
29
37
 
30
- from llama_index.llms.bedrock import Bedrock
38
+ def _patch_llm_based_on_config(client: ModelType, llm_config: LLMBaseConfig) -> ModelType:
39
+
40
+ from llama_index.core.base.llms.types import ChatMessage
31
41
 
32
- kwargs = llm_config.model_dump(exclude={"type", "max_tokens"}, by_alias=True)
42
+ class LlamaIndexThinkingInjector(BaseThinkingInjector):
33
43
 
34
- llm = Bedrock(**kwargs)
44
+ @override
45
+ def inject(self, messages: Sequence[ChatMessage], *args, **kwargs) -> FunctionArgumentWrapper:
46
+ new_messages = [ChatMessage(role="system", content=self.system_prompt)] + list(messages)
47
+ return FunctionArgumentWrapper(new_messages, *args, **kwargs)
35
48
 
36
49
  if isinstance(llm_config, RetryMixin):
37
- llm = patch_with_retry(llm,
38
- retries=llm_config.num_retries,
39
- retry_codes=llm_config.retry_on_status_codes,
40
- retry_on_messages=llm_config.retry_on_errors)
50
+ client = patch_with_retry(client,
51
+ retries=llm_config.num_retries,
52
+ retry_codes=llm_config.retry_on_status_codes,
53
+ retry_on_messages=llm_config.retry_on_errors)
54
+
55
+ if isinstance(llm_config, ThinkingMixin) and llm_config.thinking_system_prompt is not None:
56
+ client = patch_with_thinking(
57
+ client,
58
+ LlamaIndexThinkingInjector(
59
+ system_prompt=llm_config.thinking_system_prompt,
60
+ function_names=[
61
+ "chat",
62
+ "stream_chat",
63
+ "achat",
64
+ "astream_chat",
65
+ ],
66
+ ))
67
+
68
+ return client
69
+
70
+
71
+ @register_llm_client(config_type=AWSBedrockModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
72
+ async def aws_bedrock_llama_index(llm_config: AWSBedrockModelConfig, _builder: Builder):
73
+
74
+ from llama_index.llms.bedrock import Bedrock
75
+
76
+ # LlamaIndex uses context_size instead of max_tokens
77
+ llm = Bedrock(**llm_config.model_dump(exclude={"type", "top_p", "thinking"}, by_alias=True))
41
78
 
42
- yield llm
79
+ yield _patch_llm_based_on_config(llm, llm_config)
43
80
 
44
81
 
45
82
  @register_llm_client(config_type=AzureOpenAIModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -47,17 +84,9 @@ async def azure_openai_llama_index(llm_config: AzureOpenAIModelConfig, _builder:
47
84
 
48
85
  from llama_index.llms.azure_openai import AzureOpenAI
49
86
 
50
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
87
+ llm = AzureOpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True))
51
88
 
52
- llm = AzureOpenAI(**kwargs)
53
-
54
- if isinstance(llm_config, RetryMixin):
55
- llm = patch_with_retry(llm,
56
- retries=llm_config.num_retries,
57
- retry_codes=llm_config.retry_on_status_codes,
58
- retry_on_messages=llm_config.retry_on_errors)
59
-
60
- yield llm
89
+ yield _patch_llm_based_on_config(llm, llm_config)
61
90
 
62
91
 
63
92
  @register_llm_client(config_type=NIMModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -65,20 +94,9 @@ async def nim_llama_index(llm_config: NIMModelConfig, _builder: Builder):
65
94
 
66
95
  from llama_index.llms.nvidia import NVIDIA
67
96
 
68
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
97
+ llm = NVIDIA(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
69
98
 
70
- if ("base_url" in kwargs and kwargs["base_url"] is None):
71
- del kwargs["base_url"]
72
-
73
- llm = NVIDIA(**kwargs)
74
-
75
- if isinstance(llm_config, RetryMixin):
76
- llm = patch_with_retry(llm,
77
- retries=llm_config.num_retries,
78
- retry_codes=llm_config.retry_on_status_codes,
79
- retry_on_messages=llm_config.retry_on_errors)
80
-
81
- yield llm
99
+ yield _patch_llm_based_on_config(llm, llm_config)
82
100
 
83
101
 
84
102
  @register_llm_client(config_type=OpenAIModelConfig, wrapper_type=LLMFrameworkEnum.LLAMA_INDEX)
@@ -86,17 +104,6 @@ async def openai_llama_index(llm_config: OpenAIModelConfig, _builder: Builder):
86
104
 
87
105
  from llama_index.llms.openai import OpenAI
88
106
 
89
- kwargs = llm_config.model_dump(exclude={"type"}, by_alias=True)
90
-
91
- if ("base_url" in kwargs and kwargs["base_url"] is None):
92
- del kwargs["base_url"]
93
-
94
- llm = OpenAI(**kwargs)
95
-
96
- if isinstance(llm_config, RetryMixin):
97
- llm = patch_with_retry(llm,
98
- retries=llm_config.num_retries,
99
- retry_codes=llm_config.retry_on_status_codes,
100
- retry_on_messages=llm_config.retry_on_errors)
107
+ llm = OpenAI(**llm_config.model_dump(exclude={"type", "thinking"}, by_alias=True, exclude_none=True))
101
108
 
102
- yield llm
109
+ yield _patch_llm_based_on_config(llm, llm_config)
@@ -13,7 +13,6 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- # pylint: disable=unused-import
17
16
  # flake8: noqa
18
17
  # isort:skip_file
19
18
 
@@ -1,20 +1,23 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nvidia-nat-llama-index
3
- Version: 1.3.0.dev2
3
+ Version: 1.3.0rc1
4
4
  Summary: Subpackage for Llama-Index integration in NeMo Agent toolkit
5
5
  Keywords: ai,rag,agents
6
6
  Classifier: Programming Language :: Python
7
- Requires-Python: <3.13,>=3.11
7
+ Classifier: Programming Language :: Python :: 3.11
8
+ Classifier: Programming Language :: Python :: 3.12
9
+ Classifier: Programming Language :: Python :: 3.13
10
+ Requires-Python: <3.14,>=3.11
8
11
  Description-Content-Type: text/markdown
9
- Requires-Dist: nvidia-nat==v1.3.0-dev2
12
+ Requires-Dist: nvidia-nat==v1.3.0-rc1
10
13
  Requires-Dist: llama-index-core~=0.12.21
11
- Requires-Dist: llama-index-embeddings-azure-openai~=0.3.1
14
+ Requires-Dist: llama-index-embeddings-azure-openai~=0.3.9
12
15
  Requires-Dist: llama-index-embeddings-nvidia~=0.3.1
13
16
  Requires-Dist: llama-index-embeddings-openai~=0.3.1
14
- Requires-Dist: llama-index-llms-azure-openai~=0.3.1
17
+ Requires-Dist: llama-index-llms-azure-openai~=0.3.2
15
18
  Requires-Dist: llama-index-llms-bedrock~=0.3.8
16
19
  Requires-Dist: llama-index-llms-nvidia~=0.3.1
17
- Requires-Dist: llama-index-llms-openai~=0.3.38
20
+ Requires-Dist: llama-index-llms-openai~=0.3.42
18
21
  Requires-Dist: llama-index-readers-file~=0.4.4
19
22
  Requires-Dist: llama-index~=0.12.21
20
23
 
@@ -0,0 +1,11 @@
1
+ nat/meta/pypi.md,sha256=s9C3pgWB0HLIXTx5QPryNOWN0O2fIRIap0p9_zCHlTs,1112
2
+ nat/plugins/llama_index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ nat/plugins/llama_index/embedder.py,sha256=4c4uAa19UArqA_Npe5mwEnUhCaguTbyWcAntBzcPEeQ,3493
4
+ nat/plugins/llama_index/llm.py,sha256=0M1m1sjGwVMNibiAzkH2_hnAfdC-Ptc6PM9IKDmfSL8,4500
5
+ nat/plugins/llama_index/register.py,sha256=1x_b8u6cuQwh4Iz_7TcIFWXvLIL9IIKUPE-zR9d6ug8,859
6
+ nat/plugins/llama_index/tool_wrapper.py,sha256=VFKMIIeLdWqHwW2Ax11E2w-_9w3ow6Iuhra1Hk78RYM,1387
7
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/METADATA,sha256=RwXDLjxwnnOjiWB-XC5y6olMd0ckHTzRIDlE3SAafyg,2058
8
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/entry_points.txt,sha256=2LqRRju5448P2v8B3y6TSPnk-nOd5T3AmV5JibCnoQc,68
10
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/top_level.txt,sha256=8-CJ2cP6-f0ZReXe5Hzqp-5pvzzHz-5Ds5H2bGqh1-U,4
11
+ nvidia_nat_llama_index-1.3.0rc1.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- nat/meta/pypi.md,sha256=s9C3pgWB0HLIXTx5QPryNOWN0O2fIRIap0p9_zCHlTs,1112
2
- nat/plugins/llama_index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- nat/plugins/llama_index/embedder.py,sha256=ROGlJJxwIZXV3Sg93Z9RIelOOhxuJcrw0B1id43tn_U,3130
4
- nat/plugins/llama_index/llm.py,sha256=PtXibAiZWCv1pjEeP5A77Tyi-rVUGCHc81jEoIdy3p8,3949
5
- nat/plugins/llama_index/register.py,sha256=ICmUCqrNvWTDFe6Zjm6B6KUkgqsyqfFPTHMGU74KsA4,891
6
- nat/plugins/llama_index/tool_wrapper.py,sha256=VFKMIIeLdWqHwW2Ax11E2w-_9w3ow6Iuhra1Hk78RYM,1387
7
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/METADATA,sha256=uKukv46PucXHXNFo51uQa_FkYeR6JHtk91E5W8cIzxY,1908
8
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/entry_points.txt,sha256=2LqRRju5448P2v8B3y6TSPnk-nOd5T3AmV5JibCnoQc,68
10
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/top_level.txt,sha256=8-CJ2cP6-f0ZReXe5Hzqp-5pvzzHz-5Ds5H2bGqh1-U,4
11
- nvidia_nat_llama_index-1.3.0.dev2.dist-info/RECORD,,