amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
- amd_gaia-0.15.1.dist-info/RECORD +178 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
- gaia/__init__.py +29 -29
- gaia/agents/__init__.py +19 -19
- gaia/agents/base/__init__.py +9 -9
- gaia/agents/base/agent.py +2177 -2177
- gaia/agents/base/api_agent.py +120 -120
- gaia/agents/base/console.py +1841 -1841
- gaia/agents/base/errors.py +237 -237
- gaia/agents/base/mcp_agent.py +86 -86
- gaia/agents/base/tools.py +83 -83
- gaia/agents/blender/agent.py +556 -556
- gaia/agents/blender/agent_simple.py +133 -135
- gaia/agents/blender/app.py +211 -211
- gaia/agents/blender/app_simple.py +41 -41
- gaia/agents/blender/core/__init__.py +16 -16
- gaia/agents/blender/core/materials.py +506 -506
- gaia/agents/blender/core/objects.py +316 -316
- gaia/agents/blender/core/rendering.py +225 -225
- gaia/agents/blender/core/scene.py +220 -220
- gaia/agents/blender/core/view.py +146 -146
- gaia/agents/chat/__init__.py +9 -9
- gaia/agents/chat/agent.py +835 -835
- gaia/agents/chat/app.py +1058 -1058
- gaia/agents/chat/session.py +508 -508
- gaia/agents/chat/tools/__init__.py +15 -15
- gaia/agents/chat/tools/file_tools.py +96 -96
- gaia/agents/chat/tools/rag_tools.py +1729 -1729
- gaia/agents/chat/tools/shell_tools.py +436 -436
- gaia/agents/code/__init__.py +7 -7
- gaia/agents/code/agent.py +549 -549
- gaia/agents/code/cli.py +377 -0
- gaia/agents/code/models.py +135 -135
- gaia/agents/code/orchestration/__init__.py +24 -24
- gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
- gaia/agents/code/orchestration/checklist_generator.py +713 -713
- gaia/agents/code/orchestration/factories/__init__.py +9 -9
- gaia/agents/code/orchestration/factories/base.py +63 -63
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
- gaia/agents/code/orchestration/factories/python_factory.py +106 -106
- gaia/agents/code/orchestration/orchestrator.py +841 -841
- gaia/agents/code/orchestration/project_analyzer.py +391 -391
- gaia/agents/code/orchestration/steps/__init__.py +67 -67
- gaia/agents/code/orchestration/steps/base.py +188 -188
- gaia/agents/code/orchestration/steps/error_handler.py +314 -314
- gaia/agents/code/orchestration/steps/nextjs.py +828 -828
- gaia/agents/code/orchestration/steps/python.py +307 -307
- gaia/agents/code/orchestration/template_catalog.py +469 -469
- gaia/agents/code/orchestration/workflows/__init__.py +14 -14
- gaia/agents/code/orchestration/workflows/base.py +80 -80
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
- gaia/agents/code/orchestration/workflows/python.py +94 -94
- gaia/agents/code/prompts/__init__.py +11 -11
- gaia/agents/code/prompts/base_prompt.py +77 -77
- gaia/agents/code/prompts/code_patterns.py +2036 -2036
- gaia/agents/code/prompts/nextjs_prompt.py +40 -40
- gaia/agents/code/prompts/python_prompt.py +109 -109
- gaia/agents/code/schema_inference.py +365 -365
- gaia/agents/code/system_prompt.py +41 -41
- gaia/agents/code/tools/__init__.py +42 -42
- gaia/agents/code/tools/cli_tools.py +1138 -1138
- gaia/agents/code/tools/code_formatting.py +319 -319
- gaia/agents/code/tools/code_tools.py +769 -769
- gaia/agents/code/tools/error_fixing.py +1347 -1347
- gaia/agents/code/tools/external_tools.py +180 -180
- gaia/agents/code/tools/file_io.py +845 -845
- gaia/agents/code/tools/prisma_tools.py +190 -190
- gaia/agents/code/tools/project_management.py +1016 -1016
- gaia/agents/code/tools/testing.py +321 -321
- gaia/agents/code/tools/typescript_tools.py +122 -122
- gaia/agents/code/tools/validation_parsing.py +461 -461
- gaia/agents/code/tools/validation_tools.py +806 -806
- gaia/agents/code/tools/web_dev_tools.py +1758 -1758
- gaia/agents/code/validators/__init__.py +16 -16
- gaia/agents/code/validators/antipattern_checker.py +241 -241
- gaia/agents/code/validators/ast_analyzer.py +197 -197
- gaia/agents/code/validators/requirements_validator.py +145 -145
- gaia/agents/code/validators/syntax_validator.py +171 -171
- gaia/agents/docker/__init__.py +7 -7
- gaia/agents/docker/agent.py +642 -642
- gaia/agents/emr/__init__.py +8 -8
- gaia/agents/emr/agent.py +1506 -1506
- gaia/agents/emr/cli.py +1322 -1322
- gaia/agents/emr/constants.py +475 -475
- gaia/agents/emr/dashboard/__init__.py +4 -4
- gaia/agents/emr/dashboard/server.py +1974 -1974
- gaia/agents/jira/__init__.py +11 -11
- gaia/agents/jira/agent.py +894 -894
- gaia/agents/jira/jql_templates.py +299 -299
- gaia/agents/routing/__init__.py +7 -7
- gaia/agents/routing/agent.py +567 -570
- gaia/agents/routing/system_prompt.py +75 -75
- gaia/agents/summarize/__init__.py +11 -0
- gaia/agents/summarize/agent.py +885 -0
- gaia/agents/summarize/prompts.py +129 -0
- gaia/api/__init__.py +23 -23
- gaia/api/agent_registry.py +238 -238
- gaia/api/app.py +305 -305
- gaia/api/openai_server.py +575 -575
- gaia/api/schemas.py +186 -186
- gaia/api/sse_handler.py +373 -373
- gaia/apps/__init__.py +4 -4
- gaia/apps/llm/__init__.py +6 -6
- gaia/apps/llm/app.py +173 -169
- gaia/apps/summarize/app.py +116 -633
- gaia/apps/summarize/html_viewer.py +133 -133
- gaia/apps/summarize/pdf_formatter.py +284 -284
- gaia/audio/__init__.py +2 -2
- gaia/audio/audio_client.py +439 -439
- gaia/audio/audio_recorder.py +269 -269
- gaia/audio/kokoro_tts.py +599 -599
- gaia/audio/whisper_asr.py +432 -432
- gaia/chat/__init__.py +16 -16
- gaia/chat/app.py +430 -430
- gaia/chat/prompts.py +522 -522
- gaia/chat/sdk.py +1228 -1225
- gaia/cli.py +5481 -5621
- gaia/database/__init__.py +10 -10
- gaia/database/agent.py +176 -176
- gaia/database/mixin.py +290 -290
- gaia/database/testing.py +64 -64
- gaia/eval/batch_experiment.py +2332 -2332
- gaia/eval/claude.py +542 -542
- gaia/eval/config.py +37 -37
- gaia/eval/email_generator.py +512 -512
- gaia/eval/eval.py +3179 -3179
- gaia/eval/groundtruth.py +1130 -1130
- gaia/eval/transcript_generator.py +582 -582
- gaia/eval/webapp/README.md +167 -167
- gaia/eval/webapp/package-lock.json +875 -875
- gaia/eval/webapp/package.json +20 -20
- gaia/eval/webapp/public/app.js +3402 -3402
- gaia/eval/webapp/public/index.html +87 -87
- gaia/eval/webapp/public/styles.css +3661 -3661
- gaia/eval/webapp/server.js +415 -415
- gaia/eval/webapp/test-setup.js +72 -72
- gaia/llm/__init__.py +9 -2
- gaia/llm/base_client.py +60 -0
- gaia/llm/exceptions.py +12 -0
- gaia/llm/factory.py +70 -0
- gaia/llm/lemonade_client.py +3236 -3221
- gaia/llm/lemonade_manager.py +294 -294
- gaia/llm/providers/__init__.py +9 -0
- gaia/llm/providers/claude.py +108 -0
- gaia/llm/providers/lemonade.py +120 -0
- gaia/llm/providers/openai_provider.py +79 -0
- gaia/llm/vlm_client.py +382 -382
- gaia/logger.py +189 -189
- gaia/mcp/agent_mcp_server.py +245 -245
- gaia/mcp/blender_mcp_client.py +138 -138
- gaia/mcp/blender_mcp_server.py +648 -648
- gaia/mcp/context7_cache.py +332 -332
- gaia/mcp/external_services.py +518 -518
- gaia/mcp/mcp_bridge.py +811 -550
- gaia/mcp/servers/__init__.py +6 -6
- gaia/mcp/servers/docker_mcp.py +83 -83
- gaia/perf_analysis.py +361 -0
- gaia/rag/__init__.py +10 -10
- gaia/rag/app.py +293 -293
- gaia/rag/demo.py +304 -304
- gaia/rag/pdf_utils.py +235 -235
- gaia/rag/sdk.py +2194 -2194
- gaia/security.py +163 -163
- gaia/talk/app.py +289 -289
- gaia/talk/sdk.py +538 -538
- gaia/testing/__init__.py +87 -87
- gaia/testing/assertions.py +330 -330
- gaia/testing/fixtures.py +333 -333
- gaia/testing/mocks.py +493 -493
- gaia/util.py +46 -46
- gaia/utils/__init__.py +33 -33
- gaia/utils/file_watcher.py +675 -675
- gaia/utils/parsing.py +223 -223
- gaia/version.py +100 -100
- amd_gaia-0.14.3.dist-info/RECORD +0 -168
- gaia/agents/code/app.py +0 -266
- gaia/llm/llm_client.py +0 -729
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/rag/app.py
CHANGED
|
@@ -1,293 +1,293 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
# SPDX-License-Identifier: MIT
|
|
4
|
-
|
|
5
|
-
"""
|
|
6
|
-
GAIA RAG Application - Simple PDF document Q&A
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import argparse
|
|
10
|
-
import os
|
|
11
|
-
import sys
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
|
|
14
|
-
from gaia.rag.sdk import RAGSDK, RAGConfig
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def index_command(args):
|
|
18
|
-
"""Index PDF documents."""
|
|
19
|
-
config = RAGConfig(
|
|
20
|
-
model=args.model,
|
|
21
|
-
show_stats=args.verbose,
|
|
22
|
-
chunk_size=args.chunk_size,
|
|
23
|
-
max_chunks=args.max_chunks,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
rag = RAGSDK(config)
|
|
27
|
-
|
|
28
|
-
success_count = 0
|
|
29
|
-
for pdf_path in args.files:
|
|
30
|
-
if not os.path.exists(pdf_path):
|
|
31
|
-
print(f"❌ File not found: {pdf_path}")
|
|
32
|
-
continue
|
|
33
|
-
|
|
34
|
-
print(f"📄 Indexing: {pdf_path}")
|
|
35
|
-
if rag.index_document(pdf_path):
|
|
36
|
-
print(f"✅ Successfully indexed: {pdf_path}")
|
|
37
|
-
success_count += 1
|
|
38
|
-
else:
|
|
39
|
-
print(f"❌ Failed to index: {pdf_path}")
|
|
40
|
-
|
|
41
|
-
print(f"\n📊 Indexed {success_count}/{len(args.files)} documents")
|
|
42
|
-
|
|
43
|
-
# Show status
|
|
44
|
-
status = rag.get_status()
|
|
45
|
-
print(f"📚 Total chunks: {status['total_chunks']}")
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def query_command(args):
|
|
49
|
-
"""Query indexed documents."""
|
|
50
|
-
if not args.question:
|
|
51
|
-
print("❌ Question is required for query command")
|
|
52
|
-
return
|
|
53
|
-
|
|
54
|
-
# If PDF files provided, index them first
|
|
55
|
-
config = RAGConfig(
|
|
56
|
-
model=args.model,
|
|
57
|
-
show_stats=args.verbose,
|
|
58
|
-
chunk_size=args.chunk_size,
|
|
59
|
-
max_chunks=args.max_chunks,
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
rag = RAGSDK(config)
|
|
63
|
-
|
|
64
|
-
# Index documents if provided
|
|
65
|
-
if args.files:
|
|
66
|
-
print("📄 Indexing documents...")
|
|
67
|
-
for pdf_path in args.files:
|
|
68
|
-
if os.path.exists(pdf_path):
|
|
69
|
-
print(f" • {pdf_path}")
|
|
70
|
-
rag.index_document(pdf_path)
|
|
71
|
-
|
|
72
|
-
# Check if we have any indexed documents
|
|
73
|
-
status = rag.get_status()
|
|
74
|
-
if status["total_chunks"] == 0:
|
|
75
|
-
print("❌ No documents indexed. Please index documents first.")
|
|
76
|
-
print(" Use: gaia rag index document.pdf")
|
|
77
|
-
return
|
|
78
|
-
|
|
79
|
-
print(f"\n❓ Question: {args.question}")
|
|
80
|
-
print("🤔 Searching...")
|
|
81
|
-
|
|
82
|
-
try:
|
|
83
|
-
response = rag.query(args.question)
|
|
84
|
-
|
|
85
|
-
print("\n💬 Answer:")
|
|
86
|
-
print(response.text)
|
|
87
|
-
|
|
88
|
-
if args.verbose and response.chunks:
|
|
89
|
-
print(f"\n📖 Retrieved {len(response.chunks)} relevant chunks:")
|
|
90
|
-
for i, (chunk, score) in enumerate(
|
|
91
|
-
zip(response.chunks, response.chunk_scores)
|
|
92
|
-
):
|
|
93
|
-
print(f"\n Chunk {i+1} (relevance: {score:.3f}):")
|
|
94
|
-
print(f" {chunk[:200]}...")
|
|
95
|
-
|
|
96
|
-
if response.stats and args.verbose:
|
|
97
|
-
print(f"\n📊 Stats: {response.stats}")
|
|
98
|
-
|
|
99
|
-
except Exception as e:
|
|
100
|
-
print(f"❌ Query failed: {e}")
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def status_command(args): # pylint: disable=unused-argument
|
|
104
|
-
"""Show RAG system status."""
|
|
105
|
-
config = RAGConfig()
|
|
106
|
-
rag = RAGSDK(config)
|
|
107
|
-
|
|
108
|
-
status = rag.get_status()
|
|
109
|
-
|
|
110
|
-
print("📊 GAIA RAG Status")
|
|
111
|
-
print("=" * 30)
|
|
112
|
-
print(f"Indexed files: {status['indexed_files']}")
|
|
113
|
-
print(f"Total chunks: {status['total_chunks']}")
|
|
114
|
-
print(f"Cache directory: {status['cache_dir']}")
|
|
115
|
-
print(f"Embedding model: {status['embedding_model']}")
|
|
116
|
-
print("\nConfiguration:")
|
|
117
|
-
print(f" Chunk size: {status['config']['chunk_size']} tokens")
|
|
118
|
-
print(f" Chunk overlap: {status['config']['chunk_overlap']} tokens")
|
|
119
|
-
print(f" Max chunks per query: {status['config']['max_chunks']}")
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def clear_cache_command(args):
|
|
123
|
-
"""Clear RAG cache."""
|
|
124
|
-
config = RAGConfig()
|
|
125
|
-
rag = RAGSDK(config)
|
|
126
|
-
|
|
127
|
-
if args.force or input("Clear RAG cache? (y/N): ").lower() == "y":
|
|
128
|
-
rag.clear_cache()
|
|
129
|
-
print("✅ Cache cleared")
|
|
130
|
-
else:
|
|
131
|
-
print("Cache not cleared")
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def quick_command(args):
|
|
135
|
-
"""Quick RAG query - index document and query in one step."""
|
|
136
|
-
if not args.question:
|
|
137
|
-
print("❌ Question is required for quick command")
|
|
138
|
-
return
|
|
139
|
-
|
|
140
|
-
if not args.file:
|
|
141
|
-
print("❌ PDF file is required for quick command")
|
|
142
|
-
return
|
|
143
|
-
|
|
144
|
-
if not os.path.exists(args.file):
|
|
145
|
-
print(f"❌ File not found: {args.file}")
|
|
146
|
-
return
|
|
147
|
-
|
|
148
|
-
# Configure RAG
|
|
149
|
-
config = RAGConfig(
|
|
150
|
-
model=args.model,
|
|
151
|
-
show_stats=args.verbose,
|
|
152
|
-
chunk_size=args.chunk_size,
|
|
153
|
-
max_chunks=args.max_chunks,
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
rag = RAGSDK(config)
|
|
157
|
-
|
|
158
|
-
# Index document
|
|
159
|
-
if args.verbose:
|
|
160
|
-
print(f"📄 Indexing: {args.file}")
|
|
161
|
-
else:
|
|
162
|
-
print(f"📄 Processing {Path(args.file).name}...")
|
|
163
|
-
|
|
164
|
-
if not rag.index_document(args.file):
|
|
165
|
-
print(f"❌ Failed to index: {args.file}")
|
|
166
|
-
return
|
|
167
|
-
|
|
168
|
-
if args.verbose:
|
|
169
|
-
print("✅ Indexed successfully")
|
|
170
|
-
|
|
171
|
-
# Query
|
|
172
|
-
print(f"\n❓ Question: {args.question}")
|
|
173
|
-
print("🤔 Generating answer...")
|
|
174
|
-
|
|
175
|
-
try:
|
|
176
|
-
response = rag.query(args.question)
|
|
177
|
-
|
|
178
|
-
print("\n💬 Answer:")
|
|
179
|
-
print(response.text)
|
|
180
|
-
|
|
181
|
-
if args.verbose and response.chunks:
|
|
182
|
-
print(f"\n📖 Retrieved {len(response.chunks)} relevant chunks:")
|
|
183
|
-
for i, (chunk, score) in enumerate(
|
|
184
|
-
zip(response.chunks, response.chunk_scores)
|
|
185
|
-
):
|
|
186
|
-
print(f"\n Chunk {i+1} (relevance: {score:.3f}):")
|
|
187
|
-
print(f" {chunk[:200]}...")
|
|
188
|
-
|
|
189
|
-
if response.stats and args.verbose:
|
|
190
|
-
print(f"\n📊 Stats: {response.stats}")
|
|
191
|
-
|
|
192
|
-
except Exception as e:
|
|
193
|
-
print(f"❌ Query failed: {e}")
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def main():
|
|
197
|
-
"""Main entry point for RAG CLI."""
|
|
198
|
-
parser = argparse.ArgumentParser(
|
|
199
|
-
description="GAIA RAG - Simple PDF document Q&A",
|
|
200
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
201
|
-
epilog="""
|
|
202
|
-
Examples:
|
|
203
|
-
# Index a PDF document
|
|
204
|
-
gaia rag index document.pdf
|
|
205
|
-
|
|
206
|
-
# Query indexed documents
|
|
207
|
-
gaia rag query "What are the key features?"
|
|
208
|
-
|
|
209
|
-
# Query with on-the-fly indexing (index + query in one step)
|
|
210
|
-
gaia rag query "What is this document about?" document.pdf
|
|
211
|
-
|
|
212
|
-
# Show system status
|
|
213
|
-
gaia rag status
|
|
214
|
-
|
|
215
|
-
# Clear cache
|
|
216
|
-
gaia rag clear-cache
|
|
217
|
-
""",
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
# Add subcommands
|
|
221
|
-
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
222
|
-
|
|
223
|
-
# Index command
|
|
224
|
-
index_parser = subparsers.add_parser("index", help="Index PDF documents")
|
|
225
|
-
index_parser.add_argument("files", nargs="+", help="PDF files to index")
|
|
226
|
-
|
|
227
|
-
# Query command
|
|
228
|
-
query_parser = subparsers.add_parser("query", help="Query indexed documents")
|
|
229
|
-
query_parser.add_argument("question", help="Question to ask")
|
|
230
|
-
query_parser.add_argument("files", nargs="*", help="Additional PDF files to index")
|
|
231
|
-
|
|
232
|
-
# Status command
|
|
233
|
-
subparsers.add_parser("status", help="Show RAG system status")
|
|
234
|
-
|
|
235
|
-
# Clear cache command
|
|
236
|
-
clear_parser = subparsers.add_parser("clear-cache", help="Clear RAG cache")
|
|
237
|
-
clear_parser.add_argument(
|
|
238
|
-
"--force", action="store_true", help="Force clear without confirmation"
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
# Quick command (index + query in one step)
|
|
242
|
-
quick_parser = subparsers.add_parser(
|
|
243
|
-
"quick", help="Quick RAG query (index + query in one step)"
|
|
244
|
-
)
|
|
245
|
-
quick_parser.add_argument("file", help="PDF file to index and query")
|
|
246
|
-
quick_parser.add_argument("question", help="Question to ask")
|
|
247
|
-
|
|
248
|
-
# Common arguments for all commands
|
|
249
|
-
for subparser in [index_parser, query_parser, quick_parser]:
|
|
250
|
-
subparser.add_argument(
|
|
251
|
-
"--model", default="Llama-3.2-3B-Instruct-Hybrid", help="Model to use"
|
|
252
|
-
)
|
|
253
|
-
subparser.add_argument(
|
|
254
|
-
"--chunk-size", type=int, default=500, help="Chunk size in tokens"
|
|
255
|
-
)
|
|
256
|
-
subparser.add_argument(
|
|
257
|
-
"--max-chunks", type=int, default=3, help="Maximum chunks to retrieve"
|
|
258
|
-
)
|
|
259
|
-
subparser.add_argument(
|
|
260
|
-
"--verbose", "-v", action="store_true", help="Show detailed output"
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
args = parser.parse_args()
|
|
264
|
-
|
|
265
|
-
if not args.command:
|
|
266
|
-
parser.print_help()
|
|
267
|
-
return
|
|
268
|
-
|
|
269
|
-
try:
|
|
270
|
-
if args.command == "index":
|
|
271
|
-
index_command(args)
|
|
272
|
-
elif args.command == "query":
|
|
273
|
-
query_command(args)
|
|
274
|
-
elif args.command == "quick":
|
|
275
|
-
quick_command(args)
|
|
276
|
-
elif args.command == "status":
|
|
277
|
-
status_command(args)
|
|
278
|
-
elif args.command == "clear-cache":
|
|
279
|
-
clear_cache_command(args)
|
|
280
|
-
|
|
281
|
-
except KeyboardInterrupt:
|
|
282
|
-
print("\n\n⚠️ Operation interrupted")
|
|
283
|
-
except Exception as e:
|
|
284
|
-
print(f"❌ Error: {e}")
|
|
285
|
-
if hasattr(args, "verbose") and args.verbose:
|
|
286
|
-
import traceback
|
|
287
|
-
|
|
288
|
-
traceback.print_exc()
|
|
289
|
-
sys.exit(1)
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
if __name__ == "__main__":
|
|
293
|
-
main()
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
GAIA RAG Application - Simple PDF document Q&A
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from gaia.rag.sdk import RAGSDK, RAGConfig
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def index_command(args):
|
|
18
|
+
"""Index PDF documents."""
|
|
19
|
+
config = RAGConfig(
|
|
20
|
+
model=args.model,
|
|
21
|
+
show_stats=args.verbose,
|
|
22
|
+
chunk_size=args.chunk_size,
|
|
23
|
+
max_chunks=args.max_chunks,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
rag = RAGSDK(config)
|
|
27
|
+
|
|
28
|
+
success_count = 0
|
|
29
|
+
for pdf_path in args.files:
|
|
30
|
+
if not os.path.exists(pdf_path):
|
|
31
|
+
print(f"❌ File not found: {pdf_path}")
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
print(f"📄 Indexing: {pdf_path}")
|
|
35
|
+
if rag.index_document(pdf_path):
|
|
36
|
+
print(f"✅ Successfully indexed: {pdf_path}")
|
|
37
|
+
success_count += 1
|
|
38
|
+
else:
|
|
39
|
+
print(f"❌ Failed to index: {pdf_path}")
|
|
40
|
+
|
|
41
|
+
print(f"\n📊 Indexed {success_count}/{len(args.files)} documents")
|
|
42
|
+
|
|
43
|
+
# Show status
|
|
44
|
+
status = rag.get_status()
|
|
45
|
+
print(f"📚 Total chunks: {status['total_chunks']}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def query_command(args):
|
|
49
|
+
"""Query indexed documents."""
|
|
50
|
+
if not args.question:
|
|
51
|
+
print("❌ Question is required for query command")
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
# If PDF files provided, index them first
|
|
55
|
+
config = RAGConfig(
|
|
56
|
+
model=args.model,
|
|
57
|
+
show_stats=args.verbose,
|
|
58
|
+
chunk_size=args.chunk_size,
|
|
59
|
+
max_chunks=args.max_chunks,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
rag = RAGSDK(config)
|
|
63
|
+
|
|
64
|
+
# Index documents if provided
|
|
65
|
+
if args.files:
|
|
66
|
+
print("📄 Indexing documents...")
|
|
67
|
+
for pdf_path in args.files:
|
|
68
|
+
if os.path.exists(pdf_path):
|
|
69
|
+
print(f" • {pdf_path}")
|
|
70
|
+
rag.index_document(pdf_path)
|
|
71
|
+
|
|
72
|
+
# Check if we have any indexed documents
|
|
73
|
+
status = rag.get_status()
|
|
74
|
+
if status["total_chunks"] == 0:
|
|
75
|
+
print("❌ No documents indexed. Please index documents first.")
|
|
76
|
+
print(" Use: gaia rag index document.pdf")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
print(f"\n❓ Question: {args.question}")
|
|
80
|
+
print("🤔 Searching...")
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
response = rag.query(args.question)
|
|
84
|
+
|
|
85
|
+
print("\n💬 Answer:")
|
|
86
|
+
print(response.text)
|
|
87
|
+
|
|
88
|
+
if args.verbose and response.chunks:
|
|
89
|
+
print(f"\n📖 Retrieved {len(response.chunks)} relevant chunks:")
|
|
90
|
+
for i, (chunk, score) in enumerate(
|
|
91
|
+
zip(response.chunks, response.chunk_scores)
|
|
92
|
+
):
|
|
93
|
+
print(f"\n Chunk {i+1} (relevance: {score:.3f}):")
|
|
94
|
+
print(f" {chunk[:200]}...")
|
|
95
|
+
|
|
96
|
+
if response.stats and args.verbose:
|
|
97
|
+
print(f"\n📊 Stats: {response.stats}")
|
|
98
|
+
|
|
99
|
+
except Exception as e:
|
|
100
|
+
print(f"❌ Query failed: {e}")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def status_command(args): # pylint: disable=unused-argument
|
|
104
|
+
"""Show RAG system status."""
|
|
105
|
+
config = RAGConfig()
|
|
106
|
+
rag = RAGSDK(config)
|
|
107
|
+
|
|
108
|
+
status = rag.get_status()
|
|
109
|
+
|
|
110
|
+
print("📊 GAIA RAG Status")
|
|
111
|
+
print("=" * 30)
|
|
112
|
+
print(f"Indexed files: {status['indexed_files']}")
|
|
113
|
+
print(f"Total chunks: {status['total_chunks']}")
|
|
114
|
+
print(f"Cache directory: {status['cache_dir']}")
|
|
115
|
+
print(f"Embedding model: {status['embedding_model']}")
|
|
116
|
+
print("\nConfiguration:")
|
|
117
|
+
print(f" Chunk size: {status['config']['chunk_size']} tokens")
|
|
118
|
+
print(f" Chunk overlap: {status['config']['chunk_overlap']} tokens")
|
|
119
|
+
print(f" Max chunks per query: {status['config']['max_chunks']}")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def clear_cache_command(args):
|
|
123
|
+
"""Clear RAG cache."""
|
|
124
|
+
config = RAGConfig()
|
|
125
|
+
rag = RAGSDK(config)
|
|
126
|
+
|
|
127
|
+
if args.force or input("Clear RAG cache? (y/N): ").lower() == "y":
|
|
128
|
+
rag.clear_cache()
|
|
129
|
+
print("✅ Cache cleared")
|
|
130
|
+
else:
|
|
131
|
+
print("Cache not cleared")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def quick_command(args):
|
|
135
|
+
"""Quick RAG query - index document and query in one step."""
|
|
136
|
+
if not args.question:
|
|
137
|
+
print("❌ Question is required for quick command")
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
if not args.file:
|
|
141
|
+
print("❌ PDF file is required for quick command")
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
if not os.path.exists(args.file):
|
|
145
|
+
print(f"❌ File not found: {args.file}")
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
# Configure RAG
|
|
149
|
+
config = RAGConfig(
|
|
150
|
+
model=args.model,
|
|
151
|
+
show_stats=args.verbose,
|
|
152
|
+
chunk_size=args.chunk_size,
|
|
153
|
+
max_chunks=args.max_chunks,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
rag = RAGSDK(config)
|
|
157
|
+
|
|
158
|
+
# Index document
|
|
159
|
+
if args.verbose:
|
|
160
|
+
print(f"📄 Indexing: {args.file}")
|
|
161
|
+
else:
|
|
162
|
+
print(f"📄 Processing {Path(args.file).name}...")
|
|
163
|
+
|
|
164
|
+
if not rag.index_document(args.file):
|
|
165
|
+
print(f"❌ Failed to index: {args.file}")
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
if args.verbose:
|
|
169
|
+
print("✅ Indexed successfully")
|
|
170
|
+
|
|
171
|
+
# Query
|
|
172
|
+
print(f"\n❓ Question: {args.question}")
|
|
173
|
+
print("🤔 Generating answer...")
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
response = rag.query(args.question)
|
|
177
|
+
|
|
178
|
+
print("\n💬 Answer:")
|
|
179
|
+
print(response.text)
|
|
180
|
+
|
|
181
|
+
if args.verbose and response.chunks:
|
|
182
|
+
print(f"\n📖 Retrieved {len(response.chunks)} relevant chunks:")
|
|
183
|
+
for i, (chunk, score) in enumerate(
|
|
184
|
+
zip(response.chunks, response.chunk_scores)
|
|
185
|
+
):
|
|
186
|
+
print(f"\n Chunk {i+1} (relevance: {score:.3f}):")
|
|
187
|
+
print(f" {chunk[:200]}...")
|
|
188
|
+
|
|
189
|
+
if response.stats and args.verbose:
|
|
190
|
+
print(f"\n📊 Stats: {response.stats}")
|
|
191
|
+
|
|
192
|
+
except Exception as e:
|
|
193
|
+
print(f"❌ Query failed: {e}")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def main():
|
|
197
|
+
"""Main entry point for RAG CLI."""
|
|
198
|
+
parser = argparse.ArgumentParser(
|
|
199
|
+
description="GAIA RAG - Simple PDF document Q&A",
|
|
200
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
201
|
+
epilog="""
|
|
202
|
+
Examples:
|
|
203
|
+
# Index a PDF document
|
|
204
|
+
gaia rag index document.pdf
|
|
205
|
+
|
|
206
|
+
# Query indexed documents
|
|
207
|
+
gaia rag query "What are the key features?"
|
|
208
|
+
|
|
209
|
+
# Query with on-the-fly indexing (index + query in one step)
|
|
210
|
+
gaia rag query "What is this document about?" document.pdf
|
|
211
|
+
|
|
212
|
+
# Show system status
|
|
213
|
+
gaia rag status
|
|
214
|
+
|
|
215
|
+
# Clear cache
|
|
216
|
+
gaia rag clear-cache
|
|
217
|
+
""",
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Add subcommands
|
|
221
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
222
|
+
|
|
223
|
+
# Index command
|
|
224
|
+
index_parser = subparsers.add_parser("index", help="Index PDF documents")
|
|
225
|
+
index_parser.add_argument("files", nargs="+", help="PDF files to index")
|
|
226
|
+
|
|
227
|
+
# Query command
|
|
228
|
+
query_parser = subparsers.add_parser("query", help="Query indexed documents")
|
|
229
|
+
query_parser.add_argument("question", help="Question to ask")
|
|
230
|
+
query_parser.add_argument("files", nargs="*", help="Additional PDF files to index")
|
|
231
|
+
|
|
232
|
+
# Status command
|
|
233
|
+
subparsers.add_parser("status", help="Show RAG system status")
|
|
234
|
+
|
|
235
|
+
# Clear cache command
|
|
236
|
+
clear_parser = subparsers.add_parser("clear-cache", help="Clear RAG cache")
|
|
237
|
+
clear_parser.add_argument(
|
|
238
|
+
"--force", action="store_true", help="Force clear without confirmation"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Quick command (index + query in one step)
|
|
242
|
+
quick_parser = subparsers.add_parser(
|
|
243
|
+
"quick", help="Quick RAG query (index + query in one step)"
|
|
244
|
+
)
|
|
245
|
+
quick_parser.add_argument("file", help="PDF file to index and query")
|
|
246
|
+
quick_parser.add_argument("question", help="Question to ask")
|
|
247
|
+
|
|
248
|
+
# Common arguments for all commands
|
|
249
|
+
for subparser in [index_parser, query_parser, quick_parser]:
|
|
250
|
+
subparser.add_argument(
|
|
251
|
+
"--model", default="Llama-3.2-3B-Instruct-Hybrid", help="Model to use"
|
|
252
|
+
)
|
|
253
|
+
subparser.add_argument(
|
|
254
|
+
"--chunk-size", type=int, default=500, help="Chunk size in tokens"
|
|
255
|
+
)
|
|
256
|
+
subparser.add_argument(
|
|
257
|
+
"--max-chunks", type=int, default=3, help="Maximum chunks to retrieve"
|
|
258
|
+
)
|
|
259
|
+
subparser.add_argument(
|
|
260
|
+
"--verbose", "-v", action="store_true", help="Show detailed output"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
args = parser.parse_args()
|
|
264
|
+
|
|
265
|
+
if not args.command:
|
|
266
|
+
parser.print_help()
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
if args.command == "index":
|
|
271
|
+
index_command(args)
|
|
272
|
+
elif args.command == "query":
|
|
273
|
+
query_command(args)
|
|
274
|
+
elif args.command == "quick":
|
|
275
|
+
quick_command(args)
|
|
276
|
+
elif args.command == "status":
|
|
277
|
+
status_command(args)
|
|
278
|
+
elif args.command == "clear-cache":
|
|
279
|
+
clear_cache_command(args)
|
|
280
|
+
|
|
281
|
+
except KeyboardInterrupt:
|
|
282
|
+
print("\n\n⚠️ Operation interrupted")
|
|
283
|
+
except Exception as e:
|
|
284
|
+
print(f"❌ Error: {e}")
|
|
285
|
+
if hasattr(args, "verbose") and args.verbose:
|
|
286
|
+
import traceback
|
|
287
|
+
|
|
288
|
+
traceback.print_exc()
|
|
289
|
+
sys.exit(1)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
if __name__ == "__main__":
|
|
293
|
+
main()
|