amd-gaia 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
- amd_gaia-0.15.1.dist-info/RECORD +178 -0
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
- gaia/__init__.py +29 -29
- gaia/agents/__init__.py +19 -19
- gaia/agents/base/__init__.py +9 -9
- gaia/agents/base/agent.py +2177 -2177
- gaia/agents/base/api_agent.py +120 -120
- gaia/agents/base/console.py +1841 -1841
- gaia/agents/base/errors.py +237 -237
- gaia/agents/base/mcp_agent.py +86 -86
- gaia/agents/base/tools.py +83 -83
- gaia/agents/blender/agent.py +556 -556
- gaia/agents/blender/agent_simple.py +133 -135
- gaia/agents/blender/app.py +211 -211
- gaia/agents/blender/app_simple.py +41 -41
- gaia/agents/blender/core/__init__.py +16 -16
- gaia/agents/blender/core/materials.py +506 -506
- gaia/agents/blender/core/objects.py +316 -316
- gaia/agents/blender/core/rendering.py +225 -225
- gaia/agents/blender/core/scene.py +220 -220
- gaia/agents/blender/core/view.py +146 -146
- gaia/agents/chat/__init__.py +9 -9
- gaia/agents/chat/agent.py +835 -835
- gaia/agents/chat/app.py +1058 -1058
- gaia/agents/chat/session.py +508 -508
- gaia/agents/chat/tools/__init__.py +15 -15
- gaia/agents/chat/tools/file_tools.py +96 -96
- gaia/agents/chat/tools/rag_tools.py +1729 -1729
- gaia/agents/chat/tools/shell_tools.py +436 -436
- gaia/agents/code/__init__.py +7 -7
- gaia/agents/code/agent.py +549 -549
- gaia/agents/code/cli.py +377 -0
- gaia/agents/code/models.py +135 -135
- gaia/agents/code/orchestration/__init__.py +24 -24
- gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
- gaia/agents/code/orchestration/checklist_generator.py +713 -713
- gaia/agents/code/orchestration/factories/__init__.py +9 -9
- gaia/agents/code/orchestration/factories/base.py +63 -63
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
- gaia/agents/code/orchestration/factories/python_factory.py +106 -106
- gaia/agents/code/orchestration/orchestrator.py +841 -841
- gaia/agents/code/orchestration/project_analyzer.py +391 -391
- gaia/agents/code/orchestration/steps/__init__.py +67 -67
- gaia/agents/code/orchestration/steps/base.py +188 -188
- gaia/agents/code/orchestration/steps/error_handler.py +314 -314
- gaia/agents/code/orchestration/steps/nextjs.py +828 -828
- gaia/agents/code/orchestration/steps/python.py +307 -307
- gaia/agents/code/orchestration/template_catalog.py +469 -469
- gaia/agents/code/orchestration/workflows/__init__.py +14 -14
- gaia/agents/code/orchestration/workflows/base.py +80 -80
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
- gaia/agents/code/orchestration/workflows/python.py +94 -94
- gaia/agents/code/prompts/__init__.py +11 -11
- gaia/agents/code/prompts/base_prompt.py +77 -77
- gaia/agents/code/prompts/code_patterns.py +2036 -2036
- gaia/agents/code/prompts/nextjs_prompt.py +40 -40
- gaia/agents/code/prompts/python_prompt.py +109 -109
- gaia/agents/code/schema_inference.py +365 -365
- gaia/agents/code/system_prompt.py +41 -41
- gaia/agents/code/tools/__init__.py +42 -42
- gaia/agents/code/tools/cli_tools.py +1138 -1138
- gaia/agents/code/tools/code_formatting.py +319 -319
- gaia/agents/code/tools/code_tools.py +769 -769
- gaia/agents/code/tools/error_fixing.py +1347 -1347
- gaia/agents/code/tools/external_tools.py +180 -180
- gaia/agents/code/tools/file_io.py +845 -845
- gaia/agents/code/tools/prisma_tools.py +190 -190
- gaia/agents/code/tools/project_management.py +1016 -1016
- gaia/agents/code/tools/testing.py +321 -321
- gaia/agents/code/tools/typescript_tools.py +122 -122
- gaia/agents/code/tools/validation_parsing.py +461 -461
- gaia/agents/code/tools/validation_tools.py +806 -806
- gaia/agents/code/tools/web_dev_tools.py +1758 -1758
- gaia/agents/code/validators/__init__.py +16 -16
- gaia/agents/code/validators/antipattern_checker.py +241 -241
- gaia/agents/code/validators/ast_analyzer.py +197 -197
- gaia/agents/code/validators/requirements_validator.py +145 -145
- gaia/agents/code/validators/syntax_validator.py +171 -171
- gaia/agents/docker/__init__.py +7 -7
- gaia/agents/docker/agent.py +642 -642
- gaia/agents/emr/__init__.py +8 -8
- gaia/agents/emr/agent.py +1506 -1506
- gaia/agents/emr/cli.py +1322 -1322
- gaia/agents/emr/constants.py +475 -475
- gaia/agents/emr/dashboard/__init__.py +4 -4
- gaia/agents/emr/dashboard/server.py +1974 -1974
- gaia/agents/jira/__init__.py +11 -11
- gaia/agents/jira/agent.py +894 -894
- gaia/agents/jira/jql_templates.py +299 -299
- gaia/agents/routing/__init__.py +7 -7
- gaia/agents/routing/agent.py +567 -570
- gaia/agents/routing/system_prompt.py +75 -75
- gaia/agents/summarize/__init__.py +11 -0
- gaia/agents/summarize/agent.py +885 -0
- gaia/agents/summarize/prompts.py +129 -0
- gaia/api/__init__.py +23 -23
- gaia/api/agent_registry.py +238 -238
- gaia/api/app.py +305 -305
- gaia/api/openai_server.py +575 -575
- gaia/api/schemas.py +186 -186
- gaia/api/sse_handler.py +373 -373
- gaia/apps/__init__.py +4 -4
- gaia/apps/llm/__init__.py +6 -6
- gaia/apps/llm/app.py +173 -169
- gaia/apps/summarize/app.py +116 -633
- gaia/apps/summarize/html_viewer.py +133 -133
- gaia/apps/summarize/pdf_formatter.py +284 -284
- gaia/audio/__init__.py +2 -2
- gaia/audio/audio_client.py +439 -439
- gaia/audio/audio_recorder.py +269 -269
- gaia/audio/kokoro_tts.py +599 -599
- gaia/audio/whisper_asr.py +432 -432
- gaia/chat/__init__.py +16 -16
- gaia/chat/app.py +430 -430
- gaia/chat/prompts.py +522 -522
- gaia/chat/sdk.py +1228 -1225
- gaia/cli.py +5481 -5632
- gaia/database/__init__.py +10 -10
- gaia/database/agent.py +176 -176
- gaia/database/mixin.py +290 -290
- gaia/database/testing.py +64 -64
- gaia/eval/batch_experiment.py +2332 -2332
- gaia/eval/claude.py +542 -542
- gaia/eval/config.py +37 -37
- gaia/eval/email_generator.py +512 -512
- gaia/eval/eval.py +3179 -3179
- gaia/eval/groundtruth.py +1130 -1130
- gaia/eval/transcript_generator.py +582 -582
- gaia/eval/webapp/README.md +167 -167
- gaia/eval/webapp/package-lock.json +875 -875
- gaia/eval/webapp/package.json +20 -20
- gaia/eval/webapp/public/app.js +3402 -3402
- gaia/eval/webapp/public/index.html +87 -87
- gaia/eval/webapp/public/styles.css +3661 -3661
- gaia/eval/webapp/server.js +415 -415
- gaia/eval/webapp/test-setup.js +72 -72
- gaia/llm/__init__.py +9 -2
- gaia/llm/base_client.py +60 -0
- gaia/llm/exceptions.py +12 -0
- gaia/llm/factory.py +70 -0
- gaia/llm/lemonade_client.py +3236 -3221
- gaia/llm/lemonade_manager.py +294 -294
- gaia/llm/providers/__init__.py +9 -0
- gaia/llm/providers/claude.py +108 -0
- gaia/llm/providers/lemonade.py +120 -0
- gaia/llm/providers/openai_provider.py +79 -0
- gaia/llm/vlm_client.py +382 -382
- gaia/logger.py +189 -189
- gaia/mcp/agent_mcp_server.py +245 -245
- gaia/mcp/blender_mcp_client.py +138 -138
- gaia/mcp/blender_mcp_server.py +648 -648
- gaia/mcp/context7_cache.py +332 -332
- gaia/mcp/external_services.py +518 -518
- gaia/mcp/mcp_bridge.py +811 -550
- gaia/mcp/servers/__init__.py +6 -6
- gaia/mcp/servers/docker_mcp.py +83 -83
- gaia/perf_analysis.py +361 -0
- gaia/rag/__init__.py +10 -10
- gaia/rag/app.py +293 -293
- gaia/rag/demo.py +304 -304
- gaia/rag/pdf_utils.py +235 -235
- gaia/rag/sdk.py +2194 -2194
- gaia/security.py +163 -163
- gaia/talk/app.py +289 -289
- gaia/talk/sdk.py +538 -538
- gaia/testing/__init__.py +87 -87
- gaia/testing/assertions.py +330 -330
- gaia/testing/fixtures.py +333 -333
- gaia/testing/mocks.py +493 -493
- gaia/util.py +46 -46
- gaia/utils/__init__.py +33 -33
- gaia/utils/file_watcher.py +675 -675
- gaia/utils/parsing.py +223 -223
- gaia/version.py +100 -100
- amd_gaia-0.15.0.dist-info/RECORD +0 -168
- gaia/agents/code/app.py +0 -266
- gaia/llm/llm_client.py +0 -723
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/eval/claude.py
CHANGED
|
@@ -1,542 +1,542 @@
|
|
|
1
|
-
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
|
|
4
|
-
import base64
|
|
5
|
-
import json
|
|
6
|
-
import os
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
try:
|
|
10
|
-
import anthropic
|
|
11
|
-
except ImportError:
|
|
12
|
-
anthropic = None
|
|
13
|
-
|
|
14
|
-
try:
|
|
15
|
-
from bs4 import BeautifulSoup
|
|
16
|
-
except ImportError:
|
|
17
|
-
BeautifulSoup = None
|
|
18
|
-
|
|
19
|
-
from dotenv import load_dotenv
|
|
20
|
-
|
|
21
|
-
from gaia.eval.config import DEFAULT_CLAUDE_MODEL, MODEL_PRICING
|
|
22
|
-
from gaia.logger import get_logger
|
|
23
|
-
|
|
24
|
-
load_dotenv()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class ClaudeClient:
|
|
28
|
-
log = get_logger(__name__)
|
|
29
|
-
|
|
30
|
-
def __init__(self, model=None, max_tokens=1024, max_retries=3):
|
|
31
|
-
"""
|
|
32
|
-
Initialize Claude client with retry support.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
model: Claude model to use (defaults to DEFAULT_CLAUDE_MODEL)
|
|
36
|
-
max_tokens: Maximum tokens in response (default: 1024)
|
|
37
|
-
max_retries: Maximum number of retry attempts for API calls with exponential backoff (default: 3)
|
|
38
|
-
"""
|
|
39
|
-
# Check for required dependencies
|
|
40
|
-
if anthropic is None:
|
|
41
|
-
error_msg = (
|
|
42
|
-
"\n❌ Error: Missing required package 'anthropic'\n\n"
|
|
43
|
-
"Please install the eval dependencies:\n"
|
|
44
|
-
' uv pip install -e ".[eval]"\n\n'
|
|
45
|
-
"Or install anthropic directly:\n"
|
|
46
|
-
" uv pip install anthropic\n"
|
|
47
|
-
)
|
|
48
|
-
raise ImportError(error_msg)
|
|
49
|
-
|
|
50
|
-
if BeautifulSoup is None:
|
|
51
|
-
error_msg = (
|
|
52
|
-
"\n❌ Error: Missing required package 'bs4' (BeautifulSoup4)\n\n"
|
|
53
|
-
"Please install the eval dependencies:\n"
|
|
54
|
-
' uv pip install -e ".[eval]"\n\n'
|
|
55
|
-
"Or install beautifulsoup4 directly:\n"
|
|
56
|
-
" uv pip install beautifulsoup4\n"
|
|
57
|
-
)
|
|
58
|
-
raise ImportError(error_msg)
|
|
59
|
-
|
|
60
|
-
if model is None:
|
|
61
|
-
model = DEFAULT_CLAUDE_MODEL
|
|
62
|
-
self.log = self.__class__.log # Use the class-level logger for instances
|
|
63
|
-
self.api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
64
|
-
if not self.api_key:
|
|
65
|
-
error_msg = (
|
|
66
|
-
"ANTHROPIC_API_KEY not found in environment.\n"
|
|
67
|
-
"Please add your Anthropic API key to the .env file:\n"
|
|
68
|
-
" ANTHROPIC_API_KEY=your_api_key_here\n"
|
|
69
|
-
"Alternatively, export it as an environment variable:\n"
|
|
70
|
-
" export ANTHROPIC_API_KEY=your_api_key_here\n"
|
|
71
|
-
)
|
|
72
|
-
self.log.error(error_msg)
|
|
73
|
-
raise ValueError(error_msg)
|
|
74
|
-
# Initialize Anthropic client with retry support
|
|
75
|
-
# The SDK handles exponential backoff automatically
|
|
76
|
-
self.client = anthropic.Anthropic(
|
|
77
|
-
api_key=self.api_key,
|
|
78
|
-
max_retries=max_retries,
|
|
79
|
-
timeout=300.0, # 5 minute timeout for large documents
|
|
80
|
-
)
|
|
81
|
-
self.model = model
|
|
82
|
-
self.max_tokens = max_tokens
|
|
83
|
-
self.max_retries = max_retries
|
|
84
|
-
self.log.info(
|
|
85
|
-
f"Initialized ClaudeClient with model: {model}, max_retries: {max_retries}"
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
def calculate_cost(self, input_tokens, output_tokens):
|
|
89
|
-
"""
|
|
90
|
-
Calculate the cost of an API call based on token usage.
|
|
91
|
-
|
|
92
|
-
Args:
|
|
93
|
-
input_tokens (int): Number of input tokens
|
|
94
|
-
output_tokens (int): Number of output tokens
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
dict: Cost breakdown with input_cost, output_cost, and total_cost
|
|
98
|
-
"""
|
|
99
|
-
# Get pricing for the current model, fallback to default if not found
|
|
100
|
-
pricing = MODEL_PRICING.get(self.model, MODEL_PRICING["default"])
|
|
101
|
-
|
|
102
|
-
# Calculate costs (convert tokens to millions)
|
|
103
|
-
input_cost = (input_tokens / 1_000_000) * pricing["input_per_mtok"]
|
|
104
|
-
output_cost = (output_tokens / 1_000_000) * pricing["output_per_mtok"]
|
|
105
|
-
total_cost = input_cost + output_cost
|
|
106
|
-
|
|
107
|
-
return {
|
|
108
|
-
"input_cost": round(input_cost, 6),
|
|
109
|
-
"output_cost": round(output_cost, 6),
|
|
110
|
-
"total_cost": round(total_cost, 6),
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
def get_completion(self, prompt):
|
|
114
|
-
self.log.debug("Getting completion from Claude")
|
|
115
|
-
self.log.debug(f"Prompt token count: {self.count_tokens(prompt)}")
|
|
116
|
-
try:
|
|
117
|
-
message = self.client.messages.create(
|
|
118
|
-
model=self.model,
|
|
119
|
-
max_tokens=self.max_tokens,
|
|
120
|
-
messages=[{"role": "user", "content": prompt}],
|
|
121
|
-
)
|
|
122
|
-
return message.content
|
|
123
|
-
except Exception as e:
|
|
124
|
-
self.log.error(f"Error getting completion: {e}")
|
|
125
|
-
raise
|
|
126
|
-
|
|
127
|
-
def get_completion_with_usage(self, prompt):
|
|
128
|
-
"""
|
|
129
|
-
Get completion from Claude and return both content and usage/cost information.
|
|
130
|
-
|
|
131
|
-
Args:
|
|
132
|
-
prompt (str): The prompt to send to Claude
|
|
133
|
-
|
|
134
|
-
Returns:
|
|
135
|
-
dict: Contains 'content', 'usage', and 'cost' keys
|
|
136
|
-
"""
|
|
137
|
-
self.log.info("Getting completion with usage tracking from Claude")
|
|
138
|
-
try:
|
|
139
|
-
message = self.client.messages.create(
|
|
140
|
-
model=self.model,
|
|
141
|
-
max_tokens=self.max_tokens,
|
|
142
|
-
messages=[{"role": "user", "content": prompt}],
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
# Extract usage information
|
|
146
|
-
usage = {
|
|
147
|
-
"input_tokens": message.usage.input_tokens,
|
|
148
|
-
"output_tokens": message.usage.output_tokens,
|
|
149
|
-
"total_tokens": message.usage.input_tokens
|
|
150
|
-
+ message.usage.output_tokens,
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
# Calculate cost
|
|
154
|
-
cost = self.calculate_cost(usage["input_tokens"], usage["output_tokens"])
|
|
155
|
-
|
|
156
|
-
self.log.info(
|
|
157
|
-
f"Usage: {usage['input_tokens']} input + {usage['output_tokens']} output = {usage['total_tokens']} total tokens"
|
|
158
|
-
)
|
|
159
|
-
self.log.info(
|
|
160
|
-
f"Cost: ${cost['input_cost']:.4f} input + ${cost['output_cost']:.4f} output = ${cost['total_cost']:.4f} total"
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
return {"content": message.content, "usage": usage, "cost": cost}
|
|
164
|
-
except Exception as e:
|
|
165
|
-
self.log.error(f"Error getting completion with usage: {e}")
|
|
166
|
-
raise
|
|
167
|
-
|
|
168
|
-
def list_models(self):
|
|
169
|
-
self.log.info("Retrieving available models")
|
|
170
|
-
try:
|
|
171
|
-
models = self.client.models.list(limit=20)
|
|
172
|
-
self.log.info(f"Successfully retrieved {len(models)} models")
|
|
173
|
-
return models
|
|
174
|
-
except Exception as e:
|
|
175
|
-
self.log.error(f"Error listing models: {e}")
|
|
176
|
-
raise
|
|
177
|
-
|
|
178
|
-
def count_tokens(self, prompt):
|
|
179
|
-
return self.client.messages.count_tokens(
|
|
180
|
-
model=self.model, messages=[{"role": "user", "content": prompt}]
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
def _convert_html_to_text(
|
|
184
|
-
self, file_path, save_text=False, output_dir="./output/claude"
|
|
185
|
-
):
|
|
186
|
-
"""
|
|
187
|
-
Convert HTML file content to plain text.
|
|
188
|
-
|
|
189
|
-
Args:
|
|
190
|
-
file_path (str): Path to the HTML file
|
|
191
|
-
save_text (bool): If True, saves extracted text to a file
|
|
192
|
-
|
|
193
|
-
Returns:
|
|
194
|
-
str: Extracted text content
|
|
195
|
-
"""
|
|
196
|
-
self.log.info("Converting HTML to text")
|
|
197
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
|
198
|
-
soup = BeautifulSoup(f.read(), "html.parser")
|
|
199
|
-
text_content = soup.get_text(separator="\n", strip=True)
|
|
200
|
-
self.log.debug(f"Extracted {len(text_content)} characters of text")
|
|
201
|
-
|
|
202
|
-
if save_text:
|
|
203
|
-
# Create output directory if it doesn't exist
|
|
204
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
205
|
-
|
|
206
|
-
filename = Path(file_path).stem
|
|
207
|
-
output_path = f"{output_dir}/{filename}.soup.txt"
|
|
208
|
-
with open(output_path, "w", encoding="utf-8") as f:
|
|
209
|
-
f.write(text_content)
|
|
210
|
-
self.log.info(f"Saved extracted text to: {output_path}")
|
|
211
|
-
|
|
212
|
-
return text_content
|
|
213
|
-
|
|
214
|
-
def analyze_file(
|
|
215
|
-
self,
|
|
216
|
-
file_path,
|
|
217
|
-
prompt,
|
|
218
|
-
media_type=None,
|
|
219
|
-
save_text=False,
|
|
220
|
-
output_dir="./output/claude",
|
|
221
|
-
):
|
|
222
|
-
"""
|
|
223
|
-
Analyze a file using Claude's file understanding capabilities.
|
|
224
|
-
|
|
225
|
-
Args:
|
|
226
|
-
file_path (str): Path to the file to analyze
|
|
227
|
-
prompt (str): The analysis prompt/question
|
|
228
|
-
media_type (str, optional): The MIME type of the file. If None, will try to infer from extension
|
|
229
|
-
save_text (bool, optional): If True, saves extracted text content to a file (for HTML files only)
|
|
230
|
-
output_dir (str, optional): The directory to save the output file
|
|
231
|
-
"""
|
|
232
|
-
self.log.info(f"Analyzing file: {file_path}")
|
|
233
|
-
ext = os.path.splitext(file_path)[1].lower()
|
|
234
|
-
|
|
235
|
-
try:
|
|
236
|
-
# For HTML files, extract text using BeautifulSoup
|
|
237
|
-
if ext in [".html", ".htm"]:
|
|
238
|
-
text_content = self._convert_html_to_text(
|
|
239
|
-
file_path, save_text, output_dir
|
|
240
|
-
)
|
|
241
|
-
message = self.client.messages.create(
|
|
242
|
-
model=self.model,
|
|
243
|
-
max_tokens=self.max_tokens,
|
|
244
|
-
messages=[
|
|
245
|
-
{
|
|
246
|
-
"role": "user",
|
|
247
|
-
"content": f"Document content:\n\n{text_content}\n\n{prompt}",
|
|
248
|
-
}
|
|
249
|
-
],
|
|
250
|
-
)
|
|
251
|
-
self.log.info("Successfully analyzed HTML content")
|
|
252
|
-
return message.content[0].text
|
|
253
|
-
|
|
254
|
-
# For other file types, use the original base64 encoding method
|
|
255
|
-
mime_types = {
|
|
256
|
-
".txt": "text/plain",
|
|
257
|
-
".pdf": "application/pdf",
|
|
258
|
-
".md": "text/markdown",
|
|
259
|
-
".csv": "text/csv",
|
|
260
|
-
".json": "application/json",
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
if media_type is None:
|
|
264
|
-
media_type = mime_types.get(ext, "application/octet-stream")
|
|
265
|
-
self.log.debug(f"Using media type: {media_type}")
|
|
266
|
-
|
|
267
|
-
with open(file_path, "rb") as f:
|
|
268
|
-
file_content = base64.b64encode(f.read()).decode("utf-8")
|
|
269
|
-
self.log.debug(f"File encoded, size: {len(file_content)} bytes")
|
|
270
|
-
|
|
271
|
-
self.log.info("Sending file for analysis")
|
|
272
|
-
message = self.client.messages.create(
|
|
273
|
-
model=self.model,
|
|
274
|
-
max_tokens=self.max_tokens,
|
|
275
|
-
messages=[
|
|
276
|
-
{
|
|
277
|
-
"role": "user",
|
|
278
|
-
"content": [
|
|
279
|
-
{
|
|
280
|
-
"type": "document",
|
|
281
|
-
"source": {
|
|
282
|
-
"type": "base64",
|
|
283
|
-
"media_type": media_type,
|
|
284
|
-
"data": file_content,
|
|
285
|
-
},
|
|
286
|
-
},
|
|
287
|
-
{"type": "text", "text": prompt},
|
|
288
|
-
],
|
|
289
|
-
}
|
|
290
|
-
],
|
|
291
|
-
)
|
|
292
|
-
self.log.info("Successfully analyzed file")
|
|
293
|
-
return message.content[0].text
|
|
294
|
-
|
|
295
|
-
except Exception as e:
|
|
296
|
-
self.log.error(f"Error analyzing file: {e}")
|
|
297
|
-
raise
|
|
298
|
-
|
|
299
|
-
def analyze_file_with_usage(
|
|
300
|
-
self,
|
|
301
|
-
file_path,
|
|
302
|
-
prompt,
|
|
303
|
-
media_type=None,
|
|
304
|
-
save_text=False,
|
|
305
|
-
output_dir="./output/claude",
|
|
306
|
-
):
|
|
307
|
-
"""
|
|
308
|
-
Analyze a file using Claude's file understanding capabilities with usage tracking.
|
|
309
|
-
|
|
310
|
-
Args:
|
|
311
|
-
file_path (str): Path to the file to analyze
|
|
312
|
-
prompt (str): The analysis prompt/question
|
|
313
|
-
media_type (str, optional): The MIME type of the file. If None, will try to infer from extension
|
|
314
|
-
save_text (bool, optional): If True, saves extracted text content to a file (for HTML files only)
|
|
315
|
-
output_dir (str, optional): The directory to save the output file
|
|
316
|
-
|
|
317
|
-
Returns:
|
|
318
|
-
dict: Contains 'content', 'usage', and 'cost' keys
|
|
319
|
-
"""
|
|
320
|
-
self.log.info(f"Analyzing file with usage tracking: {file_path}")
|
|
321
|
-
ext = os.path.splitext(file_path)[1].lower()
|
|
322
|
-
|
|
323
|
-
try:
|
|
324
|
-
# For text-based files, read content directly as text
|
|
325
|
-
if ext in [".html", ".htm", ".txt", ".md", ".csv", ".json"]:
|
|
326
|
-
if ext in [".html", ".htm"]:
|
|
327
|
-
text_content = self._convert_html_to_text(
|
|
328
|
-
file_path, save_text, output_dir
|
|
329
|
-
)
|
|
330
|
-
else:
|
|
331
|
-
# For other text files, read directly
|
|
332
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
|
333
|
-
text_content = f.read()
|
|
334
|
-
self.log.debug(
|
|
335
|
-
f"Read text file, length: {len(text_content)} characters"
|
|
336
|
-
)
|
|
337
|
-
message = self.client.messages.create(
|
|
338
|
-
model=self.model,
|
|
339
|
-
max_tokens=self.max_tokens,
|
|
340
|
-
messages=[
|
|
341
|
-
{
|
|
342
|
-
"role": "user",
|
|
343
|
-
"content": f"Document content:\n\n{text_content}\n\n{prompt}",
|
|
344
|
-
}
|
|
345
|
-
],
|
|
346
|
-
)
|
|
347
|
-
self.log.info(f"Successfully analyzed text content ({ext} file)")
|
|
348
|
-
|
|
349
|
-
# Extract usage and calculate cost
|
|
350
|
-
usage = {
|
|
351
|
-
"input_tokens": message.usage.input_tokens,
|
|
352
|
-
"output_tokens": message.usage.output_tokens,
|
|
353
|
-
"total_tokens": message.usage.input_tokens
|
|
354
|
-
+ message.usage.output_tokens,
|
|
355
|
-
}
|
|
356
|
-
cost = self.calculate_cost(
|
|
357
|
-
usage["input_tokens"], usage["output_tokens"]
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
return {
|
|
361
|
-
"content": message.content[0].text,
|
|
362
|
-
"usage": usage,
|
|
363
|
-
"cost": cost,
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
# For binary file types (primarily PDFs), use base64 encoding with document format
|
|
367
|
-
mime_types = {
|
|
368
|
-
".pdf": "application/pdf",
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
if media_type is None:
|
|
372
|
-
media_type = mime_types.get(ext)
|
|
373
|
-
if media_type is None:
|
|
374
|
-
raise ValueError(
|
|
375
|
-
f"Unsupported file type: {ext}. Supported types: {list(mime_types.keys())}"
|
|
376
|
-
)
|
|
377
|
-
self.log.debug(f"Using media type: {media_type}")
|
|
378
|
-
|
|
379
|
-
with open(file_path, "rb") as f:
|
|
380
|
-
file_content = base64.b64encode(f.read()).decode("utf-8")
|
|
381
|
-
self.log.debug(f"File encoded, size: {len(file_content)} bytes")
|
|
382
|
-
|
|
383
|
-
self.log.info("Sending file for analysis")
|
|
384
|
-
message = self.client.messages.create(
|
|
385
|
-
model=self.model,
|
|
386
|
-
max_tokens=self.max_tokens,
|
|
387
|
-
messages=[
|
|
388
|
-
{
|
|
389
|
-
"role": "user",
|
|
390
|
-
"content": [
|
|
391
|
-
{
|
|
392
|
-
"type": "document",
|
|
393
|
-
"source": {
|
|
394
|
-
"type": "base64",
|
|
395
|
-
"media_type": media_type,
|
|
396
|
-
"data": file_content,
|
|
397
|
-
},
|
|
398
|
-
},
|
|
399
|
-
{"type": "text", "text": prompt},
|
|
400
|
-
],
|
|
401
|
-
}
|
|
402
|
-
],
|
|
403
|
-
)
|
|
404
|
-
self.log.info("Successfully analyzed file")
|
|
405
|
-
|
|
406
|
-
# Extract usage and calculate cost
|
|
407
|
-
usage = {
|
|
408
|
-
"input_tokens": message.usage.input_tokens,
|
|
409
|
-
"output_tokens": message.usage.output_tokens,
|
|
410
|
-
"total_tokens": message.usage.input_tokens
|
|
411
|
-
+ message.usage.output_tokens,
|
|
412
|
-
}
|
|
413
|
-
cost = self.calculate_cost(usage["input_tokens"], usage["output_tokens"])
|
|
414
|
-
|
|
415
|
-
return {"content": message.content[0].text, "usage": usage, "cost": cost}
|
|
416
|
-
|
|
417
|
-
except Exception as e:
|
|
418
|
-
self.log.error(f"Error analyzing file: {e}")
|
|
419
|
-
raise
|
|
420
|
-
|
|
421
|
-
def count_file_tokens(
|
|
422
|
-
self, file_path, prompt="", media_type=None, output_dir="./output/claude"
|
|
423
|
-
):
|
|
424
|
-
"""
|
|
425
|
-
Count tokens for a file and optional prompt combination.
|
|
426
|
-
|
|
427
|
-
Args:
|
|
428
|
-
file_path (str): Path to the file to analyze
|
|
429
|
-
prompt (str, optional): Additional prompt text to include in token count
|
|
430
|
-
media_type (str, optional): The MIME type of the file. If None, will try to infer from extension
|
|
431
|
-
|
|
432
|
-
Returns:
|
|
433
|
-
int: Total token count
|
|
434
|
-
"""
|
|
435
|
-
self.log.info(f"Counting tokens for file: {file_path}")
|
|
436
|
-
ext = os.path.splitext(file_path)[1].lower()
|
|
437
|
-
|
|
438
|
-
try:
|
|
439
|
-
# For text-based files, count tokens of extracted text
|
|
440
|
-
if ext in [".html", ".htm", ".txt", ".md", ".csv", ".json"]:
|
|
441
|
-
if ext in [".html", ".htm"]:
|
|
442
|
-
text_content = self._convert_html_to_text(
|
|
443
|
-
file_path, save_text=False, output_dir=output_dir
|
|
444
|
-
)
|
|
445
|
-
else:
|
|
446
|
-
# For other text files, read directly
|
|
447
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
|
448
|
-
text_content = f.read()
|
|
449
|
-
|
|
450
|
-
content = f"Document content:\n\n{text_content}\n\n{prompt}"
|
|
451
|
-
token_count = self.count_tokens(content)
|
|
452
|
-
self.log.info(
|
|
453
|
-
f"Text file ({ext}) token count: {token_count.input_tokens}"
|
|
454
|
-
)
|
|
455
|
-
return token_count.input_tokens
|
|
456
|
-
|
|
457
|
-
# For binary file types (primarily PDFs), encode and count
|
|
458
|
-
mime_types = {
|
|
459
|
-
".pdf": "application/pdf",
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
if media_type is None:
|
|
463
|
-
media_type = mime_types.get(ext)
|
|
464
|
-
if media_type is None:
|
|
465
|
-
raise ValueError(
|
|
466
|
-
f"Unsupported file type: {ext}. Supported types: {list(mime_types.keys())}"
|
|
467
|
-
)
|
|
468
|
-
self.log.debug(f"Using media type: {media_type}")
|
|
469
|
-
|
|
470
|
-
with open(file_path, "rb") as f:
|
|
471
|
-
file_content = base64.b64encode(f.read()).decode("utf-8")
|
|
472
|
-
|
|
473
|
-
message_content = [
|
|
474
|
-
{
|
|
475
|
-
"type": "document",
|
|
476
|
-
"source": {
|
|
477
|
-
"type": "base64",
|
|
478
|
-
"media_type": media_type,
|
|
479
|
-
"data": file_content,
|
|
480
|
-
},
|
|
481
|
-
}
|
|
482
|
-
]
|
|
483
|
-
|
|
484
|
-
if prompt:
|
|
485
|
-
message_content.append({"type": "text", "text": prompt})
|
|
486
|
-
|
|
487
|
-
token_count = self.client.messages.count_tokens(
|
|
488
|
-
model=self.model,
|
|
489
|
-
messages=[{"role": "user", "content": message_content}],
|
|
490
|
-
)
|
|
491
|
-
|
|
492
|
-
self.log.info(f"File token count: {token_count.input_tokens}")
|
|
493
|
-
return token_count.input_tokens
|
|
494
|
-
|
|
495
|
-
except Exception as e:
|
|
496
|
-
self.log.error(f"Error counting tokens: {e}")
|
|
497
|
-
raise
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
# Example usage
|
|
501
|
-
if __name__ == "__main__":
|
|
502
|
-
client = ClaudeClient()
|
|
503
|
-
|
|
504
|
-
# Test file analysis with Blender introduction document
|
|
505
|
-
file_path = "./data/html/blender/introduction.html"
|
|
506
|
-
prompt = (
|
|
507
|
-
"Given this document, generate a set of short queries a user "
|
|
508
|
-
"may ask about the document and produce a set of ground truth "
|
|
509
|
-
"answers to be used in validating a RAG system. Include a "
|
|
510
|
-
"summary of the document in the queries. Return a json "
|
|
511
|
-
"formatted list of query-response pairs formatted as follows:"
|
|
512
|
-
"{'source': 'path/to/document', 'summary': 'summarized document', "
|
|
513
|
-
"'qa_pairs': [{'query': 'query1', 'response': 'response1'}, "
|
|
514
|
-
"{'query': 'query2', 'response': 'response2'}, ...]}"
|
|
515
|
-
)
|
|
516
|
-
|
|
517
|
-
analysis = client.analyze_file(
|
|
518
|
-
file_path, prompt, save_text=True, output_dir="./output/claude"
|
|
519
|
-
)
|
|
520
|
-
print(client.count_file_tokens(file_path, prompt))
|
|
521
|
-
|
|
522
|
-
# Prepare enhanced output with metadata
|
|
523
|
-
from datetime import datetime
|
|
524
|
-
|
|
525
|
-
output_data = {
|
|
526
|
-
"metadata": {
|
|
527
|
-
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
528
|
-
"model": client.model,
|
|
529
|
-
"source_file": file_path,
|
|
530
|
-
"prompt": prompt,
|
|
531
|
-
"token_count": client.count_file_tokens(file_path, prompt),
|
|
532
|
-
},
|
|
533
|
-
"analysis": json.loads(analysis), # Parse JSON string into dictionary
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
# Save analysis to JSON file
|
|
537
|
-
output_dir = "./output/claude"
|
|
538
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
539
|
-
output_path = f"{output_dir}/{Path(file_path).stem}.out.json"
|
|
540
|
-
with open(output_path, "w", encoding="utf-8") as f:
|
|
541
|
-
json.dump(output_data, f, indent=2)
|
|
542
|
-
print(f"Analysis saved to: {output_path}")
|
|
1
|
+
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
import base64
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import anthropic
|
|
11
|
+
except ImportError:
|
|
12
|
+
anthropic = None
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from bs4 import BeautifulSoup
|
|
16
|
+
except ImportError:
|
|
17
|
+
BeautifulSoup = None
|
|
18
|
+
|
|
19
|
+
from dotenv import load_dotenv
|
|
20
|
+
|
|
21
|
+
from gaia.eval.config import DEFAULT_CLAUDE_MODEL, MODEL_PRICING
|
|
22
|
+
from gaia.logger import get_logger
|
|
23
|
+
|
|
24
|
+
load_dotenv()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ClaudeClient:
|
|
28
|
+
log = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
def __init__(self, model=None, max_tokens=1024, max_retries=3):
|
|
31
|
+
"""
|
|
32
|
+
Initialize Claude client with retry support.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
model: Claude model to use (defaults to DEFAULT_CLAUDE_MODEL)
|
|
36
|
+
max_tokens: Maximum tokens in response (default: 1024)
|
|
37
|
+
max_retries: Maximum number of retry attempts for API calls with exponential backoff (default: 3)
|
|
38
|
+
"""
|
|
39
|
+
# Check for required dependencies
|
|
40
|
+
if anthropic is None:
|
|
41
|
+
error_msg = (
|
|
42
|
+
"\n❌ Error: Missing required package 'anthropic'\n\n"
|
|
43
|
+
"Please install the eval dependencies:\n"
|
|
44
|
+
' uv pip install -e ".[eval]"\n\n'
|
|
45
|
+
"Or install anthropic directly:\n"
|
|
46
|
+
" uv pip install anthropic\n"
|
|
47
|
+
)
|
|
48
|
+
raise ImportError(error_msg)
|
|
49
|
+
|
|
50
|
+
if BeautifulSoup is None:
|
|
51
|
+
error_msg = (
|
|
52
|
+
"\n❌ Error: Missing required package 'bs4' (BeautifulSoup4)\n\n"
|
|
53
|
+
"Please install the eval dependencies:\n"
|
|
54
|
+
' uv pip install -e ".[eval]"\n\n'
|
|
55
|
+
"Or install beautifulsoup4 directly:\n"
|
|
56
|
+
" uv pip install beautifulsoup4\n"
|
|
57
|
+
)
|
|
58
|
+
raise ImportError(error_msg)
|
|
59
|
+
|
|
60
|
+
if model is None:
|
|
61
|
+
model = DEFAULT_CLAUDE_MODEL
|
|
62
|
+
self.log = self.__class__.log # Use the class-level logger for instances
|
|
63
|
+
self.api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
64
|
+
if not self.api_key:
|
|
65
|
+
error_msg = (
|
|
66
|
+
"ANTHROPIC_API_KEY not found in environment.\n"
|
|
67
|
+
"Please add your Anthropic API key to the .env file:\n"
|
|
68
|
+
" ANTHROPIC_API_KEY=your_api_key_here\n"
|
|
69
|
+
"Alternatively, export it as an environment variable:\n"
|
|
70
|
+
" export ANTHROPIC_API_KEY=your_api_key_here\n"
|
|
71
|
+
)
|
|
72
|
+
self.log.error(error_msg)
|
|
73
|
+
raise ValueError(error_msg)
|
|
74
|
+
# Initialize Anthropic client with retry support
|
|
75
|
+
# The SDK handles exponential backoff automatically
|
|
76
|
+
self.client = anthropic.Anthropic(
|
|
77
|
+
api_key=self.api_key,
|
|
78
|
+
max_retries=max_retries,
|
|
79
|
+
timeout=300.0, # 5 minute timeout for large documents
|
|
80
|
+
)
|
|
81
|
+
self.model = model
|
|
82
|
+
self.max_tokens = max_tokens
|
|
83
|
+
self.max_retries = max_retries
|
|
84
|
+
self.log.info(
|
|
85
|
+
f"Initialized ClaudeClient with model: {model}, max_retries: {max_retries}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def calculate_cost(self, input_tokens, output_tokens):
|
|
89
|
+
"""
|
|
90
|
+
Calculate the cost of an API call based on token usage.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
input_tokens (int): Number of input tokens
|
|
94
|
+
output_tokens (int): Number of output tokens
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
dict: Cost breakdown with input_cost, output_cost, and total_cost
|
|
98
|
+
"""
|
|
99
|
+
# Get pricing for the current model, fallback to default if not found
|
|
100
|
+
pricing = MODEL_PRICING.get(self.model, MODEL_PRICING["default"])
|
|
101
|
+
|
|
102
|
+
# Calculate costs (convert tokens to millions)
|
|
103
|
+
input_cost = (input_tokens / 1_000_000) * pricing["input_per_mtok"]
|
|
104
|
+
output_cost = (output_tokens / 1_000_000) * pricing["output_per_mtok"]
|
|
105
|
+
total_cost = input_cost + output_cost
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
"input_cost": round(input_cost, 6),
|
|
109
|
+
"output_cost": round(output_cost, 6),
|
|
110
|
+
"total_cost": round(total_cost, 6),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
def get_completion(self, prompt):
|
|
114
|
+
self.log.debug("Getting completion from Claude")
|
|
115
|
+
self.log.debug(f"Prompt token count: {self.count_tokens(prompt)}")
|
|
116
|
+
try:
|
|
117
|
+
message = self.client.messages.create(
|
|
118
|
+
model=self.model,
|
|
119
|
+
max_tokens=self.max_tokens,
|
|
120
|
+
messages=[{"role": "user", "content": prompt}],
|
|
121
|
+
)
|
|
122
|
+
return message.content
|
|
123
|
+
except Exception as e:
|
|
124
|
+
self.log.error(f"Error getting completion: {e}")
|
|
125
|
+
raise
|
|
126
|
+
|
|
127
|
+
def get_completion_with_usage(self, prompt):
|
|
128
|
+
"""
|
|
129
|
+
Get completion from Claude and return both content and usage/cost information.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
prompt (str): The prompt to send to Claude
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
dict: Contains 'content', 'usage', and 'cost' keys
|
|
136
|
+
"""
|
|
137
|
+
self.log.info("Getting completion with usage tracking from Claude")
|
|
138
|
+
try:
|
|
139
|
+
message = self.client.messages.create(
|
|
140
|
+
model=self.model,
|
|
141
|
+
max_tokens=self.max_tokens,
|
|
142
|
+
messages=[{"role": "user", "content": prompt}],
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Extract usage information
|
|
146
|
+
usage = {
|
|
147
|
+
"input_tokens": message.usage.input_tokens,
|
|
148
|
+
"output_tokens": message.usage.output_tokens,
|
|
149
|
+
"total_tokens": message.usage.input_tokens
|
|
150
|
+
+ message.usage.output_tokens,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
# Calculate cost
|
|
154
|
+
cost = self.calculate_cost(usage["input_tokens"], usage["output_tokens"])
|
|
155
|
+
|
|
156
|
+
self.log.info(
|
|
157
|
+
f"Usage: {usage['input_tokens']} input + {usage['output_tokens']} output = {usage['total_tokens']} total tokens"
|
|
158
|
+
)
|
|
159
|
+
self.log.info(
|
|
160
|
+
f"Cost: ${cost['input_cost']:.4f} input + ${cost['output_cost']:.4f} output = ${cost['total_cost']:.4f} total"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return {"content": message.content, "usage": usage, "cost": cost}
|
|
164
|
+
except Exception as e:
|
|
165
|
+
self.log.error(f"Error getting completion with usage: {e}")
|
|
166
|
+
raise
|
|
167
|
+
|
|
168
|
+
def list_models(self):
|
|
169
|
+
self.log.info("Retrieving available models")
|
|
170
|
+
try:
|
|
171
|
+
models = self.client.models.list(limit=20)
|
|
172
|
+
self.log.info(f"Successfully retrieved {len(models)} models")
|
|
173
|
+
return models
|
|
174
|
+
except Exception as e:
|
|
175
|
+
self.log.error(f"Error listing models: {e}")
|
|
176
|
+
raise
|
|
177
|
+
|
|
178
|
+
def count_tokens(self, prompt):
|
|
179
|
+
return self.client.messages.count_tokens(
|
|
180
|
+
model=self.model, messages=[{"role": "user", "content": prompt}]
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def _convert_html_to_text(
|
|
184
|
+
self, file_path, save_text=False, output_dir="./output/claude"
|
|
185
|
+
):
|
|
186
|
+
"""
|
|
187
|
+
Convert HTML file content to plain text.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
file_path (str): Path to the HTML file
|
|
191
|
+
save_text (bool): If True, saves extracted text to a file
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
str: Extracted text content
|
|
195
|
+
"""
|
|
196
|
+
self.log.info("Converting HTML to text")
|
|
197
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
198
|
+
soup = BeautifulSoup(f.read(), "html.parser")
|
|
199
|
+
text_content = soup.get_text(separator="\n", strip=True)
|
|
200
|
+
self.log.debug(f"Extracted {len(text_content)} characters of text")
|
|
201
|
+
|
|
202
|
+
if save_text:
|
|
203
|
+
# Create output directory if it doesn't exist
|
|
204
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
205
|
+
|
|
206
|
+
filename = Path(file_path).stem
|
|
207
|
+
output_path = f"{output_dir}/{filename}.soup.txt"
|
|
208
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
209
|
+
f.write(text_content)
|
|
210
|
+
self.log.info(f"Saved extracted text to: {output_path}")
|
|
211
|
+
|
|
212
|
+
return text_content
|
|
213
|
+
|
|
214
|
+
def analyze_file(
|
|
215
|
+
self,
|
|
216
|
+
file_path,
|
|
217
|
+
prompt,
|
|
218
|
+
media_type=None,
|
|
219
|
+
save_text=False,
|
|
220
|
+
output_dir="./output/claude",
|
|
221
|
+
):
|
|
222
|
+
"""
|
|
223
|
+
Analyze a file using Claude's file understanding capabilities.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
file_path (str): Path to the file to analyze
|
|
227
|
+
prompt (str): The analysis prompt/question
|
|
228
|
+
media_type (str, optional): The MIME type of the file. If None, will try to infer from extension
|
|
229
|
+
save_text (bool, optional): If True, saves extracted text content to a file (for HTML files only)
|
|
230
|
+
output_dir (str, optional): The directory to save the output file
|
|
231
|
+
"""
|
|
232
|
+
self.log.info(f"Analyzing file: {file_path}")
|
|
233
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
# For HTML files, extract text using BeautifulSoup
|
|
237
|
+
if ext in [".html", ".htm"]:
|
|
238
|
+
text_content = self._convert_html_to_text(
|
|
239
|
+
file_path, save_text, output_dir
|
|
240
|
+
)
|
|
241
|
+
message = self.client.messages.create(
|
|
242
|
+
model=self.model,
|
|
243
|
+
max_tokens=self.max_tokens,
|
|
244
|
+
messages=[
|
|
245
|
+
{
|
|
246
|
+
"role": "user",
|
|
247
|
+
"content": f"Document content:\n\n{text_content}\n\n{prompt}",
|
|
248
|
+
}
|
|
249
|
+
],
|
|
250
|
+
)
|
|
251
|
+
self.log.info("Successfully analyzed HTML content")
|
|
252
|
+
return message.content[0].text
|
|
253
|
+
|
|
254
|
+
# For other file types, use the original base64 encoding method
|
|
255
|
+
mime_types = {
|
|
256
|
+
".txt": "text/plain",
|
|
257
|
+
".pdf": "application/pdf",
|
|
258
|
+
".md": "text/markdown",
|
|
259
|
+
".csv": "text/csv",
|
|
260
|
+
".json": "application/json",
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if media_type is None:
|
|
264
|
+
media_type = mime_types.get(ext, "application/octet-stream")
|
|
265
|
+
self.log.debug(f"Using media type: {media_type}")
|
|
266
|
+
|
|
267
|
+
with open(file_path, "rb") as f:
|
|
268
|
+
file_content = base64.b64encode(f.read()).decode("utf-8")
|
|
269
|
+
self.log.debug(f"File encoded, size: {len(file_content)} bytes")
|
|
270
|
+
|
|
271
|
+
self.log.info("Sending file for analysis")
|
|
272
|
+
message = self.client.messages.create(
|
|
273
|
+
model=self.model,
|
|
274
|
+
max_tokens=self.max_tokens,
|
|
275
|
+
messages=[
|
|
276
|
+
{
|
|
277
|
+
"role": "user",
|
|
278
|
+
"content": [
|
|
279
|
+
{
|
|
280
|
+
"type": "document",
|
|
281
|
+
"source": {
|
|
282
|
+
"type": "base64",
|
|
283
|
+
"media_type": media_type,
|
|
284
|
+
"data": file_content,
|
|
285
|
+
},
|
|
286
|
+
},
|
|
287
|
+
{"type": "text", "text": prompt},
|
|
288
|
+
],
|
|
289
|
+
}
|
|
290
|
+
],
|
|
291
|
+
)
|
|
292
|
+
self.log.info("Successfully analyzed file")
|
|
293
|
+
return message.content[0].text
|
|
294
|
+
|
|
295
|
+
except Exception as e:
|
|
296
|
+
self.log.error(f"Error analyzing file: {e}")
|
|
297
|
+
raise
|
|
298
|
+
|
|
299
|
+
def analyze_file_with_usage(
|
|
300
|
+
self,
|
|
301
|
+
file_path,
|
|
302
|
+
prompt,
|
|
303
|
+
media_type=None,
|
|
304
|
+
save_text=False,
|
|
305
|
+
output_dir="./output/claude",
|
|
306
|
+
):
|
|
307
|
+
"""
|
|
308
|
+
Analyze a file using Claude's file understanding capabilities with usage tracking.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
file_path (str): Path to the file to analyze
|
|
312
|
+
prompt (str): The analysis prompt/question
|
|
313
|
+
media_type (str, optional): The MIME type of the file. If None, will try to infer from extension
|
|
314
|
+
save_text (bool, optional): If True, saves extracted text content to a file (for HTML files only)
|
|
315
|
+
output_dir (str, optional): The directory to save the output file
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
dict: Contains 'content', 'usage', and 'cost' keys
|
|
319
|
+
"""
|
|
320
|
+
self.log.info(f"Analyzing file with usage tracking: {file_path}")
|
|
321
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
322
|
+
|
|
323
|
+
try:
|
|
324
|
+
# For text-based files, read content directly as text
|
|
325
|
+
if ext in [".html", ".htm", ".txt", ".md", ".csv", ".json"]:
|
|
326
|
+
if ext in [".html", ".htm"]:
|
|
327
|
+
text_content = self._convert_html_to_text(
|
|
328
|
+
file_path, save_text, output_dir
|
|
329
|
+
)
|
|
330
|
+
else:
|
|
331
|
+
# For other text files, read directly
|
|
332
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
333
|
+
text_content = f.read()
|
|
334
|
+
self.log.debug(
|
|
335
|
+
f"Read text file, length: {len(text_content)} characters"
|
|
336
|
+
)
|
|
337
|
+
message = self.client.messages.create(
|
|
338
|
+
model=self.model,
|
|
339
|
+
max_tokens=self.max_tokens,
|
|
340
|
+
messages=[
|
|
341
|
+
{
|
|
342
|
+
"role": "user",
|
|
343
|
+
"content": f"Document content:\n\n{text_content}\n\n{prompt}",
|
|
344
|
+
}
|
|
345
|
+
],
|
|
346
|
+
)
|
|
347
|
+
self.log.info(f"Successfully analyzed text content ({ext} file)")
|
|
348
|
+
|
|
349
|
+
# Extract usage and calculate cost
|
|
350
|
+
usage = {
|
|
351
|
+
"input_tokens": message.usage.input_tokens,
|
|
352
|
+
"output_tokens": message.usage.output_tokens,
|
|
353
|
+
"total_tokens": message.usage.input_tokens
|
|
354
|
+
+ message.usage.output_tokens,
|
|
355
|
+
}
|
|
356
|
+
cost = self.calculate_cost(
|
|
357
|
+
usage["input_tokens"], usage["output_tokens"]
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
return {
|
|
361
|
+
"content": message.content[0].text,
|
|
362
|
+
"usage": usage,
|
|
363
|
+
"cost": cost,
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
# For binary file types (primarily PDFs), use base64 encoding with document format
|
|
367
|
+
mime_types = {
|
|
368
|
+
".pdf": "application/pdf",
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if media_type is None:
|
|
372
|
+
media_type = mime_types.get(ext)
|
|
373
|
+
if media_type is None:
|
|
374
|
+
raise ValueError(
|
|
375
|
+
f"Unsupported file type: {ext}. Supported types: {list(mime_types.keys())}"
|
|
376
|
+
)
|
|
377
|
+
self.log.debug(f"Using media type: {media_type}")
|
|
378
|
+
|
|
379
|
+
with open(file_path, "rb") as f:
|
|
380
|
+
file_content = base64.b64encode(f.read()).decode("utf-8")
|
|
381
|
+
self.log.debug(f"File encoded, size: {len(file_content)} bytes")
|
|
382
|
+
|
|
383
|
+
self.log.info("Sending file for analysis")
|
|
384
|
+
message = self.client.messages.create(
|
|
385
|
+
model=self.model,
|
|
386
|
+
max_tokens=self.max_tokens,
|
|
387
|
+
messages=[
|
|
388
|
+
{
|
|
389
|
+
"role": "user",
|
|
390
|
+
"content": [
|
|
391
|
+
{
|
|
392
|
+
"type": "document",
|
|
393
|
+
"source": {
|
|
394
|
+
"type": "base64",
|
|
395
|
+
"media_type": media_type,
|
|
396
|
+
"data": file_content,
|
|
397
|
+
},
|
|
398
|
+
},
|
|
399
|
+
{"type": "text", "text": prompt},
|
|
400
|
+
],
|
|
401
|
+
}
|
|
402
|
+
],
|
|
403
|
+
)
|
|
404
|
+
self.log.info("Successfully analyzed file")
|
|
405
|
+
|
|
406
|
+
# Extract usage and calculate cost
|
|
407
|
+
usage = {
|
|
408
|
+
"input_tokens": message.usage.input_tokens,
|
|
409
|
+
"output_tokens": message.usage.output_tokens,
|
|
410
|
+
"total_tokens": message.usage.input_tokens
|
|
411
|
+
+ message.usage.output_tokens,
|
|
412
|
+
}
|
|
413
|
+
cost = self.calculate_cost(usage["input_tokens"], usage["output_tokens"])
|
|
414
|
+
|
|
415
|
+
return {"content": message.content[0].text, "usage": usage, "cost": cost}
|
|
416
|
+
|
|
417
|
+
except Exception as e:
|
|
418
|
+
self.log.error(f"Error analyzing file: {e}")
|
|
419
|
+
raise
|
|
420
|
+
|
|
421
|
+
def count_file_tokens(
|
|
422
|
+
self, file_path, prompt="", media_type=None, output_dir="./output/claude"
|
|
423
|
+
):
|
|
424
|
+
"""
|
|
425
|
+
Count tokens for a file and optional prompt combination.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
file_path (str): Path to the file to analyze
|
|
429
|
+
prompt (str, optional): Additional prompt text to include in token count
|
|
430
|
+
media_type (str, optional): The MIME type of the file. If None, will try to infer from extension
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
int: Total token count
|
|
434
|
+
"""
|
|
435
|
+
self.log.info(f"Counting tokens for file: {file_path}")
|
|
436
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
437
|
+
|
|
438
|
+
try:
|
|
439
|
+
# For text-based files, count tokens of extracted text
|
|
440
|
+
if ext in [".html", ".htm", ".txt", ".md", ".csv", ".json"]:
|
|
441
|
+
if ext in [".html", ".htm"]:
|
|
442
|
+
text_content = self._convert_html_to_text(
|
|
443
|
+
file_path, save_text=False, output_dir=output_dir
|
|
444
|
+
)
|
|
445
|
+
else:
|
|
446
|
+
# For other text files, read directly
|
|
447
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
448
|
+
text_content = f.read()
|
|
449
|
+
|
|
450
|
+
content = f"Document content:\n\n{text_content}\n\n{prompt}"
|
|
451
|
+
token_count = self.count_tokens(content)
|
|
452
|
+
self.log.info(
|
|
453
|
+
f"Text file ({ext}) token count: {token_count.input_tokens}"
|
|
454
|
+
)
|
|
455
|
+
return token_count.input_tokens
|
|
456
|
+
|
|
457
|
+
# For binary file types (primarily PDFs), encode and count
|
|
458
|
+
mime_types = {
|
|
459
|
+
".pdf": "application/pdf",
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if media_type is None:
|
|
463
|
+
media_type = mime_types.get(ext)
|
|
464
|
+
if media_type is None:
|
|
465
|
+
raise ValueError(
|
|
466
|
+
f"Unsupported file type: {ext}. Supported types: {list(mime_types.keys())}"
|
|
467
|
+
)
|
|
468
|
+
self.log.debug(f"Using media type: {media_type}")
|
|
469
|
+
|
|
470
|
+
with open(file_path, "rb") as f:
|
|
471
|
+
file_content = base64.b64encode(f.read()).decode("utf-8")
|
|
472
|
+
|
|
473
|
+
message_content = [
|
|
474
|
+
{
|
|
475
|
+
"type": "document",
|
|
476
|
+
"source": {
|
|
477
|
+
"type": "base64",
|
|
478
|
+
"media_type": media_type,
|
|
479
|
+
"data": file_content,
|
|
480
|
+
},
|
|
481
|
+
}
|
|
482
|
+
]
|
|
483
|
+
|
|
484
|
+
if prompt:
|
|
485
|
+
message_content.append({"type": "text", "text": prompt})
|
|
486
|
+
|
|
487
|
+
token_count = self.client.messages.count_tokens(
|
|
488
|
+
model=self.model,
|
|
489
|
+
messages=[{"role": "user", "content": message_content}],
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
self.log.info(f"File token count: {token_count.input_tokens}")
|
|
493
|
+
return token_count.input_tokens
|
|
494
|
+
|
|
495
|
+
except Exception as e:
|
|
496
|
+
self.log.error(f"Error counting tokens: {e}")
|
|
497
|
+
raise
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
# Example usage
|
|
501
|
+
if __name__ == "__main__":
|
|
502
|
+
client = ClaudeClient()
|
|
503
|
+
|
|
504
|
+
# Test file analysis with Blender introduction document
|
|
505
|
+
file_path = "./data/html/blender/introduction.html"
|
|
506
|
+
prompt = (
|
|
507
|
+
"Given this document, generate a set of short queries a user "
|
|
508
|
+
"may ask about the document and produce a set of ground truth "
|
|
509
|
+
"answers to be used in validating a RAG system. Include a "
|
|
510
|
+
"summary of the document in the queries. Return a json "
|
|
511
|
+
"formatted list of query-response pairs formatted as follows:"
|
|
512
|
+
"{'source': 'path/to/document', 'summary': 'summarized document', "
|
|
513
|
+
"'qa_pairs': [{'query': 'query1', 'response': 'response1'}, "
|
|
514
|
+
"{'query': 'query2', 'response': 'response2'}, ...]}"
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
analysis = client.analyze_file(
|
|
518
|
+
file_path, prompt, save_text=True, output_dir="./output/claude"
|
|
519
|
+
)
|
|
520
|
+
print(client.count_file_tokens(file_path, prompt))
|
|
521
|
+
|
|
522
|
+
# Prepare enhanced output with metadata
|
|
523
|
+
from datetime import datetime
|
|
524
|
+
|
|
525
|
+
output_data = {
|
|
526
|
+
"metadata": {
|
|
527
|
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
528
|
+
"model": client.model,
|
|
529
|
+
"source_file": file_path,
|
|
530
|
+
"prompt": prompt,
|
|
531
|
+
"token_count": client.count_file_tokens(file_path, prompt),
|
|
532
|
+
},
|
|
533
|
+
"analysis": json.loads(analysis), # Parse JSON string into dictionary
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
# Save analysis to JSON file
|
|
537
|
+
output_dir = "./output/claude"
|
|
538
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
539
|
+
output_path = f"{output_dir}/{Path(file_path).stem}.out.json"
|
|
540
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
541
|
+
json.dump(output_data, f, indent=2)
|
|
542
|
+
print(f"Analysis saved to: {output_path}")
|