aixtools 0.3.10__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aixtools might be problematic. Click here for more details.

aixtools/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.10'
32
- __version_tuple__ = version_tuple = (0, 3, 10)
31
+ __version__ = version = '0.4.0'
32
+ __version_tuple__ = version_tuple = (0, 4, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
aixtools/a2a/app.py CHANGED
@@ -32,6 +32,7 @@ from starlette.responses import RedirectResponse
32
32
  from aixtools.a2a.auth_middleware import AuthMiddleware
33
33
  from aixtools.auth.auth import AccessTokenAuthProvider
34
34
  from aixtools.context import session_id_var, user_id_var
35
+ from aixtools.utils import config
35
36
 
36
37
 
37
38
  class AgentWorkerWithMetadataParser(AgentWorker):
@@ -140,6 +141,11 @@ def build_a2a_starlette_app(
140
141
  http_handler=request_handler,
141
142
  )
142
143
 
144
+ if config.SKIP_MCP_AUTHORIZATION:
145
+ auth_provider = None
146
+ elif auth_provider is None:
147
+ auth_provider = AccessTokenAuthProvider()
148
+
143
149
  app = server.build()
144
150
  if auth_provider:
145
151
  app.add_middleware(AuthMiddleware, provider=auth_provider)
aixtools/agents/prompt.py CHANGED
@@ -1,68 +1,114 @@
1
1
  """Prompt building utilities for Pydantic AI agent, including file handling and context management."""
2
2
 
3
3
  import mimetypes
4
- from pathlib import Path
4
+ from dataclasses import dataclass
5
+ from pathlib import Path, PurePosixPath
6
+ from typing import Optional
5
7
 
8
+ from markitdown import MarkItDown
6
9
  from pydantic_ai import BinaryContent
7
10
 
8
11
  from aixtools.context import SessionIdTuple
12
+ from aixtools.logging.logging_config import get_logger
9
13
  from aixtools.server import container_to_host_path
14
+ from aixtools.utils.config import (
15
+ EXTRACTABLE_DOCUMENT_TYPES,
16
+ IMAGE_ATTACHMENT_TYPES,
17
+ MAX_EXTRACTED_TEXT_SIZE,
18
+ MAX_IMAGE_ATTACHMENT_SIZE,
19
+ )
10
20
  from aixtools.utils.files import is_text_content
11
21
 
12
- CLAUDE_MAX_FILE_SIZE_IN_CONTEXT = 4 * 1024 * 1024 # Claude limit 4.5 MB for PDF files
13
- CLAUDE_IMAGE_MAX_FILE_SIZE_IN_CONTEXT = (
14
- 5 * 1024 * 1024
15
- ) # Claude limit 5 MB for images, to avoid large image files in context
22
+ logger = get_logger(__name__)
23
+
24
+
25
+ @dataclass
26
+ class FileExtractionResult:
27
+ """Result of file content extraction.
28
+
29
+ Attributes:
30
+ content: Extracted file content (str for text/documents, BinaryContent for images, None on failure)
31
+ success: True if file was successfully read or extracted, False on any failure
32
+ error_message: Error description if extraction failed, None otherwise
33
+ was_extracted: True if document extraction via markitdown was used successfully
34
+ """
35
+
36
+ content: str | BinaryContent | None
37
+ success: bool
38
+ error_message: str | None = None
39
+ was_extracted: bool = False
16
40
 
17
41
 
18
42
  def should_be_included_into_context(
19
43
  file_content: BinaryContent | str | None,
20
- file_size: int,
21
44
  *,
22
- max_img_size_bytes: int = CLAUDE_IMAGE_MAX_FILE_SIZE_IN_CONTEXT,
23
- max_file_size_bytes: int = CLAUDE_MAX_FILE_SIZE_IN_CONTEXT,
45
+ max_image_size_bytes: int = MAX_IMAGE_ATTACHMENT_SIZE,
46
+ max_extracted_text_size_bytes: int = MAX_EXTRACTED_TEXT_SIZE,
24
47
  ) -> bool:
25
- """Decide whether a file content should be included into the model context based on its type and size."""
26
- if not isinstance(file_content, BinaryContent):
48
+ """Check if file content should be included in model context based on type and size limits."""
49
+ if file_content is None:
27
50
  return False
28
51
 
29
- if file_content.media_type.startswith("text/"):
30
- return False
31
-
32
- # Exclude archive files as they're not supported by OpenAI models
33
- archive_types = {
34
- "application/zip",
35
- "application/x-tar",
36
- "application/gzip",
37
- "application/x-gzip",
38
- "application/x-rar-compressed",
39
- "application/x-7z-compressed",
40
- }
41
- if file_content.media_type in archive_types:
42
- return False
52
+ # Handle extracted text (strings)
53
+ if isinstance(file_content, str):
54
+ text_size = len(file_content.encode("utf-8"))
55
+ return text_size < max_extracted_text_size_bytes
43
56
 
44
- if file_content.is_image and file_size < max_img_size_bytes:
45
- return True
57
+ # Handle binary content (images only)
58
+ if isinstance(file_content, BinaryContent):
59
+ if file_content.media_type not in IMAGE_ATTACHMENT_TYPES:
60
+ return False
61
+ image_size = len(file_content.data)
62
+ return image_size < max_image_size_bytes
46
63
 
47
- return file_size < max_file_size_bytes
64
+ return False
48
65
 
49
66
 
50
- def file_to_binary_content(file_path: str | Path, mime_type: str = "") -> str | BinaryContent:
51
- """
52
- Read a file and return its content as either a UTF-8 string (for text files)
53
- or BinaryContent (for binary files).
54
- """
55
- with open(file_path, "rb") as f:
56
- data = f.read()
57
-
67
+ def file_to_binary_content(file_path: str | Path, mime_type: Optional[str] = None) -> FileExtractionResult:
68
+ """Read file and extract text from documents (PDF, DOCX, XLSX, PPTX) using markitdown."""
58
69
  if not mime_type:
59
70
  mime_type, _ = mimetypes.guess_type(file_path)
60
71
  mime_type = mime_type or "application/octet-stream"
61
72
 
62
- if is_text_content(data, mime_type):
63
- return data.decode("utf-8")
73
+ # Extract text from supported document types using markitdown
74
+ if mime_type in EXTRACTABLE_DOCUMENT_TYPES:
75
+ try:
76
+ markitdown = MarkItDown()
77
+ result = markitdown.convert(str(file_path))
78
+ return FileExtractionResult(
79
+ content=result.text_content, success=True, error_message=None, was_extracted=True
80
+ )
81
+ except Exception as e: # pylint: disable=broad-exception-caught
82
+ error_msg = f"Extraction failed: {type(e).__name__}: {str(e)}"
83
+ logger.error("Document extraction failed for %s: %s", file_path, error_msg)
84
+ return FileExtractionResult(content=None, success=False, error_message=error_msg)
85
+
86
+ # Read the file data for non-document types
87
+ try:
88
+ with open(file_path, "rb") as f:
89
+ data = f.read()
90
+
91
+ # Return as string if it's text content
92
+ if is_text_content(data, mime_type):
93
+ return FileExtractionResult(content=data.decode("utf-8"), success=True)
94
+
95
+ # Return as binary content for images and other binary files
96
+ return FileExtractionResult(content=BinaryContent(data=data, media_type=mime_type), success=True)
97
+ except Exception as e: # pylint: disable=broad-exception-caught
98
+ error_msg = f"Failed to read file: {type(e).__name__}: {str(e)}"
99
+ logger.error("File reading failed for %s: %s", file_path, error_msg)
100
+ return FileExtractionResult(content=None, success=False, error_message=error_msg)
101
+
64
102
 
65
- return BinaryContent(data=data, media_type=mime_type)
103
+ def truncate_extracted_text(text: str, max_bytes: int = MAX_EXTRACTED_TEXT_SIZE) -> str:
104
+ """Truncate text to max_bytes with warning prefix."""
105
+ truncated_bytes = text.encode("utf-8")[:max_bytes]
106
+ truncated_text = truncated_bytes.decode("utf-8", errors="ignore")
107
+
108
+ total_chars = len(text)
109
+ truncated_chars = len(truncated_text)
110
+
111
+ return f"[TRUNCATED - showing first {truncated_chars} of {total_chars} characters]\n\n{truncated_text}"
66
112
 
67
113
 
68
114
  def build_user_input(
@@ -75,20 +121,52 @@ def build_user_input(
75
121
  return user_text
76
122
 
77
123
  attachment_info_lines = []
78
- binary_attachments = []
124
+ binary_attachments: list[str | BinaryContent] = []
79
125
 
80
126
  for workspace_path in file_paths:
81
- host_path = container_to_host_path(workspace_path, ctx=session_tuple)
127
+ # Convert Path to PurePosixPath for container_to_host_path
128
+ workspace_posix_path = PurePosixPath(workspace_path)
129
+ host_path = container_to_host_path(workspace_posix_path, ctx=session_tuple)
130
+
131
+ # Handle None return from container_to_host_path
132
+ if host_path is None:
133
+ attachment_info = (
134
+ f"* {workspace_path.name} (path in workspace: {workspace_path}) -- conversion failed: invalid path"
135
+ )
136
+ attachment_info_lines.append(attachment_info)
137
+ continue
138
+
82
139
  file_size = host_path.stat().st_size
83
140
  mime_type, _ = mimetypes.guess_type(host_path)
84
141
  mime_type = mime_type or "application/octet-stream"
85
142
 
86
143
  attachment_info = f"* {workspace_path.name} (file_size={file_size} bytes) (path in workspace: {workspace_path})"
87
- binary_content = file_to_binary_content(host_path, mime_type)
144
+ extraction_result = file_to_binary_content(host_path, mime_type)
145
+
146
+ # Handle extraction failure - exclude from attachments
147
+ if not extraction_result.success:
148
+ attachment_info += f" -- extraction failed: {extraction_result.error_message}"
149
+ attachment_info_lines.append(attachment_info)
150
+ continue
151
+
152
+ # Handle successful extraction
153
+ if extraction_result.was_extracted:
154
+ attachment_info += " -- extracted as text"
88
155
 
89
- if should_be_included_into_context(binary_content, file_size):
90
- binary_attachments.append(binary_content)
156
+ # Check if content should be included in context
157
+ if should_be_included_into_context(extraction_result.content) and extraction_result.content is not None:
158
+ binary_attachments.append(extraction_result.content)
91
159
  attachment_info += f" -- provided to model context at index {len(binary_attachments) - 1}"
160
+ elif (
161
+ isinstance(extraction_result.content, str) and extraction_result.content and extraction_result.was_extracted
162
+ ):
163
+ # Truncate large extracted text and include with warning (only for extracted documents)
164
+ truncated_content = truncate_extracted_text(extraction_result.content)
165
+ binary_attachments.append(truncated_content)
166
+ attachment_info += f" -- truncated and provided to model context at index {len(binary_attachments) - 1}"
167
+ elif extraction_result.content is not None:
168
+ # Content exists but excluded from context (e.g., images too large, non-extracted text)
169
+ attachment_info += " -- too large for context"
92
170
 
93
171
  attachment_info_lines.append(attachment_info)
94
172
 
aixtools/utils/config.py CHANGED
@@ -146,3 +146,26 @@ APP_DEFAULT_SCOPE = get_variable_env("APP_DEFAULT_SCOPE", allow_empty=True)
146
146
  AUTH_TEST_TOKEN = get_variable_env("AUTH_TEST_TOKEN", allow_empty=True)
147
147
 
148
148
  MCP_TOOLS_MAX_RETRIES = int(get_variable_env("MCP_TOOLS_MAX_RETRIES", default=10))
149
+
150
+
151
+ # File attachment limits and supported types for model context
152
+ # Maximum extracted document text size (5MB default, planned for future use)
153
+ MAX_EXTRACTED_TEXT_SIZE = int(get_variable_env("MAX_EXTRACTED_TEXT_SIZE", default=str(5 * 1024 * 1024)))
154
+ # Maximum image attachment size (2MB default)
155
+ MAX_IMAGE_ATTACHMENT_SIZE = int(get_variable_env("MAX_IMAGE_ATTACHMENT_SIZE", default=str(2 * 1024 * 1024)))
156
+ # Image MIME types that can be attached to model context
157
+ IMAGE_ATTACHMENT_TYPES = {
158
+ "image/png",
159
+ "image/jpeg",
160
+ "image/jpg",
161
+ "image/gif",
162
+ "image/webp",
163
+ }
164
+ # Document MIME types that can be extracted as text
165
+ EXTRACTABLE_DOCUMENT_TYPES = {
166
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation", # .pptx
167
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
168
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .xlsx
169
+ "application/vnd.ms-excel", # .xls
170
+ "application/pdf", # .pdf
171
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aixtools
3
- Version: 0.3.10
3
+ Version: 0.4.0
4
4
  Summary: Tools for AI exploration and debugging
5
5
  Requires-Python: >=3.11.2
6
6
  Description-Content-Type: text/markdown
@@ -26,6 +26,7 @@ Requires-Dist: rich>=14.0.0
26
26
  Requires-Dist: ruff>=0.11.6
27
27
  Requires-Dist: streamlit>=1.44.1
28
28
  Requires-Dist: watchdog>=6.0.0
29
+ Requires-Dist: markitdown[docx,pdf,pptx,xls,xlsx]>=0.1.3
29
30
  Provides-Extra: test
30
31
  Requires-Dist: pyyaml; extra == "test"
31
32
  Provides-Extra: feature
@@ -1,5 +1,5 @@
1
1
  aixtools/__init__.py,sha256=9NGHm7LjsQmsvjTZvw6QFJexSvAU4bCoN_KBk9SCa00,260
2
- aixtools/_version.py,sha256=IwCXstmG50R88HCesfawu5BpJEPg8WIKehd-k6CRLJs,706
2
+ aixtools/_version.py,sha256=2_0GUP7yBCXRus-qiJKxQD62z172WSs1sQ6DVpPsbmM,704
3
3
  aixtools/app.py,sha256=JzQ0nrv_bjDQokllIlGHOV0HEb-V8N6k_nGQH-TEsVU,5227
4
4
  aixtools/chainlit.md,sha256=yC37Ly57vjKyiIvK4oUvf4DYxZCwH7iocTlx7bLeGLU,761
5
5
  aixtools/context.py,sha256=I_MD40ZnvRm5WPKAKqBUAdXIf8YaurkYUUHSVVy-QvU,598
@@ -17,7 +17,7 @@ aixtools/.chainlit/translations/nl.json,sha256=R3e-WxkQXAiuQgnnXjFWhwzpn1EA9xJ8g
17
17
  aixtools/.chainlit/translations/ta.json,sha256=pxa2uLEEDjiGiT6MFcCJ_kNh5KoFViHFptcJjc79Llc,17224
18
18
  aixtools/.chainlit/translations/te.json,sha256=0qGj-ODEHVOcxfVVX5IszS1QBCKSXuU1okANP_EbvBQ,16885
19
19
  aixtools/.chainlit/translations/zh-CN.json,sha256=EWxhT2_6CW9z0F6SI2llr3RsaL2omH1QZWHVG2n5POA,8664
20
- aixtools/a2a/app.py,sha256=ugx9FR8QadpEIgy79V6vx1zoamm6ldds6PYKePPQ3sA,5809
20
+ aixtools/a2a/app.py,sha256=7sxzgWcAq0IulQh8iukVFy5w8CVbx8Y3kEe0qnM0yeQ,5993
21
21
  aixtools/a2a/auth_middleware.py,sha256=TdVaUn4qhV0_QTCfavx8hmWJVPzCGsPtiQmH4icxC2Y,1660
22
22
  aixtools/a2a/utils.py,sha256=EHr3IyyBJn23ni-JcfAf6i3VpQmPs0g1TSnAZazvY_8,4039
23
23
  aixtools/a2a/google_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -32,7 +32,7 @@ aixtools/agents/nodes_to_md.py,sha256=hAT8dgiZTG4uGoSgeRZkIJ7zgkQUNpdIr8KSFhjWAH
32
32
  aixtools/agents/nodes_to_message.py,sha256=ZqcmxUNf4esiCTRk37wWP1LquhqNsCmydvMr4kjZEjw,1012
33
33
  aixtools/agents/nodes_to_str.py,sha256=UkOu5Nry827J4H_ohQU3tPBfJxtr3p6FfCfWoUy5uIs,4325
34
34
  aixtools/agents/print_nodes.py,sha256=wVTngNfqM0As845WTRz6G3Rei_Gr3HuBlvu-G_eXuig,1665
35
- aixtools/agents/prompt.py,sha256=p9OYnyJ4-MyGXwHPrQeJBhZ2a3RV2HqhtdUUCrTMsAQ,3361
35
+ aixtools/agents/prompt.py,sha256=oZl6_3SelyoSysLpF6AAmLHLHhwyPYCtX8hJ2pRUnhw,7396
36
36
  aixtools/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  aixtools/auth/auth.py,sha256=w8RODphhpBYN3aPQ6x-btILDZ6InQOX1AK5sbDBEtRs,8736
38
38
  aixtools/compliance/__init__.py,sha256=vnw0zEdySIJWvDAJ8DCRRaWmY_agEOz1qlpAdhmtiuo,191
@@ -86,7 +86,7 @@ aixtools/tools/doctor/mcp_tool_doctor.py,sha256=sX2q5GfNkmUYxnXrqMpeGIwGfeL1LpYJ
86
86
  aixtools/tools/doctor/tool_doctor.py,sha256=EY1pshjLGLD0j6cc1ZFtbc0G19I5IbOZwHFDqypE49Q,2661
87
87
  aixtools/tools/doctor/tool_recommendation.py,sha256=LYyVOSXdAorWiY4P-ucSA1vLlV5BTEfX4GzBXNE_X0M,1569
88
88
  aixtools/utils/__init__.py,sha256=xT6almZBQYMfj4h7Hq9QXDHyVXbOOTxqLsmJsxYYnSw,757
89
- aixtools/utils/config.py,sha256=OGTlvkc8hxL9GJk9AqEWTUIKLntl6TeL10Ni7t6C0nE,5575
89
+ aixtools/utils/config.py,sha256=R4lXaa2lPWAoWCLcELBBG4-ZRSEWojLtsEUt6Ym0-9c,6566
90
90
  aixtools/utils/config_util.py,sha256=3Ya4Qqhj1RJ1qtTTykQ6iayf5uxlpigPXgEJlTi1wn4,2229
91
91
  aixtools/utils/enum_with_description.py,sha256=zjSzWxG74eR4x7dpmb74pLTYCWNSMvauHd7_9LpDYIw,1088
92
92
  aixtools/utils/files.py,sha256=8JnxwHJRJcjWCdFpjzWmo0po2fRg8esj4H7sOxElYXU,517
@@ -96,8 +96,8 @@ aixtools/utils/chainlit/cl_agent_show.py,sha256=vaRuowp4BRvhxEr5hw0zHEJ7iaSF_5bo
96
96
  aixtools/utils/chainlit/cl_utils.py,sha256=fxaxdkcZg6uHdM8uztxdPowg3a2f7VR7B26VPY4t-3c,5738
97
97
  aixtools/vault/__init__.py,sha256=fsr_NuX3GZ9WZ7dGfe0gp_5-z3URxAfwVRXw7Xyc0dU,141
98
98
  aixtools/vault/vault.py,sha256=9dZLWdZQk9qN_Q9Djkofw9LUKnJqnrX5H0fGusVLBhA,6037
99
- aixtools-0.3.10.dist-info/METADATA,sha256=AhCPj7Q3xRPJCpAAAfoB7grM12n4eawP4WbtJfcMY9Q,27958
100
- aixtools-0.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
101
- aixtools-0.3.10.dist-info/entry_points.txt,sha256=q8412TG4T0S8K0SKeWp2vkVPIDYQs0jNoHqcQ7qxOiA,155
102
- aixtools-0.3.10.dist-info/top_level.txt,sha256=wBn-rw9bCtxrR4AYEYgjilNCUVmKY0LWby9Zan2PRJM,9
103
- aixtools-0.3.10.dist-info/RECORD,,
99
+ aixtools-0.4.0.dist-info/METADATA,sha256=SwUbnt5yIcMRsi8fmzcaUh5MTBjUqkSoZJZ7vQYmVR0,28014
100
+ aixtools-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
101
+ aixtools-0.4.0.dist-info/entry_points.txt,sha256=q8412TG4T0S8K0SKeWp2vkVPIDYQs0jNoHqcQ7qxOiA,155
102
+ aixtools-0.4.0.dist-info/top_level.txt,sha256=wBn-rw9bCtxrR4AYEYgjilNCUVmKY0LWby9Zan2PRJM,9
103
+ aixtools-0.4.0.dist-info/RECORD,,