aixtools 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aixtools might be problematic. Click here for more details.

aixtools/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.7'
32
- __version_tuple__ = version_tuple = (0, 3, 7)
31
+ __version__ = version = '0.3.9'
32
+ __version_tuple__ = version_tuple = (0, 3, 9)
33
33
 
34
34
  __commit_id__ = commit_id = None
aixtools/agents/prompt.py CHANGED
@@ -1,114 +1,68 @@
1
1
  """Prompt building utilities for Pydantic AI agent, including file handling and context management."""
2
2
 
3
3
  import mimetypes
4
- from dataclasses import dataclass
5
- from pathlib import Path, PurePosixPath
6
- from typing import Optional
4
+ from pathlib import Path
7
5
 
8
- from markitdown import MarkItDown
9
6
  from pydantic_ai import BinaryContent
10
7
 
11
8
  from aixtools.context import SessionIdTuple
12
- from aixtools.logging.logging_config import get_logger
13
9
  from aixtools.server import container_to_host_path
14
- from aixtools.utils.config import (
15
- EXTRACTABLE_DOCUMENT_TYPES,
16
- IMAGE_ATTACHMENT_TYPES,
17
- MAX_EXTRACTED_TEXT_SIZE,
18
- MAX_IMAGE_ATTACHMENT_SIZE,
19
- )
20
10
  from aixtools.utils.files import is_text_content
21
11
 
22
- logger = get_logger(__name__)
23
-
24
-
25
- @dataclass
26
- class FileExtractionResult:
27
- """Result of file content extraction.
28
-
29
- Attributes:
30
- content: Extracted file content (str for text/documents, BinaryContent for images, None on failure)
31
- success: True if file was successfully read or extracted, False on any failure
32
- error_message: Error description if extraction failed, None otherwise
33
- was_extracted: True if document extraction via markitdown was used successfully
34
- """
35
-
36
- content: str | BinaryContent | None
37
- success: bool
38
- error_message: str | None = None
39
- was_extracted: bool = False
12
+ CLAUDE_MAX_FILE_SIZE_IN_CONTEXT = 4 * 1024 * 1024 # Claude limit 4.5 MB for PDF files
13
+ CLAUDE_IMAGE_MAX_FILE_SIZE_IN_CONTEXT = (
14
+ 5 * 1024 * 1024
15
+ ) # Claude limit 5 MB for images, to avoid large image files in context
40
16
 
41
17
 
42
18
  def should_be_included_into_context(
43
19
  file_content: BinaryContent | str | None,
20
+ file_size: int,
44
21
  *,
45
- max_image_size_bytes: int = MAX_IMAGE_ATTACHMENT_SIZE,
46
- max_extracted_text_size_bytes: int = MAX_EXTRACTED_TEXT_SIZE,
22
+ max_img_size_bytes: int = CLAUDE_IMAGE_MAX_FILE_SIZE_IN_CONTEXT,
23
+ max_file_size_bytes: int = CLAUDE_MAX_FILE_SIZE_IN_CONTEXT,
47
24
  ) -> bool:
48
- """Check if file content should be included in model context based on type and size limits."""
49
- if file_content is None:
25
+ """Decide whether a file content should be included into the model context based on its type and size."""
26
+ if not isinstance(file_content, BinaryContent):
50
27
  return False
51
28
 
52
- # Handle extracted text (strings)
53
- if isinstance(file_content, str):
54
- text_size = len(file_content.encode("utf-8"))
55
- return text_size < max_extracted_text_size_bytes
29
+ if file_content.media_type.startswith("text/"):
30
+ return False
31
+
32
+ # Exclude archive files as they're not supported by OpenAI models
33
+ archive_types = {
34
+ "application/zip",
35
+ "application/x-tar",
36
+ "application/gzip",
37
+ "application/x-gzip",
38
+ "application/x-rar-compressed",
39
+ "application/x-7z-compressed",
40
+ }
41
+ if file_content.media_type in archive_types:
42
+ return False
56
43
 
57
- # Handle binary content (images only)
58
- if isinstance(file_content, BinaryContent):
59
- if file_content.media_type not in IMAGE_ATTACHMENT_TYPES:
60
- return False
61
- image_size = len(file_content.data)
62
- return image_size < max_image_size_bytes
44
+ if file_content.is_image and file_size < max_img_size_bytes:
45
+ return True
63
46
 
64
- return False
47
+ return file_size < max_file_size_bytes
65
48
 
66
49
 
67
- def file_to_binary_content(file_path: str | Path, mime_type: Optional[str] = None) -> FileExtractionResult:
68
- """Read file and extract text from documents (PDF, DOCX, XLSX, PPTX) using markitdown."""
50
+ def file_to_binary_content(file_path: str | Path, mime_type: str = "") -> str | BinaryContent:
51
+ """
52
+ Read a file and return its content as either a UTF-8 string (for text files)
53
+ or BinaryContent (for binary files).
54
+ """
55
+ with open(file_path, "rb") as f:
56
+ data = f.read()
57
+
69
58
  if not mime_type:
70
59
  mime_type, _ = mimetypes.guess_type(file_path)
71
60
  mime_type = mime_type or "application/octet-stream"
72
61
 
73
- # Extract text from supported document types using markitdown
74
- if mime_type in EXTRACTABLE_DOCUMENT_TYPES:
75
- try:
76
- markitdown = MarkItDown()
77
- result = markitdown.convert(str(file_path))
78
- return FileExtractionResult(
79
- content=result.text_content, success=True, error_message=None, was_extracted=True
80
- )
81
- except Exception as e: # pylint: disable=broad-exception-caught
82
- error_msg = f"Extraction failed: {type(e).__name__}: {str(e)}"
83
- logger.error("Document extraction failed for %s: %s", file_path, error_msg)
84
- return FileExtractionResult(content=None, success=False, error_message=error_msg)
85
-
86
- # Read the file data for non-document types
87
- try:
88
- with open(file_path, "rb") as f:
89
- data = f.read()
90
-
91
- # Return as string if it's text content
92
- if is_text_content(data, mime_type):
93
- return FileExtractionResult(content=data.decode("utf-8"), success=True)
94
-
95
- # Return as binary content for images and other binary files
96
- return FileExtractionResult(content=BinaryContent(data=data, media_type=mime_type), success=True)
97
- except Exception as e: # pylint: disable=broad-exception-caught
98
- error_msg = f"Failed to read file: {type(e).__name__}: {str(e)}"
99
- logger.error("File reading failed for %s: %s", file_path, error_msg)
100
- return FileExtractionResult(content=None, success=False, error_message=error_msg)
101
-
62
+ if is_text_content(data, mime_type):
63
+ return data.decode("utf-8")
102
64
 
103
- def truncate_extracted_text(text: str, max_bytes: int = MAX_EXTRACTED_TEXT_SIZE) -> str:
104
- """Truncate text to max_bytes with warning prefix."""
105
- truncated_bytes = text.encode("utf-8")[:max_bytes]
106
- truncated_text = truncated_bytes.decode("utf-8", errors="ignore")
107
-
108
- total_chars = len(text)
109
- truncated_chars = len(truncated_text)
110
-
111
- return f"[TRUNCATED - showing first {truncated_chars} of {total_chars} characters]\n\n{truncated_text}"
65
+ return BinaryContent(data=data, media_type=mime_type)
112
66
 
113
67
 
114
68
  def build_user_input(
@@ -121,52 +75,20 @@ def build_user_input(
121
75
  return user_text
122
76
 
123
77
  attachment_info_lines = []
124
- binary_attachments: list[str | BinaryContent] = []
78
+ binary_attachments = []
125
79
 
126
80
  for workspace_path in file_paths:
127
- # Convert Path to PurePosixPath for container_to_host_path
128
- workspace_posix_path = PurePosixPath(workspace_path)
129
- host_path = container_to_host_path(workspace_posix_path, ctx=session_tuple)
130
-
131
- # Handle None return from container_to_host_path
132
- if host_path is None:
133
- attachment_info = (
134
- f"* {workspace_path.name} (path in workspace: {workspace_path}) -- conversion failed: invalid path"
135
- )
136
- attachment_info_lines.append(attachment_info)
137
- continue
138
-
81
+ host_path = container_to_host_path(workspace_path, ctx=session_tuple)
139
82
  file_size = host_path.stat().st_size
140
83
  mime_type, _ = mimetypes.guess_type(host_path)
141
84
  mime_type = mime_type or "application/octet-stream"
142
85
 
143
86
  attachment_info = f"* {workspace_path.name} (file_size={file_size} bytes) (path in workspace: {workspace_path})"
144
- extraction_result = file_to_binary_content(host_path, mime_type)
145
-
146
- # Handle extraction failure - exclude from attachments
147
- if not extraction_result.success:
148
- attachment_info += f" -- extraction failed: {extraction_result.error_message}"
149
- attachment_info_lines.append(attachment_info)
150
- continue
151
-
152
- # Handle successful extraction
153
- if extraction_result.was_extracted:
154
- attachment_info += " -- extracted as text"
87
+ binary_content = file_to_binary_content(host_path, mime_type)
155
88
 
156
- # Check if content should be included in context
157
- if should_be_included_into_context(extraction_result.content) and extraction_result.content is not None:
158
- binary_attachments.append(extraction_result.content)
89
+ if should_be_included_into_context(binary_content, file_size):
90
+ binary_attachments.append(binary_content)
159
91
  attachment_info += f" -- provided to model context at index {len(binary_attachments) - 1}"
160
- elif (
161
- isinstance(extraction_result.content, str) and extraction_result.content and extraction_result.was_extracted
162
- ):
163
- # Truncate large extracted text and include with warning (only for extracted documents)
164
- truncated_content = truncate_extracted_text(extraction_result.content)
165
- binary_attachments.append(truncated_content)
166
- attachment_info += f" -- truncated and provided to model context at index {len(binary_attachments) - 1}"
167
- elif extraction_result.content is not None:
168
- # Content exists but excluded from context (e.g., images too large, non-extracted text)
169
- attachment_info += " -- too large for context"
170
92
 
171
93
  attachment_info_lines.append(attachment_info)
172
94
 
@@ -53,8 +53,13 @@ class ContextFilter(logging.Filter): # pylint: disable=too-few-public-methods
53
53
  except ImportError:
54
54
  pass
55
55
 
56
- if not user_id and not session_id:
57
- user_id, session_id = self._extract_from_mcp_context()
56
+ mcp_user_id = None
57
+ mcp_session_id = None
58
+ if not user_id or not session_id:
59
+ mcp_user_id, mcp_session_id = self._extract_from_mcp_context()
60
+
61
+ user_id = user_id or mcp_user_id
62
+ session_id = session_id or mcp_session_id
58
63
 
59
64
  context = ""
60
65
  if session_id and not str(session_id).startswith("default"):
aixtools/utils/config.py CHANGED
@@ -146,26 +146,3 @@ APP_DEFAULT_SCOPE = get_variable_env("APP_DEFAULT_SCOPE", allow_empty=True)
146
146
  AUTH_TEST_TOKEN = get_variable_env("AUTH_TEST_TOKEN", allow_empty=True)
147
147
 
148
148
  MCP_TOOLS_MAX_RETRIES = int(get_variable_env("MCP_TOOLS_MAX_RETRIES", default=10))
149
-
150
-
151
- # File attachment limits and supported types for model context
152
- # Maximum extracted document text size (5MB default, planned for future use)
153
- MAX_EXTRACTED_TEXT_SIZE = int(get_variable_env("MAX_EXTRACTED_TEXT_SIZE", default=str(5 * 1024 * 1024)))
154
- # Maximum image attachment size (2MB default)
155
- MAX_IMAGE_ATTACHMENT_SIZE = int(get_variable_env("MAX_IMAGE_ATTACHMENT_SIZE", default=str(2 * 1024 * 1024)))
156
- # Image MIME types that can be attached to model context
157
- IMAGE_ATTACHMENT_TYPES = {
158
- "image/png",
159
- "image/jpeg",
160
- "image/jpg",
161
- "image/gif",
162
- "image/webp",
163
- }
164
- # Document MIME types that can be extracted as text
165
- EXTRACTABLE_DOCUMENT_TYPES = {
166
- "application/vnd.openxmlformats-officedocument.presentationml.presentation", # .pptx
167
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
168
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .xlsx
169
- "application/vnd.ms-excel", # .xls
170
- "application/pdf", # .pdf
171
- }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aixtools
3
- Version: 0.3.7
3
+ Version: 0.3.9
4
4
  Summary: Tools for AI exploration and debugging
5
5
  Requires-Python: >=3.11.2
6
6
  Description-Content-Type: text/markdown
@@ -26,7 +26,6 @@ Requires-Dist: rich>=14.0.0
26
26
  Requires-Dist: ruff>=0.11.6
27
27
  Requires-Dist: streamlit>=1.44.1
28
28
  Requires-Dist: watchdog>=6.0.0
29
- Requires-Dist: markitdown[docx,pdf,pptx,xls,xlsx]>=0.1.3
30
29
  Provides-Extra: test
31
30
  Requires-Dist: pyyaml; extra == "test"
32
31
  Provides-Extra: feature
@@ -1,5 +1,5 @@
1
1
  aixtools/__init__.py,sha256=9NGHm7LjsQmsvjTZvw6QFJexSvAU4bCoN_KBk9SCa00,260
2
- aixtools/_version.py,sha256=CszCydqJjxQ_CbTrTy0L1k2j2gCfwJlahui0bCcdNp4,704
2
+ aixtools/_version.py,sha256=ExraCdacEuQ4u39MvW3tM7oAUHWlw1IoMTF_3HHD4gA,704
3
3
  aixtools/app.py,sha256=JzQ0nrv_bjDQokllIlGHOV0HEb-V8N6k_nGQH-TEsVU,5227
4
4
  aixtools/chainlit.md,sha256=yC37Ly57vjKyiIvK4oUvf4DYxZCwH7iocTlx7bLeGLU,761
5
5
  aixtools/context.py,sha256=I_MD40ZnvRm5WPKAKqBUAdXIf8YaurkYUUHSVVy-QvU,598
@@ -32,7 +32,7 @@ aixtools/agents/nodes_to_md.py,sha256=hAT8dgiZTG4uGoSgeRZkIJ7zgkQUNpdIr8KSFhjWAH
32
32
  aixtools/agents/nodes_to_message.py,sha256=ZqcmxUNf4esiCTRk37wWP1LquhqNsCmydvMr4kjZEjw,1012
33
33
  aixtools/agents/nodes_to_str.py,sha256=UkOu5Nry827J4H_ohQU3tPBfJxtr3p6FfCfWoUy5uIs,4325
34
34
  aixtools/agents/print_nodes.py,sha256=wVTngNfqM0As845WTRz6G3Rei_Gr3HuBlvu-G_eXuig,1665
35
- aixtools/agents/prompt.py,sha256=oZl6_3SelyoSysLpF6AAmLHLHhwyPYCtX8hJ2pRUnhw,7396
35
+ aixtools/agents/prompt.py,sha256=p9OYnyJ4-MyGXwHPrQeJBhZ2a3RV2HqhtdUUCrTMsAQ,3361
36
36
  aixtools/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  aixtools/auth/auth.py,sha256=AUc9Kw8ZrjiIxsp7Vg1bexg8mIuMbRv8XNU3pynfMdU,8487
38
38
  aixtools/compliance/__init__.py,sha256=vnw0zEdySIJWvDAJ8DCRRaWmY_agEOz1qlpAdhmtiuo,191
@@ -54,7 +54,7 @@ aixtools/log_view/filters.py,sha256=xbgSzzUnkqHoS_A_p4lRDMFhXqFvXCjwkcIEBQ_L29g,
54
54
  aixtools/log_view/log_utils.py,sha256=PGUUB039x6w7nOLg_M6Jk1aDIvcjyKP4f3YF5kavQ88,914
55
55
  aixtools/log_view/node_summary.py,sha256=EJjnBqdBWI-_bI-4nfTxwaost3mtiufb5cK7T54cfuQ,9299
56
56
  aixtools/logfilters/__init__.py,sha256=pTD8ujCqjPWBCeB7yv7lmCtnA2KXOnkIv0HExDagkXs,129
57
- aixtools/logfilters/context_filter.py,sha256=zR3Bnv3fCqXLeb7bCFTmlnWhC6dFIvUb-u712tOnUPk,2259
57
+ aixtools/logfilters/context_filter.py,sha256=7Tfus83Ja2CmhIyd4xEAnGNAdAVSC1shb7_cRfilZQc,2415
58
58
  aixtools/logging/__init__.py,sha256=b5oYyGQDUHHxhRtzqKUaQPv8hQeWw54rzDXSV8lDY1w,613
59
59
  aixtools/logging/log_objects.py,sha256=gohsgcfyr8vsY7G_hfmj973-Ek1_PN-bMMLEUA-4u6U,7708
60
60
  aixtools/logging/logging_config.py,sha256=LvxV3C75-I0096PpcCIbgM-Cp998LzWXeMM14HYbU20,4985
@@ -86,7 +86,7 @@ aixtools/tools/doctor/mcp_tool_doctor.py,sha256=sX2q5GfNkmUYxnXrqMpeGIwGfeL1LpYJ
86
86
  aixtools/tools/doctor/tool_doctor.py,sha256=EY1pshjLGLD0j6cc1ZFtbc0G19I5IbOZwHFDqypE49Q,2661
87
87
  aixtools/tools/doctor/tool_recommendation.py,sha256=LYyVOSXdAorWiY4P-ucSA1vLlV5BTEfX4GzBXNE_X0M,1569
88
88
  aixtools/utils/__init__.py,sha256=xT6almZBQYMfj4h7Hq9QXDHyVXbOOTxqLsmJsxYYnSw,757
89
- aixtools/utils/config.py,sha256=R4lXaa2lPWAoWCLcELBBG4-ZRSEWojLtsEUt6Ym0-9c,6566
89
+ aixtools/utils/config.py,sha256=OGTlvkc8hxL9GJk9AqEWTUIKLntl6TeL10Ni7t6C0nE,5575
90
90
  aixtools/utils/config_util.py,sha256=3Ya4Qqhj1RJ1qtTTykQ6iayf5uxlpigPXgEJlTi1wn4,2229
91
91
  aixtools/utils/enum_with_description.py,sha256=zjSzWxG74eR4x7dpmb74pLTYCWNSMvauHd7_9LpDYIw,1088
92
92
  aixtools/utils/files.py,sha256=8JnxwHJRJcjWCdFpjzWmo0po2fRg8esj4H7sOxElYXU,517
@@ -96,8 +96,8 @@ aixtools/utils/chainlit/cl_agent_show.py,sha256=vaRuowp4BRvhxEr5hw0zHEJ7iaSF_5bo
96
96
  aixtools/utils/chainlit/cl_utils.py,sha256=fxaxdkcZg6uHdM8uztxdPowg3a2f7VR7B26VPY4t-3c,5738
97
97
  aixtools/vault/__init__.py,sha256=fsr_NuX3GZ9WZ7dGfe0gp_5-z3URxAfwVRXw7Xyc0dU,141
98
98
  aixtools/vault/vault.py,sha256=9dZLWdZQk9qN_Q9Djkofw9LUKnJqnrX5H0fGusVLBhA,6037
99
- aixtools-0.3.7.dist-info/METADATA,sha256=YAAB9h7I10KSNbxjUsIkryI5H50DeUEtq-xJYTZzFvc,28014
100
- aixtools-0.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
101
- aixtools-0.3.7.dist-info/entry_points.txt,sha256=q8412TG4T0S8K0SKeWp2vkVPIDYQs0jNoHqcQ7qxOiA,155
102
- aixtools-0.3.7.dist-info/top_level.txt,sha256=wBn-rw9bCtxrR4AYEYgjilNCUVmKY0LWby9Zan2PRJM,9
103
- aixtools-0.3.7.dist-info/RECORD,,
99
+ aixtools-0.3.9.dist-info/METADATA,sha256=0RHde52mA6rl82UF1PF4fBTHYS1uM5Vh1EHCdQqpOWA,27957
100
+ aixtools-0.3.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
101
+ aixtools-0.3.9.dist-info/entry_points.txt,sha256=q8412TG4T0S8K0SKeWp2vkVPIDYQs0jNoHqcQ7qxOiA,155
102
+ aixtools-0.3.9.dist-info/top_level.txt,sha256=wBn-rw9bCtxrR4AYEYgjilNCUVmKY0LWby9Zan2PRJM,9
103
+ aixtools-0.3.9.dist-info/RECORD,,