aip-agents-binary 0.5.15__py3-none-macosx_13_0_arm64.whl → 0.5.17__py3-none-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,8 @@ References:
12
12
 
13
13
  try:
14
14
  from aip_agents.tools.document_loader.base_reader import ( # noqa: F401
15
+ DOCPROC_AVAILABLE,
16
+ DOCPROC_MISSING_MESSAGE,
15
17
  BaseDocumentReaderTool,
16
18
  DocumentReaderInput,
17
19
  )
@@ -28,10 +30,15 @@ try:
28
30
  "ExcelReaderTool",
29
31
  "PDFSplitter",
30
32
  ]
33
+ if not DOCPROC_AVAILABLE:
34
+ import warnings
35
+
36
+ warnings.warn(DOCPROC_MISSING_MESSAGE, ImportWarning)
31
37
  except ImportError:
32
38
  import warnings
33
39
 
34
40
  warnings.warn(
35
- "Document loader tools not available. Install with: pip install aip-agents[document-loader]", ImportWarning
41
+ "Document loader tools not available. Install gllm-docproc to enable them.",
42
+ ImportWarning,
36
43
  )
37
44
  __all__ = []
@@ -10,8 +10,8 @@ References:
10
10
  import gc
11
11
  import os
12
12
  from abc import ABC, abstractmethod
13
+ from typing import TYPE_CHECKING, Any
13
14
 
14
- from gllm_docproc.loader.pipeline_loader import PipelineLoader
15
15
  from langchain_core.runnables import RunnableConfig
16
16
  from langchain_core.tools import BaseTool
17
17
  from pydantic import BaseModel, Field
@@ -21,6 +21,46 @@ from aip_agents.utils.logger import get_logger
21
21
 
22
22
  logger = get_logger(__name__)
23
23
 
24
+ DOCPROC_MISSING_MESSAGE = (
25
+ "gllm-docproc is required for document loader tools but is not installed. "
26
+ "Install it from your internal registry to enable document processing."
27
+ )
28
+
29
+ try:
30
+ from gllm_docproc.loader.pipeline_loader import PipelineLoader
31
+ except ImportError:
32
+ PipelineLoader = None # type: ignore[assignment]
33
+
34
+ if TYPE_CHECKING:
35
+ from gllm_docproc.loader.pipeline_loader import PipelineLoader as PipelineLoaderType
36
+ else:
37
+ PipelineLoaderType = Any
38
+
39
+
40
+ class _MissingDocprocLoader:
41
+ """Fallback loader that errors when document processing is attempted."""
42
+
43
+ def __init__(self) -> None:
44
+ self.loaders: list[object] = []
45
+
46
+ def add_loader(self, loader: object) -> None:
47
+ self.loaders.append(loader)
48
+
49
+ def load(self, *_args: object, **_kwargs: object) -> list[dict[str, str]]:
50
+ raise ImportError(DOCPROC_MISSING_MESSAGE)
51
+
52
+ def clear_cache(self) -> None:
53
+ self.loaders.clear()
54
+
55
+
56
+ def _build_pipeline_loader() -> "PipelineLoader":
57
+ if PipelineLoader is None:
58
+ return _MissingDocprocLoader()
59
+ return PipelineLoader() # type: ignore[misc]
60
+
61
+
62
+ DOCPROC_AVAILABLE = PipelineLoader is not None
63
+
24
64
 
25
65
  class BaseDocumentConfig(BaseModel):
26
66
  """Base tool configuration schema for document processing with batching functionality.
@@ -78,7 +118,7 @@ class BaseDocumentReaderTool(BaseTool, ABC):
78
118
  description: str = "Read a document file and extract its text content."
79
119
  args_schema: type[BaseModel] = DocumentReaderInput
80
120
  tool_config_schema: type[BaseModel] = BaseDocumentConfig
81
- loader: PipelineLoader = Field(default_factory=PipelineLoader)
121
+ loader: PipelineLoaderType = Field(default_factory=_build_pipeline_loader)
82
122
 
83
123
  def __init__(self):
84
124
  """Initialize the base document reader tool."""
@@ -1,11 +1,24 @@
1
1
  from _typeshed import Incomplete
2
2
  from abc import ABC
3
3
  from aip_agents.utils.logger import get_logger as get_logger
4
- from gllm_docproc.loader.pipeline_loader import PipelineLoader
4
+ from gllm_docproc.loader.pipeline_loader import PipelineLoader as PipelineLoaderType
5
5
  from langchain_core.tools import BaseTool
6
6
  from pydantic import BaseModel
7
+ from typing import Any
7
8
 
8
9
  logger: Incomplete
10
+ DOCPROC_MISSING_MESSAGE: str
11
+ PipelineLoaderType = Any
12
+
13
+ class _MissingDocprocLoader:
14
+ """Fallback loader that errors when document processing is attempted."""
15
+ loaders: list[object]
16
+ def __init__(self) -> None: ...
17
+ def add_loader(self, loader: object) -> None: ...
18
+ def load(self, *_args: object, **_kwargs: object) -> list[dict[str, str]]: ...
19
+ def clear_cache(self) -> None: ...
20
+
21
+ DOCPROC_AVAILABLE: Incomplete
9
22
 
10
23
  class BaseDocumentConfig(BaseModel):
11
24
  """Base tool configuration schema for document processing with batching functionality.
@@ -50,7 +63,7 @@ class BaseDocumentReaderTool(BaseTool, ABC):
50
63
  description: str
51
64
  args_schema: type[BaseModel]
52
65
  tool_config_schema: type[BaseModel]
53
- loader: PipelineLoader
66
+ loader: PipelineLoaderType
54
67
  def __init__(self) -> None:
55
68
  """Initialize the base document reader tool."""
56
69
  def cleanup_memory(self) -> None:
@@ -8,9 +8,19 @@ References:
8
8
  reader/docx_reader_tool.py
9
9
  """
10
10
 
11
- from gllm_docproc.loader.docx import DOCX2PythonLoader, PythonDOCXTableLoader
11
+ try:
12
+ from gllm_docproc.loader.docx import DOCX2PythonLoader, PythonDOCXTableLoader
13
+ except ImportError as exc:
14
+ DOCX2PythonLoader = None # type: ignore[assignment]
15
+ PythonDOCXTableLoader = None # type: ignore[assignment]
16
+ _DOCPROC_IMPORT_ERROR: Exception | None = exc
17
+ else:
18
+ _DOCPROC_IMPORT_ERROR = None
12
19
 
13
- from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool
20
+ from aip_agents.tools.document_loader.base_reader import DOCPROC_MISSING_MESSAGE, BaseDocumentReaderTool
21
+ from aip_agents.utils.logger import get_logger
22
+
23
+ logger = get_logger(__name__)
14
24
 
15
25
 
16
26
  class DocxReaderTool(BaseDocumentReaderTool):
@@ -20,6 +30,11 @@ class DocxReaderTool(BaseDocumentReaderTool):
20
30
  description: str = "Read a Word document and extract its text content. Input should be the path to the Word file."
21
31
 
22
32
  def _setup_loader(self):
33
+ if DOCX2PythonLoader is None or PythonDOCXTableLoader is None:
34
+ logger.warning(DOCPROC_MISSING_MESSAGE)
35
+ if _DOCPROC_IMPORT_ERROR is not None:
36
+ logger.debug("gllm_docproc import failed: %s", _DOCPROC_IMPORT_ERROR)
37
+ return
23
38
  self.loader.add_loader(DOCX2PythonLoader())
24
39
  self.loader.add_loader(PythonDOCXTableLoader())
25
40
 
@@ -1,4 +1,8 @@
1
- from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool as BaseDocumentReaderTool
1
+ from _typeshed import Incomplete
2
+ from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool as BaseDocumentReaderTool, DOCPROC_MISSING_MESSAGE as DOCPROC_MISSING_MESSAGE
3
+ from aip_agents.utils.logger import get_logger as get_logger
4
+
5
+ logger: Incomplete
2
6
 
3
7
  class DocxReaderTool(BaseDocumentReaderTool):
4
8
  """Tool to read and extract text from Word documents."""
@@ -10,9 +10,15 @@ Authors:
10
10
  import zipfile
11
11
  from pathlib import Path
12
12
 
13
- from gllm_docproc.loader.xlsx import OpenpyxlLoader
14
-
15
- from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool
13
+ try:
14
+ from gllm_docproc.loader.xlsx import OpenpyxlLoader
15
+ except ImportError as exc:
16
+ OpenpyxlLoader = None # type: ignore[assignment]
17
+ _DOCPROC_IMPORT_ERROR: Exception | None = exc
18
+ else:
19
+ _DOCPROC_IMPORT_ERROR = None
20
+
21
+ from aip_agents.tools.document_loader.base_reader import DOCPROC_MISSING_MESSAGE, BaseDocumentReaderTool
16
22
  from aip_agents.utils.logger import get_logger
17
23
 
18
24
  logger = get_logger(__name__)
@@ -50,6 +56,11 @@ class ExcelReaderTool(BaseDocumentReaderTool):
50
56
  This method initializes the OpenpyxlLoader which handles extraction
51
57
  of content from Excel files and formatting as Markdown tables.
52
58
  """
59
+ if OpenpyxlLoader is None:
60
+ logger.warning(DOCPROC_MISSING_MESSAGE)
61
+ if _DOCPROC_IMPORT_ERROR is not None:
62
+ logger.debug("gllm_docproc import failed: %s", _DOCPROC_IMPORT_ERROR)
63
+ return
53
64
  self.loader.add_loader(OpenpyxlLoader())
54
65
 
55
66
  def _run_with_batching(self, file_path: str, batch_size: int) -> str:
@@ -1,5 +1,5 @@
1
1
  from _typeshed import Incomplete
2
- from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool as BaseDocumentReaderTool
2
+ from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool as BaseDocumentReaderTool, DOCPROC_MISSING_MESSAGE as DOCPROC_MISSING_MESSAGE
3
3
  from aip_agents.utils.logger import get_logger as get_logger
4
4
 
5
5
  logger: Incomplete
@@ -8,9 +8,16 @@ References:
8
8
  reader/pdf_reader_tool.py
9
9
  """
10
10
 
11
- from gllm_docproc.loader.pdf import PDFPlumberLoader, PyMuPDFLoader
12
-
13
- from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool
11
+ try:
12
+ from gllm_docproc.loader.pdf import PDFPlumberLoader, PyMuPDFLoader
13
+ except ImportError as exc:
14
+ PDFPlumberLoader = None # type: ignore[assignment]
15
+ PyMuPDFLoader = None # type: ignore[assignment]
16
+ _DOCPROC_IMPORT_ERROR: Exception | None = exc
17
+ else:
18
+ _DOCPROC_IMPORT_ERROR = None
19
+
20
+ from aip_agents.tools.document_loader.base_reader import DOCPROC_MISSING_MESSAGE, BaseDocumentReaderTool
14
21
  from aip_agents.tools.document_loader.pdf_splitter import PDFSplitter
15
22
  from aip_agents.utils.logger import get_logger
16
23
 
@@ -24,6 +31,11 @@ class PDFReaderTool(BaseDocumentReaderTool):
24
31
  description: str = "Read a PDF file and extract its text content. Input should be the path to the PDF file."
25
32
 
26
33
  def _setup_loader(self):
34
+ if PDFPlumberLoader is None or PyMuPDFLoader is None:
35
+ logger.warning(DOCPROC_MISSING_MESSAGE)
36
+ if _DOCPROC_IMPORT_ERROR is not None:
37
+ logger.debug("gllm_docproc import failed: %s", _DOCPROC_IMPORT_ERROR)
38
+ return
27
39
  self.loader.add_loader(PyMuPDFLoader())
28
40
  self.loader.add_loader(PDFPlumberLoader())
29
41
 
@@ -43,7 +55,7 @@ class PDFReaderTool(BaseDocumentReaderTool):
43
55
  Raises:
44
56
  FileNotFoundError: If the input PDF file doesn't exist
45
57
  ValueError: If batch_size is invalid or PDF processing fails
46
- Exception: For other unexpected errors during PDF splitting
58
+ RuntimeError: For other unexpected errors during PDF splitting
47
59
  """
48
60
  logger.info(f"Splitting PDF file '{file_path}' with batch_size={batch_size}")
49
61
 
@@ -64,4 +76,4 @@ class PDFReaderTool(BaseDocumentReaderTool):
64
76
  raise
65
77
  except Exception as e:
66
78
  logger.error(f"Unexpected error splitting PDF '{file_path}': {str(e)}")
67
- raise Exception(f"Failed to split PDF file: {str(e)}") from e
79
+ raise RuntimeError(f"Failed to split PDF file: {str(e)}") from e
@@ -1,5 +1,5 @@
1
1
  from _typeshed import Incomplete
2
- from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool as BaseDocumentReaderTool
2
+ from aip_agents.tools.document_loader.base_reader import BaseDocumentReaderTool as BaseDocumentReaderTool, DOCPROC_MISSING_MESSAGE as DOCPROC_MISSING_MESSAGE
3
3
  from aip_agents.tools.document_loader.pdf_splitter import PDFSplitter as PDFSplitter
4
4
  from aip_agents.utils.logger import get_logger as get_logger
5
5
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aip-agents-binary
3
- Version: 0.5.15
3
+ Version: 0.5.17
4
4
  Summary: A library for managing agents in Gen AI applications.
5
5
  Author-email: Raymond Christopher <raymond.christopher@gdplabs.id>
6
6
  Requires-Python: <3.13,>=3.11
@@ -38,7 +38,6 @@ Requires-Dist: browser-use==0.5.9; extra == "browser-use-tool"
38
38
  Requires-Dist: steel-sdk>=0.7.0; extra == "browser-use-tool"
39
39
  Requires-Dist: json-repair>=0.52.3; extra == "browser-use-tool"
40
40
  Provides-Extra: document-loader
41
- Requires-Dist: gllm-docproc-binary[docx,pdf,xlsx]<0.8.0,>=0.7.20; extra == "document-loader"
42
41
  Requires-Dist: PyPDF2<4.0.0,>=3.0.0; extra == "document-loader"
43
42
  Requires-Dist: unidecode<2.0.0,>=1.3.0; extra == "document-loader"
44
43
  Provides-Extra: gl-connector
@@ -50,9 +49,8 @@ Requires-Dist: steel-sdk>=0.7.0; extra == "local"
50
49
  Requires-Dist: json-repair>=0.52.3; extra == "local"
51
50
  Requires-Dist: PyPDF2<4.0.0,>=3.0.0; extra == "local"
52
51
  Requires-Dist: unidecode<2.0.0,>=1.3.0; extra == "local"
52
+ Requires-Dist: gllm-docproc-binary<0.8.0,>=0.7.20; extra == "local"
53
53
  Requires-Dist: bosa-connectors-binary<0.4.0,>=0.3.1; extra == "local"
54
- Requires-Dist: gllm-docproc-binary[docx,pdf,xlsx]<0.8.0,>=0.7.20; extra == "local"
55
- Requires-Dist: gllm-datastore-binary[chroma,elasticsearch]==0.5.40; extra == "local"
56
54
  Provides-Extra: dev
57
55
  Requires-Dist: coverage<8.0.0,>=7.4.4; extra == "dev"
58
56
  Requires-Dist: mypy<2.0.0,>=1.15.0; extra == "dev"
@@ -414,16 +414,16 @@ aip_agents/tools/code_sandbox/e2b_cloud_sandbox_extended.py,sha256=H91-1a3keS6Si
414
414
  aip_agents/tools/code_sandbox/e2b_cloud_sandbox_extended.pyi,sha256=ZnW7S-Wla5J-ZD8P92-sXuzTGZ2-z5zyv4aH97FlshI,3554
415
415
  aip_agents/tools/code_sandbox/e2b_sandbox_tool.py,sha256=sTFTQd9Li9lB2iXgsARfpJEfFz9O2_sdFYFh8gFvC6A,16954
416
416
  aip_agents/tools/code_sandbox/e2b_sandbox_tool.pyi,sha256=ylrBQaqNBa1Jppld-mHjaskSa9SOjsQD8TdcK2bnl4s,1165
417
- aip_agents/tools/document_loader/__init__.py,sha256=Av6CDxzGuLhlpLtwHjKaEGHVVf06nIo50Vpn30BetG4,1206
417
+ aip_agents/tools/document_loader/__init__.py,sha256=rnQFLYJqvit5eegGIWGdjOUoEJiyOh3nW0mLSRd7xfE,1375
418
418
  aip_agents/tools/document_loader/__init__.pyi,sha256=RYZb-EdfR1btPxFBUwfmrOWs51aVgwJuw9epguxnVgQ,650
419
- aip_agents/tools/document_loader/base_reader.py,sha256=JxqG-VXzDHUesGoCGqmzX_fzEG2f15DwanpbhDHko9k,9192
420
- aip_agents/tools/document_loader/base_reader.pyi,sha256=SlXiQdIUXeNATyxSvTI9Z5v9DVVUkqkuzMn2k5MPHRU,2606
421
- aip_agents/tools/document_loader/docx_reader_tool.py,sha256=7JNLDk8w0boRvg-ltBt3csTo8FyPdr_KunayMrm-JJk,2032
422
- aip_agents/tools/document_loader/docx_reader_tool.pyi,sha256=__bJ8jrlZOJsTo33YeLuTicuhGGOje5ROg7AXxjS3fI,249
423
- aip_agents/tools/document_loader/excel_reader_tool.py,sha256=GQOF4DC4eUxeobRvSPmt_I0oFQ2cD2e6Px-oWO21Y1Q,6153
424
- aip_agents/tools/document_loader/excel_reader_tool.pyi,sha256=-I5YT2OpOO0eWQPf5Ly0vbR3n1xgVREroq9qvie0I40,875
425
- aip_agents/tools/document_loader/pdf_reader_tool.py,sha256=YTUunsVHAB7FfOSjZMGXhRvlU7-YLYemJw3GCMkWkZE,2575
426
- aip_agents/tools/document_loader/pdf_reader_tool.pyi,sha256=VWXiWU1FlO0eh_Si8siXzmPkPRB63dTaR3Z2oggLUyc,442
419
+ aip_agents/tools/document_loader/base_reader.py,sha256=fUK5yry8VnTLgu04LiW-fZRjpWLcIcs5nIzOwBUS4AQ,10341
420
+ aip_agents/tools/document_loader/base_reader.pyi,sha256=OLn3MKD831u97rOiPGjoL6-VcJSOS-rDhhlPiXs7eY8,3085
421
+ aip_agents/tools/document_loader/docx_reader_tool.py,sha256=EOGgXdjC3BXKPtu-6ABzGQp0xL8Z9L9WiMM7aUDPREE,2656
422
+ aip_agents/tools/document_loader/docx_reader_tool.pyi,sha256=LKhrTT7xSVZr-zvrJQzhcIoUe_grQTXsl9DRxPS7oNk,415
423
+ aip_agents/tools/document_loader/excel_reader_tool.py,sha256=uu1aK81SlrRR_8hoEGApRH3_QM-CpSgMBOX0tWPwESE,6599
424
+ aip_agents/tools/document_loader/excel_reader_tool.pyi,sha256=my50Gc51IYNyekVBLRFksbsDG6lmK85o2d-exvff9r4,927
425
+ aip_agents/tools/document_loader/pdf_reader_tool.py,sha256=36U_r7E0n_nLl4xUXC4nDXmr3L6kASfgKHuhtJFZeqI,3109
426
+ aip_agents/tools/document_loader/pdf_reader_tool.pyi,sha256=ZKr5E3ePTJWHdLfM3_7Ji9NGDpL29uKLFhLl-smZQqQ,494
427
427
  aip_agents/tools/document_loader/pdf_splitter.py,sha256=-QyrlnN4AXDqY_dxeUzxcgCVJJiD1vYUYR7swfhM_RQ,5752
428
428
  aip_agents/tools/document_loader/pdf_splitter.pyi,sha256=3IiRDQWDz8QR5LH_sni9O-N2qJFheFjKOQMAhiWxB7E,716
429
429
  aip_agents/tools/gl_connector/__init__.py,sha256=tpQ0vF7gMiyO32jTBndOAVor5MRDNff7yh_SC7Frv_U,136
@@ -538,7 +538,7 @@ aip_agents/utils/pii/pii_helper.py,sha256=g0yRzakfA2AA6vjUNLWHqFlcxyLql6MXQ90NN3
538
538
  aip_agents/utils/pii/pii_helper.pyi,sha256=dulZs150ikbAL3Bw2YLcz3_g4DsGmL3lciwf8mKxEjI,2939
539
539
  aip_agents/utils/pii/uuid_deanonymizer_mapping.py,sha256=Gks8l8t0cuS9pzoQnrpiK1CaLmWYksjOnTeiHh3_7EE,7348
540
540
  aip_agents/utils/pii/uuid_deanonymizer_mapping.pyi,sha256=gnWfD1rWZh_tloJjgKiZ6f6iNUuBaHpKqCSiP0d-9bs,3084
541
- aip_agents_binary-0.5.15.dist-info/METADATA,sha256=_rnBCf9SJp55WwHGQ18LO3dVpcX7xp98U050dNAzrQo,22786
542
- aip_agents_binary-0.5.15.dist-info/WHEEL,sha256=PaP4PvkDyiSc4C6Dhw6ccQmfxsWFrSv-lJQjBshu0hw,105
543
- aip_agents_binary-0.5.15.dist-info/top_level.txt,sha256=PEz8vcwC1bH4UrkhF0LkIYCNfXGWZUHdSklbvkBe25E,11
544
- aip_agents_binary-0.5.15.dist-info/RECORD,,
541
+ aip_agents_binary-0.5.17.dist-info/METADATA,sha256=2RWWD_Yd2YUYRVCLHiCZCB5mlISRwe0rw71BU3mLHiU,22593
542
+ aip_agents_binary-0.5.17.dist-info/WHEEL,sha256=PaP4PvkDyiSc4C6Dhw6ccQmfxsWFrSv-lJQjBshu0hw,105
543
+ aip_agents_binary-0.5.17.dist-info/top_level.txt,sha256=PEz8vcwC1bH4UrkhF0LkIYCNfXGWZUHdSklbvkBe25E,11
544
+ aip_agents_binary-0.5.17.dist-info/RECORD,,