dtSpark 1.0.10__tar.gz → 1.1.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {dtspark-1.0.10 → dtspark-1.1.0a1}/PKG-INFO +7 -1
  2. {dtspark-1.0.10 → dtspark-1.1.0a1}/pyproject.toml +7 -0
  3. dtspark-1.1.0a1/src/dtSpark/_version.txt +1 -0
  4. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/core/application.py +226 -0
  5. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/mcp_integration/tool_selector.py +5 -0
  6. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/resources/config.yaml.template +42 -0
  7. dtspark-1.1.0a1/src/dtSpark/tools/builtin.py +2271 -0
  8. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/endpoints/chat.py +147 -0
  9. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/endpoints/main_menu.py +75 -0
  10. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/chat.html +300 -0
  11. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/main_menu.html +71 -29
  12. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark.egg-info/PKG-INFO +7 -1
  13. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark.egg-info/SOURCES.txt +1 -0
  14. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark.egg-info/requires.txt +10 -0
  15. dtspark-1.1.0a1/tests/test_document_archive_tools.py +846 -0
  16. dtspark-1.0.10/src/dtSpark/_version.txt +0 -1
  17. dtspark-1.0.10/src/dtSpark/tools/builtin.py +0 -833
  18. {dtspark-1.0.10 → dtspark-1.1.0a1}/LICENSE +0 -0
  19. {dtspark-1.0.10 → dtspark-1.1.0a1}/MANIFEST.in +0 -0
  20. {dtspark-1.0.10 → dtspark-1.1.0a1}/README.md +0 -0
  21. {dtspark-1.0.10 → dtspark-1.1.0a1}/setup.cfg +0 -0
  22. {dtspark-1.0.10 → dtspark-1.1.0a1}/setup.py +0 -0
  23. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/__init__.py +0 -0
  24. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/_description.txt +0 -0
  25. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/_full_name.txt +0 -0
  26. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/_licence.txt +0 -0
  27. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/_metadata.yaml +0 -0
  28. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/_name.txt +0 -0
  29. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/aws/__init__.py +0 -0
  30. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/aws/authentication.py +0 -0
  31. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/aws/bedrock.py +0 -0
  32. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/aws/costs.py +0 -0
  33. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/aws/pricing.py +0 -0
  34. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/cli_interface.py +0 -0
  35. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/conversation_manager.py +0 -0
  36. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/core/__init__.py +0 -0
  37. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/core/context_compaction.py +0 -0
  38. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/daemon/__init__.py +0 -0
  39. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/daemon/__main__.py +0 -0
  40. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/daemon/action_monitor.py +0 -0
  41. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/daemon/daemon_app.py +0 -0
  42. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/daemon/daemon_manager.py +0 -0
  43. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/daemon/execution_coordinator.py +0 -0
  44. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/daemon/pid_file.py +0 -0
  45. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/__init__.py +0 -0
  46. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/autonomous_actions.py +0 -0
  47. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/backends.py +0 -0
  48. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/connection.py +0 -0
  49. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/conversations.py +0 -0
  50. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/credential_prompt.py +0 -0
  51. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/files.py +0 -0
  52. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/mcp_ops.py +0 -0
  53. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/messages.py +0 -0
  54. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/schema.py +0 -0
  55. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/tool_permissions.py +0 -0
  56. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/database/usage.py +0 -0
  57. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/files/__init__.py +0 -0
  58. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/files/manager.py +0 -0
  59. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/launch.py +0 -0
  60. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/limits/__init__.py +0 -0
  61. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/limits/costs.py +0 -0
  62. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/limits/tokens.py +0 -0
  63. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/llm/__init__.py +0 -0
  64. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/llm/anthropic_direct.py +0 -0
  65. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/llm/base.py +0 -0
  66. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/llm/context_limits.py +0 -0
  67. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/llm/manager.py +0 -0
  68. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/llm/ollama.py +0 -0
  69. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/mcp_integration/__init__.py +0 -0
  70. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/mcp_integration/manager.py +0 -0
  71. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/safety/__init__.py +0 -0
  72. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/safety/llm_service.py +0 -0
  73. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/safety/patterns.py +0 -0
  74. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/safety/prompt_inspector.py +0 -0
  75. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/safety/violation_logger.py +0 -0
  76. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/scheduler/__init__.py +0 -0
  77. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/scheduler/creation_tools.py +0 -0
  78. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/scheduler/execution_queue.py +0 -0
  79. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/scheduler/executor.py +0 -0
  80. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/scheduler/manager.py +0 -0
  81. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/tools/__init__.py +0 -0
  82. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/__init__.py +0 -0
  83. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/auth.py +0 -0
  84. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/dependencies.py +0 -0
  85. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/endpoints/__init__.py +0 -0
  86. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/endpoints/autonomous_actions.py +0 -0
  87. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/endpoints/conversations.py +0 -0
  88. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/endpoints/streaming.py +0 -0
  89. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/server.py +0 -0
  90. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/session.py +0 -0
  91. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/ssl_utils.py +0 -0
  92. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/static/css/dark-theme.css +0 -0
  93. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/static/js/actions.js +0 -0
  94. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/static/js/chat.js +0 -0
  95. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/static/js/main.js +0 -0
  96. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/static/js/sse-client.js +0 -0
  97. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/actions.html +0 -0
  98. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/base.html +0 -0
  99. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/conversations.html +0 -0
  100. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/goodbye.html +0 -0
  101. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/login.html +0 -0
  102. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/templates/new_conversation.html +0 -0
  103. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark/web/web_interface.py +0 -0
  104. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark.egg-info/dependency_links.txt +0 -0
  105. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark.egg-info/entry_points.txt +0 -0
  106. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark.egg-info/not-zip-safe +0 -0
  107. {dtspark-1.0.10 → dtspark-1.1.0a1}/src/dtSpark.egg-info/top_level.txt +0 -0
  108. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/README.md +0 -0
  109. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/debug_bulk_api.py +0 -0
  110. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/diagnose_aws_costs.py +0 -0
  111. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_builtin_tools.py +0 -0
  112. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_builtin_tools_integration.py +0 -0
  113. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_bulk_pricing.py +0 -0
  114. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_filesystem_tools.py +0 -0
  115. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_mcp_server.py +0 -0
  116. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_ollama_context.py +0 -0
  117. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_ollama_conversation.py +0 -0
  118. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_ollama_integration.py +0 -0
  119. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_pricing_integration.py +0 -0
  120. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_prompt_inspection.py +0 -0
  121. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_status_indicator.py +0 -0
  122. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_tool_selector.py +0 -0
  123. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_web_auth.py +0 -0
  124. {dtspark-1.0.10 → dtspark-1.1.0a1}/tests/test_web_session.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dtSpark
3
- Version: 1.0.10
3
+ Version: 1.1.0a1
4
4
  Summary: Secure Personal AI Research Kit - Multi-provider LLM CLI/Web interface with MCP tool integration
5
5
  Home-page: https://github.com/digital-thought/dtSpark
6
6
  Author: Matthew Westwood-Hill
@@ -49,6 +49,12 @@ Requires-Dist: cryptography>=41.0.0
49
49
  Requires-Dist: anthropic>=0.18.0
50
50
  Requires-Dist: APScheduler>=3.10.0
51
51
  Requires-Dist: markdown>=3.4.0
52
+ Requires-Dist: python-docx>=0.8.11
53
+ Requires-Dist: openpyxl>=3.1.0
54
+ Requires-Dist: python-pptx>=0.6.21
55
+ Requires-Dist: pdfplumber>=0.10.0
56
+ Requires-Dist: python-magic-bin>=0.4.14; sys_platform == "win32"
57
+ Requires-Dist: python-magic>=0.4.27; sys_platform != "win32"
52
58
  Provides-Extra: dev
53
59
  Requires-Dist: pytest>=7.0.0; extra == "dev"
54
60
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
@@ -64,6 +64,13 @@ dependencies = [
64
64
  "anthropic>=0.18.0",
65
65
  "APScheduler>=3.10.0",
66
66
  "markdown>=3.4.0",
67
+ # MS Office document handling
68
+ "python-docx>=0.8.11",
69
+ "openpyxl>=3.1.0",
70
+ "python-pptx>=0.6.21",
71
+ "pdfplumber>=0.10.0",
72
+ "python-magic-bin>=0.4.14;sys_platform=='win32'",
73
+ "python-magic>=0.4.27;sys_platform!='win32'",
67
74
  ]
68
75
 
69
76
  [project.optional-dependencies]
@@ -0,0 +1 @@
1
+ 1.1.0a1
@@ -779,6 +779,27 @@ class AWSBedrockCLI(AbstractApp):
779
779
  'enabled': self.settings.get('embedded_tools.filesystem.enabled', False),
780
780
  'allowed_path': self.settings.get('embedded_tools.filesystem.allowed_path', './'),
781
781
  'access_mode': self.settings.get('embedded_tools.filesystem.access_mode', 'read')
782
+ },
783
+ 'documents': {
784
+ 'enabled': self.settings.get('embedded_tools.documents.enabled', False),
785
+ 'allowed_path': self.settings.get('embedded_tools.documents.allowed_path', './'),
786
+ 'access_mode': self.settings.get('embedded_tools.documents.access_mode', 'read'),
787
+ 'max_file_size_mb': self.settings.get('embedded_tools.documents.max_file_size_mb', 50),
788
+ 'reading': {
789
+ 'max_pdf_pages': self.settings.get('embedded_tools.documents.reading.max_pdf_pages', 100),
790
+ 'max_excel_rows': self.settings.get('embedded_tools.documents.reading.max_excel_rows', 10000)
791
+ },
792
+ 'creation': {
793
+ 'templates_path': self.settings.get('embedded_tools.documents.creation.templates_path'),
794
+ 'default_author': self.settings.get('embedded_tools.documents.creation.default_author')
795
+ }
796
+ },
797
+ 'archives': {
798
+ 'enabled': self.settings.get('embedded_tools.archives.enabled', False),
799
+ 'allowed_path': self.settings.get('embedded_tools.archives.allowed_path', './'),
800
+ 'access_mode': self.settings.get('embedded_tools.archives.access_mode', 'read'),
801
+ 'max_file_size_mb': self.settings.get('embedded_tools.archives.max_file_size_mb', 100),
802
+ 'max_files_to_list': self.settings.get('embedded_tools.archives.max_files_to_list', 1000)
782
803
  }
783
804
  }
784
805
  }
@@ -1655,6 +1676,122 @@ class AWSBedrockCLI(AbstractApp):
1655
1676
  )
1656
1677
  filesystem_access_mode = access_mode_choices[access_mode_choice]
1657
1678
 
1679
+ # ═══════════════════════════════════════════════════════════════
1680
+ # Embedded Document Tools (MS Office & PDF)
1681
+ # ═══════════════════════════════════════════════════════════════
1682
+ cli.console.print()
1683
+ enable_document_tools = Confirm.ask("Enable embedded document tools (MS Office & PDF)?", default=False)
1684
+
1685
+ # Default values
1686
+ document_allowed_path = "./running"
1687
+ document_access_mode = "read"
1688
+ document_max_file_size = "50"
1689
+ document_max_pdf_pages = "100"
1690
+ document_max_excel_rows = "10000"
1691
+ document_templates_path = ""
1692
+ document_default_author = ""
1693
+
1694
+ if enable_document_tools:
1695
+ cli.console.print()
1696
+ cli.console.print("[dim]Document tools allow reading and creating MS Office documents (Word, Excel, PowerPoint) and PDFs.[/dim]")
1697
+ cli.console.print()
1698
+
1699
+ document_allowed_path = Prompt.ask(
1700
+ "Allowed directory path for documents",
1701
+ default="./running"
1702
+ )
1703
+
1704
+ # Access mode
1705
+ doc_access_mode_choices = {
1706
+ "1": "read",
1707
+ "2": "read_write"
1708
+ }
1709
+ cli.console.print()
1710
+ cli.console.print(" [1] Read - Read documents only")
1711
+ cli.console.print(" [2] Read/Write - Read and create documents")
1712
+ cli.console.print()
1713
+ doc_access_mode_choice = Prompt.ask(
1714
+ "Select access mode",
1715
+ choices=["1", "2"],
1716
+ default="1"
1717
+ )
1718
+ document_access_mode = doc_access_mode_choices[doc_access_mode_choice]
1719
+
1720
+ document_max_file_size = Prompt.ask(
1721
+ "Maximum file size in MB",
1722
+ default="50"
1723
+ )
1724
+
1725
+ document_max_pdf_pages = Prompt.ask(
1726
+ "Maximum PDF pages to read",
1727
+ default="100"
1728
+ )
1729
+
1730
+ document_max_excel_rows = Prompt.ask(
1731
+ "Maximum Excel rows to read",
1732
+ default="10000"
1733
+ )
1734
+
1735
+ if document_access_mode == "read_write":
1736
+ cli.console.print()
1737
+ cli.console.print("[dim]Templates allow creating documents with placeholder substitution.[/dim]")
1738
+ document_templates_path = Prompt.ask(
1739
+ "Templates directory path (leave empty to disable)",
1740
+ default=""
1741
+ )
1742
+ document_default_author = Prompt.ask(
1743
+ "Default author for created documents (leave empty to disable)",
1744
+ default=""
1745
+ )
1746
+
1747
+ # ═══════════════════════════════════════════════════════════════
1748
+ # Embedded Archive Tools
1749
+ # ═══════════════════════════════════════════════════════════════
1750
+ cli.console.print()
1751
+ enable_archive_tools = Confirm.ask("Enable embedded archive tools (ZIP, TAR)?", default=False)
1752
+
1753
+ # Default values
1754
+ archive_allowed_path = "./running"
1755
+ archive_access_mode = "read"
1756
+ archive_max_file_size = "100"
1757
+ archive_max_files_to_list = "1000"
1758
+
1759
+ if enable_archive_tools:
1760
+ cli.console.print()
1761
+ cli.console.print("[dim]Archive tools allow reading and extracting ZIP and TAR archives.[/dim]")
1762
+ cli.console.print()
1763
+
1764
+ archive_allowed_path = Prompt.ask(
1765
+ "Allowed directory path for archives",
1766
+ default="./running"
1767
+ )
1768
+
1769
+ # Access mode
1770
+ archive_access_mode_choices = {
1771
+ "1": "read",
1772
+ "2": "read_write"
1773
+ }
1774
+ cli.console.print()
1775
+ cli.console.print(" [1] Read - List contents and read files from archives")
1776
+ cli.console.print(" [2] Read/Write - Read and extract archives to disk")
1777
+ cli.console.print()
1778
+ archive_access_mode_choice = Prompt.ask(
1779
+ "Select access mode",
1780
+ choices=["1", "2"],
1781
+ default="1"
1782
+ )
1783
+ archive_access_mode = archive_access_mode_choices[archive_access_mode_choice]
1784
+
1785
+ archive_max_file_size = Prompt.ask(
1786
+ "Maximum archive file size in MB",
1787
+ default="100"
1788
+ )
1789
+
1790
+ archive_max_files_to_list = Prompt.ask(
1791
+ "Maximum files to list from archive",
1792
+ default="1000"
1793
+ )
1794
+
1658
1795
  # ═══════════════════════════════════════════════════════════════
1659
1796
  # Tool Permissions
1660
1797
  # ═══════════════════════════════════════════════════════════════
@@ -2040,6 +2177,95 @@ class AWSBedrockCLI(AbstractApp):
2040
2177
  config_content
2041
2178
  )
2042
2179
 
2180
+ # Embedded Document Tools
2181
+ config_content = re.sub(
2182
+ r'(documents:\s*\n\s+enabled:\s+)(true|false)',
2183
+ f'\\g<1>{str(enable_document_tools).lower()}',
2184
+ config_content
2185
+ )
2186
+ if enable_document_tools:
2187
+ # Allowed path
2188
+ escaped_doc_path = document_allowed_path.replace('\\', '/')
2189
+ config_content = re.sub(
2190
+ r'(documents:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+)[^\s#]+',
2191
+ f'\\g<1>{escaped_doc_path}',
2192
+ config_content
2193
+ )
2194
+ # Access mode
2195
+ config_content = re.sub(
2196
+ r'(documents:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+[^\s#]+\s*\n\s+access_mode:\s+)(read|read_write)',
2197
+ f'\\g<1>{document_access_mode}',
2198
+ config_content
2199
+ )
2200
+ # Max file size
2201
+ config_content = re.sub(
2202
+ r'(documents:.*?max_file_size_mb:\s+)\d+',
2203
+ f'\\g<1>{document_max_file_size}',
2204
+ config_content,
2205
+ flags=re.DOTALL
2206
+ )
2207
+ # Max PDF pages
2208
+ config_content = re.sub(
2209
+ r'(max_pdf_pages:\s+)\d+',
2210
+ f'\\g<1>{document_max_pdf_pages}',
2211
+ config_content
2212
+ )
2213
+ # Max Excel rows
2214
+ config_content = re.sub(
2215
+ r'(max_excel_rows:\s+)\d+',
2216
+ f'\\g<1>{document_max_excel_rows}',
2217
+ config_content
2218
+ )
2219
+ # Templates path (if provided)
2220
+ if document_templates_path:
2221
+ escaped_templates_path = document_templates_path.replace('\\', '/')
2222
+ config_content = re.sub(
2223
+ r'(templates_path:\s+)(null|[^\s#]+)',
2224
+ f'\\g<1>{escaped_templates_path}',
2225
+ config_content
2226
+ )
2227
+ # Default author (if provided)
2228
+ if document_default_author:
2229
+ config_content = re.sub(
2230
+ r'(default_author:\s+)(null|[^\s#]+)',
2231
+ f'\\g<1>{document_default_author}',
2232
+ config_content
2233
+ )
2234
+
2235
+ # Embedded Archive Tools
2236
+ config_content = re.sub(
2237
+ r'(archives:\s*\n\s+enabled:\s+)(true|false)',
2238
+ f'\\g<1>{str(enable_archive_tools).lower()}',
2239
+ config_content
2240
+ )
2241
+ if enable_archive_tools:
2242
+ # Allowed path
2243
+ escaped_archive_path = archive_allowed_path.replace('\\', '/')
2244
+ config_content = re.sub(
2245
+ r'(archives:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+)[^\s#]+',
2246
+ f'\\g<1>{escaped_archive_path}',
2247
+ config_content
2248
+ )
2249
+ # Access mode
2250
+ config_content = re.sub(
2251
+ r'(archives:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+[^\s#]+\s*\n\s+access_mode:\s+)(read|read_write)',
2252
+ f'\\g<1>{archive_access_mode}',
2253
+ config_content
2254
+ )
2255
+ # Max file size
2256
+ config_content = re.sub(
2257
+ r'(archives:.*?max_file_size_mb:\s+)\d+',
2258
+ f'\\g<1>{archive_max_file_size}',
2259
+ config_content,
2260
+ flags=re.DOTALL
2261
+ )
2262
+ # Max files to list
2263
+ config_content = re.sub(
2264
+ r'(max_files_to_list:\s+)\d+',
2265
+ f'\\g<1>{archive_max_files_to_list}',
2266
+ config_content
2267
+ )
2268
+
2043
2269
  # Tool Permissions
2044
2270
  config_content = re.sub(
2045
2271
  r'(tool_permissions:\s*\n\s+auto_approve:\s+)(true|false)',
@@ -21,6 +21,8 @@ class ToolSelector:
21
21
  'aws_infrastructure': ['ec2', 's3', 'lambda', 'cloudwatch', 'iam', 'vpc', 'rds', 'dynamodb', 'diagram'],
22
22
  'elasticsearch': ['elasticsearch', 'search', 'index', 'query', 'aggregation'],
23
23
  'ragstore': ['ragstore', 'rag', 'embedding', 'vector', 'semantic'],
24
+ 'documents': ['word', 'excel', 'powerpoint', 'pdf', 'document', 'docx', 'xlsx', 'pptx', 'spreadsheet'],
25
+ 'archives': ['archive', 'zip', 'tar', 'extract', 'compress', 'tgz'],
24
26
  }
25
27
 
26
28
  # Keywords in user messages that trigger specific categories
@@ -36,6 +38,9 @@ class ToolSelector:
36
38
  'vpc', 'subnet', 'instance', 'bucket', 'function', 'diagram'],
37
39
  'elasticsearch': ['elasticsearch', 'search', 'query', 'index', 'log', 'aggregate'],
38
40
  'ragstore': ['ragstore', 'rag', 'embedding', 'semantic', 'vector', 'similarity'],
41
+ 'documents': ['document', 'word', 'excel', 'powerpoint', 'pdf', 'docx', 'xlsx', 'pptx',
42
+ 'spreadsheet', 'presentation', 'template', 'office'],
43
+ 'archives': ['archive', 'zip', 'tar', 'extract', 'unzip', 'compressed', 'tgz'],
39
44
  }
40
45
 
41
46
  def __init__(self, max_tools_per_request: int = 30):
@@ -603,6 +603,48 @@ embedded_tools:
603
603
  # - Use read-only mode when write operations are not needed
604
604
  # - Consider using specific subdirectories rather than root paths
605
605
 
606
+ # Document Tools (MS Office and PDF)
607
+ # Tools for reading and creating Microsoft Office documents and PDFs
608
+ documents:
609
+ enabled: false # Set to true to enable document reading/creation tools
610
+ allowed_path: ./ # Root path for document operations (files must be within this path)
611
+ access_mode: read # Access mode: "read" (read-only) or "read_write" (read and create)
612
+ # read: Only read operations (read_word_document, read_excel_document, etc.)
613
+ # read_write: Adds create operations (create_word_document, etc.)
614
+ max_file_size_mb: 50 # Maximum file size for document operations (in megabytes)
615
+
616
+ # Document reading options
617
+ reading:
618
+ max_pdf_pages: 100 # Maximum pages to extract from PDF documents
619
+ max_excel_rows: 10000 # Maximum rows to read from Excel spreadsheets
620
+
621
+ # Document creation options (only applies when access_mode is 'read_write')
622
+ creation:
623
+ templates_path: null # Optional: path to directory containing document templates
624
+ # Templates use {{placeholder_name}} syntax for replacements
625
+ default_author: null # Optional: default author name for created documents
626
+
627
+ # Supported formats:
628
+ # - Word: .docx (read and create)
629
+ # - Excel: .xlsx (read and create)
630
+ # - PowerPoint: .pptx (read and create)
631
+ # - PDF: .pdf (read only)
632
+
633
+ # Archive Tools
634
+ # Tools for reading and extracting compressed archive files
635
+ archives:
636
+ enabled: false # Set to true to enable archive tools
637
+ allowed_path: ./ # Root path for archive operations (files must be within this path)
638
+ access_mode: read # Access mode: "read" (list/read only) or "read_write" (includes extraction)
639
+ # read: Only list_archive_contents and read_archive_file operations
640
+ # read_write: Adds extract_archive operation
641
+ max_file_size_mb: 100 # Maximum archive size for operations (in megabytes)
642
+ max_files_to_list: 1000 # Maximum number of files to list from an archive
643
+
644
+ # Supported formats:
645
+ # - ZIP: .zip
646
+ # - TAR: .tar, .tar.gz, .tgz, .tar.bz2
647
+
606
648
  # Tool Permissions
607
649
  # Controls how tool usage permissions are handled
608
650
  tool_permissions: