dtSpark 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dtSpark/_version.txt CHANGED
@@ -1 +1 @@
1
- 1.0.10
1
+ 1.0.11
@@ -1655,6 +1655,122 @@ class AWSBedrockCLI(AbstractApp):
1655
1655
  )
1656
1656
  filesystem_access_mode = access_mode_choices[access_mode_choice]
1657
1657
 
1658
+ # ═══════════════════════════════════════════════════════════════
1659
+ # Embedded Document Tools (MS Office & PDF)
1660
+ # ═══════════════════════════════════════════════════════════════
1661
+ cli.console.print()
1662
+ enable_document_tools = Confirm.ask("Enable embedded document tools (MS Office & PDF)?", default=False)
1663
+
1664
+ # Default values
1665
+ document_allowed_path = "./running"
1666
+ document_access_mode = "read"
1667
+ document_max_file_size = "50"
1668
+ document_max_pdf_pages = "100"
1669
+ document_max_excel_rows = "10000"
1670
+ document_templates_path = ""
1671
+ document_default_author = ""
1672
+
1673
+ if enable_document_tools:
1674
+ cli.console.print()
1675
+ cli.console.print("[dim]Document tools allow reading and creating MS Office documents (Word, Excel, PowerPoint) and PDFs.[/dim]")
1676
+ cli.console.print()
1677
+
1678
+ document_allowed_path = Prompt.ask(
1679
+ "Allowed directory path for documents",
1680
+ default="./running"
1681
+ )
1682
+
1683
+ # Access mode
1684
+ doc_access_mode_choices = {
1685
+ "1": "read",
1686
+ "2": "read_write"
1687
+ }
1688
+ cli.console.print()
1689
+ cli.console.print(" [1] Read - Read documents only")
1690
+ cli.console.print(" [2] Read/Write - Read and create documents")
1691
+ cli.console.print()
1692
+ doc_access_mode_choice = Prompt.ask(
1693
+ "Select access mode",
1694
+ choices=["1", "2"],
1695
+ default="1"
1696
+ )
1697
+ document_access_mode = doc_access_mode_choices[doc_access_mode_choice]
1698
+
1699
+ document_max_file_size = Prompt.ask(
1700
+ "Maximum file size in MB",
1701
+ default="50"
1702
+ )
1703
+
1704
+ document_max_pdf_pages = Prompt.ask(
1705
+ "Maximum PDF pages to read",
1706
+ default="100"
1707
+ )
1708
+
1709
+ document_max_excel_rows = Prompt.ask(
1710
+ "Maximum Excel rows to read",
1711
+ default="10000"
1712
+ )
1713
+
1714
+ if document_access_mode == "read_write":
1715
+ cli.console.print()
1716
+ cli.console.print("[dim]Templates allow creating documents with placeholder substitution.[/dim]")
1717
+ document_templates_path = Prompt.ask(
1718
+ "Templates directory path (leave empty to disable)",
1719
+ default=""
1720
+ )
1721
+ document_default_author = Prompt.ask(
1722
+ "Default author for created documents (leave empty to disable)",
1723
+ default=""
1724
+ )
1725
+
1726
+ # ═══════════════════════════════════════════════════════════════
1727
+ # Embedded Archive Tools
1728
+ # ═══════════════════════════════════════════════════════════════
1729
+ cli.console.print()
1730
+ enable_archive_tools = Confirm.ask("Enable embedded archive tools (ZIP, TAR)?", default=False)
1731
+
1732
+ # Default values
1733
+ archive_allowed_path = "./running"
1734
+ archive_access_mode = "read"
1735
+ archive_max_file_size = "100"
1736
+ archive_max_files_to_list = "1000"
1737
+
1738
+ if enable_archive_tools:
1739
+ cli.console.print()
1740
+ cli.console.print("[dim]Archive tools allow reading and extracting ZIP and TAR archives.[/dim]")
1741
+ cli.console.print()
1742
+
1743
+ archive_allowed_path = Prompt.ask(
1744
+ "Allowed directory path for archives",
1745
+ default="./running"
1746
+ )
1747
+
1748
+ # Access mode
1749
+ archive_access_mode_choices = {
1750
+ "1": "read",
1751
+ "2": "read_write"
1752
+ }
1753
+ cli.console.print()
1754
+ cli.console.print(" [1] Read - List contents and read files from archives")
1755
+ cli.console.print(" [2] Read/Write - Read and extract archives to disk")
1756
+ cli.console.print()
1757
+ archive_access_mode_choice = Prompt.ask(
1758
+ "Select access mode",
1759
+ choices=["1", "2"],
1760
+ default="1"
1761
+ )
1762
+ archive_access_mode = archive_access_mode_choices[archive_access_mode_choice]
1763
+
1764
+ archive_max_file_size = Prompt.ask(
1765
+ "Maximum archive file size in MB",
1766
+ default="100"
1767
+ )
1768
+
1769
+ archive_max_files_to_list = Prompt.ask(
1770
+ "Maximum files to list from archive",
1771
+ default="1000"
1772
+ )
1773
+
1658
1774
  # ═══════════════════════════════════════════════════════════════
1659
1775
  # Tool Permissions
1660
1776
  # ═══════════════════════════════════════════════════════════════
@@ -2040,6 +2156,95 @@ class AWSBedrockCLI(AbstractApp):
2040
2156
  config_content
2041
2157
  )
2042
2158
 
2159
+ # Embedded Document Tools
2160
+ config_content = re.sub(
2161
+ r'(documents:\s*\n\s+enabled:\s+)(true|false)',
2162
+ f'\\g<1>{str(enable_document_tools).lower()}',
2163
+ config_content
2164
+ )
2165
+ if enable_document_tools:
2166
+ # Allowed path
2167
+ escaped_doc_path = document_allowed_path.replace('\\', '/')
2168
+ config_content = re.sub(
2169
+ r'(documents:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+)[^\s#]+',
2170
+ f'\\g<1>{escaped_doc_path}',
2171
+ config_content
2172
+ )
2173
+ # Access mode
2174
+ config_content = re.sub(
2175
+ r'(documents:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+[^\s#]+\s*\n\s+access_mode:\s+)(read|read_write)',
2176
+ f'\\g<1>{document_access_mode}',
2177
+ config_content
2178
+ )
2179
+ # Max file size
2180
+ config_content = re.sub(
2181
+ r'(documents:.*?max_file_size_mb:\s+)\d+',
2182
+ f'\\g<1>{document_max_file_size}',
2183
+ config_content,
2184
+ flags=re.DOTALL
2185
+ )
2186
+ # Max PDF pages
2187
+ config_content = re.sub(
2188
+ r'(max_pdf_pages:\s+)\d+',
2189
+ f'\\g<1>{document_max_pdf_pages}',
2190
+ config_content
2191
+ )
2192
+ # Max Excel rows
2193
+ config_content = re.sub(
2194
+ r'(max_excel_rows:\s+)\d+',
2195
+ f'\\g<1>{document_max_excel_rows}',
2196
+ config_content
2197
+ )
2198
+ # Templates path (if provided)
2199
+ if document_templates_path:
2200
+ escaped_templates_path = document_templates_path.replace('\\', '/')
2201
+ config_content = re.sub(
2202
+ r'(templates_path:\s+)(null|[^\s#]+)',
2203
+ f'\\g<1>{escaped_templates_path}',
2204
+ config_content
2205
+ )
2206
+ # Default author (if provided)
2207
+ if document_default_author:
2208
+ config_content = re.sub(
2209
+ r'(default_author:\s+)(null|[^\s#]+)',
2210
+ f'\\g<1>{document_default_author}',
2211
+ config_content
2212
+ )
2213
+
2214
+ # Embedded Archive Tools
2215
+ config_content = re.sub(
2216
+ r'(archives:\s*\n\s+enabled:\s+)(true|false)',
2217
+ f'\\g<1>{str(enable_archive_tools).lower()}',
2218
+ config_content
2219
+ )
2220
+ if enable_archive_tools:
2221
+ # Allowed path
2222
+ escaped_archive_path = archive_allowed_path.replace('\\', '/')
2223
+ config_content = re.sub(
2224
+ r'(archives:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+)[^\s#]+',
2225
+ f'\\g<1>{escaped_archive_path}',
2226
+ config_content
2227
+ )
2228
+ # Access mode
2229
+ config_content = re.sub(
2230
+ r'(archives:\s*\n\s+enabled:\s+(?:true|false)\s*\n\s+allowed_path:\s+[^\s#]+\s*\n\s+access_mode:\s+)(read|read_write)',
2231
+ f'\\g<1>{archive_access_mode}',
2232
+ config_content
2233
+ )
2234
+ # Max file size
2235
+ config_content = re.sub(
2236
+ r'(archives:.*?max_file_size_mb:\s+)\d+',
2237
+ f'\\g<1>{archive_max_file_size}',
2238
+ config_content,
2239
+ flags=re.DOTALL
2240
+ )
2241
+ # Max files to list
2242
+ config_content = re.sub(
2243
+ r'(max_files_to_list:\s+)\d+',
2244
+ f'\\g<1>{archive_max_files_to_list}',
2245
+ config_content
2246
+ )
2247
+
2043
2248
  # Tool Permissions
2044
2249
  config_content = re.sub(
2045
2250
  r'(tool_permissions:\s*\n\s+auto_approve:\s+)(true|false)',
@@ -21,6 +21,8 @@ class ToolSelector:
21
21
  'aws_infrastructure': ['ec2', 's3', 'lambda', 'cloudwatch', 'iam', 'vpc', 'rds', 'dynamodb', 'diagram'],
22
22
  'elasticsearch': ['elasticsearch', 'search', 'index', 'query', 'aggregation'],
23
23
  'ragstore': ['ragstore', 'rag', 'embedding', 'vector', 'semantic'],
24
+ 'documents': ['word', 'excel', 'powerpoint', 'pdf', 'document', 'docx', 'xlsx', 'pptx', 'spreadsheet'],
25
+ 'archives': ['archive', 'zip', 'tar', 'extract', 'compress', 'tgz'],
24
26
  }
25
27
 
26
28
  # Keywords in user messages that trigger specific categories
@@ -36,6 +38,9 @@ class ToolSelector:
36
38
  'vpc', 'subnet', 'instance', 'bucket', 'function', 'diagram'],
37
39
  'elasticsearch': ['elasticsearch', 'search', 'query', 'index', 'log', 'aggregate'],
38
40
  'ragstore': ['ragstore', 'rag', 'embedding', 'semantic', 'vector', 'similarity'],
41
+ 'documents': ['document', 'word', 'excel', 'powerpoint', 'pdf', 'docx', 'xlsx', 'pptx',
42
+ 'spreadsheet', 'presentation', 'template', 'office'],
43
+ 'archives': ['archive', 'zip', 'tar', 'extract', 'unzip', 'compressed', 'tgz'],
39
44
  }
40
45
 
41
46
  def __init__(self, max_tools_per_request: int = 30):
@@ -603,6 +603,48 @@ embedded_tools:
603
603
  # - Use read-only mode when write operations are not needed
604
604
  # - Consider using specific subdirectories rather than root paths
605
605
 
606
+ # Document Tools (MS Office and PDF)
607
+ # Tools for reading and creating Microsoft Office documents and PDFs
608
+ documents:
609
+ enabled: false # Set to true to enable document reading/creation tools
610
+ allowed_path: ./ # Root path for document operations (files must be within this path)
611
+ access_mode: read # Access mode: "read" (read-only) or "read_write" (read and create)
612
+ # read: Only read operations (read_word_document, read_excel_document, etc.)
613
+ # read_write: Adds create operations (create_word_document, etc.)
614
+ max_file_size_mb: 50 # Maximum file size for document operations (in megabytes)
615
+
616
+ # Document reading options
617
+ reading:
618
+ max_pdf_pages: 100 # Maximum pages to extract from PDF documents
619
+ max_excel_rows: 10000 # Maximum rows to read from Excel spreadsheets
620
+
621
+ # Document creation options (only applies when access_mode is 'read_write')
622
+ creation:
623
+ templates_path: null # Optional: path to directory containing document templates
624
+ # Templates use {{placeholder_name}} syntax for replacements
625
+ default_author: null # Optional: default author name for created documents
626
+
627
+ # Supported formats:
628
+ # - Word: .docx (read and create)
629
+ # - Excel: .xlsx (read and create)
630
+ # - PowerPoint: .pptx (read and create)
631
+ # - PDF: .pdf (read only)
632
+
633
+ # Archive Tools
634
+ # Tools for reading and extracting compressed archive files
635
+ archives:
636
+ enabled: false # Set to true to enable archive tools
637
+ allowed_path: ./ # Root path for archive operations (files must be within this path)
638
+ access_mode: read # Access mode: "read" (list/read only) or "read_write" (includes extraction)
639
+ # read: Only list_archive_contents and read_archive_file operations
640
+ # read_write: Adds extract_archive operation
641
+ max_file_size_mb: 100 # Maximum archive size for operations (in megabytes)
642
+ max_files_to_list: 1000 # Maximum number of files to list from an archive
643
+
644
+ # Supported formats:
645
+ # - ZIP: .zip
646
+ # - TAR: .tar, .tar.gz, .tgz, .tar.bz2
647
+
606
648
  # Tool Permissions
607
649
  # Controls how tool usage permissions are handled
608
650
  tool_permissions: