ostruct-cli 0.8.8__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/__init__.py +3 -15
- ostruct/cli/attachment_processor.py +455 -0
- ostruct/cli/attachment_template_bridge.py +973 -0
- ostruct/cli/cli.py +187 -33
- ostruct/cli/click_options.py +775 -692
- ostruct/cli/code_interpreter.py +195 -12
- ostruct/cli/commands/__init__.py +0 -3
- ostruct/cli/commands/run.py +289 -62
- ostruct/cli/config.py +23 -22
- ostruct/cli/constants.py +89 -0
- ostruct/cli/errors.py +191 -6
- ostruct/cli/explicit_file_processor.py +0 -15
- ostruct/cli/file_info.py +118 -14
- ostruct/cli/file_list.py +82 -1
- ostruct/cli/file_search.py +68 -2
- ostruct/cli/help_json.py +235 -0
- ostruct/cli/mcp_integration.py +13 -16
- ostruct/cli/params.py +217 -0
- ostruct/cli/plan_assembly.py +335 -0
- ostruct/cli/plan_printing.py +385 -0
- ostruct/cli/progress_reporting.py +8 -56
- ostruct/cli/quick_ref_help.py +128 -0
- ostruct/cli/rich_config.py +299 -0
- ostruct/cli/runner.py +397 -190
- ostruct/cli/security/__init__.py +2 -0
- ostruct/cli/security/allowed_checker.py +41 -0
- ostruct/cli/security/normalization.py +13 -9
- ostruct/cli/security/security_manager.py +558 -17
- ostruct/cli/security/types.py +15 -0
- ostruct/cli/template_debug.py +283 -261
- ostruct/cli/template_debug_help.py +233 -142
- ostruct/cli/template_env.py +46 -5
- ostruct/cli/template_filters.py +415 -8
- ostruct/cli/template_processor.py +240 -619
- ostruct/cli/template_rendering.py +49 -73
- ostruct/cli/template_validation.py +2 -1
- ostruct/cli/token_validation.py +35 -15
- ostruct/cli/types.py +15 -19
- ostruct/cli/unicode_compat.py +283 -0
- ostruct/cli/upload_manager.py +448 -0
- ostruct/cli/utils.py +30 -0
- ostruct/cli/validators.py +272 -54
- {ostruct_cli-0.8.8.dist-info → ostruct_cli-1.0.0.dist-info}/METADATA +292 -126
- ostruct_cli-1.0.0.dist-info/RECORD +80 -0
- ostruct/cli/commands/quick_ref.py +0 -54
- ostruct/cli/template_optimizer.py +0 -478
- ostruct_cli-0.8.8.dist-info/RECORD +0 -71
- {ostruct_cli-0.8.8.dist-info → ostruct_cli-1.0.0.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.8.8.dist-info → ostruct_cli-1.0.0.dist-info}/WHEEL +0 -0
- {ostruct_cli-0.8.8.dist-info → ostruct_cli-1.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,80 @@
|
|
1
|
+
ostruct/__init__.py,sha256=X6zo6V7ZNMv731Wi388aTVQngD1410ExGwGx4J6lpyo,187
|
2
|
+
ostruct/cli/__init__.py,sha256=njg70kPnRja8KqP_y93CIWPyeNj8EzH8AhHotn0jJAc,820
|
3
|
+
ostruct/cli/attachment_processor.py,sha256=_RcE8tdJ5ey5jsL5UBsOadKrAqBjr71nLqZsv8b2kwg,16469
|
4
|
+
ostruct/cli/attachment_template_bridge.py,sha256=iehtZ3AR1QoQP_fQxiVH_jV4z-3bCsfvRkwoMz8DEWs,35058
|
5
|
+
ostruct/cli/base_errors.py,sha256=o-877bJJA8yJWISRPy0KyL6wDu1-_avddmQIfVePuFM,5989
|
6
|
+
ostruct/cli/cache_manager.py,sha256=ej3KrRfkKKZ_lEp2JswjbJ5bW2ncsvna9NeJu81cqqs,5192
|
7
|
+
ostruct/cli/cli.py,sha256=RCHjwx2mbRx9N-L1GeMgROqskvipjpwmavVh2z_Etds,8746
|
8
|
+
ostruct/cli/click_options.py,sha256=NFcs6amLBtVLOjR6smNbGmDHwBpAXrpxQy5KCiLKPo4,34203
|
9
|
+
ostruct/cli/code_interpreter.py,sha256=M8gyPy17s5lu8TsvXMDes4wi78iwqz_yoAQ4axeuhwM,23264
|
10
|
+
ostruct/cli/commands/__init__.py,sha256=KuEoh7UF9CWis3VHZ6LAwoRcZrpG8oq9VSQRl1NjEo8,622
|
11
|
+
ostruct/cli/commands/list_models.py,sha256=yeuQpUGAmRr4uOHS7teuVHkC9dkqN0yKDOEw_n-ehi0,4662
|
12
|
+
ostruct/cli/commands/run.py,sha256=oq2ZXCQehFJnUV34nC9TEaJL8Qb2aZrHAoLu0x3TR2M,16805
|
13
|
+
ostruct/cli/commands/update_registry.py,sha256=7DQrPlCJScPVgE2HbFAM7UMap-EdYu58AQWfpI-H7Gw,2483
|
14
|
+
ostruct/cli/config.py,sha256=KxkJGxvRiiDr7oinwZVaDqtzRcYyQZ1xcoCtvykq6F4,10143
|
15
|
+
ostruct/cli/constants.py,sha256=zyUwq4fladlCsfPnBPh7rSqfSwarbCL3I5HM1dIEf6I,2467
|
16
|
+
ostruct/cli/cost_estimation.py,sha256=08hyE-kM5QYzj7y-KB3lMD_RxCMoM_Ve3-IQlSpJAo4,4483
|
17
|
+
ostruct/cli/errors.py,sha256=vqdkGx2Znze61ms6BByOYg4D3ScBEK7OFP73PeJJufo,30865
|
18
|
+
ostruct/cli/exit_codes.py,sha256=gdvB1dpu0LalEUZrofpk5K6aTQ24n5AfkAK5umludHU,365
|
19
|
+
ostruct/cli/explicit_file_processor.py,sha256=mKYQCW4bodk8Ida0sGw8m5MmbiaMCAYvoHUXzU1KDSE,19210
|
20
|
+
ostruct/cli/field_utils.py,sha256=bcRi1qQ0Ac2UCfrKSQ677_yu-VzaShm_zN7QLf98qc0,1939
|
21
|
+
ostruct/cli/file_info.py,sha256=rNQSkUDUL3qbalovRu1unTixLVDzJ0HU01eFQYlMXlM,21443
|
22
|
+
ostruct/cli/file_list.py,sha256=AmnQnXyI1djVbp_pv2VwK2VAqZQpSiTIXPbwu3_wCRQ,23431
|
23
|
+
ostruct/cli/file_search.py,sha256=6JvThbrauznfr-nX4M_VG7kRQbSLG9YOQomLR9_IqPE,17566
|
24
|
+
ostruct/cli/file_utils.py,sha256=JZprQ-1LHQzI3eBfeCIS6VmxTa2fGUZHygGC8gcwpJM,24367
|
25
|
+
ostruct/cli/help_json.py,sha256=IDArJBtPTTMM2KVt36t1PBY72totKH9tVAsRnPuEPcA,8223
|
26
|
+
ostruct/cli/json_extract.py,sha256=ZleIxat8g-JnA9VVqWgJaKxN7QzL25itQ8aP0Gb5e4Q,2650
|
27
|
+
ostruct/cli/mcp_integration.py,sha256=zC24TZScr-Vio2Zq0lPBJVmATVWoIm2pfWwnO3ztQJ8,18791
|
28
|
+
ostruct/cli/model_creation.py,sha256=HGo8Qv7eBF8Co463IR7RcbTCQcaOvd_cBGuRodRCAa4,23261
|
29
|
+
ostruct/cli/model_validation.py,sha256=j2az3q88-Ljm2cMMgZ8p_-gcp1vKQnWCknnw0y0YlAw,6675
|
30
|
+
ostruct/cli/params.py,sha256=D4j_I6vZYN-lOeqHWyAmLSxAT3u77CUXVvf6sx27Uvo,6353
|
31
|
+
ostruct/cli/path_utils.py,sha256=j44q1OoLkqMErgK-qEuhuIZ1VyzqRIvNgxR1et9PoXA,4813
|
32
|
+
ostruct/cli/plan_assembly.py,sha256=pCz2fH3MNbUVODMw43_fb-e61vgo5N0GBrZ1VlShs1M,11656
|
33
|
+
ostruct/cli/plan_printing.py,sha256=_OSrnaLKi0nLdSSHnf6exaIhJT5YPedKh_8nYgqkluI,12837
|
34
|
+
ostruct/cli/progress.py,sha256=rj9nVEco5UeZORMbzd7mFJpFGJjbH9KbBFh5oTE5Anw,3415
|
35
|
+
ostruct/cli/progress_reporting.py,sha256=cQoMsRCBlIdRIefGwpBEX744H9pfpD5TdT8sB180hfI,11665
|
36
|
+
ostruct/cli/quick_ref_help.py,sha256=_AWPcdxDj9v6DOX6yecahv7dfV38-TwB4sK5HsWmIbo,5014
|
37
|
+
ostruct/cli/registry_updates.py,sha256=ohiHdlfrocvThpR_ZjMyqulDKFjRM1hIFKOlNzpaqHg,5138
|
38
|
+
ostruct/cli/rich_config.py,sha256=DChXWG97pQRm_QZCTdOIpBhvuIfJXDlqT48vnBdnYKc,9904
|
39
|
+
ostruct/cli/runner.py,sha256=a7Zkp4Wn9x6sMmtVtGEwLQ8i8ctTvKo2w36n6WpbuLM,74672
|
40
|
+
ostruct/cli/schema_utils.py,sha256=9LnsjxEKg6RIfXQB3nS3pyDggm5n-4-thXf92897gJU,3590
|
41
|
+
ostruct/cli/schema_validation.py,sha256=ohEuxJ0KF93qphj0JSZDnrxDn0C2ZU37g-U2JY03onM,8154
|
42
|
+
ostruct/cli/security/__init__.py,sha256=Ix0AXDgxJaR0TtR-sG1353DtUF1BPujgjcaA28Cvgh4,898
|
43
|
+
ostruct/cli/security/allowed_checker.py,sha256=ceNUEDiTirClq9Kvosytlm01JWj0J37JdiRzLsw3zD8,3462
|
44
|
+
ostruct/cli/security/base.py,sha256=q9YUdHEj2eg5w8GEw5403E9OQKIjZbEiaWsvYFnCGLw,1359
|
45
|
+
ostruct/cli/security/case_manager.py,sha256=I_ZJSyntLuGx5qVzze559CI-OxsaNPSibkAN8zZ7PvE,2345
|
46
|
+
ostruct/cli/security/errors.py,sha256=8jYJFRQyEXIH3Wd2ATWORVoqbDg7qwu0TsuROpsqNfU,5254
|
47
|
+
ostruct/cli/security/normalization.py,sha256=6UoFAl8cbvOUETMauyvmYogaoVhJm9WybGcS84biwh8,5488
|
48
|
+
ostruct/cli/security/safe_joiner.py,sha256=PHowCeBAkfHfPqRwuO5Com0OemGuq3cHkdu2p9IYNT0,7107
|
49
|
+
ostruct/cli/security/security_manager.py,sha256=VQbWxOp76mm8OCpCTtkzBrBh5nb-dKWQbxdZdZdFShM,36419
|
50
|
+
ostruct/cli/security/symlink_resolver.py,sha256=wtZdJ_T_0FOy6B1P5ty1odEXQk9vr8BzlWeAFD4huJE,16744
|
51
|
+
ostruct/cli/security/types.py,sha256=38qBfUSaYB0NXyOq9DytTg4OnievLSsA4ysI88YdaFY,3545
|
52
|
+
ostruct/cli/security/windows_paths.py,sha256=qxC2H2kLwtmQ7YePYde3UrmOJcGnsLEebDLh242sUaI,13453
|
53
|
+
ostruct/cli/sentinel.py,sha256=69faYPrhVJmEpYNLsCtf1HF96aan3APqXZdIjxBNZYo,798
|
54
|
+
ostruct/cli/serialization.py,sha256=ec0UswDE2onwtZVUoZaMCsGv6zW_tSKdBng2qVo6Ucs,704
|
55
|
+
ostruct/cli/services.py,sha256=nLYUbF3DDNuilh7j9q_atUOjTAWta7bxTS3G-zkveaA,21621
|
56
|
+
ostruct/cli/template_debug.py,sha256=PEU9w10lIWPaMHbY2_uPr2HwM3154V1aIJ8mc_A5rSQ,25283
|
57
|
+
ostruct/cli/template_debug_help.py,sha256=KqKYFIbE81HxTHWT-A2naLdk8PUsTNHTXkLusleGUD4,9178
|
58
|
+
ostruct/cli/template_env.py,sha256=mxJPoA4DwbJKQIg2wLmqEsXarR1HrDiR8PhwKNKnAEQ,3535
|
59
|
+
ostruct/cli/template_extensions.py,sha256=_lomtDGMGxMfpw05v_-daJ0JbhRm_r_-uEJlPAjbpkI,2699
|
60
|
+
ostruct/cli/template_filters.py,sha256=ulo5jRo62zmkApYvDUjO5imUS5osec7eoeYwJsbuMKk,34566
|
61
|
+
ostruct/cli/template_io.py,sha256=yUWO-8rZnSdX97DTMSEX8fG9CP1ISsOhm2NZN3Fab9A,8821
|
62
|
+
ostruct/cli/template_processor.py,sha256=Y9GMCuA5lASse_AzDCgu8VdcmsAhxQn8qyG546cvBE0,30150
|
63
|
+
ostruct/cli/template_rendering.py,sha256=at8RiT9v0peJUBRwDgX1-WJ6pDDMDO0H33MwuWlkowM,15831
|
64
|
+
ostruct/cli/template_schema.py,sha256=ckH4rUZnEgfm_BHS9LnMGr8LtDxRmZ0C6UBVrSp8KTc,19604
|
65
|
+
ostruct/cli/template_utils.py,sha256=MZdXXjL-x-IXX-5Y8GWopGNBkDE2ItLdCuCl0QWFR_U,14968
|
66
|
+
ostruct/cli/template_validation.py,sha256=3wUBoZJZKt0C5TVaT0nmjH_ehyw4Gn1yEBnMDDE8qLU,14327
|
67
|
+
ostruct/cli/token_utils.py,sha256=r4KPEO3Sec18Q6mU0aClK6XGShvusgUggXEQgEPPlaA,1369
|
68
|
+
ostruct/cli/token_validation.py,sha256=DGc5yyScy0iTvvuF2DaDUgRQaIqVIcm9ZZtuVEyy_oI,10276
|
69
|
+
ostruct/cli/types.py,sha256=qQ5mc4OAK8ChVrgaJ9VSn3gS75byrOonjlsc9WdgibE,2782
|
70
|
+
ostruct/cli/unattended_operation.py,sha256=kI95SSVJC_taxORXQYrce_qLEnuKc6edwn9tMOye-qs,9383
|
71
|
+
ostruct/cli/unicode_compat.py,sha256=bDeVZdWkDv74jVt4AsBl10tRra0v7kV04SNAMyR3q0U,9508
|
72
|
+
ostruct/cli/upload_manager.py,sha256=m8BpVRJc7Ag9Ln5bAQPGMtisHSP0m7VVcXhHnqZ5CjI,16368
|
73
|
+
ostruct/cli/utils.py,sha256=DV8KAx46rl3GgXLydVhrP6A1xY7ofyir3hEKUslSfek,2149
|
74
|
+
ostruct/cli/validators.py,sha256=SRJozxcXgIdbjF0Jv5z2rSbOzeXiTYpzbZKc_WF-5HE,23882
|
75
|
+
ostruct/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
|
+
ostruct_cli-1.0.0.dist-info/LICENSE,sha256=DmGAkaYzhrdzTB9Y2Rvfzd3mJiF9ZrTOhxN8t6wrfHA,1098
|
77
|
+
ostruct_cli-1.0.0.dist-info/METADATA,sha256=X6YXB3suiMvexCSUvF0FQUpJk6iotxfF00-F3vmeXqQ,31311
|
78
|
+
ostruct_cli-1.0.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
79
|
+
ostruct_cli-1.0.0.dist-info/entry_points.txt,sha256=NFq9IuqHVTem0j9zKjV8C1si_zGcP1RL6Wbvt9fUDXw,48
|
80
|
+
ostruct_cli-1.0.0.dist-info/RECORD,,
|
@@ -1,54 +0,0 @@
|
|
1
|
-
"""Quick reference command for ostruct CLI."""
|
2
|
-
|
3
|
-
import click
|
4
|
-
|
5
|
-
|
6
|
-
@click.command("quick-ref")
|
7
|
-
def quick_reference() -> None:
|
8
|
-
"""Show quick reference for file routing and common usage patterns."""
|
9
|
-
quick_ref = """
|
10
|
-
🚀 OSTRUCT QUICK REFERENCE
|
11
|
-
|
12
|
-
📁 FILE ROUTING:
|
13
|
-
-ft FILE 📄 Template access only (config files, small data)
|
14
|
-
-fc FILE 💻 Code Interpreter upload (data files, scripts)
|
15
|
-
-fs FILE 🔍 File Search vector store (documents, manuals)
|
16
|
-
|
17
|
-
-dt DIR 📁 Template directory (config dirs, reference data)
|
18
|
-
-dc DIR 📂 Code execution directory (datasets, code repos)
|
19
|
-
-ds DIR 📁 Search directory (documentation, knowledge)
|
20
|
-
|
21
|
-
🔄 ADVANCED ROUTING:
|
22
|
-
--file-for code-interpreter data.csv Single tool, single file
|
23
|
-
--file-for file-search docs.pdf Single tool, single file
|
24
|
-
--file-for code-interpreter shared.json --file-for file-search shared.json Multi-tool routing
|
25
|
-
|
26
|
-
🏷️ VARIABLES:
|
27
|
-
-V name=value Simple string variables
|
28
|
-
-J config='{"key":"value"}' JSON structured data
|
29
|
-
|
30
|
-
🔌 TOOLS:
|
31
|
-
--web-search 🌐 Real-time web search for current info
|
32
|
-
--mcp-server label@https://server.com/sse MCP server integration
|
33
|
-
--timeout 7200 2-hour timeout for long operations
|
34
|
-
|
35
|
-
⚡ COMMON PATTERNS:
|
36
|
-
# Basic template rendering
|
37
|
-
ostruct run template.j2 schema.json -V env=prod
|
38
|
-
|
39
|
-
# Data analysis with Code Interpreter
|
40
|
-
ostruct run analysis.j2 schema.json -fc data.csv -V task=analyze
|
41
|
-
|
42
|
-
# Document search + processing
|
43
|
-
ostruct run search.j2 schema.json -fs docs/ -ft config.yaml
|
44
|
-
|
45
|
-
# Multi-tool workflow
|
46
|
-
ostruct run workflow.j2 schema.json -fc raw_data.csv -fs knowledge/ -ft config.json
|
47
|
-
|
48
|
-
# Current information research
|
49
|
-
ostruct run research.j2 schema.json --web-search -V topic="latest AI developments"
|
50
|
-
|
51
|
-
📖 Full help: ostruct run --help
|
52
|
-
📖 Documentation: https://ostruct.readthedocs.io
|
53
|
-
"""
|
54
|
-
click.echo(quick_ref)
|
@@ -1,478 +0,0 @@
|
|
1
|
-
"""Automatic template optimization system for improved LLM performance."""
|
2
|
-
|
3
|
-
import re
|
4
|
-
from dataclasses import dataclass
|
5
|
-
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Match, Optional, Set
|
7
|
-
|
8
|
-
try:
|
9
|
-
from jinja2 import Environment
|
10
|
-
from jinja2.nodes import For, Name
|
11
|
-
|
12
|
-
JINJA2_AVAILABLE = True
|
13
|
-
except ImportError:
|
14
|
-
JINJA2_AVAILABLE = False
|
15
|
-
|
16
|
-
|
17
|
-
@dataclass
|
18
|
-
class OptimizationResult:
|
19
|
-
"""Result of template optimization process."""
|
20
|
-
|
21
|
-
optimized_template: str
|
22
|
-
transformations: List[str]
|
23
|
-
files_moved: List[str]
|
24
|
-
inline_files_kept: List[str]
|
25
|
-
optimization_time_ms: float
|
26
|
-
|
27
|
-
@property
|
28
|
-
def has_optimizations(self) -> bool:
|
29
|
-
"""Check if any optimizations were applied."""
|
30
|
-
return bool(self.transformations)
|
31
|
-
|
32
|
-
|
33
|
-
class TemplateOptimizer:
|
34
|
-
"""Automatic prompt optimization using pure text manipulation.
|
35
|
-
|
36
|
-
This optimizer applies prompt engineering best practices to improve LLM performance:
|
37
|
-
- Moves large file content to structured appendices
|
38
|
-
- Keeps small values inline for context
|
39
|
-
- Generates natural language references
|
40
|
-
- Maintains deterministic and fast processing
|
41
|
-
"""
|
42
|
-
|
43
|
-
def __init__(self) -> None:
|
44
|
-
"""Initialize the template optimizer."""
|
45
|
-
self.file_references: Dict[str, str] = {}
|
46
|
-
self.dir_references: Dict[str, str] = {}
|
47
|
-
self.optimization_log: List[str] = []
|
48
|
-
self.inline_threshold = (
|
49
|
-
200 # Characters threshold for inline vs appendix
|
50
|
-
)
|
51
|
-
self.small_value_threshold = (
|
52
|
-
50 # Very small values stay inline regardless
|
53
|
-
)
|
54
|
-
|
55
|
-
def _get_loop_variables(self, template_content: str) -> Set[str]:
|
56
|
-
"""Extract loop variables from template to avoid optimizing them.
|
57
|
-
|
58
|
-
Args:
|
59
|
-
template_content: Template content to analyze
|
60
|
-
|
61
|
-
Returns:
|
62
|
-
Set of variable names used in for loops
|
63
|
-
"""
|
64
|
-
if not JINJA2_AVAILABLE:
|
65
|
-
# Fallback: return common loop variable names
|
66
|
-
return {
|
67
|
-
"file",
|
68
|
-
"item",
|
69
|
-
"element",
|
70
|
-
"entry",
|
71
|
-
"record",
|
72
|
-
"row",
|
73
|
-
"line",
|
74
|
-
}
|
75
|
-
|
76
|
-
try:
|
77
|
-
env = Environment()
|
78
|
-
ast = env.parse(template_content)
|
79
|
-
loop_vars = set()
|
80
|
-
|
81
|
-
def visit_node(node: Any) -> None:
|
82
|
-
"""Recursively visit AST nodes to find loop variables."""
|
83
|
-
if isinstance(node, For):
|
84
|
-
# Handle single loop variable: {% for item in items %}
|
85
|
-
if isinstance(node.target, Name):
|
86
|
-
loop_vars.add(node.target.name)
|
87
|
-
# Handle tuple unpacking: {% for key, value in items %}
|
88
|
-
elif hasattr(node.target, "items"):
|
89
|
-
for item in node.target.items:
|
90
|
-
if isinstance(item, Name):
|
91
|
-
loop_vars.add(item.name)
|
92
|
-
|
93
|
-
# Recursively visit child nodes
|
94
|
-
for child in node.iter_child_nodes():
|
95
|
-
visit_node(child)
|
96
|
-
|
97
|
-
visit_node(ast)
|
98
|
-
return loop_vars
|
99
|
-
|
100
|
-
except Exception:
|
101
|
-
# If AST parsing fails, return common loop variable names as fallback
|
102
|
-
return {
|
103
|
-
"file",
|
104
|
-
"item",
|
105
|
-
"element",
|
106
|
-
"entry",
|
107
|
-
"record",
|
108
|
-
"row",
|
109
|
-
"line",
|
110
|
-
}
|
111
|
-
|
112
|
-
def optimize_for_llm(
|
113
|
-
self, template_content: str, step_tracker: Optional[Any] = None
|
114
|
-
) -> OptimizationResult:
|
115
|
-
"""Apply prompt engineering best practices for files and directories.
|
116
|
-
|
117
|
-
Args:
|
118
|
-
template_content: Original template content
|
119
|
-
step_tracker: Optional step tracker for detailed optimization logging
|
120
|
-
|
121
|
-
Returns:
|
122
|
-
OptimizationResult with optimized template and metadata
|
123
|
-
"""
|
124
|
-
import time
|
125
|
-
|
126
|
-
start_time = time.time()
|
127
|
-
|
128
|
-
# Reset state for new optimization
|
129
|
-
self.file_references.clear()
|
130
|
-
self.dir_references.clear()
|
131
|
-
self.optimization_log.clear()
|
132
|
-
|
133
|
-
# Log initial step
|
134
|
-
if step_tracker:
|
135
|
-
step_tracker.log_step(
|
136
|
-
"Initial template loaded",
|
137
|
-
"",
|
138
|
-
template_content,
|
139
|
-
"Starting optimization process with original template",
|
140
|
-
)
|
141
|
-
|
142
|
-
# Get loop variables to avoid optimizing them
|
143
|
-
loop_variables = self._get_loop_variables(template_content)
|
144
|
-
|
145
|
-
optimized = template_content
|
146
|
-
inline_files_kept = []
|
147
|
-
|
148
|
-
# Step 1: Find and optimize file_content() calls
|
149
|
-
file_pattern = r'{{\s*([^}]*\.content|file_content\([^)]+\)|[^}]*\[["\'][^"\']*["\']\]\.content)\s*}}'
|
150
|
-
|
151
|
-
def replace_file_reference(match: Match[str]) -> str:
|
152
|
-
full_match = match.group(0)
|
153
|
-
content_expr = match.group(1)
|
154
|
-
|
155
|
-
# Extract file path from various patterns
|
156
|
-
file_path = self._extract_file_path(content_expr, loop_variables)
|
157
|
-
if not file_path:
|
158
|
-
return full_match # Keep original if can't parse
|
159
|
-
|
160
|
-
# Check if content should stay inline
|
161
|
-
try:
|
162
|
-
if self._should_stay_inline(file_path):
|
163
|
-
inline_files_kept.append(file_path)
|
164
|
-
return full_match # Keep inline
|
165
|
-
|
166
|
-
# Generate reference for large files
|
167
|
-
reference = self._generate_file_reference_text(file_path)
|
168
|
-
self.file_references[file_path] = reference
|
169
|
-
self.optimization_log.append(f"Moved {file_path} to appendix")
|
170
|
-
return reference
|
171
|
-
|
172
|
-
except Exception:
|
173
|
-
# If any error, keep original
|
174
|
-
return full_match
|
175
|
-
|
176
|
-
# Track file content optimization step
|
177
|
-
pre_file_opt = optimized
|
178
|
-
optimized = re.sub(file_pattern, replace_file_reference, optimized)
|
179
|
-
|
180
|
-
if step_tracker and pre_file_opt != optimized:
|
181
|
-
step_tracker.log_step(
|
182
|
-
"File content optimization",
|
183
|
-
pre_file_opt,
|
184
|
-
optimized,
|
185
|
-
f"Moved {len(self.file_references)} files to appendix, kept {len(inline_files_kept)} inline",
|
186
|
-
)
|
187
|
-
|
188
|
-
# Step 2: Optimize directory content references
|
189
|
-
dir_pattern = (
|
190
|
-
r"{{\s*([^}]*\.(files|content)|[^}]*\.files\[[^]]*\])\s*}}"
|
191
|
-
)
|
192
|
-
|
193
|
-
def replace_dir_reference_with_loop_check(match: Match[str]) -> str:
|
194
|
-
return self._replace_dir_reference(match, loop_variables)
|
195
|
-
|
196
|
-
# Track directory optimization step
|
197
|
-
pre_dir_opt = optimized
|
198
|
-
optimized = re.sub(
|
199
|
-
dir_pattern, replace_dir_reference_with_loop_check, optimized
|
200
|
-
)
|
201
|
-
|
202
|
-
if step_tracker and pre_dir_opt != optimized:
|
203
|
-
step_tracker.log_step(
|
204
|
-
"Directory reference optimization",
|
205
|
-
pre_dir_opt,
|
206
|
-
optimized,
|
207
|
-
f"Converted {len(self.dir_references)} directory references to natural language",
|
208
|
-
)
|
209
|
-
|
210
|
-
# Step 3: Build comprehensive appendix if we moved files
|
211
|
-
if self.file_references or self.dir_references:
|
212
|
-
pre_appendix = optimized
|
213
|
-
appendix = self._build_complete_appendix()
|
214
|
-
optimized += "\n\n" + appendix
|
215
|
-
self.optimization_log.append(
|
216
|
-
"Built structured appendix with moved content"
|
217
|
-
)
|
218
|
-
|
219
|
-
if step_tracker:
|
220
|
-
step_tracker.log_step(
|
221
|
-
"Appendix generation",
|
222
|
-
pre_appendix,
|
223
|
-
optimized,
|
224
|
-
f"Generated appendix with {len(self.file_references)} files and {len(self.dir_references)} directories",
|
225
|
-
)
|
226
|
-
|
227
|
-
optimization_time = (time.time() - start_time) * 1000
|
228
|
-
|
229
|
-
return OptimizationResult(
|
230
|
-
optimized_template=optimized,
|
231
|
-
transformations=self.optimization_log.copy(),
|
232
|
-
files_moved=list(self.file_references.keys()),
|
233
|
-
inline_files_kept=inline_files_kept,
|
234
|
-
optimization_time_ms=optimization_time,
|
235
|
-
)
|
236
|
-
|
237
|
-
def _extract_file_path(
|
238
|
-
self, content_expr: str, loop_variables: Set[str]
|
239
|
-
) -> Optional[str]:
|
240
|
-
"""Extract file path from various Jinja2 content expressions.
|
241
|
-
|
242
|
-
Args:
|
243
|
-
content_expr: Jinja2 expression containing file reference
|
244
|
-
loop_variables: Set of loop variables to avoid
|
245
|
-
|
246
|
-
Returns:
|
247
|
-
Extracted file path or None if not found
|
248
|
-
"""
|
249
|
-
# Pattern 1: file_content('path') or file_content("path")
|
250
|
-
match = re.search(r'file_content\(["\']([^"\']+)["\']\)', content_expr)
|
251
|
-
if match:
|
252
|
-
return match.group(1)
|
253
|
-
|
254
|
-
# Pattern 2: variable.content - handle ALL single-file variables
|
255
|
-
match = re.search(r"(\w+)\.content", content_expr)
|
256
|
-
if match:
|
257
|
-
var_name = match.group(1)
|
258
|
-
# Skip if this is a loop variable
|
259
|
-
if var_name in loop_variables:
|
260
|
-
return None
|
261
|
-
# Return placeholder for ANY .content access to prevent directory misclassification
|
262
|
-
return f"${var_name}"
|
263
|
-
|
264
|
-
# Pattern 3: files['filename'].content
|
265
|
-
match = re.search(
|
266
|
-
r'files\[["\']([^"\']+)["\']\]\.content', content_expr
|
267
|
-
)
|
268
|
-
if match:
|
269
|
-
return match.group(1)
|
270
|
-
|
271
|
-
return None
|
272
|
-
|
273
|
-
def _should_stay_inline(self, file_path: str) -> bool:
|
274
|
-
"""Determine if file content should stay inline.
|
275
|
-
|
276
|
-
Args:
|
277
|
-
file_path: Path to the file
|
278
|
-
|
279
|
-
Returns:
|
280
|
-
True if content should stay inline, False if moved to appendix
|
281
|
-
"""
|
282
|
-
# Variable references (${var}) should stay inline for now
|
283
|
-
if file_path.startswith("$"):
|
284
|
-
return True
|
285
|
-
|
286
|
-
try:
|
287
|
-
# Check actual file size if path exists
|
288
|
-
path = Path(file_path)
|
289
|
-
if path.exists() and path.is_file():
|
290
|
-
content = path.read_text(encoding="utf-8", errors="ignore")
|
291
|
-
content_size = len(content.strip())
|
292
|
-
|
293
|
-
# Very small files stay inline
|
294
|
-
if content_size <= self.small_value_threshold:
|
295
|
-
return True
|
296
|
-
|
297
|
-
# Check if it's a simple value (no newlines, short)
|
298
|
-
if (
|
299
|
-
content_size <= self.inline_threshold
|
300
|
-
and "\n" not in content.strip()
|
301
|
-
):
|
302
|
-
return True
|
303
|
-
|
304
|
-
return False
|
305
|
-
|
306
|
-
except Exception:
|
307
|
-
pass
|
308
|
-
|
309
|
-
# Default: move to appendix for unknown files
|
310
|
-
return False
|
311
|
-
|
312
|
-
def _generate_file_reference_text(self, file_path: str) -> str:
|
313
|
-
"""Generate natural language references using pattern matching.
|
314
|
-
|
315
|
-
Args:
|
316
|
-
file_path: Path to the file
|
317
|
-
|
318
|
-
Returns:
|
319
|
-
Natural language reference text
|
320
|
-
"""
|
321
|
-
filename = Path(file_path).name.lower()
|
322
|
-
|
323
|
-
# Pattern-based natural language generation
|
324
|
-
if "config" in filename:
|
325
|
-
return f"the configuration details in <file:{file_path}>"
|
326
|
-
elif "rule" in filename or "policy" in filename:
|
327
|
-
return f"the business rules defined in <file:{file_path}>"
|
328
|
-
elif "schema" in filename:
|
329
|
-
return f"the schema definition in <file:{file_path}>"
|
330
|
-
elif "example" in filename or "sample" in filename:
|
331
|
-
return f"the example shown in <file:{file_path}>"
|
332
|
-
elif filename.endswith((".json", ".yaml", ".yml")):
|
333
|
-
return f"the structured data from <file:{file_path}>"
|
334
|
-
elif filename.endswith((".py", ".js", ".ts", ".java", ".cpp", ".c")):
|
335
|
-
return f"the code implementation in <file:{file_path}>"
|
336
|
-
elif filename.endswith((".md", ".txt", ".rst")):
|
337
|
-
return f"the documentation in <file:{file_path}>"
|
338
|
-
elif filename.endswith((".csv", ".xlsx", ".tsv")):
|
339
|
-
return f"the data from <file:{file_path}>"
|
340
|
-
elif "readme" in filename:
|
341
|
-
return f"the project overview in <file:{file_path}>"
|
342
|
-
elif "changelog" in filename or "history" in filename:
|
343
|
-
return f"the change history in <file:{file_path}>"
|
344
|
-
else:
|
345
|
-
return f"the content of <file:{file_path}>"
|
346
|
-
|
347
|
-
def _replace_dir_reference(
|
348
|
-
self, match: Match[str], loop_variables: Set[str]
|
349
|
-
) -> str:
|
350
|
-
"""Replace directory content references with natural language.
|
351
|
-
|
352
|
-
Args:
|
353
|
-
match: Regex match object for directory reference
|
354
|
-
loop_variables: Set of loop variables to avoid
|
355
|
-
|
356
|
-
Returns:
|
357
|
-
Natural language reference or original text
|
358
|
-
"""
|
359
|
-
full_match = match.group(0)
|
360
|
-
content_expr = match.group(1)
|
361
|
-
|
362
|
-
# Extract directory variable name
|
363
|
-
dir_match = re.search(r"(\w+)\.(?:files|content)", content_expr)
|
364
|
-
if not dir_match:
|
365
|
-
return full_match
|
366
|
-
|
367
|
-
dir_var = dir_match.group(1)
|
368
|
-
|
369
|
-
# Skip if this is a loop variable
|
370
|
-
if dir_var in loop_variables:
|
371
|
-
return full_match
|
372
|
-
|
373
|
-
# Skip if this is a single-file variable accessed via .content
|
374
|
-
# These should be handled by the file pattern, not directory pattern
|
375
|
-
if content_expr.endswith(".content"):
|
376
|
-
# Check if this would be handled by _extract_file_path
|
377
|
-
file_path = self._extract_file_path(content_expr, loop_variables)
|
378
|
-
if file_path:
|
379
|
-
return full_match # Let file pattern handle it
|
380
|
-
|
381
|
-
reference = f"the files and subdirectories in <dir:{dir_var}>"
|
382
|
-
self.dir_references[dir_var] = reference
|
383
|
-
self.optimization_log.append(
|
384
|
-
f"Moved directory {dir_var} content to appendix"
|
385
|
-
)
|
386
|
-
|
387
|
-
return reference
|
388
|
-
|
389
|
-
def _build_complete_appendix(self) -> str:
|
390
|
-
"""Build comprehensive appendix with moved content.
|
391
|
-
|
392
|
-
Returns:
|
393
|
-
Formatted appendix section
|
394
|
-
"""
|
395
|
-
appendix_lines = [
|
396
|
-
"=" * 50,
|
397
|
-
"APPENDIX: Referenced Files and Directories",
|
398
|
-
"=" * 50,
|
399
|
-
]
|
400
|
-
|
401
|
-
if self.file_references:
|
402
|
-
appendix_lines.extend(["", "FILES:"])
|
403
|
-
for file_path, reference in self.file_references.items():
|
404
|
-
appendix_lines.append(f" <file:{file_path}>")
|
405
|
-
appendix_lines.append(f" Referenced as: {reference}")
|
406
|
-
# Note: Actual file content would be injected here during template rendering
|
407
|
-
appendix_lines.append(
|
408
|
-
f" {{{{ file_content('{file_path}') }}}}"
|
409
|
-
)
|
410
|
-
appendix_lines.append("")
|
411
|
-
|
412
|
-
if self.dir_references:
|
413
|
-
appendix_lines.extend(["DIRECTORIES:"])
|
414
|
-
for dir_var, reference in self.dir_references.items():
|
415
|
-
appendix_lines.append(f" <dir:{dir_var}>")
|
416
|
-
appendix_lines.append(f" Referenced as: {reference}")
|
417
|
-
appendix_lines.append(f" {{{{ {dir_var} }}}}")
|
418
|
-
appendix_lines.append("")
|
419
|
-
|
420
|
-
return "\n".join(appendix_lines)
|
421
|
-
|
422
|
-
def get_optimization_stats(self) -> Dict[str, int]:
|
423
|
-
"""Get statistics about the last optimization.
|
424
|
-
|
425
|
-
Returns:
|
426
|
-
Dictionary with optimization statistics
|
427
|
-
"""
|
428
|
-
return {
|
429
|
-
"files_moved": len(self.file_references),
|
430
|
-
"directories_moved": len(self.dir_references),
|
431
|
-
"total_transformations": len(self.optimization_log),
|
432
|
-
}
|
433
|
-
|
434
|
-
|
435
|
-
def optimize_template_for_llm(
|
436
|
-
template_content: str, step_tracker: Optional[Any] = None
|
437
|
-
) -> OptimizationResult:
|
438
|
-
"""Convenience function for template optimization.
|
439
|
-
|
440
|
-
Args:
|
441
|
-
template_content: Original template content
|
442
|
-
step_tracker: Optional step tracker for detailed optimization logging
|
443
|
-
|
444
|
-
Returns:
|
445
|
-
OptimizationResult with optimized template
|
446
|
-
"""
|
447
|
-
optimizer = TemplateOptimizer()
|
448
|
-
return optimizer.optimize_for_llm(template_content, step_tracker)
|
449
|
-
|
450
|
-
|
451
|
-
def is_optimization_beneficial(
|
452
|
-
template_content: str, threshold_chars: int = 1000
|
453
|
-
) -> bool:
|
454
|
-
"""Check if template optimization would be beneficial.
|
455
|
-
|
456
|
-
Args:
|
457
|
-
template_content: Template content to analyze
|
458
|
-
threshold_chars: Minimum size to consider optimization
|
459
|
-
|
460
|
-
Returns:
|
461
|
-
True if optimization would likely help
|
462
|
-
"""
|
463
|
-
# Check template size
|
464
|
-
if len(template_content) < threshold_chars:
|
465
|
-
return False
|
466
|
-
|
467
|
-
# Check for file content patterns
|
468
|
-
file_patterns = [
|
469
|
-
r"{{\s*[^}]*\.content\s*}}",
|
470
|
-
r"{{\s*file_content\([^)]+\)\s*}}",
|
471
|
-
r"{{\s*[^}]*\.files\s*}}",
|
472
|
-
]
|
473
|
-
|
474
|
-
for pattern in file_patterns:
|
475
|
-
if re.search(pattern, template_content):
|
476
|
-
return True
|
477
|
-
|
478
|
-
return False
|