ostruct-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct_cli-0.1.0/LICENSE +21 -0
- ostruct_cli-0.1.0/PKG-INFO +182 -0
- ostruct_cli-0.1.0/README.md +157 -0
- ostruct_cli-0.1.0/pyproject.toml +110 -0
- ostruct_cli-0.1.0/src/ostruct/__init__.py +0 -0
- ostruct_cli-0.1.0/src/ostruct/cli/__init__.py +19 -0
- ostruct_cli-0.1.0/src/ostruct/cli/cache_manager.py +175 -0
- ostruct_cli-0.1.0/src/ostruct/cli/cli.py +2033 -0
- ostruct_cli-0.1.0/src/ostruct/cli/errors.py +329 -0
- ostruct_cli-0.1.0/src/ostruct/cli/file_info.py +316 -0
- ostruct_cli-0.1.0/src/ostruct/cli/file_list.py +151 -0
- ostruct_cli-0.1.0/src/ostruct/cli/file_utils.py +518 -0
- ostruct_cli-0.1.0/src/ostruct/cli/path_utils.py +123 -0
- ostruct_cli-0.1.0/src/ostruct/cli/progress.py +105 -0
- ostruct_cli-0.1.0/src/ostruct/cli/security.py +311 -0
- ostruct_cli-0.1.0/src/ostruct/cli/security_types.py +49 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_env.py +55 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_extensions.py +51 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_filters.py +650 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_io.py +261 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_rendering.py +347 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_schema.py +565 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_utils.py +288 -0
- ostruct_cli-0.1.0/src/ostruct/cli/template_validation.py +375 -0
- ostruct_cli-0.1.0/src/ostruct/cli/utils.py +31 -0
- ostruct_cli-0.1.0/src/ostruct/py.typed +0 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 Yaniv Golan
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,182 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: ostruct-cli
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: CLI for OpenAI Structured Output
|
5
|
+
Author: Yaniv Golan
|
6
|
+
Author-email: yaniv@golan.name
|
7
|
+
Requires-Python: >=3.9,<4.0
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
14
|
+
Requires-Dist: cachetools (>=5.3.2,<6.0.0)
|
15
|
+
Requires-Dist: chardet (>=5.0.0,<6.0.0)
|
16
|
+
Requires-Dist: ijson (>=3.2.3,<4.0.0)
|
17
|
+
Requires-Dist: jsonschema (>=4.23.0,<5.0.0)
|
18
|
+
Requires-Dist: openai-structured (>=1.0.0,<2.0.0)
|
19
|
+
Requires-Dist: pydantic (>=2.6.3,<3.0.0)
|
20
|
+
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
21
|
+
Requires-Dist: tiktoken (>=0.6.0,<0.7.0)
|
22
|
+
Requires-Dist: typing-extensions (>=4.9.0,<5.0.0)
|
23
|
+
Description-Content-Type: text/markdown
|
24
|
+
|
25
|
+
# ostruct-cli
|
26
|
+
|
27
|
+
[](https://badge.fury.io/py/ostruct-cli)
|
28
|
+
[](https://pypi.org/project/ostruct-cli/)
|
29
|
+
[](https://ostruct-cli.readthedocs.io/en/latest/?badge=latest)
|
30
|
+
[](https://github.com/yaniv-golan/ostruct/actions/workflows/ci.yml)
|
31
|
+
[](https://opensource.org/licenses/MIT)
|
32
|
+
|
33
|
+
Command-line interface for working with OpenAI models and structured output, powered by the [openai-structured](https://github.com/yaniv-golan/openai-structured) library.
|
34
|
+
|
35
|
+
## Features
|
36
|
+
|
37
|
+
- Generate structured output from natural language using OpenAI models
|
38
|
+
- Rich template system for defining output schemas
|
39
|
+
- Automatic token counting and context window management
|
40
|
+
- Streaming support for real-time output
|
41
|
+
- Caching system for cost optimization
|
42
|
+
- Secure handling of sensitive data
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
|
46
|
+
```bash
|
47
|
+
pip install ostruct-cli
|
48
|
+
```
|
49
|
+
|
50
|
+
## Quick Start
|
51
|
+
|
52
|
+
1. Set your OpenAI API key:
|
53
|
+
|
54
|
+
```bash
|
55
|
+
export OPENAI_API_KEY=your-api-key
|
56
|
+
```
|
57
|
+
|
58
|
+
2. Create a task template file `task.j2`:
|
59
|
+
|
60
|
+
```
|
61
|
+
Extract information about the person: {{ stdin }}
|
62
|
+
```
|
63
|
+
|
64
|
+
3. Create a schema file `schema.json`:
|
65
|
+
|
66
|
+
```json
|
67
|
+
{
|
68
|
+
"type": "object",
|
69
|
+
"properties": {
|
70
|
+
"name": {
|
71
|
+
"type": "string",
|
72
|
+
"description": "The person's full name"
|
73
|
+
},
|
74
|
+
"age": {
|
75
|
+
"type": "integer",
|
76
|
+
"description": "The person's age"
|
77
|
+
},
|
78
|
+
"occupation": {
|
79
|
+
"type": "string",
|
80
|
+
"description": "The person's job or profession"
|
81
|
+
}
|
82
|
+
},
|
83
|
+
"required": ["name", "age", "occupation"]
|
84
|
+
}
|
85
|
+
```
|
86
|
+
|
87
|
+
4. Run the CLI:
|
88
|
+
|
89
|
+
```bash
|
90
|
+
echo "John Smith is a 35 year old software engineer" | ostruct --task @task.j2 --schema schema.json
|
91
|
+
```
|
92
|
+
|
93
|
+
Output:
|
94
|
+
|
95
|
+
```json
|
96
|
+
{
|
97
|
+
"name": "John Smith",
|
98
|
+
"age": 35,
|
99
|
+
"occupation": "software engineer"
|
100
|
+
}
|
101
|
+
```
|
102
|
+
|
103
|
+
### About Template Files
|
104
|
+
|
105
|
+
Template files use the `.j2` extension to indicate they contain Jinja2 template syntax. This convention:
|
106
|
+
|
107
|
+
- Enables proper syntax highlighting in most editors
|
108
|
+
- Makes it clear the file contains template logic
|
109
|
+
- Follows industry standards for Jinja2 templates
|
110
|
+
|
111
|
+
While the CLI accepts templates with any extension (when prefixed with `@`), we recommend using `.j2` for better tooling support and clarity.
|
112
|
+
|
113
|
+
## Debug Options
|
114
|
+
|
115
|
+
- `--show-model-schema`: Display the generated Pydantic model schema
|
116
|
+
- `--debug-validation`: Show detailed schema validation debugging
|
117
|
+
- `--verbose-schema`: Enable verbose schema debugging output
|
118
|
+
- `--debug-openai-stream`: Enable low-level debug output for OpenAI streaming (very verbose)
|
119
|
+
- `--progress-level {none,basic,detailed}`: Set progress reporting level (default: basic)
|
120
|
+
|
121
|
+
All debug and error logs are written to:
|
122
|
+
|
123
|
+
- `~/.ostruct/logs/ostruct.log`: General application logs
|
124
|
+
- `~/.ostruct/logs/openai_stream.log`: OpenAI streaming operations logs
|
125
|
+
|
126
|
+
For more detailed documentation and examples, visit our [documentation](https://ostruct-cli.readthedocs.io/).
|
127
|
+
|
128
|
+
## Development
|
129
|
+
|
130
|
+
To contribute or report issues, please visit our [GitHub repository](https://github.com/yaniv-golan/ostruct).
|
131
|
+
|
132
|
+
## Development Setup
|
133
|
+
|
134
|
+
1. Clone the repository:
|
135
|
+
|
136
|
+
```bash
|
137
|
+
git clone https://github.com/yanivgolan/ostruct.git
|
138
|
+
cd ostruct
|
139
|
+
```
|
140
|
+
|
141
|
+
2. Install Poetry if you haven't already:
|
142
|
+
|
143
|
+
```bash
|
144
|
+
curl -sSL https://install.python-poetry.org | python3 -
|
145
|
+
```
|
146
|
+
|
147
|
+
3. Install dependencies:
|
148
|
+
|
149
|
+
```bash
|
150
|
+
poetry install
|
151
|
+
```
|
152
|
+
|
153
|
+
4. Install openai-structured in editable mode:
|
154
|
+
|
155
|
+
```bash
|
156
|
+
poetry add --editable ../openai-structured # Adjust path as needed
|
157
|
+
```
|
158
|
+
|
159
|
+
5. Run tests:
|
160
|
+
|
161
|
+
```bash
|
162
|
+
poetry run pytest
|
163
|
+
```
|
164
|
+
|
165
|
+
## Contributing
|
166
|
+
|
167
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
168
|
+
|
169
|
+
## License
|
170
|
+
|
171
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
172
|
+
|
173
|
+
## Migration from openai-structured
|
174
|
+
|
175
|
+
If you were previously using the CLI bundled with openai-structured (pre-1.0.0), this is its new home. The migration is straightforward:
|
176
|
+
|
177
|
+
1. Update openai-structured to version 1.0.0 or later
|
178
|
+
2. Install ostruct-cli
|
179
|
+
3. Replace any `openai-structured` CLI commands with `ostruct`
|
180
|
+
|
181
|
+
The functionality remains the same, just moved to a dedicated package for better maintenance and focus.
|
182
|
+
|
@@ -0,0 +1,157 @@
|
|
1
|
+
# ostruct-cli
|
2
|
+
|
3
|
+
[](https://badge.fury.io/py/ostruct-cli)
|
4
|
+
[](https://pypi.org/project/ostruct-cli/)
|
5
|
+
[](https://ostruct-cli.readthedocs.io/en/latest/?badge=latest)
|
6
|
+
[](https://github.com/yaniv-golan/ostruct/actions/workflows/ci.yml)
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
8
|
+
|
9
|
+
Command-line interface for working with OpenAI models and structured output, powered by the [openai-structured](https://github.com/yaniv-golan/openai-structured) library.
|
10
|
+
|
11
|
+
## Features
|
12
|
+
|
13
|
+
- Generate structured output from natural language using OpenAI models
|
14
|
+
- Rich template system for defining output schemas
|
15
|
+
- Automatic token counting and context window management
|
16
|
+
- Streaming support for real-time output
|
17
|
+
- Caching system for cost optimization
|
18
|
+
- Secure handling of sensitive data
|
19
|
+
|
20
|
+
## Installation
|
21
|
+
|
22
|
+
```bash
|
23
|
+
pip install ostruct-cli
|
24
|
+
```
|
25
|
+
|
26
|
+
## Quick Start
|
27
|
+
|
28
|
+
1. Set your OpenAI API key:
|
29
|
+
|
30
|
+
```bash
|
31
|
+
export OPENAI_API_KEY=your-api-key
|
32
|
+
```
|
33
|
+
|
34
|
+
2. Create a task template file `task.j2`:
|
35
|
+
|
36
|
+
```
|
37
|
+
Extract information about the person: {{ stdin }}
|
38
|
+
```
|
39
|
+
|
40
|
+
3. Create a schema file `schema.json`:
|
41
|
+
|
42
|
+
```json
|
43
|
+
{
|
44
|
+
"type": "object",
|
45
|
+
"properties": {
|
46
|
+
"name": {
|
47
|
+
"type": "string",
|
48
|
+
"description": "The person's full name"
|
49
|
+
},
|
50
|
+
"age": {
|
51
|
+
"type": "integer",
|
52
|
+
"description": "The person's age"
|
53
|
+
},
|
54
|
+
"occupation": {
|
55
|
+
"type": "string",
|
56
|
+
"description": "The person's job or profession"
|
57
|
+
}
|
58
|
+
},
|
59
|
+
"required": ["name", "age", "occupation"]
|
60
|
+
}
|
61
|
+
```
|
62
|
+
|
63
|
+
4. Run the CLI:
|
64
|
+
|
65
|
+
```bash
|
66
|
+
echo "John Smith is a 35 year old software engineer" | ostruct --task @task.j2 --schema schema.json
|
67
|
+
```
|
68
|
+
|
69
|
+
Output:
|
70
|
+
|
71
|
+
```json
|
72
|
+
{
|
73
|
+
"name": "John Smith",
|
74
|
+
"age": 35,
|
75
|
+
"occupation": "software engineer"
|
76
|
+
}
|
77
|
+
```
|
78
|
+
|
79
|
+
### About Template Files
|
80
|
+
|
81
|
+
Template files use the `.j2` extension to indicate they contain Jinja2 template syntax. This convention:
|
82
|
+
|
83
|
+
- Enables proper syntax highlighting in most editors
|
84
|
+
- Makes it clear the file contains template logic
|
85
|
+
- Follows industry standards for Jinja2 templates
|
86
|
+
|
87
|
+
While the CLI accepts templates with any extension (when prefixed with `@`), we recommend using `.j2` for better tooling support and clarity.
|
88
|
+
|
89
|
+
## Debug Options
|
90
|
+
|
91
|
+
- `--show-model-schema`: Display the generated Pydantic model schema
|
92
|
+
- `--debug-validation`: Show detailed schema validation debugging
|
93
|
+
- `--verbose-schema`: Enable verbose schema debugging output
|
94
|
+
- `--debug-openai-stream`: Enable low-level debug output for OpenAI streaming (very verbose)
|
95
|
+
- `--progress-level {none,basic,detailed}`: Set progress reporting level (default: basic)
|
96
|
+
|
97
|
+
All debug and error logs are written to:
|
98
|
+
|
99
|
+
- `~/.ostruct/logs/ostruct.log`: General application logs
|
100
|
+
- `~/.ostruct/logs/openai_stream.log`: OpenAI streaming operations logs
|
101
|
+
|
102
|
+
For more detailed documentation and examples, visit our [documentation](https://ostruct-cli.readthedocs.io/).
|
103
|
+
|
104
|
+
## Development
|
105
|
+
|
106
|
+
To contribute or report issues, please visit our [GitHub repository](https://github.com/yaniv-golan/ostruct).
|
107
|
+
|
108
|
+
## Development Setup
|
109
|
+
|
110
|
+
1. Clone the repository:
|
111
|
+
|
112
|
+
```bash
|
113
|
+
git clone https://github.com/yanivgolan/ostruct.git
|
114
|
+
cd ostruct
|
115
|
+
```
|
116
|
+
|
117
|
+
2. Install Poetry if you haven't already:
|
118
|
+
|
119
|
+
```bash
|
120
|
+
curl -sSL https://install.python-poetry.org | python3 -
|
121
|
+
```
|
122
|
+
|
123
|
+
3. Install dependencies:
|
124
|
+
|
125
|
+
```bash
|
126
|
+
poetry install
|
127
|
+
```
|
128
|
+
|
129
|
+
4. Install openai-structured in editable mode:
|
130
|
+
|
131
|
+
```bash
|
132
|
+
poetry add --editable ../openai-structured # Adjust path as needed
|
133
|
+
```
|
134
|
+
|
135
|
+
5. Run tests:
|
136
|
+
|
137
|
+
```bash
|
138
|
+
poetry run pytest
|
139
|
+
```
|
140
|
+
|
141
|
+
## Contributing
|
142
|
+
|
143
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
144
|
+
|
145
|
+
## License
|
146
|
+
|
147
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
148
|
+
|
149
|
+
## Migration from openai-structured
|
150
|
+
|
151
|
+
If you were previously using the CLI bundled with openai-structured (pre-1.0.0), this is its new home. The migration is straightforward:
|
152
|
+
|
153
|
+
1. Update openai-structured to version 1.0.0 or later
|
154
|
+
2. Install ostruct-cli
|
155
|
+
3. Replace any `openai-structured` CLI commands with `ostruct`
|
156
|
+
|
157
|
+
The functionality remains the same, just moved to a dedicated package for better maintenance and focus.
|
@@ -0,0 +1,110 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["poetry-core"]
|
3
|
+
build-backend = "poetry.core.masonry.api"
|
4
|
+
|
5
|
+
[tool.poetry]
|
6
|
+
name = "ostruct-cli"
|
7
|
+
version = "0.1.0"
|
8
|
+
description = "CLI for OpenAI Structured Output"
|
9
|
+
authors = ["Yaniv Golan <yaniv@golan.name>"]
|
10
|
+
readme = "README.md"
|
11
|
+
packages = [{include = "ostruct", from = "src"}]
|
12
|
+
|
13
|
+
[tool.poetry.dependencies]
|
14
|
+
python = ">=3.9,<4.0"
|
15
|
+
pydantic = "^2.6.3"
|
16
|
+
jsonschema = "^4.23.0"
|
17
|
+
chardet = "^5.0.0"
|
18
|
+
cachetools = "^5.3.2"
|
19
|
+
ijson = "^3.2.3"
|
20
|
+
typing-extensions = "^4.9.0"
|
21
|
+
tiktoken = "^0.6.0"
|
22
|
+
pyyaml = "^6.0.2"
|
23
|
+
openai-structured = "^1.0.0"
|
24
|
+
|
25
|
+
[tool.poetry.scripts]
|
26
|
+
ostruct = "ostruct.cli.cli:main"
|
27
|
+
|
28
|
+
[tool.poetry.group.dev.dependencies]
|
29
|
+
pytest = "^8.3.4"
|
30
|
+
flake8 = "^6.0"
|
31
|
+
flake8-pyproject = "^1.2.3"
|
32
|
+
black = "24.8.0"
|
33
|
+
isort = "^5.13"
|
34
|
+
mypy = "^1.0"
|
35
|
+
pytest-asyncio = "^0.25.2"
|
36
|
+
pytest-mock = "^3.14.0"
|
37
|
+
build = "^1.2.2.post1"
|
38
|
+
twine = "^6.0.1"
|
39
|
+
python-dotenv = "^1.0.1"
|
40
|
+
types-jsonschema = "^4.23.0.20241208"
|
41
|
+
anyio = {version = "3.7.1", extras = ["trio"]}
|
42
|
+
sphinx = ">=7.0,<8.0"
|
43
|
+
types-pyyaml = "^6.0.12.20241230"
|
44
|
+
types-pygments = "^2.19.0.20250107"
|
45
|
+
types-chardet = "^5.0.4.6"
|
46
|
+
pyfakefs = "^5.7.4"
|
47
|
+
types-cachetools = "^5.5.0.20240820"
|
48
|
+
|
49
|
+
[tool.poetry.group.docs]
|
50
|
+
optional = true
|
51
|
+
|
52
|
+
[tool.poetry.group.docs.dependencies]
|
53
|
+
sphinx = "^7.0"
|
54
|
+
sphinx-rtd-theme = "^1.0"
|
55
|
+
myst-parser = "^2.0.0"
|
56
|
+
|
57
|
+
[tool.flake8]
|
58
|
+
max-line-length = 120
|
59
|
+
extend-ignore = ["E203"]
|
60
|
+
|
61
|
+
[tool.mypy]
|
62
|
+
plugins = ["pydantic.mypy"]
|
63
|
+
strict = true
|
64
|
+
exclude = [
|
65
|
+
"docs/",
|
66
|
+
"examples/"
|
67
|
+
]
|
68
|
+
packages = ["ostruct", "tests"]
|
69
|
+
python_version = "3.9"
|
70
|
+
warn_unused_configs = true
|
71
|
+
disallow_untyped_defs = true
|
72
|
+
check_untyped_defs = true
|
73
|
+
warn_redundant_casts = true
|
74
|
+
warn_unused_ignores = true
|
75
|
+
warn_return_any = true
|
76
|
+
warn_unreachable = true
|
77
|
+
show_error_codes = true
|
78
|
+
ignore_missing_imports = false
|
79
|
+
|
80
|
+
[tool.black]
|
81
|
+
line-length = 79
|
82
|
+
target-version = ["py39"]
|
83
|
+
include = '\.pyi?$'
|
84
|
+
preview = false
|
85
|
+
required-version = "24.8.0"
|
86
|
+
|
87
|
+
[tool.isort]
|
88
|
+
profile = "black"
|
89
|
+
multi_line_output = 3
|
90
|
+
line_length = 79
|
91
|
+
|
92
|
+
[tool.pytest.ini_options]
|
93
|
+
asyncio_mode = "strict"
|
94
|
+
testpaths = ["tests"]
|
95
|
+
python_files = ["test_*.py"]
|
96
|
+
markers = [
|
97
|
+
"live: mark test as a live test that should use real API key",
|
98
|
+
"asyncio: mark test as requiring async loop",
|
99
|
+
]
|
100
|
+
asyncio_default_fixture_loop_scope = "function"
|
101
|
+
|
102
|
+
[tool.ruff]
|
103
|
+
target-version = "py39"
|
104
|
+
|
105
|
+
[tool.poetry.group.examples]
|
106
|
+
optional = true
|
107
|
+
|
108
|
+
[tool.poetry.group.examples.dependencies]
|
109
|
+
tenacity = "^8.2.3"
|
110
|
+
asyncio-throttle = "^1.0.2"
|
File without changes
|
@@ -0,0 +1,19 @@
|
|
1
|
+
"""Command-line interface for making structured OpenAI API calls."""
|
2
|
+
|
3
|
+
from .cli import (
|
4
|
+
ExitCode,
|
5
|
+
_main,
|
6
|
+
validate_schema_file,
|
7
|
+
validate_task_template,
|
8
|
+
validate_variable_mapping,
|
9
|
+
)
|
10
|
+
from .path_utils import validate_path_mapping
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
"ExitCode",
|
14
|
+
"_main",
|
15
|
+
"validate_path_mapping",
|
16
|
+
"validate_schema_file",
|
17
|
+
"validate_task_template",
|
18
|
+
"validate_variable_mapping",
|
19
|
+
]
|
@@ -0,0 +1,175 @@
|
|
1
|
+
"""Cache management for file content.
|
2
|
+
|
3
|
+
This module provides a thread-safe cache manager for file content
|
4
|
+
with LRU eviction and automatic invalidation on file changes.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from typing import Any, Optional, Tuple
|
10
|
+
|
11
|
+
from cachetools import LRUCache
|
12
|
+
from cachetools.keys import hashkey
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
# Type alias for cache keys
|
17
|
+
CacheKey = Tuple[Any, ...]
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass(frozen=True)
|
21
|
+
class CacheEntry:
|
22
|
+
"""Represents a cached file entry.
|
23
|
+
|
24
|
+
Note: This class is immutable (frozen) to ensure thread safety
|
25
|
+
when used as a cache value.
|
26
|
+
"""
|
27
|
+
|
28
|
+
content: str
|
29
|
+
encoding: Optional[str]
|
30
|
+
hash_value: Optional[str]
|
31
|
+
mtime_ns: int # Nanosecond precision mtime
|
32
|
+
size: int # Actual file size from stat
|
33
|
+
|
34
|
+
|
35
|
+
class FileCache:
|
36
|
+
"""Thread-safe LRU cache for file content with size limit."""
|
37
|
+
|
38
|
+
def __init__(self, max_size_bytes: int = 50 * 1024 * 1024): # 50MB default
|
39
|
+
"""Initialize cache with maximum size in bytes.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
max_size_bytes: Maximum cache size in bytes
|
43
|
+
"""
|
44
|
+
self._max_size = max_size_bytes
|
45
|
+
self._current_size = 0
|
46
|
+
self._cache: LRUCache[CacheKey, CacheEntry] = LRUCache(maxsize=1024)
|
47
|
+
logger.debug(
|
48
|
+
"Initialized FileCache with max_size=%d bytes, maxsize=%d entries",
|
49
|
+
max_size_bytes,
|
50
|
+
1024,
|
51
|
+
)
|
52
|
+
|
53
|
+
def _remove_entry(self, key: CacheKey) -> None:
|
54
|
+
"""Remove entry from cache and update size.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
key: Cache key to remove
|
58
|
+
"""
|
59
|
+
entry = self._cache.get(key)
|
60
|
+
if entry is not None:
|
61
|
+
self._current_size -= entry.size
|
62
|
+
logger.debug(
|
63
|
+
"Removed cache entry: key=%s, size=%d, new_total_size=%d",
|
64
|
+
key,
|
65
|
+
entry.size,
|
66
|
+
self._current_size,
|
67
|
+
)
|
68
|
+
self._cache.pop(key, None)
|
69
|
+
|
70
|
+
def get(
|
71
|
+
self, path: str, current_mtime_ns: int, current_size: int
|
72
|
+
) -> Optional[CacheEntry]:
|
73
|
+
"""Get cache entry if it exists and is valid.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
path: Absolute path to the file
|
77
|
+
current_mtime_ns: Current modification time in nanoseconds
|
78
|
+
current_size: Current file size in bytes
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
CacheEntry if valid cache exists, None otherwise
|
82
|
+
"""
|
83
|
+
key = hashkey(path)
|
84
|
+
entry = self._cache.get(key)
|
85
|
+
|
86
|
+
if entry is None:
|
87
|
+
logger.debug("Cache miss for %s: no entry found", path)
|
88
|
+
return None
|
89
|
+
|
90
|
+
# Check if file has been modified using both mtime and size
|
91
|
+
if entry.mtime_ns != current_mtime_ns or entry.size != current_size:
|
92
|
+
logger.info(
|
93
|
+
"Cache invalidated for %s: mtime_ns=%d->%d (%s), size=%d->%d (%s)",
|
94
|
+
path,
|
95
|
+
entry.mtime_ns,
|
96
|
+
current_mtime_ns,
|
97
|
+
"changed" if entry.mtime_ns != current_mtime_ns else "same",
|
98
|
+
entry.size,
|
99
|
+
current_size,
|
100
|
+
"changed" if entry.size != current_size else "same",
|
101
|
+
)
|
102
|
+
self._remove_entry(key)
|
103
|
+
return None
|
104
|
+
|
105
|
+
logger.debug(
|
106
|
+
"Cache hit for %s: mtime_ns=%d, size=%d",
|
107
|
+
path,
|
108
|
+
entry.mtime_ns,
|
109
|
+
entry.size,
|
110
|
+
)
|
111
|
+
return entry
|
112
|
+
|
113
|
+
def put(
|
114
|
+
self,
|
115
|
+
path: str,
|
116
|
+
content: str,
|
117
|
+
encoding: Optional[str],
|
118
|
+
hash_value: Optional[str],
|
119
|
+
mtime_ns: int,
|
120
|
+
size: int,
|
121
|
+
) -> None:
|
122
|
+
"""Add or update cache entry.
|
123
|
+
|
124
|
+
Args:
|
125
|
+
path: Absolute path to the file
|
126
|
+
content: File content
|
127
|
+
encoding: File encoding
|
128
|
+
hash_value: Content hash
|
129
|
+
mtime_ns: File modification time in nanoseconds
|
130
|
+
size: File size in bytes from stat
|
131
|
+
"""
|
132
|
+
if size > self._max_size:
|
133
|
+
logger.warning(
|
134
|
+
"File %s size (%d bytes) exceeds cache max size (%d bytes)",
|
135
|
+
path,
|
136
|
+
size,
|
137
|
+
self._max_size,
|
138
|
+
)
|
139
|
+
return
|
140
|
+
|
141
|
+
key = hashkey(path)
|
142
|
+
self._remove_entry(key)
|
143
|
+
|
144
|
+
entry = CacheEntry(content, encoding, hash_value, mtime_ns, size)
|
145
|
+
|
146
|
+
# Evict entries if needed
|
147
|
+
evicted_count = 0
|
148
|
+
while self._current_size + size > self._max_size and self._cache:
|
149
|
+
evicted_key, evicted = self._cache.popitem()
|
150
|
+
self._current_size -= evicted.size
|
151
|
+
evicted_count += 1
|
152
|
+
logger.debug(
|
153
|
+
"Evicted cache entry: key=%s, size=%d, new_total_size=%d",
|
154
|
+
evicted_key,
|
155
|
+
evicted.size,
|
156
|
+
self._current_size,
|
157
|
+
)
|
158
|
+
|
159
|
+
if evicted_count > 0:
|
160
|
+
logger.info(
|
161
|
+
"Evicted %d entries to make room for %s (size=%d)",
|
162
|
+
evicted_count,
|
163
|
+
path,
|
164
|
+
size,
|
165
|
+
)
|
166
|
+
|
167
|
+
self._cache[key] = entry
|
168
|
+
self._current_size += size
|
169
|
+
logger.debug(
|
170
|
+
"Added cache entry: path=%s, size=%d, total_size=%d/%d",
|
171
|
+
path,
|
172
|
+
size,
|
173
|
+
self._current_size,
|
174
|
+
self._max_size,
|
175
|
+
)
|