feather-structured-output 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # PyPI configuration file
210
+ .pypirc
211
+
212
+ # Local task runner configuration with publishing tokens
213
+ mise.toml
214
+
215
+ # Marimo
216
+ marimo/_static/
217
+ marimo/_lsp/
218
+ __marimo__/
219
+
220
+ # Streamlit
221
+ .streamlit/secrets.toml
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 第一無重工
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: feather-structured-output
3
+ Version: 0.1.1
4
+ Summary: Add your description here
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.13
7
+ Description-Content-Type: text/markdown
8
+
9
+ # feather-structured-output
10
+
11
+ `feather-structured-output` is a lightweight Python library for turning LLM text
12
+ responses into validated Python dictionaries.
13
+
14
+ It does not call an LLM, retry requests, or manage conversation history. It only
15
+ handles schema definition, short JSON prompt generation, JSON extraction, field
16
+ validation, normalization, and retry-friendly error messages.
17
+
18
+ ## Installation
19
+
20
+ This repository uses `uv`.
21
+
22
+ ```powershell
23
+ uv run pytest
24
+ ```
25
+
26
+ ## Basic Usage
27
+
28
+ ```python
29
+ from feather_structured_output import IntField, Schema, StringField
30
+
31
+ schema = Schema(
32
+ instruction="Extract the following JSON.",
33
+ fields=[
34
+ StringField(
35
+ source_label="Company name",
36
+ output_name="name",
37
+ description="Use the formal company name.",
38
+ example="Acme Corp",
39
+ ),
40
+ IntField(
41
+ source_label="Quantity",
42
+ output_name="quantity",
43
+ gt=0,
44
+ ),
45
+ ],
46
+ )
47
+
48
+ prompt = schema.dump_prompt()
49
+ ```
50
+
51
+ `prompt` becomes a compact JSON example with comments:
52
+
53
+ ```text
54
+ Extract the following JSON.
55
+
56
+ {
57
+ "name": "Acme Corp", // Company name。String. Use the formal company name. Required.
58
+ "quantity": 1 // Quantity。Integer. Greater than 0. Required.
59
+ }
60
+ ```
61
+
62
+ Pass that prompt to any LLM client yourself, then validate the response:
63
+
64
+ ```python
65
+ result = schema.extract('{"name": "Acme Corp", "quantity": 3}')
66
+
67
+ if result.ok:
68
+ data = result.data
69
+ else:
70
+ retry_prompt = result.error_message
71
+ ```
72
+
73
+ ## Fields
74
+
75
+ Available field types:
76
+
77
+ - `StringField`
78
+ - `IntField`
79
+ - `FloatField`
80
+ - `BooleanField`
81
+ - `ObjectField`
82
+ - `UnionField`
83
+
84
+ Common field options:
85
+
86
+ - `source_label`: label from the source document or UI; optional
87
+ - `output_name`: JSON key; if omitted, `source_label` is used
88
+ - `description`: extra instruction for the LLM
89
+ - `example`: sample value in the generated prompt
90
+ - `allowed_values`: fixed allowed values for the field
91
+ - `presence`: missing value policy
92
+ - `repeatable`, `min_items`, `max_items`: array support
93
+
94
+ At least one of `output_name` or `source_label` is required when a field is used
95
+ in a `Schema`.
96
+
97
+ ## Presence Policy
98
+
99
+ ```python
100
+ from feather_structured_output import PresencePolicy, StringField
101
+
102
+ StringField(
103
+ output_name="note",
104
+ presence=PresencePolicy.NULLABLE,
105
+ )
106
+ ```
107
+
108
+ Policies:
109
+
110
+ - `REQUIRED`: value is required
111
+ - `OMITTABLE`: missing value is omitted from normalized data
112
+ - `NULLABLE`: missing value is normalized to `None`
113
+ - `BLANKABLE`: missing value is normalized to `""`
114
+
115
+ `""` is treated as a missing value during validation.
116
+
117
+ ## Arrays and Objects
118
+
119
+ ```python
120
+ from feather_structured_output import IntField, ObjectField, StringField
121
+
122
+ items = ObjectField(
123
+ output_name="items",
124
+ repeatable=True,
125
+ min_items=1,
126
+ fields=[
127
+ StringField(output_name="name"),
128
+ IntField(output_name="quantity", gt=0),
129
+ ],
130
+ )
131
+ ```
132
+
133
+ ## Fixed Values and Unions
134
+
135
+ Use `allowed_values` for fixed values. Use `UnionField` when a value may match
136
+ more than one field shape.
137
+
138
+ ```python
139
+ from feather_structured_output import IntField, StringField, UnionField
140
+
141
+ dog_age = UnionField(
142
+ output_name="dog_age",
143
+ variants=[
144
+ StringField(allowed_values=["N/A"]),
145
+ StringField(allowed_values=["-"]),
146
+ IntField(gt=0),
147
+ ],
148
+ )
149
+ ```
150
+
151
+ ## JSON Extraction
152
+
153
+ `Schema.extract()` accepts raw JSON or a single fenced `json` code block. Text
154
+ outside one JSON code block is ignored.
155
+
156
+ ````python
157
+ result = schema.extract(
158
+ """Here is the result:
159
+ ```json
160
+ {"name": "Acme Corp", "quantity": 3}
161
+ ```"""
162
+ )
163
+ ````
164
+
165
+ Multiple JSON code blocks, non-JSON code blocks, invalid JSON, and non-object
166
+ JSON values return `ExtractResult` with validation errors.
167
+
168
+ ## Custom Validators
169
+
170
+ Use validators for cross-field rules.
171
+
172
+ ```python
173
+ from feather_structured_output import IntField, Schema, StringField, ValidationError
174
+
175
+
176
+ def validate_octopus_legs(data: dict[str, object]) -> list[ValidationError]:
177
+ if data.get("pet") == "octopus" and data.get("legs") != 8:
178
+ return [
179
+ ValidationError(
180
+ path="legs",
181
+ message={
182
+ "en": "legs must be 8 when pet is octopus.",
183
+ "ja": "pet が octopus の場合、legs は 8 である必要があります。",
184
+ },
185
+ actual=data.get("legs"),
186
+ )
187
+ ]
188
+ return []
189
+
190
+
191
+ schema = Schema(
192
+ fields=[
193
+ StringField(output_name="pet"),
194
+ IntField(output_name="legs", ge=0),
195
+ ],
196
+ validators=[validate_octopus_legs],
197
+ )
198
+ ```
199
+
200
+ Validators run only after field validation succeeds. Exceptions raised inside a
201
+ validator are not converted to LLM-facing validation errors.
202
+
203
+ ## Error Messages and Locale
204
+
205
+ English is the default. Japanese retry messages are available with
206
+ `locale="ja"`.
207
+
208
+ ```python
209
+ schema = Schema(
210
+ locale="ja",
211
+ fields=[IntField(output_name="quantity", gt=0)],
212
+ )
213
+
214
+ result = schema.extract('{"quantity": 0}')
215
+ print(result.error_message)
216
+ ```
217
+
218
+ Internal message templates are split by locale under
219
+ `src/feather_structured_output/locales/`.
@@ -0,0 +1,211 @@
1
+ # feather-structured-output
2
+
3
+ `feather-structured-output` is a lightweight Python library for turning LLM text
4
+ responses into validated Python dictionaries.
5
+
6
+ It does not call an LLM, retry requests, or manage conversation history. It only
7
+ handles schema definition, short JSON prompt generation, JSON extraction, field
8
+ validation, normalization, and retry-friendly error messages.
9
+
10
+ ## Installation
11
+
12
+ This repository uses `uv`.
13
+
14
+ ```powershell
15
+ uv run pytest
16
+ ```
17
+
18
+ ## Basic Usage
19
+
20
+ ```python
21
+ from feather_structured_output import IntField, Schema, StringField
22
+
23
+ schema = Schema(
24
+ instruction="Extract the following JSON.",
25
+ fields=[
26
+ StringField(
27
+ source_label="Company name",
28
+ output_name="name",
29
+ description="Use the formal company name.",
30
+ example="Acme Corp",
31
+ ),
32
+ IntField(
33
+ source_label="Quantity",
34
+ output_name="quantity",
35
+ gt=0,
36
+ ),
37
+ ],
38
+ )
39
+
40
+ prompt = schema.dump_prompt()
41
+ ```
42
+
43
+ `prompt` becomes a compact JSON example with comments:
44
+
45
+ ```text
46
+ Extract the following JSON.
47
+
48
+ {
49
+ "name": "Acme Corp", // Company name。String. Use the formal company name. Required.
50
+ "quantity": 1 // Quantity。Integer. Greater than 0. Required.
51
+ }
52
+ ```
53
+
54
+ Pass that prompt to any LLM client yourself, then validate the response:
55
+
56
+ ```python
57
+ result = schema.extract('{"name": "Acme Corp", "quantity": 3}')
58
+
59
+ if result.ok:
60
+ data = result.data
61
+ else:
62
+ retry_prompt = result.error_message
63
+ ```
64
+
65
+ ## Fields
66
+
67
+ Available field types:
68
+
69
+ - `StringField`
70
+ - `IntField`
71
+ - `FloatField`
72
+ - `BooleanField`
73
+ - `ObjectField`
74
+ - `UnionField`
75
+
76
+ Common field options:
77
+
78
+ - `source_label`: label from the source document or UI; optional
79
+ - `output_name`: JSON key; if omitted, `source_label` is used
80
+ - `description`: extra instruction for the LLM
81
+ - `example`: sample value in the generated prompt
82
+ - `allowed_values`: fixed allowed values for the field
83
+ - `presence`: missing value policy
84
+ - `repeatable`, `min_items`, `max_items`: array support
85
+
86
+ At least one of `output_name` or `source_label` is required when a field is used
87
+ in a `Schema`.
88
+
89
+ ## Presence Policy
90
+
91
+ ```python
92
+ from feather_structured_output import PresencePolicy, StringField
93
+
94
+ StringField(
95
+ output_name="note",
96
+ presence=PresencePolicy.NULLABLE,
97
+ )
98
+ ```
99
+
100
+ Policies:
101
+
102
+ - `REQUIRED`: value is required
103
+ - `OMITTABLE`: missing value is omitted from normalized data
104
+ - `NULLABLE`: missing value is normalized to `None`
105
+ - `BLANKABLE`: missing value is normalized to `""`
106
+
107
+ `""` is treated as a missing value during validation.
108
+
109
+ ## Arrays and Objects
110
+
111
+ ```python
112
+ from feather_structured_output import IntField, ObjectField, StringField
113
+
114
+ items = ObjectField(
115
+ output_name="items",
116
+ repeatable=True,
117
+ min_items=1,
118
+ fields=[
119
+ StringField(output_name="name"),
120
+ IntField(output_name="quantity", gt=0),
121
+ ],
122
+ )
123
+ ```
124
+
125
+ ## Fixed Values and Unions
126
+
127
+ Use `allowed_values` for fixed values. Use `UnionField` when a value may match
128
+ more than one field shape.
129
+
130
+ ```python
131
+ from feather_structured_output import IntField, StringField, UnionField
132
+
133
+ dog_age = UnionField(
134
+ output_name="dog_age",
135
+ variants=[
136
+ StringField(allowed_values=["N/A"]),
137
+ StringField(allowed_values=["-"]),
138
+ IntField(gt=0),
139
+ ],
140
+ )
141
+ ```
142
+
143
+ ## JSON Extraction
144
+
145
+ `Schema.extract()` accepts raw JSON or a single fenced `json` code block. Text
146
+ outside one JSON code block is ignored.
147
+
148
+ ````python
149
+ result = schema.extract(
150
+ """Here is the result:
151
+ ```json
152
+ {"name": "Acme Corp", "quantity": 3}
153
+ ```"""
154
+ )
155
+ ````
156
+
157
+ Multiple JSON code blocks, non-JSON code blocks, invalid JSON, and non-object
158
+ JSON values return `ExtractResult` with validation errors.
159
+
160
+ ## Custom Validators
161
+
162
+ Use validators for cross-field rules.
163
+
164
+ ```python
165
+ from feather_structured_output import IntField, Schema, StringField, ValidationError
166
+
167
+
168
+ def validate_octopus_legs(data: dict[str, object]) -> list[ValidationError]:
169
+ if data.get("pet") == "octopus" and data.get("legs") != 8:
170
+ return [
171
+ ValidationError(
172
+ path="legs",
173
+ message={
174
+ "en": "legs must be 8 when pet is octopus.",
175
+ "ja": "pet が octopus の場合、legs は 8 である必要があります。",
176
+ },
177
+ actual=data.get("legs"),
178
+ )
179
+ ]
180
+ return []
181
+
182
+
183
+ schema = Schema(
184
+ fields=[
185
+ StringField(output_name="pet"),
186
+ IntField(output_name="legs", ge=0),
187
+ ],
188
+ validators=[validate_octopus_legs],
189
+ )
190
+ ```
191
+
192
+ Validators run only after field validation succeeds. Exceptions raised inside a
193
+ validator are not converted to LLM-facing validation errors.
194
+
195
+ ## Error Messages and Locale
196
+
197
+ English is the default. Japanese retry messages are available with
198
+ `locale="ja"`.
199
+
200
+ ```python
201
+ schema = Schema(
202
+ locale="ja",
203
+ fields=[IntField(output_name="quantity", gt=0)],
204
+ )
205
+
206
+ result = schema.extract('{"quantity": 0}')
207
+ print(result.error_message)
208
+ ```
209
+
210
+ Internal message templates are split by locale under
211
+ `src/feather_structured_output/locales/`.
@@ -0,0 +1,39 @@
1
+ [project]
2
+ name = "feather-structured-output"
3
+ dynamic = ["version"]
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = []
8
+
9
+ [dependency-groups]
10
+ dev = [
11
+ "pytest>=8.3",
12
+ ]
13
+ integration = [
14
+ "pydantic>=2",
15
+ "pymupdf>=1.24",
16
+ "strands-agents[openai]>=1.0",
17
+ ]
18
+
19
+ [build-system]
20
+ requires = ["hatchling", "hatch-vcs"]
21
+ build-backend = "hatchling.build"
22
+
23
+ [tool.hatch.version]
24
+ source = "vcs"
25
+ fallback-version = "0.0.0"
26
+
27
+ [tool.hatch.build.targets.wheel]
28
+ packages = ["src/feather_structured_output"]
29
+
30
+ [tool.hatch.build.targets.sdist]
31
+ include = [
32
+ "/src",
33
+ ]
34
+
35
+ [tool.pytest.ini_options]
36
+ pythonpath = ["src"]
37
+ markers = [
38
+ "integration: tests that require external services such as LM Studio",
39
+ ]