headson 0.6.4__tar.gz → 0.6.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of headson might be problematic. Click here for more details.
- {headson-0.6.4 → headson-0.6.6}/Cargo.lock +1 -1
- {headson-0.6.4 → headson-0.6.6}/Cargo.toml +1 -1
- {headson-0.6.4 → headson-0.6.6}/PKG-INFO +29 -9
- {headson-0.6.4 → headson-0.6.6}/README.md +28 -8
- headson-0.6.6/docs/assets/tapes/demo.gif +0 -0
- {headson-0.6.4 → headson-0.6.6}/pyproject.toml +1 -1
- {headson-0.6.4 → headson-0.6.6}/python/Cargo.lock +2 -2
- {headson-0.6.4 → headson-0.6.6}/python/Cargo.toml +1 -1
- {headson-0.6.4 → headson-0.6.6}/python/README.md +9 -9
- {headson-0.6.4 → headson-0.6.6}/python/src/lib.rs +5 -3
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/text/mod.rs +1 -0
- headson-0.6.6/src/lib.rs +337 -0
- {headson-0.6.4 → headson-0.6.6}/src/main.rs +192 -77
- {headson-0.6.4 → headson-0.6.6}/src/order/build.rs +1 -1
- {headson-0.6.4 → headson-0.6.6}/src/order/types.rs +5 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/mod.rs +55 -6
- {headson-0.6.4 → headson-0.6.6}/src/serialization/types.rs +3 -0
- headson-0.6.6/src/utils/measure.rs +50 -0
- {headson-0.6.4 → headson-0.6.6}/src/utils/mod.rs +1 -0
- headson-0.6.4/docs/assets/tapes/demo.gif +0 -0
- headson-0.6.4/src/lib.rs +0 -176
- {headson-0.6.4 → headson-0.6.6}/docs/assets/algorithm.svg +0 -0
- {headson-0.6.4 → headson-0.6.6}/docs/assets/logo.png +0 -0
- {headson-0.6.4 → headson-0.6.6}/docs/assets/logo.svg +0 -0
- {headson-0.6.4 → headson-0.6.6}/python/headson/__init__.py +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/format.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/builder.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/default.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/head.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/tail.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/yaml/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/ingest/sampling/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/order/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/order/scoring.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/color.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/fileset.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/output.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty_yaml.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_single_yaml.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_internal_gaps_yaml.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_head.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_tail.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_head.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_tail.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_head.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_tail.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_json.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_yaml.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/core.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/js.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/json.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/mod.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/pseudo.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/text.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/yaml.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/utils/graph.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/utils/json.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/utils/search.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/utils/text.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/src/utils/tree_arena.rs +0 -0
- {headson-0.6.4 → headson-0.6.6}/tests/fixtures/json/JSONTestSuite/LICENSE +0 -0
- {headson-0.6.4 → headson-0.6.6}/tests/fixtures/json/JSONTestSuite/README.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: headson
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.6
|
|
4
4
|
Classifier: Programming Language :: Python
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Rust
|
|
@@ -20,7 +20,7 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
20
20
|
<br/>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
|
-
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict
|
|
23
|
+
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict byte budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
|
|
24
24
|
|
|
25
25
|
Available as:
|
|
26
26
|
- CLI (see [Usage](#usage))
|
|
@@ -70,7 +70,8 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
|
|
|
70
70
|
|
|
71
71
|
Common flags:
|
|
72
72
|
|
|
73
|
-
- `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
73
|
+
- `-c, --bytes <BYTES>`: per‑file output budget (bytes). For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
74
|
+
- `-u, --chars <CHARS>`: per‑file output budget (Unicode code points). Behaves like `--bytes` but counts characters instead of bytes.
|
|
74
75
|
- `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
|
|
75
76
|
- `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
|
|
76
77
|
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
|
|
@@ -97,6 +98,25 @@ Notes:
|
|
|
97
98
|
- Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
|
|
98
99
|
- Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
|
|
99
100
|
|
|
101
|
+
## Budget Modes
|
|
102
|
+
|
|
103
|
+
- Bytes (`-c/--bytes`, `-C/--global-bytes`)
|
|
104
|
+
- Measures UTF‑8 bytes in the output.
|
|
105
|
+
- Default per‑file budget is 500 bytes when neither `--lines` nor `--chars` is provided.
|
|
106
|
+
- Multiple inputs: total default budget is `<BYTES> * number_of_inputs`; `--global-bytes` caps the total.
|
|
107
|
+
|
|
108
|
+
- Characters (`-u/--chars`)
|
|
109
|
+
- Measures Unicode code points (not grapheme clusters).
|
|
110
|
+
|
|
111
|
+
- Lines (`-n/--lines`, `-N/--global-lines`)
|
|
112
|
+
- Caps the number of lines in the output.
|
|
113
|
+
- Incompatible with `--no-newline`.
|
|
114
|
+
- Multiple inputs: defaults to `<LINES> * number_of_inputs`; `--global-lines` caps the total.
|
|
115
|
+
|
|
116
|
+
- Interactions and precedence
|
|
117
|
+
- All active budgets are enforced simultaneously. The render must satisfy all of: bytes (if set), chars (if set), and lines (if set). The strictest cap wins.
|
|
118
|
+
- When only lines are specified, no implicit byte cap applies. When neither lines nor chars are specified, a 500‑byte default applies.
|
|
119
|
+
|
|
100
120
|
Quick one‑liners:
|
|
101
121
|
|
|
102
122
|
- Peek a big JSON stream (keeps structure):
|
|
@@ -190,11 +210,11 @@ A thin Python extension module is available on PyPI as `headson`.
|
|
|
190
210
|
|
|
191
211
|
- Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
|
|
192
212
|
- API:
|
|
193
|
-
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json",
|
|
213
|
+
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
|
|
194
214
|
- `format`: `"auto" | "json" | "yaml"` (auto maps to JSON family for single inputs)
|
|
195
215
|
- `style`: `"strict" | "default" | "detailed"`
|
|
196
216
|
- `input_format`: `"json" | "yaml"` (ingestion)
|
|
197
|
-
- `
|
|
217
|
+
- `byte_budget`: maximum output size in bytes (default: 500)
|
|
198
218
|
- `skew`: `"balanced" | "head" | "tail"` (affects display styles; strict JSON remains unannotated)
|
|
199
219
|
|
|
200
220
|
Examples:
|
|
@@ -204,7 +224,7 @@ import json
|
|
|
204
224
|
import headson
|
|
205
225
|
|
|
206
226
|
data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
|
|
207
|
-
preview = headson.summarize(json.dumps(data), format="json", style="strict",
|
|
227
|
+
preview = headson.summarize(json.dumps(data), format="json", style="strict", byte_budget=200)
|
|
208
228
|
print(preview)
|
|
209
229
|
|
|
210
230
|
# Prefer the tail of arrays (annotations show with style="default"/"detailed")
|
|
@@ -213,14 +233,14 @@ print(
|
|
|
213
233
|
json.dumps(list(range(100))),
|
|
214
234
|
format="json",
|
|
215
235
|
style="detailed",
|
|
216
|
-
|
|
236
|
+
byte_budget=80,
|
|
217
237
|
skew="tail",
|
|
218
238
|
)
|
|
219
239
|
)
|
|
220
240
|
|
|
221
241
|
# YAML support
|
|
222
242
|
doc = "root:\n items: [1,2,3,4,5,6,7,8,9,10]\n"
|
|
223
|
-
print(headson.summarize(doc, format="yaml", style="default", input_format="yaml",
|
|
243
|
+
print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
|
|
224
244
|
```
|
|
225
245
|
|
|
226
246
|
# Algorithm
|
|
@@ -230,7 +250,7 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
|
|
|
230
250
|
## Footnotes
|
|
231
251
|
- <sup><b>[1]</b></sup> <b>Optimized tree representation</b>: An arena‑style tree stored in flat, contiguous buffers. Each node records its kind and value plus index ranges into shared child and key arrays. Arrays are ingested in a single pass and may be deterministically pre‑sampled: the first element is always kept; additional elements are selected via a fixed per‑index inclusion test; for kept elements, original indices are stored and full lengths are counted. This enables accurate omission info and internal gap markers later, while minimizing pointer chasing.
|
|
232
252
|
- <sup><b>[2]</b></sup> <b>Priority order</b>: Nodes are scored so previews surface representative structure and values first. Arrays can favor head/mid/tail coverage (default) or strictly the head; tail preference flips head/tail when configured. Object properties are ordered by key, and strings expand by grapheme with early characters prioritized over very deep expansions.
|
|
233
|
-
- <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the
|
|
253
|
+
- <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the byte budget, looping between “choose N” and a render attempt to converge quickly.
|
|
234
254
|
- <sup><b>[4]</b></sup> <b>Render attempt</b>: Serializes the currently included nodes using the selected template. Omission summaries and per-file section headers appear in display templates (pseudo/js); json remains strict. For arrays, display templates may insert internal gap markers between non‑contiguous kept items using original indices.
|
|
235
255
|
- <sup><b>[5]</b></sup> <b>Diagram source</b>: The Algorithm diagram is generated from `docs/diagrams/algorithm.mmd`. Regenerate the SVG with `cargo make diagrams` before releasing.
|
|
236
256
|
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
<br/>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict
|
|
9
|
+
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict byte budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
|
|
10
10
|
|
|
11
11
|
Available as:
|
|
12
12
|
- CLI (see [Usage](#usage))
|
|
@@ -56,7 +56,8 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
|
|
|
56
56
|
|
|
57
57
|
Common flags:
|
|
58
58
|
|
|
59
|
-
- `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
59
|
+
- `-c, --bytes <BYTES>`: per‑file output budget (bytes). For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
60
|
+
- `-u, --chars <CHARS>`: per‑file output budget (Unicode code points). Behaves like `--bytes` but counts characters instead of bytes.
|
|
60
61
|
- `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
|
|
61
62
|
- `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
|
|
62
63
|
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
|
|
@@ -83,6 +84,25 @@ Notes:
|
|
|
83
84
|
- Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
|
|
84
85
|
- Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
|
|
85
86
|
|
|
87
|
+
## Budget Modes
|
|
88
|
+
|
|
89
|
+
- Bytes (`-c/--bytes`, `-C/--global-bytes`)
|
|
90
|
+
- Measures UTF‑8 bytes in the output.
|
|
91
|
+
- Default per‑file budget is 500 bytes when neither `--lines` nor `--chars` is provided.
|
|
92
|
+
- Multiple inputs: total default budget is `<BYTES> * number_of_inputs`; `--global-bytes` caps the total.
|
|
93
|
+
|
|
94
|
+
- Characters (`-u/--chars`)
|
|
95
|
+
- Measures Unicode code points (not grapheme clusters).
|
|
96
|
+
|
|
97
|
+
- Lines (`-n/--lines`, `-N/--global-lines`)
|
|
98
|
+
- Caps the number of lines in the output.
|
|
99
|
+
- Incompatible with `--no-newline`.
|
|
100
|
+
- Multiple inputs: defaults to `<LINES> * number_of_inputs`; `--global-lines` caps the total.
|
|
101
|
+
|
|
102
|
+
- Interactions and precedence
|
|
103
|
+
- All active budgets are enforced simultaneously. The render must satisfy all of: bytes (if set), chars (if set), and lines (if set). The strictest cap wins.
|
|
104
|
+
- When only lines are specified, no implicit byte cap applies. When neither lines nor chars are specified, a 500‑byte default applies.
|
|
105
|
+
|
|
86
106
|
Quick one‑liners:
|
|
87
107
|
|
|
88
108
|
- Peek a big JSON stream (keeps structure):
|
|
@@ -176,11 +196,11 @@ A thin Python extension module is available on PyPI as `headson`.
|
|
|
176
196
|
|
|
177
197
|
- Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
|
|
178
198
|
- API:
|
|
179
|
-
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json",
|
|
199
|
+
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
|
|
180
200
|
- `format`: `"auto" | "json" | "yaml"` (auto maps to JSON family for single inputs)
|
|
181
201
|
- `style`: `"strict" | "default" | "detailed"`
|
|
182
202
|
- `input_format`: `"json" | "yaml"` (ingestion)
|
|
183
|
-
- `
|
|
203
|
+
- `byte_budget`: maximum output size in bytes (default: 500)
|
|
184
204
|
- `skew`: `"balanced" | "head" | "tail"` (affects display styles; strict JSON remains unannotated)
|
|
185
205
|
|
|
186
206
|
Examples:
|
|
@@ -190,7 +210,7 @@ import json
|
|
|
190
210
|
import headson
|
|
191
211
|
|
|
192
212
|
data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
|
|
193
|
-
preview = headson.summarize(json.dumps(data), format="json", style="strict",
|
|
213
|
+
preview = headson.summarize(json.dumps(data), format="json", style="strict", byte_budget=200)
|
|
194
214
|
print(preview)
|
|
195
215
|
|
|
196
216
|
# Prefer the tail of arrays (annotations show with style="default"/"detailed")
|
|
@@ -199,14 +219,14 @@ print(
|
|
|
199
219
|
json.dumps(list(range(100))),
|
|
200
220
|
format="json",
|
|
201
221
|
style="detailed",
|
|
202
|
-
|
|
222
|
+
byte_budget=80,
|
|
203
223
|
skew="tail",
|
|
204
224
|
)
|
|
205
225
|
)
|
|
206
226
|
|
|
207
227
|
# YAML support
|
|
208
228
|
doc = "root:\n items: [1,2,3,4,5,6,7,8,9,10]\n"
|
|
209
|
-
print(headson.summarize(doc, format="yaml", style="default", input_format="yaml",
|
|
229
|
+
print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
|
|
210
230
|
```
|
|
211
231
|
|
|
212
232
|
# Algorithm
|
|
@@ -216,7 +236,7 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
|
|
|
216
236
|
## Footnotes
|
|
217
237
|
- <sup><b>[1]</b></sup> <b>Optimized tree representation</b>: An arena‑style tree stored in flat, contiguous buffers. Each node records its kind and value plus index ranges into shared child and key arrays. Arrays are ingested in a single pass and may be deterministically pre‑sampled: the first element is always kept; additional elements are selected via a fixed per‑index inclusion test; for kept elements, original indices are stored and full lengths are counted. This enables accurate omission info and internal gap markers later, while minimizing pointer chasing.
|
|
218
238
|
- <sup><b>[2]</b></sup> <b>Priority order</b>: Nodes are scored so previews surface representative structure and values first. Arrays can favor head/mid/tail coverage (default) or strictly the head; tail preference flips head/tail when configured. Object properties are ordered by key, and strings expand by grapheme with early characters prioritized over very deep expansions.
|
|
219
|
-
- <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the
|
|
239
|
+
- <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the byte budget, looping between “choose N” and a render attempt to converge quickly.
|
|
220
240
|
- <sup><b>[4]</b></sup> <b>Render attempt</b>: Serializes the currently included nodes using the selected template. Omission summaries and per-file section headers appear in display templates (pseudo/js); json remains strict. For arrays, display templates may insert internal gap markers between non‑contiguous kept items using original indices.
|
|
221
241
|
- <sup><b>[5]</b></sup> <b>Diagram source</b>: The Algorithm diagram is generated from `docs/diagrams/algorithm.mmd`. Regenerate the SVG with `cargo make diagrams` before releasing.
|
|
222
242
|
|
|
Binary file
|
|
@@ -214,7 +214,7 @@ dependencies = [
|
|
|
214
214
|
|
|
215
215
|
[[package]]
|
|
216
216
|
name = "headson"
|
|
217
|
-
version = "0.6.
|
|
217
|
+
version = "0.6.6"
|
|
218
218
|
dependencies = [
|
|
219
219
|
"anyhow",
|
|
220
220
|
"clap",
|
|
@@ -228,7 +228,7 @@ dependencies = [
|
|
|
228
228
|
|
|
229
229
|
[[package]]
|
|
230
230
|
name = "headson-python"
|
|
231
|
-
version = "0.6.
|
|
231
|
+
version = "0.6.6"
|
|
232
232
|
dependencies = [
|
|
233
233
|
"anyhow",
|
|
234
234
|
"headson",
|
|
@@ -4,11 +4,11 @@ Minimal Python API for the `headson` preview renderer.
|
|
|
4
4
|
|
|
5
5
|
API
|
|
6
6
|
|
|
7
|
-
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json",
|
|
7
|
+
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
|
|
8
8
|
- `format`: output format — `"auto" | "json" | "yaml" | "text"`.
|
|
9
9
|
- `style`: output style — `"strict" | "default" | "detailed"`.
|
|
10
10
|
- `input_format`: ingestion format — `"json" | "yaml" | "text"`.
|
|
11
|
-
- `
|
|
11
|
+
- `byte_budget`: maximum output size in bytes (defaults to 500 if not set).
|
|
12
12
|
- `skew`: one of `"balanced" | "head" | "tail"`.
|
|
13
13
|
- `balanced` (default), `head` keeps first N, `tail` keeps last N. Display styles place omission markers accordingly; strict JSON remains unannotated.
|
|
14
14
|
- Notes:
|
|
@@ -20,26 +20,26 @@ Examples:
|
|
|
20
20
|
import headson
|
|
21
21
|
|
|
22
22
|
# Human-friendly JSON (Pseudo) with a small budget
|
|
23
|
-
print(headson.summarize('{"a": 1, "b": [1,2,3]}', format="json", style="default",
|
|
23
|
+
print(headson.summarize('{"a": 1, "b": [1,2,3]}', format="json", style="default", byte_budget=80))
|
|
24
24
|
|
|
25
25
|
# Strict JSON stays valid JSON
|
|
26
|
-
print(headson.summarize('{"a": 1, "b": {"c": 2}}', format="json", style="strict",
|
|
26
|
+
print(headson.summarize('{"a": 1, "b": {"c": 2}}', format="json", style="strict", byte_budget=10_000))
|
|
27
27
|
|
|
28
28
|
# Annotated JSON (JS) with tail skew: prefer the end of arrays when truncating
|
|
29
29
|
arr = ','.join(str(i) for i in range(100))
|
|
30
|
-
print(headson.summarize('{"arr": [' + arr + ']}', format="json", style="detailed",
|
|
30
|
+
print(headson.summarize('{"arr": [' + arr + ']}', format="json", style="detailed", byte_budget=60, skew="tail"))
|
|
31
31
|
|
|
32
32
|
# YAML styles: strict (no comments), default (… comments), detailed (counts)
|
|
33
33
|
doc = 'root:\n items: [1,2,3,4,5,6,7,8,9,10]\n'
|
|
34
|
-
print(headson.summarize(doc, format="yaml", style="strict", input_format="yaml",
|
|
35
|
-
print(headson.summarize(doc, format="yaml", style="default", input_format="yaml",
|
|
36
|
-
print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml",
|
|
34
|
+
print(headson.summarize(doc, format="yaml", style="strict", input_format="yaml", byte_budget=60))
|
|
35
|
+
print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
|
|
36
|
+
print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", byte_budget=60))
|
|
37
37
|
|
|
38
38
|
# Note: tail mode affects only display styles; strict JSON stays strict.
|
|
39
39
|
|
|
40
40
|
# Text: render raw lines with omission markers depending on style
|
|
41
41
|
text = "one\ntwo\nthree\n"
|
|
42
|
-
print(headson.summarize(text, format="text", style="default", input_format="text",
|
|
42
|
+
print(headson.summarize(text, format="text", style="default", input_format="text", byte_budget=10))
|
|
43
43
|
```
|
|
44
44
|
|
|
45
45
|
Install for development:
|
|
@@ -59,6 +59,7 @@ fn render_config_with_sampler(
|
|
|
59
59
|
color_mode: ColorMode::Auto,
|
|
60
60
|
color_enabled: false,
|
|
61
61
|
style: s,
|
|
62
|
+
string_free_prefix_graphemes: None,
|
|
62
63
|
})
|
|
63
64
|
}
|
|
64
65
|
|
|
@@ -85,6 +86,7 @@ fn priority_config(
|
|
|
85
86
|
prefer_tail_arrays,
|
|
86
87
|
array_bias: headson_core::ArrayBias::HeadMidTail,
|
|
87
88
|
array_sampler: sampler,
|
|
89
|
+
line_budget_only: false,
|
|
88
90
|
}
|
|
89
91
|
}
|
|
90
92
|
|
|
@@ -93,19 +95,19 @@ fn to_pyerr(e: anyhow::Error) -> PyErr {
|
|
|
93
95
|
}
|
|
94
96
|
|
|
95
97
|
#[pyfunction]
|
|
96
|
-
#[pyo3(signature = (text, *, format="auto", style="default",
|
|
98
|
+
#[pyo3(signature = (text, *, format="auto", style="default", byte_budget=None, skew="balanced", input_format="json"))]
|
|
97
99
|
fn summarize(
|
|
98
100
|
py: Python<'_>,
|
|
99
101
|
text: &str,
|
|
100
102
|
format: &str,
|
|
101
103
|
style: &str,
|
|
102
|
-
|
|
104
|
+
byte_budget: Option<usize>,
|
|
103
105
|
skew: &str,
|
|
104
106
|
input_format: &str,
|
|
105
107
|
) -> PyResult<String> {
|
|
106
108
|
let sampler = parse_skew(skew).map_err(to_pyerr)?;
|
|
107
109
|
let cfg = render_config_with_sampler(format, style, sampler).map_err(to_pyerr)?;
|
|
108
|
-
let budget =
|
|
110
|
+
let budget = byte_budget.unwrap_or(500);
|
|
109
111
|
let per_file_for_priority = budget.max(1);
|
|
110
112
|
let prio = priority_config(per_file_for_priority, sampler);
|
|
111
113
|
let input = text.as_bytes().to_vec();
|