headson 0.6.4__tar.gz → 0.6.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of headson might be problematic. Click here for more details.

Files changed (72) hide show
  1. {headson-0.6.4 → headson-0.6.6}/Cargo.lock +1 -1
  2. {headson-0.6.4 → headson-0.6.6}/Cargo.toml +1 -1
  3. {headson-0.6.4 → headson-0.6.6}/PKG-INFO +29 -9
  4. {headson-0.6.4 → headson-0.6.6}/README.md +28 -8
  5. headson-0.6.6/docs/assets/tapes/demo.gif +0 -0
  6. {headson-0.6.4 → headson-0.6.6}/pyproject.toml +1 -1
  7. {headson-0.6.4 → headson-0.6.6}/python/Cargo.lock +2 -2
  8. {headson-0.6.4 → headson-0.6.6}/python/Cargo.toml +1 -1
  9. {headson-0.6.4 → headson-0.6.6}/python/README.md +9 -9
  10. {headson-0.6.4 → headson-0.6.6}/python/src/lib.rs +5 -3
  11. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/text/mod.rs +1 -0
  12. headson-0.6.6/src/lib.rs +337 -0
  13. {headson-0.6.4 → headson-0.6.6}/src/main.rs +192 -77
  14. {headson-0.6.4 → headson-0.6.6}/src/order/build.rs +1 -1
  15. {headson-0.6.4 → headson-0.6.6}/src/order/types.rs +5 -0
  16. {headson-0.6.4 → headson-0.6.6}/src/serialization/mod.rs +55 -6
  17. {headson-0.6.4 → headson-0.6.6}/src/serialization/types.rs +3 -0
  18. headson-0.6.6/src/utils/measure.rs +50 -0
  19. {headson-0.6.4 → headson-0.6.6}/src/utils/mod.rs +1 -0
  20. headson-0.6.4/docs/assets/tapes/demo.gif +0 -0
  21. headson-0.6.4/src/lib.rs +0 -176
  22. {headson-0.6.4 → headson-0.6.6}/docs/assets/algorithm.svg +0 -0
  23. {headson-0.6.4 → headson-0.6.6}/docs/assets/logo.png +0 -0
  24. {headson-0.6.4 → headson-0.6.6}/docs/assets/logo.svg +0 -0
  25. {headson-0.6.4 → headson-0.6.6}/python/headson/__init__.py +0 -0
  26. {headson-0.6.4 → headson-0.6.6}/src/format.rs +0 -0
  27. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/builder.rs +0 -0
  28. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/mod.rs +0 -0
  29. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/default.rs +0 -0
  30. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/head.rs +0 -0
  31. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/mod.rs +0 -0
  32. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/json/samplers/tail.rs +0 -0
  33. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/mod.rs +0 -0
  34. {headson-0.6.4 → headson-0.6.6}/src/ingest/formats/yaml/mod.rs +0 -0
  35. {headson-0.6.4 → headson-0.6.6}/src/ingest/mod.rs +0 -0
  36. {headson-0.6.4 → headson-0.6.6}/src/ingest/sampling/mod.rs +0 -0
  37. {headson-0.6.4 → headson-0.6.6}/src/order/mod.rs +0 -0
  38. {headson-0.6.4 → headson-0.6.6}/src/order/scoring.rs +0 -0
  39. {headson-0.6.4 → headson-0.6.6}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
  40. {headson-0.6.4 → headson-0.6.6}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
  41. {headson-0.6.4 → headson-0.6.6}/src/serialization/color.rs +0 -0
  42. {headson-0.6.4 → headson-0.6.6}/src/serialization/fileset.rs +0 -0
  43. {headson-0.6.4 → headson-0.6.6}/src/serialization/output.rs +0 -0
  44. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
  45. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty_yaml.snap +0 -0
  46. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
  47. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__arena_render_single_yaml.snap +0 -0
  48. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_internal_gaps_yaml.snap +0 -0
  49. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_head.snap +0 -0
  50. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_tail.snap +0 -0
  51. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_head.snap +0 -0
  52. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_tail.snap +0 -0
  53. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_head.snap +0 -0
  54. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_tail.snap +0 -0
  55. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_json.snap +0 -0
  56. {headson-0.6.4 → headson-0.6.6}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_yaml.snap +0 -0
  57. {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/core.rs +0 -0
  58. {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/js.rs +0 -0
  59. {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/json.rs +0 -0
  60. {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/mod.rs +0 -0
  61. {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/pseudo.rs +0 -0
  62. {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/text.rs +0 -0
  63. {headson-0.6.4 → headson-0.6.6}/src/serialization/templates/yaml.rs +0 -0
  64. {headson-0.6.4 → headson-0.6.6}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
  65. {headson-0.6.4 → headson-0.6.6}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
  66. {headson-0.6.4 → headson-0.6.6}/src/utils/graph.rs +0 -0
  67. {headson-0.6.4 → headson-0.6.6}/src/utils/json.rs +0 -0
  68. {headson-0.6.4 → headson-0.6.6}/src/utils/search.rs +0 -0
  69. {headson-0.6.4 → headson-0.6.6}/src/utils/text.rs +0 -0
  70. {headson-0.6.4 → headson-0.6.6}/src/utils/tree_arena.rs +0 -0
  71. {headson-0.6.4 → headson-0.6.6}/tests/fixtures/json/JSONTestSuite/LICENSE +0 -0
  72. {headson-0.6.4 → headson-0.6.6}/tests/fixtures/json/JSONTestSuite/README.md +0 -0
@@ -298,7 +298,7 @@ dependencies = [
298
298
 
299
299
  [[package]]
300
300
  name = "headson"
301
- version = "0.6.4"
301
+ version = "0.6.6"
302
302
  dependencies = [
303
303
  "anyhow",
304
304
  "assert_cmd",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson"
3
- version = "0.6.4"
3
+ version = "0.6.6"
4
4
  edition = "2024"
5
5
  description = "Budget‑constrained JSON preview renderer"
6
6
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: headson
3
- Version: 0.6.4
3
+ Version: 0.6.6
4
4
  Classifier: Programming Language :: Python
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Rust
@@ -20,7 +20,7 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
20
  <br/>
21
21
  </p>
22
22
 
23
- `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
23
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict byte budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
24
24
 
25
25
  Available as:
26
26
  - CLI (see [Usage](#usage))
@@ -70,7 +70,8 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
70
70
 
71
71
  Common flags:
72
72
 
73
- - `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
73
+ - `-c, --bytes <BYTES>`: per‑file output budget (bytes). For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
74
+ - `-u, --chars <CHARS>`: per‑file output budget (Unicode code points). Behaves like `--bytes` but counts characters instead of bytes.
74
75
  - `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
75
76
  - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
76
77
  - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
@@ -97,6 +98,25 @@ Notes:
97
98
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
98
99
  - Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
99
100
 
101
+ ## Budget Modes
102
+
103
+ - Bytes (`-c/--bytes`, `-C/--global-bytes`)
104
+ - Measures UTF‑8 bytes in the output.
105
+ - Default per‑file budget is 500 bytes when neither `--lines` nor `--chars` is provided.
106
+ - Multiple inputs: total default budget is `<BYTES> * number_of_inputs`; `--global-bytes` caps the total.
107
+
108
+ - Characters (`-u/--chars`)
109
+ - Measures Unicode code points (not grapheme clusters).
110
+
111
+ - Lines (`-n/--lines`, `-N/--global-lines`)
112
+ - Caps the number of lines in the output.
113
+ - Incompatible with `--no-newline`.
114
+ - Multiple inputs: defaults to `<LINES> * number_of_inputs`; `--global-lines` caps the total.
115
+
116
+ - Interactions and precedence
117
+ - All active budgets are enforced simultaneously. The render must satisfy all of: bytes (if set), chars (if set), and lines (if set). The strictest cap wins.
118
+ - When only lines are specified, no implicit byte cap applies. When neither lines nor chars are specified, a 500‑byte default applies.
119
+
100
120
  Quick one‑liners:
101
121
 
102
122
  - Peek a big JSON stream (keeps structure):
@@ -190,11 +210,11 @@ A thin Python extension module is available on PyPI as `headson`.
190
210
 
191
211
  - Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
192
212
  - API:
193
- - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
213
+ - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
194
214
  - `format`: `"auto" | "json" | "yaml"` (auto maps to JSON family for single inputs)
195
215
  - `style`: `"strict" | "default" | "detailed"`
196
216
  - `input_format`: `"json" | "yaml"` (ingestion)
197
- - `character_budget`: maximum output size in characters (default: 500)
217
+ - `byte_budget`: maximum output size in bytes (default: 500)
198
218
  - `skew`: `"balanced" | "head" | "tail"` (affects display styles; strict JSON remains unannotated)
199
219
 
200
220
  Examples:
@@ -204,7 +224,7 @@ import json
204
224
  import headson
205
225
 
206
226
  data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
207
- preview = headson.summarize(json.dumps(data), format="json", style="strict", character_budget=200)
227
+ preview = headson.summarize(json.dumps(data), format="json", style="strict", byte_budget=200)
208
228
  print(preview)
209
229
 
210
230
  # Prefer the tail of arrays (annotations show with style="default"/"detailed")
@@ -213,14 +233,14 @@ print(
213
233
  json.dumps(list(range(100))),
214
234
  format="json",
215
235
  style="detailed",
216
- character_budget=80,
236
+ byte_budget=80,
217
237
  skew="tail",
218
238
  )
219
239
  )
220
240
 
221
241
  # YAML support
222
242
  doc = "root:\n items: [1,2,3,4,5,6,7,8,9,10]\n"
223
- print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", character_budget=60))
243
+ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
224
244
  ```
225
245
 
226
246
  # Algorithm
@@ -230,7 +250,7 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
230
250
  ## Footnotes
231
251
  - <sup><b>[1]</b></sup> <b>Optimized tree representation</b>: An arena‑style tree stored in flat, contiguous buffers. Each node records its kind and value plus index ranges into shared child and key arrays. Arrays are ingested in a single pass and may be deterministically pre‑sampled: the first element is always kept; additional elements are selected via a fixed per‑index inclusion test; for kept elements, original indices are stored and full lengths are counted. This enables accurate omission info and internal gap markers later, while minimizing pointer chasing.
232
252
  - <sup><b>[2]</b></sup> <b>Priority order</b>: Nodes are scored so previews surface representative structure and values first. Arrays can favor head/mid/tail coverage (default) or strictly the head; tail preference flips head/tail when configured. Object properties are ordered by key, and strings expand by grapheme with early characters prioritized over very deep expansions.
233
- - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the character budget, looping between “choose N” and a render attempt to converge quickly.
253
+ - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the byte budget, looping between “choose N” and a render attempt to converge quickly.
234
254
  - <sup><b>[4]</b></sup> <b>Render attempt</b>: Serializes the currently included nodes using the selected template. Omission summaries and per-file section headers appear in display templates (pseudo/js); json remains strict. For arrays, display templates may insert internal gap markers between non‑contiguous kept items using original indices.
235
255
  - <sup><b>[5]</b></sup> <b>Diagram source</b>: The Algorithm diagram is generated from `docs/diagrams/algorithm.mmd`. Regenerate the SVG with `cargo make diagrams` before releasing.
236
256
 
@@ -6,7 +6,7 @@
6
6
  <br/>
7
7
  </p>
8
8
 
9
- `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
9
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict byte budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
10
10
 
11
11
  Available as:
12
12
  - CLI (see [Usage](#usage))
@@ -56,7 +56,8 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
56
56
 
57
57
  Common flags:
58
58
 
59
- - `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
59
+ - `-c, --bytes <BYTES>`: per‑file output budget (bytes). For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
60
+ - `-u, --chars <CHARS>`: per‑file output budget (Unicode code points). Behaves like `--bytes` but counts characters instead of bytes.
60
61
  - `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
61
62
  - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
62
63
  - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
@@ -83,6 +84,25 @@ Notes:
83
84
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
84
85
  - Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
85
86
 
87
+ ## Budget Modes
88
+
89
+ - Bytes (`-c/--bytes`, `-C/--global-bytes`)
90
+ - Measures UTF‑8 bytes in the output.
91
+ - Default per‑file budget is 500 bytes when neither `--lines` nor `--chars` is provided.
92
+ - Multiple inputs: total default budget is `<BYTES> * number_of_inputs`; `--global-bytes` caps the total.
93
+
94
+ - Characters (`-u/--chars`)
95
+ - Measures Unicode code points (not grapheme clusters).
96
+
97
+ - Lines (`-n/--lines`, `-N/--global-lines`)
98
+ - Caps the number of lines in the output.
99
+ - Incompatible with `--no-newline`.
100
+ - Multiple inputs: defaults to `<LINES> * number_of_inputs`; `--global-lines` caps the total.
101
+
102
+ - Interactions and precedence
103
+ - All active budgets are enforced simultaneously. The render must satisfy all of: bytes (if set), chars (if set), and lines (if set). The strictest cap wins.
104
+ - When only lines are specified, no implicit byte cap applies. When neither lines nor chars are specified, a 500‑byte default applies.
105
+
86
106
  Quick one‑liners:
87
107
 
88
108
  - Peek a big JSON stream (keeps structure):
@@ -176,11 +196,11 @@ A thin Python extension module is available on PyPI as `headson`.
176
196
 
177
197
  - Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
178
198
  - API:
179
- - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
199
+ - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
180
200
  - `format`: `"auto" | "json" | "yaml"` (auto maps to JSON family for single inputs)
181
201
  - `style`: `"strict" | "default" | "detailed"`
182
202
  - `input_format`: `"json" | "yaml"` (ingestion)
183
- - `character_budget`: maximum output size in characters (default: 500)
203
+ - `byte_budget`: maximum output size in bytes (default: 500)
184
204
  - `skew`: `"balanced" | "head" | "tail"` (affects display styles; strict JSON remains unannotated)
185
205
 
186
206
  Examples:
@@ -190,7 +210,7 @@ import json
190
210
  import headson
191
211
 
192
212
  data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
193
- preview = headson.summarize(json.dumps(data), format="json", style="strict", character_budget=200)
213
+ preview = headson.summarize(json.dumps(data), format="json", style="strict", byte_budget=200)
194
214
  print(preview)
195
215
 
196
216
  # Prefer the tail of arrays (annotations show with style="default"/"detailed")
@@ -199,14 +219,14 @@ print(
199
219
  json.dumps(list(range(100))),
200
220
  format="json",
201
221
  style="detailed",
202
- character_budget=80,
222
+ byte_budget=80,
203
223
  skew="tail",
204
224
  )
205
225
  )
206
226
 
207
227
  # YAML support
208
228
  doc = "root:\n items: [1,2,3,4,5,6,7,8,9,10]\n"
209
- print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", character_budget=60))
229
+ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
210
230
  ```
211
231
 
212
232
  # Algorithm
@@ -216,7 +236,7 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
216
236
  ## Footnotes
217
237
  - <sup><b>[1]</b></sup> <b>Optimized tree representation</b>: An arena‑style tree stored in flat, contiguous buffers. Each node records its kind and value plus index ranges into shared child and key arrays. Arrays are ingested in a single pass and may be deterministically pre‑sampled: the first element is always kept; additional elements are selected via a fixed per‑index inclusion test; for kept elements, original indices are stored and full lengths are counted. This enables accurate omission info and internal gap markers later, while minimizing pointer chasing.
218
238
  - <sup><b>[2]</b></sup> <b>Priority order</b>: Nodes are scored so previews surface representative structure and values first. Arrays can favor head/mid/tail coverage (default) or strictly the head; tail preference flips head/tail when configured. Object properties are ordered by key, and strings expand by grapheme with early characters prioritized over very deep expansions.
219
- - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the character budget, looping between “choose N” and a render attempt to converge quickly.
239
+ - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the byte budget, looping between “choose N” and a render attempt to converge quickly.
220
240
  - <sup><b>[4]</b></sup> <b>Render attempt</b>: Serializes the currently included nodes using the selected template. Omission summaries and per-file section headers appear in display templates (pseudo/js); json remains strict. For arrays, display templates may insert internal gap markers between non‑contiguous kept items using original indices.
221
241
  - <sup><b>[5]</b></sup> <b>Diagram source</b>: The Algorithm diagram is generated from `docs/diagrams/algorithm.mmd`. Regenerate the SVG with `cargo make diagrams` before releasing.
222
242
 
Binary file
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "headson"
7
- version = "0.6.4"
7
+ version = "0.6.6"
8
8
  description = "Budget‑constrained JSON preview renderer (Python bindings)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -214,7 +214,7 @@ dependencies = [
214
214
 
215
215
  [[package]]
216
216
  name = "headson"
217
- version = "0.6.4"
217
+ version = "0.6.6"
218
218
  dependencies = [
219
219
  "anyhow",
220
220
  "clap",
@@ -228,7 +228,7 @@ dependencies = [
228
228
 
229
229
  [[package]]
230
230
  name = "headson-python"
231
- version = "0.6.4"
231
+ version = "0.6.6"
232
232
  dependencies = [
233
233
  "anyhow",
234
234
  "headson",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson-python"
3
- version = "0.6.4"
3
+ version = "0.6.6"
4
4
  edition = "2021"
5
5
  publish = false
6
6
  readme = "README.md"
@@ -4,11 +4,11 @@ Minimal Python API for the `headson` preview renderer.
4
4
 
5
5
  API
6
6
 
7
- - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
7
+ - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
8
8
  - `format`: output format — `"auto" | "json" | "yaml" | "text"`.
9
9
  - `style`: output style — `"strict" | "default" | "detailed"`.
10
10
  - `input_format`: ingestion format — `"json" | "yaml" | "text"`.
11
- - `character_budget`: maximum output size in characters (defaults to 500 if not set).
11
+ - `byte_budget`: maximum output size in bytes (defaults to 500 if not set).
12
12
  - `skew`: one of `"balanced" | "head" | "tail"`.
13
13
  - `balanced` (default), `head` keeps first N, `tail` keeps last N. Display styles place omission markers accordingly; strict JSON remains unannotated.
14
14
  - Notes:
@@ -20,26 +20,26 @@ Examples:
20
20
  import headson
21
21
 
22
22
  # Human-friendly JSON (Pseudo) with a small budget
23
- print(headson.summarize('{"a": 1, "b": [1,2,3]}', format="json", style="default", character_budget=80))
23
+ print(headson.summarize('{"a": 1, "b": [1,2,3]}', format="json", style="default", byte_budget=80))
24
24
 
25
25
  # Strict JSON stays valid JSON
26
- print(headson.summarize('{"a": 1, "b": {"c": 2}}', format="json", style="strict", character_budget=10_000))
26
+ print(headson.summarize('{"a": 1, "b": {"c": 2}}', format="json", style="strict", byte_budget=10_000))
27
27
 
28
28
  # Annotated JSON (JS) with tail skew: prefer the end of arrays when truncating
29
29
  arr = ','.join(str(i) for i in range(100))
30
- print(headson.summarize('{"arr": [' + arr + ']}', format="json", style="detailed", character_budget=60, skew="tail"))
30
+ print(headson.summarize('{"arr": [' + arr + ']}', format="json", style="detailed", byte_budget=60, skew="tail"))
31
31
 
32
32
  # YAML styles: strict (no comments), default (… comments), detailed (counts)
33
33
  doc = 'root:\n items: [1,2,3,4,5,6,7,8,9,10]\n'
34
- print(headson.summarize(doc, format="yaml", style="strict", input_format="yaml", character_budget=60))
35
- print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", character_budget=60))
36
- print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", character_budget=60))
34
+ print(headson.summarize(doc, format="yaml", style="strict", input_format="yaml", byte_budget=60))
35
+ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
36
+ print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", byte_budget=60))
37
37
 
38
38
  # Note: tail mode affects only display styles; strict JSON stays strict.
39
39
 
40
40
  # Text: render raw lines with omission markers depending on style
41
41
  text = "one\ntwo\nthree\n"
42
- print(headson.summarize(text, format="text", style="default", input_format="text", character_budget=10))
42
+ print(headson.summarize(text, format="text", style="default", input_format="text", byte_budget=10))
43
43
  ```
44
44
 
45
45
  Install for development:
@@ -59,6 +59,7 @@ fn render_config_with_sampler(
59
59
  color_mode: ColorMode::Auto,
60
60
  color_enabled: false,
61
61
  style: s,
62
+ string_free_prefix_graphemes: None,
62
63
  })
63
64
  }
64
65
 
@@ -85,6 +86,7 @@ fn priority_config(
85
86
  prefer_tail_arrays,
86
87
  array_bias: headson_core::ArrayBias::HeadMidTail,
87
88
  array_sampler: sampler,
89
+ line_budget_only: false,
88
90
  }
89
91
  }
90
92
 
@@ -93,19 +95,19 @@ fn to_pyerr(e: anyhow::Error) -> PyErr {
93
95
  }
94
96
 
95
97
  #[pyfunction]
96
- #[pyo3(signature = (text, *, format="auto", style="default", character_budget=None, skew="balanced", input_format="json"))]
98
+ #[pyo3(signature = (text, *, format="auto", style="default", byte_budget=None, skew="balanced", input_format="json"))]
97
99
  fn summarize(
98
100
  py: Python<'_>,
99
101
  text: &str,
100
102
  format: &str,
101
103
  style: &str,
102
- character_budget: Option<usize>,
104
+ byte_budget: Option<usize>,
103
105
  skew: &str,
104
106
  input_format: &str,
105
107
  ) -> PyResult<String> {
106
108
  let sampler = parse_skew(skew).map_err(to_pyerr)?;
107
109
  let cfg = render_config_with_sampler(format, style, sampler).map_err(to_pyerr)?;
108
- let budget = character_budget.unwrap_or(500);
110
+ let budget = byte_budget.unwrap_or(500);
109
111
  let per_file_for_priority = budget.max(1);
110
112
  let prio = priority_config(per_file_for_priority, sampler);
111
113
  let input = text.as_bytes().to_vec();
@@ -220,6 +220,7 @@ mod tests {
220
220
  color_mode: crate::serialization::types::ColorMode::Off,
221
221
  color_enabled: false,
222
222
  style: Style::Default,
223
+ string_free_prefix_graphemes: None,
223
224
  };
224
225
  let prio = PriorityConfig::new(100, 100);
225
226
  (cfg, prio)