headson 0.6.3__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {headson-0.6.3 → headson-0.6.5}/Cargo.lock +1 -1
  2. {headson-0.6.3 → headson-0.6.5}/Cargo.toml +1 -1
  3. {headson-0.6.3 → headson-0.6.5}/PKG-INFO +25 -20
  4. {headson-0.6.3 → headson-0.6.5}/README.md +24 -19
  5. headson-0.6.5/docs/assets/tapes/demo.gif +0 -0
  6. {headson-0.6.3 → headson-0.6.5}/pyproject.toml +1 -1
  7. {headson-0.6.3 → headson-0.6.5}/python/Cargo.lock +2 -2
  8. {headson-0.6.3 → headson-0.6.5}/python/Cargo.toml +1 -1
  9. {headson-0.6.3 → headson-0.6.5}/python/README.md +9 -9
  10. {headson-0.6.3 → headson-0.6.5}/python/src/lib.rs +5 -3
  11. {headson-0.6.3/src/json_ingest → headson-0.6.5/src/ingest/formats/json}/builder.rs +2 -5
  12. {headson-0.6.3/src/json_ingest → headson-0.6.5/src/ingest/formats/json}/mod.rs +41 -6
  13. {headson-0.6.3/src/json_ingest → headson-0.6.5/src/ingest/formats/json}/samplers/default.rs +4 -6
  14. {headson-0.6.3/src/json_ingest → headson-0.6.5/src/ingest/formats/json}/samplers/head.rs +2 -1
  15. {headson-0.6.3/src/json_ingest → headson-0.6.5/src/ingest/formats/json}/samplers/mod.rs +2 -20
  16. {headson-0.6.3/src/json_ingest → headson-0.6.5/src/ingest/formats/json}/samplers/tail.rs +6 -3
  17. headson-0.6.5/src/ingest/formats/mod.rs +9 -0
  18. {headson-0.6.3/src/text_ingest → headson-0.6.5/src/ingest/formats/text}/mod.rs +97 -14
  19. {headson-0.6.3/src/yaml_ingest → headson-0.6.5/src/ingest/formats/yaml}/mod.rs +124 -78
  20. {headson-0.6.3 → headson-0.6.5}/src/ingest/mod.rs +14 -8
  21. headson-0.6.5/src/ingest/sampling/mod.rs +118 -0
  22. {headson-0.6.3 → headson-0.6.5}/src/lib.rs +162 -15
  23. {headson-0.6.3 → headson-0.6.5}/src/main.rs +154 -70
  24. {headson-0.6.3 → headson-0.6.5}/src/order/build.rs +2 -2
  25. {headson-0.6.3 → headson-0.6.5}/src/order/types.rs +5 -0
  26. {headson-0.6.3 → headson-0.6.5}/src/serialization/mod.rs +79 -27
  27. {headson-0.6.3 → headson-0.6.5}/src/serialization/types.rs +3 -0
  28. headson-0.6.5/src/utils/measure.rs +42 -0
  29. {headson-0.6.3 → headson-0.6.5}/src/utils/mod.rs +1 -0
  30. headson-0.6.3/docs/assets/tapes/demo.gif +0 -0
  31. headson-0.6.3/src/ingest/json.rs +0 -37
  32. headson-0.6.3/src/ingest/text.rs +0 -45
  33. headson-0.6.3/src/ingest/yaml.rs +0 -39
  34. {headson-0.6.3 → headson-0.6.5}/docs/assets/algorithm.svg +0 -0
  35. {headson-0.6.3 → headson-0.6.5}/docs/assets/logo.png +0 -0
  36. {headson-0.6.3 → headson-0.6.5}/docs/assets/logo.svg +0 -0
  37. {headson-0.6.3 → headson-0.6.5}/python/headson/__init__.py +0 -0
  38. {headson-0.6.3 → headson-0.6.5}/src/format.rs +0 -0
  39. {headson-0.6.3 → headson-0.6.5}/src/order/mod.rs +0 -0
  40. {headson-0.6.3 → headson-0.6.5}/src/order/scoring.rs +0 -0
  41. {headson-0.6.3 → headson-0.6.5}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
  42. {headson-0.6.3 → headson-0.6.5}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
  43. {headson-0.6.3 → headson-0.6.5}/src/serialization/color.rs +0 -0
  44. {headson-0.6.3 → headson-0.6.5}/src/serialization/fileset.rs +0 -0
  45. {headson-0.6.3 → headson-0.6.5}/src/serialization/output.rs +0 -0
  46. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
  47. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty_yaml.snap +0 -0
  48. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
  49. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__arena_render_single_yaml.snap +0 -0
  50. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__array_internal_gaps_yaml.snap +0 -0
  51. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_head.snap +0 -0
  52. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_tail.snap +0 -0
  53. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_head.snap +0 -0
  54. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_tail.snap +0 -0
  55. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_head.snap +0 -0
  56. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_tail.snap +0 -0
  57. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_json.snap +0 -0
  58. {headson-0.6.3 → headson-0.6.5}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_yaml.snap +0 -0
  59. {headson-0.6.3 → headson-0.6.5}/src/serialization/templates/core.rs +0 -0
  60. {headson-0.6.3 → headson-0.6.5}/src/serialization/templates/js.rs +0 -0
  61. {headson-0.6.3 → headson-0.6.5}/src/serialization/templates/json.rs +0 -0
  62. {headson-0.6.3 → headson-0.6.5}/src/serialization/templates/mod.rs +0 -0
  63. {headson-0.6.3 → headson-0.6.5}/src/serialization/templates/pseudo.rs +0 -0
  64. {headson-0.6.3 → headson-0.6.5}/src/serialization/templates/text.rs +0 -0
  65. {headson-0.6.3 → headson-0.6.5}/src/serialization/templates/yaml.rs +0 -0
  66. {headson-0.6.3 → headson-0.6.5}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
  67. {headson-0.6.3 → headson-0.6.5}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
  68. {headson-0.6.3 → headson-0.6.5}/src/utils/graph.rs +0 -0
  69. {headson-0.6.3 → headson-0.6.5}/src/utils/json.rs +0 -0
  70. {headson-0.6.3 → headson-0.6.5}/src/utils/search.rs +0 -0
  71. {headson-0.6.3 → headson-0.6.5}/src/utils/text.rs +0 -0
  72. {headson-0.6.3 → headson-0.6.5}/src/utils/tree_arena.rs +0 -0
  73. {headson-0.6.3 → headson-0.6.5}/tests/fixtures/json/JSONTestSuite/LICENSE +0 -0
  74. {headson-0.6.3 → headson-0.6.5}/tests/fixtures/json/JSONTestSuite/README.md +0 -0
@@ -298,7 +298,7 @@ dependencies = [
298
298
 
299
299
  [[package]]
300
300
  name = "headson"
301
- version = "0.6.3"
301
+ version = "0.6.5"
302
302
  dependencies = [
303
303
  "anyhow",
304
304
  "assert_cmd",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson"
3
- version = "0.6.3"
3
+ version = "0.6.5"
4
4
  edition = "2024"
5
5
  description = "Budget‑constrained JSON preview renderer"
6
6
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: headson
3
- Version: 0.6.3
3
+ Version: 0.6.5
4
4
  Classifier: Programming Language :: Python
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Rust
@@ -20,12 +20,15 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
20
  <br/>
21
21
  </p>
22
22
 
23
- `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
23
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict byte budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
24
24
 
25
25
  Available as:
26
26
  - CLI (see [Usage](#usage))
27
27
  - Python library (see [Python Bindings](#python-bindings))
28
28
 
29
+ ![Codecov](https://img.shields.io/codecov/c/github/kantord/headson?style=flat-square) ![Crates.io Version](https://img.shields.io/crates/v/headson?style=flat-square) ![PyPI - Version](https://img.shields.io/pypi/v/headson?style=flat-square)
30
+
31
+
29
32
  ## Install
30
33
 
31
34
  Using Cargo:
@@ -67,8 +70,8 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
67
70
 
68
71
  Common flags:
69
72
 
70
- - `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
71
- - `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
73
+ - `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
74
+ - `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
72
75
  - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
73
76
  - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
74
77
  - `-t, --template <strict|default|detailed>`: output style (default: `default`).
@@ -89,7 +92,7 @@ Notes:
89
92
  - With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
90
93
  - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
91
94
  - Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
92
- - `--global-budget` may truncate or omit entire files to respect the total budget.
95
+ - `--global-bytes` may truncate or omit entire files to respect the total budget.
93
96
  - The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
94
97
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
95
98
  - Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
@@ -98,33 +101,33 @@ Quick one‑liners:
98
101
 
99
102
  - Peek a big JSON stream (keeps structure):
100
103
 
101
- zstdcat huge.json.zst | headson -n 800 -f json -t default
104
+ zstdcat huge.json.zst | headson -c 800 -f json -t default
102
105
 
103
106
  - Many files with a fixed overall size:
104
107
 
105
- headson -N 1200 -f json -t strict logs/*.json
108
+ headson -C 1200 -f json -t strict logs/*.json
106
109
 
107
110
  - Glance at a file, JavaScript‑style comments for omissions:
108
111
 
109
- headson -n 400 -f json -t detailed data.json
112
+ headson -c 400 -f json -t detailed data.json
110
113
 
111
114
  - YAML with detailed comments:
112
115
 
113
- headson -n 400 -f yaml -t detailed config.yaml
116
+ headson -c 400 -f yaml -t detailed config.yaml
114
117
 
115
118
  ### Text mode
116
119
 
117
120
  - Single file (auto):
118
121
 
119
- headson -n 200 notes.txt
122
+ headson -c 200 notes.txt
120
123
 
121
124
  - Force Text ingest/output (useful when mixing with other extensions):
122
125
 
123
- headson -n 200 -i text -f text notes.txt
126
+ headson -c 200 -i text -f text notes.txt
124
127
 
125
128
  - Many text files (fileset):
126
129
 
127
- headson -n 800 -i text -f text logs/*.txt
130
+ headson -c 800 -i text -f text logs/*.txt
128
131
 
129
132
  - Styles on Text:
130
133
  - default: omission as a standalone `…` line.
@@ -135,6 +138,8 @@ Show help:
135
138
 
136
139
  headson --help
137
140
 
141
+ Note: flags align with head/tail conventions (`-c/--bytes`, `-C/--global-bytes`).
142
+
138
143
  ## Examples: head vs headson
139
144
 
140
145
  Input:
@@ -153,7 +158,7 @@ jq -c . users.json | head -c 80
153
158
  Structured preview with headson (JSON family, default style → Pseudo):
154
159
 
155
160
  ```bash
156
- headson -n 120 -f json -t default users.json
161
+ headson -c 120 -f json -t default users.json
157
162
  # {
158
163
  # users: [
159
164
  # { id: 1, name: "Ana", roles: [ "admin", … ] },
@@ -166,7 +171,7 @@ headson -n 120 -f json -t default users.json
166
171
  Machine‑readable preview (JSON family, strict style → strict JSON):
167
172
 
168
173
  ```bash
169
- headson -n 120 -f json -t strict users.json
174
+ headson -c 120 -f json -t strict users.json
170
175
  # {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
171
176
  ```
172
177
 
@@ -185,11 +190,11 @@ A thin Python extension module is available on PyPI as `headson`.
185
190
 
186
191
  - Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
187
192
  - API:
188
- - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
193
+ - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
189
194
  - `format`: `"auto" | "json" | "yaml"` (auto maps to JSON family for single inputs)
190
195
  - `style`: `"strict" | "default" | "detailed"`
191
196
  - `input_format`: `"json" | "yaml"` (ingestion)
192
- - `character_budget`: maximum output size in characters (default: 500)
197
+ - `byte_budget`: maximum output size in bytes (default: 500)
193
198
  - `skew`: `"balanced" | "head" | "tail"` (affects display styles; strict JSON remains unannotated)
194
199
 
195
200
  Examples:
@@ -199,7 +204,7 @@ import json
199
204
  import headson
200
205
 
201
206
  data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
202
- preview = headson.summarize(json.dumps(data), format="json", style="strict", character_budget=200)
207
+ preview = headson.summarize(json.dumps(data), format="json", style="strict", byte_budget=200)
203
208
  print(preview)
204
209
 
205
210
  # Prefer the tail of arrays (annotations show with style="default"/"detailed")
@@ -208,14 +213,14 @@ print(
208
213
  json.dumps(list(range(100))),
209
214
  format="json",
210
215
  style="detailed",
211
- character_budget=80,
216
+ byte_budget=80,
212
217
  skew="tail",
213
218
  )
214
219
  )
215
220
 
216
221
  # YAML support
217
222
  doc = "root:\n items: [1,2,3,4,5,6,7,8,9,10]\n"
218
- print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", character_budget=60))
223
+ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
219
224
  ```
220
225
 
221
226
  # Algorithm
@@ -225,7 +230,7 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
225
230
  ## Footnotes
226
231
  - <sup><b>[1]</b></sup> <b>Optimized tree representation</b>: An arena‑style tree stored in flat, contiguous buffers. Each node records its kind and value plus index ranges into shared child and key arrays. Arrays are ingested in a single pass and may be deterministically pre‑sampled: the first element is always kept; additional elements are selected via a fixed per‑index inclusion test; for kept elements, original indices are stored and full lengths are counted. This enables accurate omission info and internal gap markers later, while minimizing pointer chasing.
227
232
  - <sup><b>[2]</b></sup> <b>Priority order</b>: Nodes are scored so previews surface representative structure and values first. Arrays can favor head/mid/tail coverage (default) or strictly the head; tail preference flips head/tail when configured. Object properties are ordered by key, and strings expand by grapheme with early characters prioritized over very deep expansions.
228
- - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the character budget, looping between “choose N” and a render attempt to converge quickly.
233
+ - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the byte budget, looping between “choose N” and a render attempt to converge quickly.
229
234
  - <sup><b>[4]</b></sup> <b>Render attempt</b>: Serializes the currently included nodes using the selected template. Omission summaries and per-file section headers appear in display templates (pseudo/js); json remains strict. For arrays, display templates may insert internal gap markers between non‑contiguous kept items using original indices.
230
235
  - <sup><b>[5]</b></sup> <b>Diagram source</b>: The Algorithm diagram is generated from `docs/diagrams/algorithm.mmd`. Regenerate the SVG with `cargo make diagrams` before releasing.
231
236
 
@@ -6,12 +6,15 @@
6
6
  <br/>
7
7
  </p>
8
8
 
9
- `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
9
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict byte budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
10
10
 
11
11
  Available as:
12
12
  - CLI (see [Usage](#usage))
13
13
  - Python library (see [Python Bindings](#python-bindings))
14
14
 
15
+ ![Codecov](https://img.shields.io/codecov/c/github/kantord/headson?style=flat-square) ![Crates.io Version](https://img.shields.io/crates/v/headson?style=flat-square) ![PyPI - Version](https://img.shields.io/pypi/v/headson?style=flat-square)
16
+
17
+
15
18
  ## Install
16
19
 
17
20
  Using Cargo:
@@ -53,8 +56,8 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
53
56
 
54
57
  Common flags:
55
58
 
56
- - `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
57
- - `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
59
+ - `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
60
+ - `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
58
61
  - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
59
62
  - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
60
63
  - `-t, --template <strict|default|detailed>`: output style (default: `default`).
@@ -75,7 +78,7 @@ Notes:
75
78
  - With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
76
79
  - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
77
80
  - Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
78
- - `--global-budget` may truncate or omit entire files to respect the total budget.
81
+ - `--global-bytes` may truncate or omit entire files to respect the total budget.
79
82
  - The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
80
83
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
81
84
  - Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
@@ -84,33 +87,33 @@ Quick one‑liners:
84
87
 
85
88
  - Peek a big JSON stream (keeps structure):
86
89
 
87
- zstdcat huge.json.zst | headson -n 800 -f json -t default
90
+ zstdcat huge.json.zst | headson -c 800 -f json -t default
88
91
 
89
92
  - Many files with a fixed overall size:
90
93
 
91
- headson -N 1200 -f json -t strict logs/*.json
94
+ headson -C 1200 -f json -t strict logs/*.json
92
95
 
93
96
  - Glance at a file, JavaScript‑style comments for omissions:
94
97
 
95
- headson -n 400 -f json -t detailed data.json
98
+ headson -c 400 -f json -t detailed data.json
96
99
 
97
100
  - YAML with detailed comments:
98
101
 
99
- headson -n 400 -f yaml -t detailed config.yaml
102
+ headson -c 400 -f yaml -t detailed config.yaml
100
103
 
101
104
  ### Text mode
102
105
 
103
106
  - Single file (auto):
104
107
 
105
- headson -n 200 notes.txt
108
+ headson -c 200 notes.txt
106
109
 
107
110
  - Force Text ingest/output (useful when mixing with other extensions):
108
111
 
109
- headson -n 200 -i text -f text notes.txt
112
+ headson -c 200 -i text -f text notes.txt
110
113
 
111
114
  - Many text files (fileset):
112
115
 
113
- headson -n 800 -i text -f text logs/*.txt
116
+ headson -c 800 -i text -f text logs/*.txt
114
117
 
115
118
  - Styles on Text:
116
119
  - default: omission as a standalone `…` line.
@@ -121,6 +124,8 @@ Show help:
121
124
 
122
125
  headson --help
123
126
 
127
+ Note: flags align with head/tail conventions (`-c/--bytes`, `-C/--global-bytes`).
128
+
124
129
  ## Examples: head vs headson
125
130
 
126
131
  Input:
@@ -139,7 +144,7 @@ jq -c . users.json | head -c 80
139
144
  Structured preview with headson (JSON family, default style → Pseudo):
140
145
 
141
146
  ```bash
142
- headson -n 120 -f json -t default users.json
147
+ headson -c 120 -f json -t default users.json
143
148
  # {
144
149
  # users: [
145
150
  # { id: 1, name: "Ana", roles: [ "admin", … ] },
@@ -152,7 +157,7 @@ headson -n 120 -f json -t default users.json
152
157
  Machine‑readable preview (JSON family, strict style → strict JSON):
153
158
 
154
159
  ```bash
155
- headson -n 120 -f json -t strict users.json
160
+ headson -c 120 -f json -t strict users.json
156
161
  # {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
157
162
  ```
158
163
 
@@ -171,11 +176,11 @@ A thin Python extension module is available on PyPI as `headson`.
171
176
 
172
177
  - Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
173
178
  - API:
174
- - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
179
+ - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
175
180
  - `format`: `"auto" | "json" | "yaml"` (auto maps to JSON family for single inputs)
176
181
  - `style`: `"strict" | "default" | "detailed"`
177
182
  - `input_format`: `"json" | "yaml"` (ingestion)
178
- - `character_budget`: maximum output size in characters (default: 500)
183
+ - `byte_budget`: maximum output size in bytes (default: 500)
179
184
  - `skew`: `"balanced" | "head" | "tail"` (affects display styles; strict JSON remains unannotated)
180
185
 
181
186
  Examples:
@@ -185,7 +190,7 @@ import json
185
190
  import headson
186
191
 
187
192
  data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
188
- preview = headson.summarize(json.dumps(data), format="json", style="strict", character_budget=200)
193
+ preview = headson.summarize(json.dumps(data), format="json", style="strict", byte_budget=200)
189
194
  print(preview)
190
195
 
191
196
  # Prefer the tail of arrays (annotations show with style="default"/"detailed")
@@ -194,14 +199,14 @@ print(
194
199
  json.dumps(list(range(100))),
195
200
  format="json",
196
201
  style="detailed",
197
- character_budget=80,
202
+ byte_budget=80,
198
203
  skew="tail",
199
204
  )
200
205
  )
201
206
 
202
207
  # YAML support
203
208
  doc = "root:\n items: [1,2,3,4,5,6,7,8,9,10]\n"
204
- print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", character_budget=60))
209
+ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
205
210
  ```
206
211
 
207
212
  # Algorithm
@@ -211,7 +216,7 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
211
216
  ## Footnotes
212
217
  - <sup><b>[1]</b></sup> <b>Optimized tree representation</b>: An arena‑style tree stored in flat, contiguous buffers. Each node records its kind and value plus index ranges into shared child and key arrays. Arrays are ingested in a single pass and may be deterministically pre‑sampled: the first element is always kept; additional elements are selected via a fixed per‑index inclusion test; for kept elements, original indices are stored and full lengths are counted. This enables accurate omission info and internal gap markers later, while minimizing pointer chasing.
213
218
  - <sup><b>[2]</b></sup> <b>Priority order</b>: Nodes are scored so previews surface representative structure and values first. Arrays can favor head/mid/tail coverage (default) or strictly the head; tail preference flips head/tail when configured. Object properties are ordered by key, and strings expand by grapheme with early characters prioritized over very deep expansions.
214
- - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the character budget, looping between “choose N” and a render attempt to converge quickly.
219
+ - <sup><b>[3]</b></sup> <b>Choose top N nodes (binary search)</b>: Iteratively picks N so that the rendered preview fits within the byte budget, looping between “choose N” and a render attempt to converge quickly.
215
220
  - <sup><b>[4]</b></sup> <b>Render attempt</b>: Serializes the currently included nodes using the selected template. Omission summaries and per-file section headers appear in display templates (pseudo/js); json remains strict. For arrays, display templates may insert internal gap markers between non‑contiguous kept items using original indices.
216
221
  - <sup><b>[5]</b></sup> <b>Diagram source</b>: The Algorithm diagram is generated from `docs/diagrams/algorithm.mmd`. Regenerate the SVG with `cargo make diagrams` before releasing.
217
222
 
Binary file
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "headson"
7
- version = "0.6.3"
7
+ version = "0.6.5"
8
8
  description = "Budget‑constrained JSON preview renderer (Python bindings)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -214,7 +214,7 @@ dependencies = [
214
214
 
215
215
  [[package]]
216
216
  name = "headson"
217
- version = "0.6.3"
217
+ version = "0.6.5"
218
218
  dependencies = [
219
219
  "anyhow",
220
220
  "clap",
@@ -228,7 +228,7 @@ dependencies = [
228
228
 
229
229
  [[package]]
230
230
  name = "headson-python"
231
- version = "0.6.3"
231
+ version = "0.6.5"
232
232
  dependencies = [
233
233
  "anyhow",
234
234
  "headson",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson-python"
3
- version = "0.6.3"
3
+ version = "0.6.5"
4
4
  edition = "2021"
5
5
  publish = false
6
6
  readme = "README.md"
@@ -4,11 +4,11 @@ Minimal Python API for the `headson` preview renderer.
4
4
 
5
5
  API
6
6
 
7
- - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
7
+ - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", byte_budget: int | None = None, skew: str = "balanced") -> str`
8
8
  - `format`: output format — `"auto" | "json" | "yaml" | "text"`.
9
9
  - `style`: output style — `"strict" | "default" | "detailed"`.
10
10
  - `input_format`: ingestion format — `"json" | "yaml" | "text"`.
11
- - `character_budget`: maximum output size in characters (defaults to 500 if not set).
11
+ - `byte_budget`: maximum output size in bytes (defaults to 500 if not set).
12
12
  - `skew`: one of `"balanced" | "head" | "tail"`.
13
13
  - `balanced` (default), `head` keeps first N, `tail` keeps last N. Display styles place omission markers accordingly; strict JSON remains unannotated.
14
14
  - Notes:
@@ -20,26 +20,26 @@ Examples:
20
20
  import headson
21
21
 
22
22
  # Human-friendly JSON (Pseudo) with a small budget
23
- print(headson.summarize('{"a": 1, "b": [1,2,3]}', format="json", style="default", character_budget=80))
23
+ print(headson.summarize('{"a": 1, "b": [1,2,3]}', format="json", style="default", byte_budget=80))
24
24
 
25
25
  # Strict JSON stays valid JSON
26
- print(headson.summarize('{"a": 1, "b": {"c": 2}}', format="json", style="strict", character_budget=10_000))
26
+ print(headson.summarize('{"a": 1, "b": {"c": 2}}', format="json", style="strict", byte_budget=10_000))
27
27
 
28
28
  # Annotated JSON (JS) with tail skew: prefer the end of arrays when truncating
29
29
  arr = ','.join(str(i) for i in range(100))
30
- print(headson.summarize('{"arr": [' + arr + ']}', format="json", style="detailed", character_budget=60, skew="tail"))
30
+ print(headson.summarize('{"arr": [' + arr + ']}', format="json", style="detailed", byte_budget=60, skew="tail"))
31
31
 
32
32
  # YAML styles: strict (no comments), default (… comments), detailed (counts)
33
33
  doc = 'root:\n items: [1,2,3,4,5,6,7,8,9,10]\n'
34
- print(headson.summarize(doc, format="yaml", style="strict", input_format="yaml", character_budget=60))
35
- print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", character_budget=60))
36
- print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", character_budget=60))
34
+ print(headson.summarize(doc, format="yaml", style="strict", input_format="yaml", byte_budget=60))
35
+ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml", byte_budget=60))
36
+ print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", byte_budget=60))
37
37
 
38
38
  # Note: tail mode affects only display styles; strict JSON stays strict.
39
39
 
40
40
  # Text: render raw lines with omission markers depending on style
41
41
  text = "one\ntwo\nthree\n"
42
- print(headson.summarize(text, format="text", style="default", input_format="text", character_budget=10))
42
+ print(headson.summarize(text, format="text", style="default", input_format="text", byte_budget=10))
43
43
  ```
44
44
 
45
45
  Install for development:
@@ -59,6 +59,7 @@ fn render_config_with_sampler(
59
59
  color_mode: ColorMode::Auto,
60
60
  color_enabled: false,
61
61
  style: s,
62
+ string_free_prefix_graphemes: None,
62
63
  })
63
64
  }
64
65
 
@@ -85,6 +86,7 @@ fn priority_config(
85
86
  prefer_tail_arrays,
86
87
  array_bias: headson_core::ArrayBias::HeadMidTail,
87
88
  array_sampler: sampler,
89
+ line_budget_only: false,
88
90
  }
89
91
  }
90
92
 
@@ -93,19 +95,19 @@ fn to_pyerr(e: anyhow::Error) -> PyErr {
93
95
  }
94
96
 
95
97
  #[pyfunction]
96
- #[pyo3(signature = (text, *, format="auto", style="default", character_budget=None, skew="balanced", input_format="json"))]
98
+ #[pyo3(signature = (text, *, format="auto", style="default", byte_budget=None, skew="balanced", input_format="json"))]
97
99
  fn summarize(
98
100
  py: Python<'_>,
99
101
  text: &str,
100
102
  format: &str,
101
103
  style: &str,
102
- character_budget: Option<usize>,
104
+ byte_budget: Option<usize>,
103
105
  skew: &str,
104
106
  input_format: &str,
105
107
  ) -> PyResult<String> {
106
108
  let sampler = parse_skew(skew).map_err(to_pyerr)?;
107
109
  let cfg = render_config_with_sampler(format, style, sampler).map_err(to_pyerr)?;
108
- let budget = character_budget.unwrap_or(500);
110
+ let budget = byte_budget.unwrap_or(500);
109
111
  let per_file_for_priority = budget.max(1);
110
112
  let prio = priority_config(per_file_for_priority, sampler);
111
113
  let input = text.as_bytes().to_vec();
@@ -5,7 +5,7 @@ use std::cell::RefCell;
5
5
  use crate::order::NodeKind;
6
6
  use crate::utils::tree_arena::{JsonTreeArena, JsonTreeNode};
7
7
 
8
- use super::samplers::ArraySamplerKind;
8
+ use crate::ingest::sampling::ArraySamplerKind;
9
9
 
10
10
  #[derive(Default)]
11
11
  pub(crate) struct JsonTreeBuilder {
@@ -15,10 +15,7 @@ pub(crate) struct JsonTreeBuilder {
15
15
  }
16
16
 
17
17
  impl JsonTreeBuilder {
18
- pub(crate) fn new(
19
- array_cap: usize,
20
- sampler: super::samplers::ArraySamplerKind,
21
- ) -> Self {
18
+ pub(crate) fn new(array_cap: usize, sampler: ArraySamplerKind) -> Self {
22
19
  Self {
23
20
  arena: RefCell::new(JsonTreeArena::default()),
24
21
  array_cap,
@@ -1,24 +1,27 @@
1
1
  mod builder;
2
2
  mod samplers;
3
- use serde::de::DeserializeSeed;
4
3
 
5
- use crate::PriorityConfig;
6
- use crate::utils::tree_arena::JsonTreeArena;
7
4
  use anyhow::Result;
8
5
  use builder::JsonTreeBuilder;
6
+ use serde::de::DeserializeSeed;
7
+
8
+ use crate::PriorityConfig;
9
+ use crate::utils::tree_arena::JsonTreeArena as TreeArena;
10
+
11
+ use crate::ingest::Ingest;
9
12
 
10
13
  #[cfg(test)]
11
14
  pub fn build_json_tree_arena(
12
15
  input: &str,
13
16
  config: &PriorityConfig,
14
- ) -> Result<JsonTreeArena> {
17
+ ) -> Result<TreeArena> {
15
18
  build_json_tree_arena_from_bytes(input.as_bytes().to_vec(), config)
16
19
  }
17
20
 
18
21
  pub fn build_json_tree_arena_from_bytes(
19
22
  mut bytes: Vec<u8>,
20
23
  config: &PriorityConfig,
21
- ) -> Result<JsonTreeArena> {
24
+ ) -> Result<TreeArena> {
22
25
  let mut de = simd_json::Deserializer::from_slice(&mut bytes)?;
23
26
  let builder = JsonTreeBuilder::new(
24
27
  config.array_max_items,
@@ -36,7 +39,7 @@ pub fn build_json_tree_arena_from_bytes(
36
39
  pub fn build_json_tree_arena_from_many(
37
40
  mut inputs: Vec<(String, Vec<u8>)>,
38
41
  config: &PriorityConfig,
39
- ) -> Result<JsonTreeArena> {
42
+ ) -> Result<TreeArena> {
40
43
  let builder = JsonTreeBuilder::new(
41
44
  config.array_max_items,
42
45
  config.array_sampler.into(),
@@ -57,6 +60,38 @@ pub fn build_json_tree_arena_from_many(
57
60
  Ok(arena)
58
61
  }
59
62
 
63
+ /// JSON adapter for the ingest boundary. Delegates to the JSON builder to
64
+ /// produce the neutral `TreeArena`.
65
+ pub struct JsonIngest;
66
+
67
+ impl Ingest for JsonIngest {
68
+ fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
69
+ build_json_tree_arena_from_bytes(bytes, cfg)
70
+ }
71
+
72
+ fn parse_many(
73
+ inputs: Vec<(String, Vec<u8>)>,
74
+ cfg: &PriorityConfig,
75
+ ) -> Result<TreeArena> {
76
+ build_json_tree_arena_from_many(inputs, cfg)
77
+ }
78
+ }
79
+
80
+ /// Convenience functions for the JSON ingest path.
81
+ pub fn parse_json_one(
82
+ bytes: Vec<u8>,
83
+ cfg: &PriorityConfig,
84
+ ) -> Result<TreeArena> {
85
+ JsonIngest::parse_one(bytes, cfg)
86
+ }
87
+
88
+ pub fn parse_json_many(
89
+ inputs: Vec<(String, Vec<u8>)>,
90
+ cfg: &PriorityConfig,
91
+ ) -> Result<TreeArena> {
92
+ JsonIngest::parse_many(inputs, cfg)
93
+ }
94
+
60
95
  #[cfg(test)]
61
96
  mod tests {
62
97
  use super::*;
@@ -1,14 +1,12 @@
1
1
  use serde::de::{IgnoredAny, SeqAccess};
2
2
 
3
- use super::{JsonTreeBuilder, SampledArray};
3
+ use super::JsonTreeBuilder;
4
+ use super::SampledArray;
4
5
 
5
- // Tunable sampling constants for the default strategy.
6
+ // Default strategy phases: keep-first, greedy, then index-hash acceptance (~50%).
6
7
  const RANDOM_ACCEPT_SEED: u64 = 0x9e37_79b9_7f4a_7c15;
7
- // ~50% acceptance to thin remaining elements in the random phase.
8
- const RANDOM_ACCEPT_THRESHOLD: u32 = 0x8000_0000;
9
- // Keep a small, fixed number of items from the head before greedy/random phases.
8
+ const RANDOM_ACCEPT_THRESHOLD: u32 = 0x8000_0000; // ~50%
10
9
  const KEEP_FIRST_COUNT: usize = 3;
11
- // Take roughly half of the remaining capacity greedily after the first items.
12
10
  const GREEDY_PORTION_DIVISOR: usize = 2;
13
11
 
14
12
  struct PhaseState {
@@ -1,6 +1,7 @@
1
1
  use serde::de::{IgnoredAny, SeqAccess};
2
2
 
3
- use super::{JsonTreeBuilder, SampledArray};
3
+ use super::JsonTreeBuilder;
4
+ use super::SampledArray;
4
5
 
5
6
  fn parse_keep<'de, A>(
6
7
  seq: &mut A,
@@ -1,7 +1,7 @@
1
1
  use serde::de::SeqAccess;
2
2
 
3
- use crate::ArraySamplerStrategy;
4
- use crate::json_ingest::builder::JsonTreeBuilder;
3
+ use crate::ingest::formats::json::builder::JsonTreeBuilder;
4
+ use crate::ingest::sampling::ArraySamplerKind;
5
5
 
6
6
  #[derive(Debug)]
7
7
  pub(crate) struct SampledArray {
@@ -10,14 +10,6 @@ pub(crate) struct SampledArray {
10
10
  pub total_len: usize,
11
11
  }
12
12
 
13
- #[derive(Copy, Clone, Debug, Default)]
14
- pub(crate) enum ArraySamplerKind {
15
- #[default]
16
- Default,
17
- Head,
18
- Tail,
19
- }
20
-
21
13
  impl ArraySamplerKind {
22
14
  pub(crate) fn sample_stream<'de, A>(
23
15
  self,
@@ -38,16 +30,6 @@ impl ArraySamplerKind {
38
30
  }
39
31
  }
40
32
 
41
- impl From<ArraySamplerStrategy> for ArraySamplerKind {
42
- fn from(strategy: ArraySamplerStrategy) -> Self {
43
- match strategy {
44
- ArraySamplerStrategy::Default => ArraySamplerKind::Default,
45
- ArraySamplerStrategy::Head => ArraySamplerKind::Head,
46
- ArraySamplerStrategy::Tail => ArraySamplerKind::Tail,
47
- }
48
- }
49
- }
50
-
51
33
  mod default;
52
34
  mod head;
53
35
  mod tail;
@@ -1,6 +1,7 @@
1
1
  use serde::de::{IgnoredAny, SeqAccess};
2
2
 
3
- use super::{JsonTreeBuilder, SampledArray};
3
+ use super::JsonTreeBuilder;
4
+ use super::SampledArray;
4
5
 
5
6
  pub(crate) fn sample_stream<'de, A>(
6
7
  seq: &mut A,
@@ -89,8 +90,10 @@ mod tests {
89
90
  let mut cfg = PriorityConfig::new(usize::MAX, 5);
90
91
  cfg.array_sampler = crate::ArraySamplerStrategy::Tail;
91
92
  let arena =
92
- crate::json_ingest::build_json_tree_arena_from_bytes(input, &cfg)
93
- .expect("arena");
93
+ crate::ingest::formats::json::build_json_tree_arena_from_bytes(
94
+ input, &cfg,
95
+ )
96
+ .expect("arena");
94
97
  let root = &arena.nodes[arena.root_id];
95
98
  assert_eq!(root.children_len, 5, "kept 5");
96
99
  let mut orig_indices = Vec::new();