headson 0.2.5__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of headson might be problematic. Click here for more details.

Files changed (49) hide show
  1. {headson-0.2.5 → headson-0.4.0}/Cargo.lock +1 -1
  2. {headson-0.2.5 → headson-0.4.0}/Cargo.toml +1 -1
  3. headson-0.4.0/PKG-INFO +169 -0
  4. headson-0.4.0/README.md +153 -0
  5. {headson-0.2.5 → headson-0.4.0}/pyproject.toml +3 -5
  6. {headson-0.2.5 → headson-0.4.0}/python/Cargo.lock +2 -2
  7. {headson-0.2.5 → headson-0.4.0}/python/Cargo.toml +2 -2
  8. {headson-0.2.5 → headson-0.4.0}/python/README.md +10 -2
  9. {headson-0.2.5 → headson-0.4.0}/python/src/lib.rs +8 -7
  10. {headson-0.2.5 → headson-0.4.0}/src/lib.rs +12 -10
  11. {headson-0.2.5 → headson-0.4.0}/src/main.rs +8 -6
  12. {headson-0.2.5 → headson-0.4.0}/src/order/build.rs +43 -35
  13. {headson-0.2.5 → headson-0.4.0}/src/order/types.rs +3 -1
  14. {headson-0.2.5 → headson-0.4.0}/src/serialization/mod.rs +202 -57
  15. {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/core.rs +6 -1
  16. {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/js.rs +7 -4
  17. {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/mod.rs +1 -0
  18. {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/pseudo.rs +3 -0
  19. {headson-0.2.5 → headson-0.4.0}/src/serialization/types.rs +2 -0
  20. headson-0.4.0/src/utils/graph.rs +61 -0
  21. {headson-0.2.5 → headson-0.4.0}/src/utils/tree_arena.rs +2 -3
  22. headson-0.2.5/PKG-INFO +0 -99
  23. headson-0.2.5/README.md +0 -83
  24. headson-0.2.5/src/utils/graph.rs +0 -54
  25. {headson-0.2.5 → headson-0.4.0}/JSONTestSuite/LICENSE +0 -0
  26. {headson-0.2.5 → headson-0.4.0}/JSONTestSuite/README.md +0 -0
  27. {headson-0.2.5 → headson-0.4.0}/LICENSE +0 -0
  28. {headson-0.2.5 → headson-0.4.0}/python/headson/__init__.py +0 -0
  29. {headson-0.2.5 → headson-0.4.0}/src/json_ingest/builder.rs +0 -0
  30. {headson-0.2.5 → headson-0.4.0}/src/json_ingest/mod.rs +0 -0
  31. {headson-0.2.5 → headson-0.4.0}/src/order/mod.rs +0 -0
  32. {headson-0.2.5 → headson-0.4.0}/src/order/scoring.rs +0 -0
  33. {headson-0.2.5 → headson-0.4.0}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
  34. {headson-0.2.5 → headson-0.4.0}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
  35. {headson-0.2.5 → headson-0.4.0}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
  36. {headson-0.2.5 → headson-0.4.0}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
  37. {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/json.rs +0 -0
  38. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
  39. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
  40. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__pq_empty_array_queue.snap +0 -0
  41. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__pq_single_string_array_queue.snap +0 -0
  42. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__queue__tests__pq_empty_array_queue.snap +0 -0
  43. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__queue__tests__pq_single_string_array_queue.snap +0 -0
  44. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__tree__tests__build_tree_empty.snap +0 -0
  45. {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__tree__tests__build_tree_single.snap +0 -0
  46. {headson-0.2.5 → headson-0.4.0}/src/utils/json.rs +0 -0
  47. {headson-0.2.5 → headson-0.4.0}/src/utils/mod.rs +0 -0
  48. {headson-0.2.5 → headson-0.4.0}/src/utils/search.rs +0 -0
  49. {headson-0.2.5 → headson-0.4.0}/src/utils/text.rs +0 -0
@@ -266,7 +266,7 @@ dependencies = [
266
266
 
267
267
  [[package]]
268
268
  name = "headson"
269
- version = "0.2.5"
269
+ version = "0.4.0"
270
270
  dependencies = [
271
271
  "anyhow",
272
272
  "assert_cmd",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson"
3
- version = "0.2.5"
3
+ version = "0.4.0"
4
4
  edition = "2024"
5
5
  description = "Budget‑constrained JSON preview renderer"
6
6
  readme = "README.md"
headson-0.4.0/PKG-INFO ADDED
@@ -0,0 +1,169 @@
1
+ Metadata-Version: 2.4
2
+ Name: headson
3
+ Version: 0.4.0
4
+ Classifier: Programming Language :: Python
5
+ Classifier: Programming Language :: Python :: 3
6
+ Classifier: Programming Language :: Rust
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Dist: pytest>=8 ; extra == 'test'
9
+ Provides-Extra: test
10
+ License-File: LICENSE
11
+ Summary: Budget‑constrained JSON preview renderer (Python bindings)
12
+ Keywords: json,preview,summarize,cli,bindings
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
15
+
16
+ # headson
17
+
18
+ Head/tail for JSON — but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
19
+
20
+ Available as:
21
+ - CLI (see [Usage](#usage))
22
+ - Python library (see [Python Bindings](#python-bindings))
23
+
24
+ ## Install
25
+
26
+ Using Cargo:
27
+
28
+ cargo install headson
29
+
30
+ From source:
31
+
32
+ cargo build --release
33
+ target/release/headson --help
34
+
35
+
36
+ ## Features
37
+
38
+ - *Budgeted output*: specify exactly how much JSON you want to see
39
+ - *Multiple output formats* : `json` (machine‑readable), `pseudo` (human‑friendly), `js` (valid JavaScript, most detailed metadata).
40
+ - *Multiple inputs*: preview many files at once with a shared or per‑file budget.
41
+ - *Fast*: can process gigabyte-scale files in seconds (mostly disk-constrained)
42
+ - *Available as a CLI app and as a Python library*
43
+
44
+ ## Fits into command line workflows
45
+
46
+ If you’re comfortable with tools like `head` and `tail`, use `headson` when you want a quick, structured peek into a JSON file without dumping the entire thing.
47
+
48
+ - `head`/`tail` operate on bytes/lines - their output is not optimized for tree structures
49
+ - `jq` you need to craft filters to preview large JSON files
50
+ - `headson` is like head/tail for trees: zero config but it keeps structure and represents content as much as possible
51
+
52
+ ## Usage
53
+
54
+ headson [FLAGS] [INPUT...]
55
+
56
+ - INPUT (optional, repeatable): file path(s). If omitted, reads JSON from stdin. Multiple input files are supported.
57
+ - Prints the preview to stdout. On parse errors, exits non‑zero and prints an error to stderr.
58
+
59
+ Common flags:
60
+
61
+ - `-n, --budget <BYTES>`: per‑file output budget. When multiple input files are provided, the total budget equals `<BYTES> * number_of_inputs`.
62
+ - `-N, --global-budget <BYTES>`: total output budget across all inputs. Useful when you want a fixed-size preview across many files (may omit entire files). Mutually exclusive with `--budget`.
63
+ - `-f, --template <json|pseudo|js>`: output style (default: `pseudo`)
64
+ - `-m, --compact`: no indentation, no spaces, no newlines
65
+ - `--no-newline`: single line output
66
+ - `--no-space`: no space after `:` in objects
67
+ - `--indent <STR>`: indentation unit (default: two spaces)
68
+ - `--string-cap <N>`: max graphemes to consider per string (default: 500)
69
+ - `--tail`: prefer the end of arrays when truncating. Strings are unaffected. In `pseudo`/`js` templates the omission marker appears at the start; `json` remains strict JSON with no annotations.
70
+
71
+ Notes:
72
+
73
+ - With multiple input files:
74
+ - JSON template outputs a single JSON object keyed by the input file paths.
75
+ - Pseudo and JS templates render file sections with human-readable headers when newlines are enabled.
76
+ - If you use `--compact` or `--no-newline` (both disable newlines), fileset output falls back to standard inline rendering (no per-file headers) to remain compact.
77
+ - Using `--global-budget` may truncate or omit entire files to respect the total budget.
78
+ - The tool finds the largest preview that fits the budget; if even the tiniest preview exceeds it, you still get a minimal, valid preview.
79
+ - When passing file paths, directories and binary files are ignored; a notice is printed to stderr for each (e.g., `Ignored binary file: ./path/to/file`). Stdin mode reads the stream as-is.
80
+
81
+ Quick one‑liners:
82
+
83
+ - Peek a big JSON stream (keeps structure):
84
+
85
+ zstdcat huge.json.zst | headson -n 800 -f pseudo
86
+
87
+ - Many files with a fixed overall size:
88
+
89
+ headson -N 1200 -f json logs/*.json
90
+
91
+ - Glance at a file, JavaScript‑style comments for omissions:
92
+
93
+ headson -n 400 -f js data.json
94
+
95
+ Show help:
96
+
97
+ headson --help
98
+
99
+ ## Examples: head vs headson
100
+
101
+ Input:
102
+
103
+ ```json
104
+ {"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"meta":{"count":2,"source":"db"}}
105
+ ```
106
+
107
+ Naive cut (can break mid‑token):
108
+
109
+ ```bash
110
+ jq -c . users.json | head -c 80
111
+ # {"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"me
112
+ ```
113
+
114
+ Structured preview with headson (pseudo):
115
+
116
+ ```bash
117
+ headson -n 120 -f pseudo users.json
118
+ # {
119
+ # users: [
120
+ # { id: 1, name: "Ana", roles: [ "admin", … ] },
121
+ # …
122
+ # ]
123
+ # meta: { count: 2, … }
124
+ # }
125
+ ```
126
+
127
+ Machine‑readable preview (json):
128
+
129
+ ```bash
130
+ headson -n 120 -f json users.json
131
+ # {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
132
+ ```
133
+
134
+ ## Python Bindings
135
+
136
+ A thin Python extension module is available on PyPI as `headson`.
137
+
138
+ - Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
139
+ - API:
140
+ - `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None, tail: bool = False) -> str`
141
+ - `template`: one of `"json" | "pseudo" | "js"`
142
+ - `character_budget`: maximum output size in characters (default: 500)
143
+ - `tail`: prefer the end of arrays when truncating; strings unaffected. Affects only display templates (`pseudo`/`js`); `json` remains strict.
144
+
145
+ Example:
146
+
147
+ ```python
148
+ import json
149
+ import headson
150
+
151
+ data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
152
+ preview = headson.summarize(json.dumps(data), template="json", character_budget=200)
153
+ print(preview)
154
+
155
+ # Prefer the tail of arrays (annotations show in pseudo/js only)
156
+ print(
157
+ headson.summarize(
158
+ json.dumps(list(range(100))),
159
+ template="pseudo",
160
+ character_budget=80,
161
+ tail=True,
162
+ )
163
+ )
164
+ ```
165
+
166
+ ## License
167
+
168
+ MIT
169
+
@@ -0,0 +1,153 @@
1
+ # headson
2
+
3
+ Head/tail for JSON — but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
4
+
5
+ Available as:
6
+ - CLI (see [Usage](#usage))
7
+ - Python library (see [Python Bindings](#python-bindings))
8
+
9
+ ## Install
10
+
11
+ Using Cargo:
12
+
13
+ cargo install headson
14
+
15
+ From source:
16
+
17
+ cargo build --release
18
+ target/release/headson --help
19
+
20
+
21
+ ## Features
22
+
23
+ - *Budgeted output*: specify exactly how much JSON you want to see
24
+ - *Multiple output formats* : `json` (machine‑readable), `pseudo` (human‑friendly), `js` (valid JavaScript, most detailed metadata).
25
+ - *Multiple inputs*: preview many files at once with a shared or per‑file budget.
26
+ - *Fast*: can process gigabyte-scale files in seconds (mostly disk-constrained)
27
+ - *Available as a CLI app and as a Python library*
28
+
29
+ ## Fits into command line workflows
30
+
31
+ If you’re comfortable with tools like `head` and `tail`, use `headson` when you want a quick, structured peek into a JSON file without dumping the entire thing.
32
+
33
+ - `head`/`tail` operate on bytes/lines - their output is not optimized for tree structures
34
+ - `jq` you need to craft filters to preview large JSON files
35
+ - `headson` is like head/tail for trees: zero config but it keeps structure and represents content as much as possible
36
+
37
+ ## Usage
38
+
39
+ headson [FLAGS] [INPUT...]
40
+
41
+ - INPUT (optional, repeatable): file path(s). If omitted, reads JSON from stdin. Multiple input files are supported.
42
+ - Prints the preview to stdout. On parse errors, exits non‑zero and prints an error to stderr.
43
+
44
+ Common flags:
45
+
46
+ - `-n, --budget <BYTES>`: per‑file output budget. When multiple input files are provided, the total budget equals `<BYTES> * number_of_inputs`.
47
+ - `-N, --global-budget <BYTES>`: total output budget across all inputs. Useful when you want a fixed-size preview across many files (may omit entire files). Mutually exclusive with `--budget`.
48
+ - `-f, --template <json|pseudo|js>`: output style (default: `pseudo`)
49
+ - `-m, --compact`: no indentation, no spaces, no newlines
50
+ - `--no-newline`: single line output
51
+ - `--no-space`: no space after `:` in objects
52
+ - `--indent <STR>`: indentation unit (default: two spaces)
53
+ - `--string-cap <N>`: max graphemes to consider per string (default: 500)
54
+ - `--tail`: prefer the end of arrays when truncating. Strings are unaffected. In `pseudo`/`js` templates the omission marker appears at the start; `json` remains strict JSON with no annotations.
55
+
56
+ Notes:
57
+
58
+ - With multiple input files:
59
+ - JSON template outputs a single JSON object keyed by the input file paths.
60
+ - Pseudo and JS templates render file sections with human-readable headers when newlines are enabled.
61
+ - If you use `--compact` or `--no-newline` (both disable newlines), fileset output falls back to standard inline rendering (no per-file headers) to remain compact.
62
+ - Using `--global-budget` may truncate or omit entire files to respect the total budget.
63
+ - The tool finds the largest preview that fits the budget; if even the tiniest preview exceeds it, you still get a minimal, valid preview.
64
+ - When passing file paths, directories and binary files are ignored; a notice is printed to stderr for each (e.g., `Ignored binary file: ./path/to/file`). Stdin mode reads the stream as-is.
65
+
66
+ Quick one‑liners:
67
+
68
+ - Peek a big JSON stream (keeps structure):
69
+
70
+ zstdcat huge.json.zst | headson -n 800 -f pseudo
71
+
72
+ - Many files with a fixed overall size:
73
+
74
+ headson -N 1200 -f json logs/*.json
75
+
76
+ - Glance at a file, JavaScript‑style comments for omissions:
77
+
78
+ headson -n 400 -f js data.json
79
+
80
+ Show help:
81
+
82
+ headson --help
83
+
84
+ ## Examples: head vs headson
85
+
86
+ Input:
87
+
88
+ ```json
89
+ {"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"meta":{"count":2,"source":"db"}}
90
+ ```
91
+
92
+ Naive cut (can break mid‑token):
93
+
94
+ ```bash
95
+ jq -c . users.json | head -c 80
96
+ # {"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"me
97
+ ```
98
+
99
+ Structured preview with headson (pseudo):
100
+
101
+ ```bash
102
+ headson -n 120 -f pseudo users.json
103
+ # {
104
+ # users: [
105
+ # { id: 1, name: "Ana", roles: [ "admin", … ] },
106
+ # …
107
+ # ]
108
+ # meta: { count: 2, … }
109
+ # }
110
+ ```
111
+
112
+ Machine‑readable preview (json):
113
+
114
+ ```bash
115
+ headson -n 120 -f json users.json
116
+ # {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
117
+ ```
118
+
119
+ ## Python Bindings
120
+
121
+ A thin Python extension module is available on PyPI as `headson`.
122
+
123
+ - Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
124
+ - API:
125
+ - `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None, tail: bool = False) -> str`
126
+ - `template`: one of `"json" | "pseudo" | "js"`
127
+ - `character_budget`: maximum output size in characters (default: 500)
128
+ - `tail`: prefer the end of arrays when truncating; strings unaffected. Affects only display templates (`pseudo`/`js`); `json` remains strict.
129
+
130
+ Example:
131
+
132
+ ```python
133
+ import json
134
+ import headson
135
+
136
+ data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
137
+ preview = headson.summarize(json.dumps(data), template="json", character_budget=200)
138
+ print(preview)
139
+
140
+ # Prefer the tail of arrays (annotations show in pseudo/js only)
141
+ print(
142
+ headson.summarize(
143
+ json.dumps(list(range(100))),
144
+ template="pseudo",
145
+ character_budget=80,
146
+ tail=True,
147
+ )
148
+ )
149
+ ```
150
+
151
+ ## License
152
+
153
+ MIT
@@ -4,10 +4,10 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "headson"
7
- version = "0.2.5"
7
+ version = "0.4.0"
8
8
  description = "Budget‑constrained JSON preview renderer (Python bindings)"
9
9
  readme = "README.md"
10
- requires-python = ">=3.8"
10
+ requires-python = ">=3.10"
11
11
  classifiers = [
12
12
  "Programming Language :: Python",
13
13
  "Programming Language :: Python :: 3",
@@ -32,7 +32,7 @@ python-source = "python"
32
32
  dev = [
33
33
  "pytest>=8",
34
34
  "maturin>=1.7,<2",
35
- "ruff>=0.6",
35
+ "ruff==0.14.2",
36
36
  ]
37
37
 
38
38
  [tool.ruff]
@@ -42,5 +42,3 @@ src = ["python", "tests_py"]
42
42
 
43
43
  [tool.ruff.lint]
44
44
  select = ["E", "F"]
45
-
46
-
@@ -169,7 +169,7 @@ dependencies = [
169
169
 
170
170
  [[package]]
171
171
  name = "headson"
172
- version = "0.2.5"
172
+ version = "0.4.0"
173
173
  dependencies = [
174
174
  "anyhow",
175
175
  "clap",
@@ -182,7 +182,7 @@ dependencies = [
182
182
 
183
183
  [[package]]
184
184
  name = "headson-python"
185
- version = "0.2.5"
185
+ version = "0.4.0"
186
186
  dependencies = [
187
187
  "anyhow",
188
188
  "headson",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson-python"
3
- version = "0.2.5"
3
+ version = "0.4.0"
4
4
  edition = "2021"
5
5
  publish = false
6
6
  readme = "README.md"
@@ -11,5 +11,5 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  anyhow = "1"
14
- pyo3 = { version = "0.27", features = ["extension-module"] }
14
+ pyo3 = { version = "0.27", features = ["extension-module", "abi3-py310"] }
15
15
  headson_core = { package = "headson", path = ".." }
@@ -4,19 +4,27 @@ Minimal Python API for the `headson` JSON preview renderer.
4
4
 
5
5
  Currently exported function:
6
6
 
7
- - `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None) -> str`
7
+ - `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None, tail: bool = False) -> str`
8
8
  - `template`: one of `"json" | "pseudo" | "js"`.
9
9
  - `character_budget`: maximum output size in characters (defaults to 500 if not set).
10
+ - `tail`: prefer the end of arrays when truncating; strings are unaffected. Only affects display templates (`pseudo`/`js`); `json` remains strict JSON with no annotations.
10
11
 
11
12
  Examples:
12
13
 
13
14
  ```python
14
15
  import headson
15
16
 
17
+ # Pseudo template with a small budget (structure-aware preview)
16
18
  print(headson.summarize('{"a": 1, "b": [1,2,3]}', template="pseudo", character_budget=80))
19
+
20
+ # Strict JSON template preserves valid JSON output
17
21
  print(headson.summarize('{"a": 1, "b": {"c": 2}}', template="json", character_budget=10_000))
22
+
23
+ # JS template with tail preference: prefer the end of arrays when truncating
18
24
  arr = ','.join(str(i) for i in range(100))
19
- print(headson.summarize('{"arr": [' + arr + ']}', template="js", character_budget=60))
25
+ print(headson.summarize('{"arr": [' + arr + ']}', template="js", character_budget=60, tail=True))
26
+
27
+ # Note: tail mode affects only pseudo/js display templates; the json template stays strict.
20
28
  ```
21
29
 
22
30
  Install for development:
@@ -13,9 +13,7 @@ fn to_template(s: &str) -> Result<OutputTemplate> {
13
13
  }
14
14
  }
15
15
 
16
- fn render_config(
17
- template: &str,
18
- ) -> Result<RenderConfig> {
16
+ fn render_config(template: &str, prefer_tail_arrays: bool) -> Result<RenderConfig> {
19
17
  let t = to_template(template)?;
20
18
  let space = " ".to_string();
21
19
  let newline = "\n".to_string();
@@ -25,13 +23,15 @@ fn render_config(
25
23
  indent_unit,
26
24
  space,
27
25
  newline,
26
+ prefer_tail_arrays,
28
27
  })
29
28
  }
30
29
 
31
- fn priority_config(per_file_budget: usize) -> PriorityConfig {
30
+ fn priority_config(per_file_budget: usize, prefer_tail_arrays: bool) -> PriorityConfig {
32
31
  PriorityConfig {
33
32
  max_string_graphemes: 500,
34
33
  array_max_items: (per_file_budget / 2).max(1),
34
+ prefer_tail_arrays,
35
35
  }
36
36
  }
37
37
 
@@ -40,17 +40,18 @@ fn to_pyerr(e: anyhow::Error) -> PyErr {
40
40
  }
41
41
 
42
42
  #[pyfunction]
43
- #[pyo3(signature = (text, *, template="pseudo", character_budget=None))]
43
+ #[pyo3(signature = (text, *, template="pseudo", character_budget=None, tail=false))]
44
44
  fn summarize(
45
45
  py: Python<'_>,
46
46
  text: &str,
47
47
  template: &str,
48
48
  character_budget: Option<usize>,
49
+ tail: bool,
49
50
  ) -> PyResult<String> {
50
- let cfg = render_config(template).map_err(to_pyerr)?;
51
+ let cfg = render_config(template, tail).map_err(to_pyerr)?;
51
52
  let budget = character_budget.unwrap_or(500);
52
53
  let per_file_for_priority = budget.max(1);
53
- let prio = priority_config(per_file_for_priority);
54
+ let prio = priority_config(per_file_for_priority, tail);
54
55
  let input = text.as_bytes().to_vec();
55
56
  py.detach(|| headson_core::headson(input, &cfg, &prio, budget).map_err(to_pyerr))
56
57
  }
@@ -70,20 +70,22 @@ fn find_largest_render_under_budget(
70
70
  // Each included node contributes at least some output; cap hi by budget.
71
71
  let lo = 1usize;
72
72
  let hi = total.min(char_budget.max(1));
73
- // Reusable inclusion marks to avoid clearing per probe
74
- let mut marks: Vec<u32> = vec![0; total];
75
- let mut mark_gen: u32 = 1;
73
+ // Reuse render-inclusion flags across render attempts to avoid clearing the vector.
74
+ // A node participates in the current render attempt when inclusion_flags[id] == render_set_id.
75
+ let mut inclusion_flags: Vec<u32> = vec![0; total];
76
+ // Each render attempt bumps this non-zero identifier to create a fresh inclusion set.
77
+ let mut render_set_id: u32 = 1;
76
78
  let mut best_str: Option<String> = None;
77
79
 
78
80
  let _ = crate::utils::search::binary_search_max(lo, hi, |mid| {
79
- let s = crate::serialization::render_arena_with_marks(
81
+ let s = crate::serialization::render_top_k(
80
82
  order_build,
81
83
  mid,
82
- &mut marks,
83
- mark_gen,
84
+ &mut inclusion_flags,
85
+ render_set_id,
84
86
  config,
85
87
  );
86
- mark_gen = mark_gen.wrapping_add(1).max(1);
88
+ render_set_id = render_set_id.wrapping_add(1).max(1);
87
89
  if s.len() <= char_budget {
88
90
  best_str = Some(s);
89
91
  true
@@ -97,11 +99,11 @@ fn find_largest_render_under_budget(
97
99
  } else {
98
100
  // Fallback: always render a single node (k=1) to produce the
99
101
  // shortest possible preview, even if it exceeds the byte budget.
100
- crate::serialization::render_arena_with_marks(
102
+ crate::serialization::render_top_k(
101
103
  order_build,
102
104
  1,
103
- &mut marks,
104
- mark_gen,
105
+ &mut inclusion_flags,
106
+ render_set_id,
105
107
  config,
106
108
  )
107
109
  }
@@ -53,6 +53,12 @@ struct Cli {
53
53
  help = "Total output budget across all inputs; useful to keep multiple files within a fixed overall output size (may omit entire files)."
54
54
  )]
55
55
  global_budget: Option<usize>,
56
+ #[arg(
57
+ long = "tail",
58
+ default_value_t = false,
59
+ help = "Prefer the end of arrays when truncating. Strings unaffected; JSON stays strict."
60
+ )]
61
+ tail: bool,
56
62
  #[arg(
57
63
  value_name = "INPUT",
58
64
  value_hint = clap::ValueHint::FilePath,
@@ -225,6 +231,7 @@ fn get_render_config_from(cli: &Cli) -> headson::RenderConfig {
225
231
  indent_unit,
226
232
  space,
227
233
  newline,
234
+ prefer_tail_arrays: cli.tail,
228
235
  }
229
236
  }
230
237
 
@@ -232,14 +239,9 @@ fn get_priority_config(
232
239
  per_file_budget: usize,
233
240
  cli: &Cli,
234
241
  ) -> headson::PriorityConfig {
235
- // Optimization: derive a conservative per‑array expansion cap from the output
236
- // budget to avoid allocating/walking items that could never appear in the
237
- // final preview. As a simple lower bound, an array of N items needs ~2*N
238
- // bytes to render (item plus comma), so we cap per‑array expansion at
239
- // budget/2. This prunes unnecessary work on large inputs without changing
240
- // output semantics.
241
242
  headson::PriorityConfig {
242
243
  max_string_graphemes: cli.string_cap,
243
244
  array_max_items: (per_file_budget / 2).max(1),
245
+ prefer_tail_arrays: cli.tail,
244
246
  }
245
247
  }