headson 0.2.5__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of headson might be problematic. Click here for more details.
- {headson-0.2.5 → headson-0.4.0}/Cargo.lock +1 -1
- {headson-0.2.5 → headson-0.4.0}/Cargo.toml +1 -1
- headson-0.4.0/PKG-INFO +169 -0
- headson-0.4.0/README.md +153 -0
- {headson-0.2.5 → headson-0.4.0}/pyproject.toml +3 -5
- {headson-0.2.5 → headson-0.4.0}/python/Cargo.lock +2 -2
- {headson-0.2.5 → headson-0.4.0}/python/Cargo.toml +2 -2
- {headson-0.2.5 → headson-0.4.0}/python/README.md +10 -2
- {headson-0.2.5 → headson-0.4.0}/python/src/lib.rs +8 -7
- {headson-0.2.5 → headson-0.4.0}/src/lib.rs +12 -10
- {headson-0.2.5 → headson-0.4.0}/src/main.rs +8 -6
- {headson-0.2.5 → headson-0.4.0}/src/order/build.rs +43 -35
- {headson-0.2.5 → headson-0.4.0}/src/order/types.rs +3 -1
- {headson-0.2.5 → headson-0.4.0}/src/serialization/mod.rs +202 -57
- {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/core.rs +6 -1
- {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/js.rs +7 -4
- {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/mod.rs +1 -0
- {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/pseudo.rs +3 -0
- {headson-0.2.5 → headson-0.4.0}/src/serialization/types.rs +2 -0
- headson-0.4.0/src/utils/graph.rs +61 -0
- {headson-0.2.5 → headson-0.4.0}/src/utils/tree_arena.rs +2 -3
- headson-0.2.5/PKG-INFO +0 -99
- headson-0.2.5/README.md +0 -83
- headson-0.2.5/src/utils/graph.rs +0 -54
- {headson-0.2.5 → headson-0.4.0}/JSONTestSuite/LICENSE +0 -0
- {headson-0.2.5 → headson-0.4.0}/JSONTestSuite/README.md +0 -0
- {headson-0.2.5 → headson-0.4.0}/LICENSE +0 -0
- {headson-0.2.5 → headson-0.4.0}/python/headson/__init__.py +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/json_ingest/builder.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/json_ingest/mod.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/order/mod.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/order/scoring.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/serialization/templates/json.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__pq_empty_array_queue.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__order__tests__pq_single_string_array_queue.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__queue__tests__pq_empty_array_queue.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__queue__tests__pq_single_string_array_queue.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__tree__tests__build_tree_empty.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/snapshots/headson__tree__tests__build_tree_single.snap +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/utils/json.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/utils/mod.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/utils/search.rs +0 -0
- {headson-0.2.5 → headson-0.4.0}/src/utils/text.rs +0 -0
headson-0.4.0/PKG-INFO
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: headson
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Classifier: Programming Language :: Python
|
|
5
|
+
Classifier: Programming Language :: Python :: 3
|
|
6
|
+
Classifier: Programming Language :: Rust
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Requires-Dist: pytest>=8 ; extra == 'test'
|
|
9
|
+
Provides-Extra: test
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Summary: Budget‑constrained JSON preview renderer (Python bindings)
|
|
12
|
+
Keywords: json,preview,summarize,cli,bindings
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
15
|
+
|
|
16
|
+
# headson
|
|
17
|
+
|
|
18
|
+
Head/tail for JSON — but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
|
|
19
|
+
|
|
20
|
+
Available as:
|
|
21
|
+
- CLI (see [Usage](#usage))
|
|
22
|
+
- Python library (see [Python Bindings](#python-bindings))
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
Using Cargo:
|
|
27
|
+
|
|
28
|
+
cargo install headson
|
|
29
|
+
|
|
30
|
+
From source:
|
|
31
|
+
|
|
32
|
+
cargo build --release
|
|
33
|
+
target/release/headson --help
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- *Budgeted output*: specify exactly how much JSON you want to see
|
|
39
|
+
- *Multiple output formats* : `json` (machine‑readable), `pseudo` (human‑friendly), `js` (valid JavaScript, most detailed metadata).
|
|
40
|
+
- *Multiple inputs*: preview many files at once with a shared or per‑file budget.
|
|
41
|
+
- *Fast*: can process gigabyte-scale files in seconds (mostly disk-constrained)
|
|
42
|
+
- *Available as a CLI app and as a Python library*
|
|
43
|
+
|
|
44
|
+
## Fits into command line workflows
|
|
45
|
+
|
|
46
|
+
If you’re comfortable with tools like `head` and `tail`, use `headson` when you want a quick, structured peek into a JSON file without dumping the entire thing.
|
|
47
|
+
|
|
48
|
+
- `head`/`tail` operate on bytes/lines - their output is not optimized for tree structures
|
|
49
|
+
- `jq` you need to craft filters to preview large JSON files
|
|
50
|
+
- `headson` is like head/tail for trees: zero config but it keeps structure and represents content as much as possible
|
|
51
|
+
|
|
52
|
+
## Usage
|
|
53
|
+
|
|
54
|
+
headson [FLAGS] [INPUT...]
|
|
55
|
+
|
|
56
|
+
- INPUT (optional, repeatable): file path(s). If omitted, reads JSON from stdin. Multiple input files are supported.
|
|
57
|
+
- Prints the preview to stdout. On parse errors, exits non‑zero and prints an error to stderr.
|
|
58
|
+
|
|
59
|
+
Common flags:
|
|
60
|
+
|
|
61
|
+
- `-n, --budget <BYTES>`: per‑file output budget. When multiple input files are provided, the total budget equals `<BYTES> * number_of_inputs`.
|
|
62
|
+
- `-N, --global-budget <BYTES>`: total output budget across all inputs. Useful when you want a fixed-size preview across many files (may omit entire files). Mutually exclusive with `--budget`.
|
|
63
|
+
- `-f, --template <json|pseudo|js>`: output style (default: `pseudo`)
|
|
64
|
+
- `-m, --compact`: no indentation, no spaces, no newlines
|
|
65
|
+
- `--no-newline`: single line output
|
|
66
|
+
- `--no-space`: no space after `:` in objects
|
|
67
|
+
- `--indent <STR>`: indentation unit (default: two spaces)
|
|
68
|
+
- `--string-cap <N>`: max graphemes to consider per string (default: 500)
|
|
69
|
+
- `--tail`: prefer the end of arrays when truncating. Strings are unaffected. In `pseudo`/`js` templates the omission marker appears at the start; `json` remains strict JSON with no annotations.
|
|
70
|
+
|
|
71
|
+
Notes:
|
|
72
|
+
|
|
73
|
+
- With multiple input files:
|
|
74
|
+
- JSON template outputs a single JSON object keyed by the input file paths.
|
|
75
|
+
- Pseudo and JS templates render file sections with human-readable headers when newlines are enabled.
|
|
76
|
+
- If you use `--compact` or `--no-newline` (both disable newlines), fileset output falls back to standard inline rendering (no per-file headers) to remain compact.
|
|
77
|
+
- Using `--global-budget` may truncate or omit entire files to respect the total budget.
|
|
78
|
+
- The tool finds the largest preview that fits the budget; if even the tiniest preview exceeds it, you still get a minimal, valid preview.
|
|
79
|
+
- When passing file paths, directories and binary files are ignored; a notice is printed to stderr for each (e.g., `Ignored binary file: ./path/to/file`). Stdin mode reads the stream as-is.
|
|
80
|
+
|
|
81
|
+
Quick one‑liners:
|
|
82
|
+
|
|
83
|
+
- Peek a big JSON stream (keeps structure):
|
|
84
|
+
|
|
85
|
+
zstdcat huge.json.zst | headson -n 800 -f pseudo
|
|
86
|
+
|
|
87
|
+
- Many files with a fixed overall size:
|
|
88
|
+
|
|
89
|
+
headson -N 1200 -f json logs/*.json
|
|
90
|
+
|
|
91
|
+
- Glance at a file, JavaScript‑style comments for omissions:
|
|
92
|
+
|
|
93
|
+
headson -n 400 -f js data.json
|
|
94
|
+
|
|
95
|
+
Show help:
|
|
96
|
+
|
|
97
|
+
headson --help
|
|
98
|
+
|
|
99
|
+
## Examples: head vs headson
|
|
100
|
+
|
|
101
|
+
Input:
|
|
102
|
+
|
|
103
|
+
```json
|
|
104
|
+
{"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"meta":{"count":2,"source":"db"}}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Naive cut (can break mid‑token):
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
jq -c . users.json | head -c 80
|
|
111
|
+
# {"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"me
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Structured preview with headson (pseudo):
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
headson -n 120 -f pseudo users.json
|
|
118
|
+
# {
|
|
119
|
+
# users: [
|
|
120
|
+
# { id: 1, name: "Ana", roles: [ "admin", … ] },
|
|
121
|
+
# …
|
|
122
|
+
# ]
|
|
123
|
+
# meta: { count: 2, … }
|
|
124
|
+
# }
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Machine‑readable preview (json):
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
headson -n 120 -f json users.json
|
|
131
|
+
# {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Python Bindings
|
|
135
|
+
|
|
136
|
+
A thin Python extension module is available on PyPI as `headson`.
|
|
137
|
+
|
|
138
|
+
- Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
|
|
139
|
+
- API:
|
|
140
|
+
- `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None, tail: bool = False) -> str`
|
|
141
|
+
- `template`: one of `"json" | "pseudo" | "js"`
|
|
142
|
+
- `character_budget`: maximum output size in characters (default: 500)
|
|
143
|
+
- `tail`: prefer the end of arrays when truncating; strings unaffected. Affects only display templates (`pseudo`/`js`); `json` remains strict.
|
|
144
|
+
|
|
145
|
+
Example:
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
import json
|
|
149
|
+
import headson
|
|
150
|
+
|
|
151
|
+
data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
|
|
152
|
+
preview = headson.summarize(json.dumps(data), template="json", character_budget=200)
|
|
153
|
+
print(preview)
|
|
154
|
+
|
|
155
|
+
# Prefer the tail of arrays (annotations show in pseudo/js only)
|
|
156
|
+
print(
|
|
157
|
+
headson.summarize(
|
|
158
|
+
json.dumps(list(range(100))),
|
|
159
|
+
template="pseudo",
|
|
160
|
+
character_budget=80,
|
|
161
|
+
tail=True,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## License
|
|
167
|
+
|
|
168
|
+
MIT
|
|
169
|
+
|
headson-0.4.0/README.md
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# headson
|
|
2
|
+
|
|
3
|
+
Head/tail for JSON — but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
|
|
4
|
+
|
|
5
|
+
Available as:
|
|
6
|
+
- CLI (see [Usage](#usage))
|
|
7
|
+
- Python library (see [Python Bindings](#python-bindings))
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
Using Cargo:
|
|
12
|
+
|
|
13
|
+
cargo install headson
|
|
14
|
+
|
|
15
|
+
From source:
|
|
16
|
+
|
|
17
|
+
cargo build --release
|
|
18
|
+
target/release/headson --help
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
## Features
|
|
22
|
+
|
|
23
|
+
- *Budgeted output*: specify exactly how much JSON you want to see
|
|
24
|
+
- *Multiple output formats* : `json` (machine‑readable), `pseudo` (human‑friendly), `js` (valid JavaScript, most detailed metadata).
|
|
25
|
+
- *Multiple inputs*: preview many files at once with a shared or per‑file budget.
|
|
26
|
+
- *Fast*: can process gigabyte-scale files in seconds (mostly disk-constrained)
|
|
27
|
+
- *Available as a CLI app and as a Python library*
|
|
28
|
+
|
|
29
|
+
## Fits into command line workflows
|
|
30
|
+
|
|
31
|
+
If you’re comfortable with tools like `head` and `tail`, use `headson` when you want a quick, structured peek into a JSON file without dumping the entire thing.
|
|
32
|
+
|
|
33
|
+
- `head`/`tail` operate on bytes/lines - their output is not optimized for tree structures
|
|
34
|
+
- `jq` you need to craft filters to preview large JSON files
|
|
35
|
+
- `headson` is like head/tail for trees: zero config but it keeps structure and represents content as much as possible
|
|
36
|
+
|
|
37
|
+
## Usage
|
|
38
|
+
|
|
39
|
+
headson [FLAGS] [INPUT...]
|
|
40
|
+
|
|
41
|
+
- INPUT (optional, repeatable): file path(s). If omitted, reads JSON from stdin. Multiple input files are supported.
|
|
42
|
+
- Prints the preview to stdout. On parse errors, exits non‑zero and prints an error to stderr.
|
|
43
|
+
|
|
44
|
+
Common flags:
|
|
45
|
+
|
|
46
|
+
- `-n, --budget <BYTES>`: per‑file output budget. When multiple input files are provided, the total budget equals `<BYTES> * number_of_inputs`.
|
|
47
|
+
- `-N, --global-budget <BYTES>`: total output budget across all inputs. Useful when you want a fixed-size preview across many files (may omit entire files). Mutually exclusive with `--budget`.
|
|
48
|
+
- `-f, --template <json|pseudo|js>`: output style (default: `pseudo`)
|
|
49
|
+
- `-m, --compact`: no indentation, no spaces, no newlines
|
|
50
|
+
- `--no-newline`: single line output
|
|
51
|
+
- `--no-space`: no space after `:` in objects
|
|
52
|
+
- `--indent <STR>`: indentation unit (default: two spaces)
|
|
53
|
+
- `--string-cap <N>`: max graphemes to consider per string (default: 500)
|
|
54
|
+
- `--tail`: prefer the end of arrays when truncating. Strings are unaffected. In `pseudo`/`js` templates the omission marker appears at the start; `json` remains strict JSON with no annotations.
|
|
55
|
+
|
|
56
|
+
Notes:
|
|
57
|
+
|
|
58
|
+
- With multiple input files:
|
|
59
|
+
- JSON template outputs a single JSON object keyed by the input file paths.
|
|
60
|
+
- Pseudo and JS templates render file sections with human-readable headers when newlines are enabled.
|
|
61
|
+
- If you use `--compact` or `--no-newline` (both disable newlines), fileset output falls back to standard inline rendering (no per-file headers) to remain compact.
|
|
62
|
+
- Using `--global-budget` may truncate or omit entire files to respect the total budget.
|
|
63
|
+
- The tool finds the largest preview that fits the budget; if even the tiniest preview exceeds it, you still get a minimal, valid preview.
|
|
64
|
+
- When passing file paths, directories and binary files are ignored; a notice is printed to stderr for each (e.g., `Ignored binary file: ./path/to/file`). Stdin mode reads the stream as-is.
|
|
65
|
+
|
|
66
|
+
Quick one‑liners:
|
|
67
|
+
|
|
68
|
+
- Peek a big JSON stream (keeps structure):
|
|
69
|
+
|
|
70
|
+
zstdcat huge.json.zst | headson -n 800 -f pseudo
|
|
71
|
+
|
|
72
|
+
- Many files with a fixed overall size:
|
|
73
|
+
|
|
74
|
+
headson -N 1200 -f json logs/*.json
|
|
75
|
+
|
|
76
|
+
- Glance at a file, JavaScript‑style comments for omissions:
|
|
77
|
+
|
|
78
|
+
headson -n 400 -f js data.json
|
|
79
|
+
|
|
80
|
+
Show help:
|
|
81
|
+
|
|
82
|
+
headson --help
|
|
83
|
+
|
|
84
|
+
## Examples: head vs headson
|
|
85
|
+
|
|
86
|
+
Input:
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"meta":{"count":2,"source":"db"}}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Naive cut (can break mid‑token):
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
jq -c . users.json | head -c 80
|
|
96
|
+
# {"users":[{"id":1,"name":"Ana","roles":["admin","dev"]},{"id":2,"name":"Bo"}],"me
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Structured preview with headson (pseudo):
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
headson -n 120 -f pseudo users.json
|
|
103
|
+
# {
|
|
104
|
+
# users: [
|
|
105
|
+
# { id: 1, name: "Ana", roles: [ "admin", … ] },
|
|
106
|
+
# …
|
|
107
|
+
# ]
|
|
108
|
+
# meta: { count: 2, … }
|
|
109
|
+
# }
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Machine‑readable preview (json):
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
headson -n 120 -f json users.json
|
|
116
|
+
# {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Python Bindings
|
|
120
|
+
|
|
121
|
+
A thin Python extension module is available on PyPI as `headson`.
|
|
122
|
+
|
|
123
|
+
- Install: `pip install headson` (ABI3 wheels for Python 3.10+ on Linux/macOS/Windows).
|
|
124
|
+
- API:
|
|
125
|
+
- `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None, tail: bool = False) -> str`
|
|
126
|
+
- `template`: one of `"json" | "pseudo" | "js"`
|
|
127
|
+
- `character_budget`: maximum output size in characters (default: 500)
|
|
128
|
+
- `tail`: prefer the end of arrays when truncating; strings unaffected. Affects only display templates (`pseudo`/`js`); `json` remains strict.
|
|
129
|
+
|
|
130
|
+
Example:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
import json
|
|
134
|
+
import headson
|
|
135
|
+
|
|
136
|
+
data = {"foo": [1, 2, 3], "bar": {"x": "y"}}
|
|
137
|
+
preview = headson.summarize(json.dumps(data), template="json", character_budget=200)
|
|
138
|
+
print(preview)
|
|
139
|
+
|
|
140
|
+
# Prefer the tail of arrays (annotations show in pseudo/js only)
|
|
141
|
+
print(
|
|
142
|
+
headson.summarize(
|
|
143
|
+
json.dumps(list(range(100))),
|
|
144
|
+
template="pseudo",
|
|
145
|
+
character_budget=80,
|
|
146
|
+
tail=True,
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## License
|
|
152
|
+
|
|
153
|
+
MIT
|
|
@@ -4,10 +4,10 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "headson"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Budget‑constrained JSON preview renderer (Python bindings)"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
11
|
classifiers = [
|
|
12
12
|
"Programming Language :: Python",
|
|
13
13
|
"Programming Language :: Python :: 3",
|
|
@@ -32,7 +32,7 @@ python-source = "python"
|
|
|
32
32
|
dev = [
|
|
33
33
|
"pytest>=8",
|
|
34
34
|
"maturin>=1.7,<2",
|
|
35
|
-
"ruff
|
|
35
|
+
"ruff==0.14.2",
|
|
36
36
|
]
|
|
37
37
|
|
|
38
38
|
[tool.ruff]
|
|
@@ -42,5 +42,3 @@ src = ["python", "tests_py"]
|
|
|
42
42
|
|
|
43
43
|
[tool.ruff.lint]
|
|
44
44
|
select = ["E", "F"]
|
|
45
|
-
|
|
46
|
-
|
|
@@ -169,7 +169,7 @@ dependencies = [
|
|
|
169
169
|
|
|
170
170
|
[[package]]
|
|
171
171
|
name = "headson"
|
|
172
|
-
version = "0.
|
|
172
|
+
version = "0.4.0"
|
|
173
173
|
dependencies = [
|
|
174
174
|
"anyhow",
|
|
175
175
|
"clap",
|
|
@@ -182,7 +182,7 @@ dependencies = [
|
|
|
182
182
|
|
|
183
183
|
[[package]]
|
|
184
184
|
name = "headson-python"
|
|
185
|
-
version = "0.
|
|
185
|
+
version = "0.4.0"
|
|
186
186
|
dependencies = [
|
|
187
187
|
"anyhow",
|
|
188
188
|
"headson",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "headson-python"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
publish = false
|
|
6
6
|
readme = "README.md"
|
|
@@ -11,5 +11,5 @@ crate-type = ["cdylib"]
|
|
|
11
11
|
|
|
12
12
|
[dependencies]
|
|
13
13
|
anyhow = "1"
|
|
14
|
-
pyo3 = { version = "0.27", features = ["extension-module"] }
|
|
14
|
+
pyo3 = { version = "0.27", features = ["extension-module", "abi3-py310"] }
|
|
15
15
|
headson_core = { package = "headson", path = ".." }
|
|
@@ -4,19 +4,27 @@ Minimal Python API for the `headson` JSON preview renderer.
|
|
|
4
4
|
|
|
5
5
|
Currently exported function:
|
|
6
6
|
|
|
7
|
-
- `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None) -> str`
|
|
7
|
+
- `headson.summarize(text: str, *, template: str = "pseudo", character_budget: int | None = None, tail: bool = False) -> str`
|
|
8
8
|
- `template`: one of `"json" | "pseudo" | "js"`.
|
|
9
9
|
- `character_budget`: maximum output size in characters (defaults to 500 if not set).
|
|
10
|
+
- `tail`: prefer the end of arrays when truncating; strings are unaffected. Only affects display templates (`pseudo`/`js`); `json` remains strict JSON with no annotations.
|
|
10
11
|
|
|
11
12
|
Examples:
|
|
12
13
|
|
|
13
14
|
```python
|
|
14
15
|
import headson
|
|
15
16
|
|
|
17
|
+
# Pseudo template with a small budget (structure-aware preview)
|
|
16
18
|
print(headson.summarize('{"a": 1, "b": [1,2,3]}', template="pseudo", character_budget=80))
|
|
19
|
+
|
|
20
|
+
# Strict JSON template preserves valid JSON output
|
|
17
21
|
print(headson.summarize('{"a": 1, "b": {"c": 2}}', template="json", character_budget=10_000))
|
|
22
|
+
|
|
23
|
+
# JS template with tail preference: prefer the end of arrays when truncating
|
|
18
24
|
arr = ','.join(str(i) for i in range(100))
|
|
19
|
-
print(headson.summarize('{"arr": [' + arr + ']}', template="js", character_budget=60))
|
|
25
|
+
print(headson.summarize('{"arr": [' + arr + ']}', template="js", character_budget=60, tail=True))
|
|
26
|
+
|
|
27
|
+
# Note: tail mode affects only pseudo/js display templates; the json template stays strict.
|
|
20
28
|
```
|
|
21
29
|
|
|
22
30
|
Install for development:
|
|
@@ -13,9 +13,7 @@ fn to_template(s: &str) -> Result<OutputTemplate> {
|
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
fn render_config(
|
|
17
|
-
template: &str,
|
|
18
|
-
) -> Result<RenderConfig> {
|
|
16
|
+
fn render_config(template: &str, prefer_tail_arrays: bool) -> Result<RenderConfig> {
|
|
19
17
|
let t = to_template(template)?;
|
|
20
18
|
let space = " ".to_string();
|
|
21
19
|
let newline = "\n".to_string();
|
|
@@ -25,13 +23,15 @@ fn render_config(
|
|
|
25
23
|
indent_unit,
|
|
26
24
|
space,
|
|
27
25
|
newline,
|
|
26
|
+
prefer_tail_arrays,
|
|
28
27
|
})
|
|
29
28
|
}
|
|
30
29
|
|
|
31
|
-
fn priority_config(per_file_budget: usize) -> PriorityConfig {
|
|
30
|
+
fn priority_config(per_file_budget: usize, prefer_tail_arrays: bool) -> PriorityConfig {
|
|
32
31
|
PriorityConfig {
|
|
33
32
|
max_string_graphemes: 500,
|
|
34
33
|
array_max_items: (per_file_budget / 2).max(1),
|
|
34
|
+
prefer_tail_arrays,
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
@@ -40,17 +40,18 @@ fn to_pyerr(e: anyhow::Error) -> PyErr {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
#[pyfunction]
|
|
43
|
-
#[pyo3(signature = (text, *, template="pseudo", character_budget=None))]
|
|
43
|
+
#[pyo3(signature = (text, *, template="pseudo", character_budget=None, tail=false))]
|
|
44
44
|
fn summarize(
|
|
45
45
|
py: Python<'_>,
|
|
46
46
|
text: &str,
|
|
47
47
|
template: &str,
|
|
48
48
|
character_budget: Option<usize>,
|
|
49
|
+
tail: bool,
|
|
49
50
|
) -> PyResult<String> {
|
|
50
|
-
let cfg = render_config(template).map_err(to_pyerr)?;
|
|
51
|
+
let cfg = render_config(template, tail).map_err(to_pyerr)?;
|
|
51
52
|
let budget = character_budget.unwrap_or(500);
|
|
52
53
|
let per_file_for_priority = budget.max(1);
|
|
53
|
-
let prio = priority_config(per_file_for_priority);
|
|
54
|
+
let prio = priority_config(per_file_for_priority, tail);
|
|
54
55
|
let input = text.as_bytes().to_vec();
|
|
55
56
|
py.detach(|| headson_core::headson(input, &cfg, &prio, budget).map_err(to_pyerr))
|
|
56
57
|
}
|
|
@@ -70,20 +70,22 @@ fn find_largest_render_under_budget(
|
|
|
70
70
|
// Each included node contributes at least some output; cap hi by budget.
|
|
71
71
|
let lo = 1usize;
|
|
72
72
|
let hi = total.min(char_budget.max(1));
|
|
73
|
-
//
|
|
74
|
-
|
|
75
|
-
let mut
|
|
73
|
+
// Reuse render-inclusion flags across render attempts to avoid clearing the vector.
|
|
74
|
+
// A node participates in the current render attempt when inclusion_flags[id] == render_set_id.
|
|
75
|
+
let mut inclusion_flags: Vec<u32> = vec![0; total];
|
|
76
|
+
// Each render attempt bumps this non-zero identifier to create a fresh inclusion set.
|
|
77
|
+
let mut render_set_id: u32 = 1;
|
|
76
78
|
let mut best_str: Option<String> = None;
|
|
77
79
|
|
|
78
80
|
let _ = crate::utils::search::binary_search_max(lo, hi, |mid| {
|
|
79
|
-
let s = crate::serialization::
|
|
81
|
+
let s = crate::serialization::render_top_k(
|
|
80
82
|
order_build,
|
|
81
83
|
mid,
|
|
82
|
-
&mut
|
|
83
|
-
|
|
84
|
+
&mut inclusion_flags,
|
|
85
|
+
render_set_id,
|
|
84
86
|
config,
|
|
85
87
|
);
|
|
86
|
-
|
|
88
|
+
render_set_id = render_set_id.wrapping_add(1).max(1);
|
|
87
89
|
if s.len() <= char_budget {
|
|
88
90
|
best_str = Some(s);
|
|
89
91
|
true
|
|
@@ -97,11 +99,11 @@ fn find_largest_render_under_budget(
|
|
|
97
99
|
} else {
|
|
98
100
|
// Fallback: always render a single node (k=1) to produce the
|
|
99
101
|
// shortest possible preview, even if it exceeds the byte budget.
|
|
100
|
-
crate::serialization::
|
|
102
|
+
crate::serialization::render_top_k(
|
|
101
103
|
order_build,
|
|
102
104
|
1,
|
|
103
|
-
&mut
|
|
104
|
-
|
|
105
|
+
&mut inclusion_flags,
|
|
106
|
+
render_set_id,
|
|
105
107
|
config,
|
|
106
108
|
)
|
|
107
109
|
}
|
|
@@ -53,6 +53,12 @@ struct Cli {
|
|
|
53
53
|
help = "Total output budget across all inputs; useful to keep multiple files within a fixed overall output size (may omit entire files)."
|
|
54
54
|
)]
|
|
55
55
|
global_budget: Option<usize>,
|
|
56
|
+
#[arg(
|
|
57
|
+
long = "tail",
|
|
58
|
+
default_value_t = false,
|
|
59
|
+
help = "Prefer the end of arrays when truncating. Strings unaffected; JSON stays strict."
|
|
60
|
+
)]
|
|
61
|
+
tail: bool,
|
|
56
62
|
#[arg(
|
|
57
63
|
value_name = "INPUT",
|
|
58
64
|
value_hint = clap::ValueHint::FilePath,
|
|
@@ -225,6 +231,7 @@ fn get_render_config_from(cli: &Cli) -> headson::RenderConfig {
|
|
|
225
231
|
indent_unit,
|
|
226
232
|
space,
|
|
227
233
|
newline,
|
|
234
|
+
prefer_tail_arrays: cli.tail,
|
|
228
235
|
}
|
|
229
236
|
}
|
|
230
237
|
|
|
@@ -232,14 +239,9 @@ fn get_priority_config(
|
|
|
232
239
|
per_file_budget: usize,
|
|
233
240
|
cli: &Cli,
|
|
234
241
|
) -> headson::PriorityConfig {
|
|
235
|
-
// Optimization: derive a conservative per‑array expansion cap from the output
|
|
236
|
-
// budget to avoid allocating/walking items that could never appear in the
|
|
237
|
-
// final preview. As a simple lower bound, an array of N items needs ~2*N
|
|
238
|
-
// bytes to render (item plus comma), so we cap per‑array expansion at
|
|
239
|
-
// budget/2. This prunes unnecessary work on large inputs without changing
|
|
240
|
-
// output semantics.
|
|
241
242
|
headson::PriorityConfig {
|
|
242
243
|
max_string_graphemes: cli.string_cap,
|
|
243
244
|
array_max_items: (per_file_budget / 2).max(1),
|
|
245
|
+
prefer_tail_arrays: cli.tail,
|
|
244
246
|
}
|
|
245
247
|
}
|