headson 0.6.2__tar.gz → 0.6.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {headson-0.6.2 → headson-0.6.4}/Cargo.lock +1 -1
- {headson-0.6.2 → headson-0.6.4}/Cargo.toml +1 -1
- {headson-0.6.2 → headson-0.6.4}/PKG-INFO +43 -16
- {headson-0.6.2 → headson-0.6.4}/README.md +42 -15
- headson-0.6.4/docs/assets/tapes/demo.gif +0 -0
- {headson-0.6.2 → headson-0.6.4}/pyproject.toml +1 -1
- {headson-0.6.2 → headson-0.6.4}/python/Cargo.lock +2 -2
- {headson-0.6.2 → headson-0.6.4}/python/Cargo.toml +1 -1
- {headson-0.6.2 → headson-0.6.4}/python/README.md +6 -2
- {headson-0.6.2 → headson-0.6.4}/python/src/lib.rs +9 -7
- {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/builder.rs +2 -5
- {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/mod.rs +41 -6
- {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/default.rs +4 -6
- {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/head.rs +2 -1
- {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/mod.rs +2 -20
- {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/tail.rs +6 -3
- headson-0.6.4/src/ingest/formats/mod.rs +9 -0
- headson-0.6.4/src/ingest/formats/text/mod.rs +274 -0
- {headson-0.6.2/src/yaml_ingest → headson-0.6.4/src/ingest/formats/yaml}/mod.rs +124 -78
- headson-0.6.4/src/ingest/mod.rs +70 -0
- headson-0.6.4/src/ingest/sampling/mod.rs +118 -0
- {headson-0.6.2 → headson-0.6.4}/src/lib.rs +26 -2
- {headson-0.6.2 → headson-0.6.4}/src/main.rs +41 -13
- {headson-0.6.2 → headson-0.6.4}/src/order/build.rs +2 -2
- {headson-0.6.2 → headson-0.6.4}/src/serialization/fileset.rs +2 -1
- {headson-0.6.2 → headson-0.6.4}/src/serialization/mod.rs +87 -24
- {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/mod.rs +3 -0
- headson-0.6.4/src/serialization/templates/text.rs +43 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/types.rs +1 -0
- headson-0.6.2/docs/assets/tapes/demo.gif +0 -0
- headson-0.6.2/src/ingest/mod.rs +0 -120
- {headson-0.6.2 → headson-0.6.4}/docs/assets/algorithm.svg +0 -0
- {headson-0.6.2 → headson-0.6.4}/docs/assets/logo.png +0 -0
- {headson-0.6.2 → headson-0.6.4}/docs/assets/logo.svg +0 -0
- {headson-0.6.2 → headson-0.6.4}/python/headson/__init__.py +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/format.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/order/mod.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/order/scoring.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/order/types.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/color.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/output.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_single_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_internal_gaps_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_head.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_tail.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_head.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_tail.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_head.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_tail.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_json.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/core.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/js.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/json.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/pseudo.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/yaml.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/utils/graph.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/utils/json.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/utils/mod.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/utils/search.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/utils/text.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/src/utils/tree_arena.rs +0 -0
- {headson-0.6.2 → headson-0.6.4}/tests/fixtures/json/JSONTestSuite/LICENSE +0 -0
- {headson-0.6.2 → headson-0.6.4}/tests/fixtures/json/JSONTestSuite/README.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: headson
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.4
|
|
4
4
|
Classifier: Programming Language :: Python
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Rust
|
|
@@ -20,12 +20,15 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
20
20
|
<br/>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
|
|
24
24
|
|
|
25
25
|
Available as:
|
|
26
26
|
- CLI (see [Usage](#usage))
|
|
27
27
|
- Python library (see [Python Bindings](#python-bindings))
|
|
28
28
|
|
|
29
|
+
  
|
|
30
|
+
|
|
31
|
+
|
|
29
32
|
## Install
|
|
30
33
|
|
|
31
34
|
Using Cargo:
|
|
@@ -41,10 +44,11 @@ From source:
|
|
|
41
44
|
## Features
|
|
42
45
|
|
|
43
46
|
- Budgeted output: specify exactly how much you want to see
|
|
44
|
-
- Output formats: `auto | json | yaml`
|
|
47
|
+
- Output formats: `auto | json | yaml | text`
|
|
45
48
|
- Styles: `strict | default | detailed`
|
|
46
49
|
- JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
|
|
47
50
|
- YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
|
|
51
|
+
- Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
|
|
48
52
|
- Multiple inputs: preview many files at once with a shared or per‑file budget
|
|
49
53
|
- Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
|
|
50
54
|
- Available as a CLI app and as a Python library
|
|
@@ -66,14 +70,14 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
|
|
|
66
70
|
|
|
67
71
|
Common flags:
|
|
68
72
|
|
|
69
|
-
- `-
|
|
70
|
-
- `-
|
|
71
|
-
- `-f, --format <auto|json|yaml>`: output format (default: `auto`).
|
|
72
|
-
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
|
|
73
|
+
- `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
74
|
+
- `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
|
|
75
|
+
- `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
|
|
76
|
+
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
|
|
73
77
|
- `-t, --template <strict|default|detailed>`: output style (default: `default`).
|
|
74
78
|
- JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
|
|
75
79
|
- YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
|
|
76
|
-
- `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
80
|
+
- `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
77
81
|
- `-m, --compact`: no indentation, no spaces, no newlines
|
|
78
82
|
- `--no-newline`: single line output
|
|
79
83
|
- `--no-space`: no space after `:` in objects
|
|
@@ -86,8 +90,9 @@ Notes:
|
|
|
86
90
|
|
|
87
91
|
- Multiple inputs:
|
|
88
92
|
- With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
|
|
89
|
-
|
|
90
|
-
-
|
|
93
|
+
- In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
|
|
94
|
+
- Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
|
|
95
|
+
- `--global-bytes` may truncate or omit entire files to respect the total budget.
|
|
91
96
|
- The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
|
|
92
97
|
- Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
|
|
93
98
|
- Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
|
|
@@ -96,24 +101,45 @@ Quick one‑liners:
|
|
|
96
101
|
|
|
97
102
|
- Peek a big JSON stream (keeps structure):
|
|
98
103
|
|
|
99
|
-
zstdcat huge.json.zst | headson -
|
|
104
|
+
zstdcat huge.json.zst | headson -c 800 -f json -t default
|
|
100
105
|
|
|
101
106
|
- Many files with a fixed overall size:
|
|
102
107
|
|
|
103
|
-
headson -
|
|
108
|
+
headson -C 1200 -f json -t strict logs/*.json
|
|
104
109
|
|
|
105
110
|
- Glance at a file, JavaScript‑style comments for omissions:
|
|
106
111
|
|
|
107
|
-
headson -
|
|
112
|
+
headson -c 400 -f json -t detailed data.json
|
|
108
113
|
|
|
109
114
|
- YAML with detailed comments:
|
|
110
115
|
|
|
111
|
-
headson -
|
|
116
|
+
headson -c 400 -f yaml -t detailed config.yaml
|
|
117
|
+
|
|
118
|
+
### Text mode
|
|
119
|
+
|
|
120
|
+
- Single file (auto):
|
|
121
|
+
|
|
122
|
+
headson -c 200 notes.txt
|
|
123
|
+
|
|
124
|
+
- Force Text ingest/output (useful when mixing with other extensions):
|
|
125
|
+
|
|
126
|
+
headson -c 200 -i text -f text notes.txt
|
|
127
|
+
|
|
128
|
+
- Many text files (fileset):
|
|
129
|
+
|
|
130
|
+
headson -c 800 -i text -f text logs/*.txt
|
|
131
|
+
|
|
132
|
+
- Styles on Text:
|
|
133
|
+
- default: omission as a standalone `…` line.
|
|
134
|
+
- detailed: omission as `… N more lines …`.
|
|
135
|
+
- strict: no array‑level omission line (individual long lines may still truncate with `…`).
|
|
112
136
|
|
|
113
137
|
Show help:
|
|
114
138
|
|
|
115
139
|
headson --help
|
|
116
140
|
|
|
141
|
+
Note: flags align with head/tail conventions (`-c/--bytes`, `-C/--global-bytes`).
|
|
142
|
+
|
|
117
143
|
## Examples: head vs headson
|
|
118
144
|
|
|
119
145
|
Input:
|
|
@@ -132,7 +158,7 @@ jq -c . users.json | head -c 80
|
|
|
132
158
|
Structured preview with headson (JSON family, default style → Pseudo):
|
|
133
159
|
|
|
134
160
|
```bash
|
|
135
|
-
headson -
|
|
161
|
+
headson -c 120 -f json -t default users.json
|
|
136
162
|
# {
|
|
137
163
|
# users: [
|
|
138
164
|
# { id: 1, name: "Ana", roles: [ "admin", … ] },
|
|
@@ -145,7 +171,7 @@ headson -n 120 -f json -t default users.json
|
|
|
145
171
|
Machine‑readable preview (JSON family, strict style → strict JSON):
|
|
146
172
|
|
|
147
173
|
```bash
|
|
148
|
-
headson -
|
|
174
|
+
headson -c 120 -f json -t strict users.json
|
|
149
175
|
# {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
|
|
150
176
|
```
|
|
151
177
|
|
|
@@ -157,6 +183,7 @@ Regenerate locally:
|
|
|
157
183
|
- Run: cargo make tapes
|
|
158
184
|
- Outputs are written to docs/assets/tapes
|
|
159
185
|
|
|
186
|
+
|
|
160
187
|
## Python Bindings
|
|
161
188
|
|
|
162
189
|
A thin Python extension module is available on PyPI as `headson`.
|
|
@@ -6,12 +6,15 @@
|
|
|
6
6
|
<br/>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
|
|
10
10
|
|
|
11
11
|
Available as:
|
|
12
12
|
- CLI (see [Usage](#usage))
|
|
13
13
|
- Python library (see [Python Bindings](#python-bindings))
|
|
14
14
|
|
|
15
|
+
  
|
|
16
|
+
|
|
17
|
+
|
|
15
18
|
## Install
|
|
16
19
|
|
|
17
20
|
Using Cargo:
|
|
@@ -27,10 +30,11 @@ From source:
|
|
|
27
30
|
## Features
|
|
28
31
|
|
|
29
32
|
- Budgeted output: specify exactly how much you want to see
|
|
30
|
-
- Output formats: `auto | json | yaml`
|
|
33
|
+
- Output formats: `auto | json | yaml | text`
|
|
31
34
|
- Styles: `strict | default | detailed`
|
|
32
35
|
- JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
|
|
33
36
|
- YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
|
|
37
|
+
- Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
|
|
34
38
|
- Multiple inputs: preview many files at once with a shared or per‑file budget
|
|
35
39
|
- Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
|
|
36
40
|
- Available as a CLI app and as a Python library
|
|
@@ -52,14 +56,14 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
|
|
|
52
56
|
|
|
53
57
|
Common flags:
|
|
54
58
|
|
|
55
|
-
- `-
|
|
56
|
-
- `-
|
|
57
|
-
- `-f, --format <auto|json|yaml>`: output format (default: `auto`).
|
|
58
|
-
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
|
|
59
|
+
- `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
60
|
+
- `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
|
|
61
|
+
- `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
|
|
62
|
+
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
|
|
59
63
|
- `-t, --template <strict|default|detailed>`: output style (default: `default`).
|
|
60
64
|
- JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
|
|
61
65
|
- YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
|
|
62
|
-
- `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
66
|
+
- `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
63
67
|
- `-m, --compact`: no indentation, no spaces, no newlines
|
|
64
68
|
- `--no-newline`: single line output
|
|
65
69
|
- `--no-space`: no space after `:` in objects
|
|
@@ -72,8 +76,9 @@ Notes:
|
|
|
72
76
|
|
|
73
77
|
- Multiple inputs:
|
|
74
78
|
- With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
|
|
75
|
-
|
|
76
|
-
-
|
|
79
|
+
- In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
|
|
80
|
+
- Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
|
|
81
|
+
- `--global-bytes` may truncate or omit entire files to respect the total budget.
|
|
77
82
|
- The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
|
|
78
83
|
- Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
|
|
79
84
|
- Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
|
|
@@ -82,24 +87,45 @@ Quick one‑liners:
|
|
|
82
87
|
|
|
83
88
|
- Peek a big JSON stream (keeps structure):
|
|
84
89
|
|
|
85
|
-
zstdcat huge.json.zst | headson -
|
|
90
|
+
zstdcat huge.json.zst | headson -c 800 -f json -t default
|
|
86
91
|
|
|
87
92
|
- Many files with a fixed overall size:
|
|
88
93
|
|
|
89
|
-
headson -
|
|
94
|
+
headson -C 1200 -f json -t strict logs/*.json
|
|
90
95
|
|
|
91
96
|
- Glance at a file, JavaScript‑style comments for omissions:
|
|
92
97
|
|
|
93
|
-
headson -
|
|
98
|
+
headson -c 400 -f json -t detailed data.json
|
|
94
99
|
|
|
95
100
|
- YAML with detailed comments:
|
|
96
101
|
|
|
97
|
-
headson -
|
|
102
|
+
headson -c 400 -f yaml -t detailed config.yaml
|
|
103
|
+
|
|
104
|
+
### Text mode
|
|
105
|
+
|
|
106
|
+
- Single file (auto):
|
|
107
|
+
|
|
108
|
+
headson -c 200 notes.txt
|
|
109
|
+
|
|
110
|
+
- Force Text ingest/output (useful when mixing with other extensions):
|
|
111
|
+
|
|
112
|
+
headson -c 200 -i text -f text notes.txt
|
|
113
|
+
|
|
114
|
+
- Many text files (fileset):
|
|
115
|
+
|
|
116
|
+
headson -c 800 -i text -f text logs/*.txt
|
|
117
|
+
|
|
118
|
+
- Styles on Text:
|
|
119
|
+
- default: omission as a standalone `…` line.
|
|
120
|
+
- detailed: omission as `… N more lines …`.
|
|
121
|
+
- strict: no array‑level omission line (individual long lines may still truncate with `…`).
|
|
98
122
|
|
|
99
123
|
Show help:
|
|
100
124
|
|
|
101
125
|
headson --help
|
|
102
126
|
|
|
127
|
+
Note: flags align with head/tail conventions (`-c/--bytes`, `-C/--global-bytes`).
|
|
128
|
+
|
|
103
129
|
## Examples: head vs headson
|
|
104
130
|
|
|
105
131
|
Input:
|
|
@@ -118,7 +144,7 @@ jq -c . users.json | head -c 80
|
|
|
118
144
|
Structured preview with headson (JSON family, default style → Pseudo):
|
|
119
145
|
|
|
120
146
|
```bash
|
|
121
|
-
headson -
|
|
147
|
+
headson -c 120 -f json -t default users.json
|
|
122
148
|
# {
|
|
123
149
|
# users: [
|
|
124
150
|
# { id: 1, name: "Ana", roles: [ "admin", … ] },
|
|
@@ -131,7 +157,7 @@ headson -n 120 -f json -t default users.json
|
|
|
131
157
|
Machine‑readable preview (JSON family, strict style → strict JSON):
|
|
132
158
|
|
|
133
159
|
```bash
|
|
134
|
-
headson -
|
|
160
|
+
headson -c 120 -f json -t strict users.json
|
|
135
161
|
# {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
|
|
136
162
|
```
|
|
137
163
|
|
|
@@ -143,6 +169,7 @@ Regenerate locally:
|
|
|
143
169
|
- Run: cargo make tapes
|
|
144
170
|
- Outputs are written to docs/assets/tapes
|
|
145
171
|
|
|
172
|
+
|
|
146
173
|
## Python Bindings
|
|
147
174
|
|
|
148
175
|
A thin Python extension module is available on PyPI as `headson`.
|
|
Binary file
|
|
@@ -214,7 +214,7 @@ dependencies = [
|
|
|
214
214
|
|
|
215
215
|
[[package]]
|
|
216
216
|
name = "headson"
|
|
217
|
-
version = "0.6.
|
|
217
|
+
version = "0.6.4"
|
|
218
218
|
dependencies = [
|
|
219
219
|
"anyhow",
|
|
220
220
|
"clap",
|
|
@@ -228,7 +228,7 @@ dependencies = [
|
|
|
228
228
|
|
|
229
229
|
[[package]]
|
|
230
230
|
name = "headson-python"
|
|
231
|
-
version = "0.6.
|
|
231
|
+
version = "0.6.4"
|
|
232
232
|
dependencies = [
|
|
233
233
|
"anyhow",
|
|
234
234
|
"headson",
|
|
@@ -5,9 +5,9 @@ Minimal Python API for the `headson` preview renderer.
|
|
|
5
5
|
API
|
|
6
6
|
|
|
7
7
|
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
|
|
8
|
-
- `format`: output format — `"auto" | "json" | "yaml"`.
|
|
8
|
+
- `format`: output format — `"auto" | "json" | "yaml" | "text"`.
|
|
9
9
|
- `style`: output style — `"strict" | "default" | "detailed"`.
|
|
10
|
-
- `input_format`: ingestion format — `"json" | "yaml"`.
|
|
10
|
+
- `input_format`: ingestion format — `"json" | "yaml" | "text"`.
|
|
11
11
|
- `character_budget`: maximum output size in characters (defaults to 500 if not set).
|
|
12
12
|
- `skew`: one of `"balanced" | "head" | "tail"`.
|
|
13
13
|
- `balanced` (default), `head` keeps first N, `tail` keeps last N. Display styles place omission markers accordingly; strict JSON remains unannotated.
|
|
@@ -36,6 +36,10 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
|
|
|
36
36
|
print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", character_budget=60))
|
|
37
37
|
|
|
38
38
|
# Note: tail mode affects only display styles; strict JSON stays strict.
|
|
39
|
+
|
|
40
|
+
# Text: render raw lines with omission markers depending on style
|
|
41
|
+
text = "one\ntwo\nthree\n"
|
|
42
|
+
print(headson.summarize(text, format="text", style="default", input_format="text", character_budget=10))
|
|
39
43
|
```
|
|
40
44
|
|
|
41
45
|
Install for development:
|
|
@@ -31,7 +31,11 @@ fn map_output_template(format: &str, style: Style) -> Result<OutputTemplate> {
|
|
|
31
31
|
"auto" => Ok(map_json_template_for_style(style)), // stdin => JSON family
|
|
32
32
|
"json" => Ok(map_json_template_for_style(style)),
|
|
33
33
|
"yaml" | "yml" => Ok(OutputTemplate::Yaml),
|
|
34
|
-
|
|
34
|
+
"text" => Ok(OutputTemplate::Text),
|
|
35
|
+
other => bail!(
|
|
36
|
+
"unknown format: {} (expected 'auto' | 'json' | 'yaml' | 'text')",
|
|
37
|
+
other
|
|
38
|
+
),
|
|
35
39
|
}
|
|
36
40
|
}
|
|
37
41
|
|
|
@@ -107,14 +111,12 @@ fn summarize(
|
|
|
107
111
|
let input = text.as_bytes().to_vec();
|
|
108
112
|
py.detach(|| {
|
|
109
113
|
match input_format.to_ascii_lowercase().as_str() {
|
|
110
|
-
"json" => headson_core::headson(input, &cfg, &prio, budget)
|
|
114
|
+
"json" => headson_core::headson(input, &cfg, &prio, budget).map_err(to_pyerr),
|
|
115
|
+
"yaml" | "yml" => headson_core::headson_yaml(input, &cfg, &prio, budget)
|
|
111
116
|
.map_err(to_pyerr),
|
|
112
|
-
"
|
|
113
|
-
headson_core::headson_yaml(input, &cfg, &prio, budget)
|
|
114
|
-
.map_err(to_pyerr)
|
|
115
|
-
}
|
|
117
|
+
"text" => headson_core::headson_text(input, &cfg, &prio, budget).map_err(to_pyerr),
|
|
116
118
|
other => Err(to_pyerr(anyhow::anyhow!(
|
|
117
|
-
"unknown input_format: {} (expected 'json' | 'yaml')",
|
|
119
|
+
"unknown input_format: {} (expected 'json' | 'yaml' | 'text')",
|
|
118
120
|
other
|
|
119
121
|
))),
|
|
120
122
|
}
|
|
@@ -5,7 +5,7 @@ use std::cell::RefCell;
|
|
|
5
5
|
use crate::order::NodeKind;
|
|
6
6
|
use crate::utils::tree_arena::{JsonTreeArena, JsonTreeNode};
|
|
7
7
|
|
|
8
|
-
use
|
|
8
|
+
use crate::ingest::sampling::ArraySamplerKind;
|
|
9
9
|
|
|
10
10
|
#[derive(Default)]
|
|
11
11
|
pub(crate) struct JsonTreeBuilder {
|
|
@@ -15,10 +15,7 @@ pub(crate) struct JsonTreeBuilder {
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
impl JsonTreeBuilder {
|
|
18
|
-
pub(crate) fn new(
|
|
19
|
-
array_cap: usize,
|
|
20
|
-
sampler: super::samplers::ArraySamplerKind,
|
|
21
|
-
) -> Self {
|
|
18
|
+
pub(crate) fn new(array_cap: usize, sampler: ArraySamplerKind) -> Self {
|
|
22
19
|
Self {
|
|
23
20
|
arena: RefCell::new(JsonTreeArena::default()),
|
|
24
21
|
array_cap,
|
|
@@ -1,24 +1,27 @@
|
|
|
1
1
|
mod builder;
|
|
2
2
|
mod samplers;
|
|
3
|
-
use serde::de::DeserializeSeed;
|
|
4
3
|
|
|
5
|
-
use crate::PriorityConfig;
|
|
6
|
-
use crate::utils::tree_arena::JsonTreeArena;
|
|
7
4
|
use anyhow::Result;
|
|
8
5
|
use builder::JsonTreeBuilder;
|
|
6
|
+
use serde::de::DeserializeSeed;
|
|
7
|
+
|
|
8
|
+
use crate::PriorityConfig;
|
|
9
|
+
use crate::utils::tree_arena::JsonTreeArena as TreeArena;
|
|
10
|
+
|
|
11
|
+
use crate::ingest::Ingest;
|
|
9
12
|
|
|
10
13
|
#[cfg(test)]
|
|
11
14
|
pub fn build_json_tree_arena(
|
|
12
15
|
input: &str,
|
|
13
16
|
config: &PriorityConfig,
|
|
14
|
-
) -> Result<
|
|
17
|
+
) -> Result<TreeArena> {
|
|
15
18
|
build_json_tree_arena_from_bytes(input.as_bytes().to_vec(), config)
|
|
16
19
|
}
|
|
17
20
|
|
|
18
21
|
pub fn build_json_tree_arena_from_bytes(
|
|
19
22
|
mut bytes: Vec<u8>,
|
|
20
23
|
config: &PriorityConfig,
|
|
21
|
-
) -> Result<
|
|
24
|
+
) -> Result<TreeArena> {
|
|
22
25
|
let mut de = simd_json::Deserializer::from_slice(&mut bytes)?;
|
|
23
26
|
let builder = JsonTreeBuilder::new(
|
|
24
27
|
config.array_max_items,
|
|
@@ -36,7 +39,7 @@ pub fn build_json_tree_arena_from_bytes(
|
|
|
36
39
|
pub fn build_json_tree_arena_from_many(
|
|
37
40
|
mut inputs: Vec<(String, Vec<u8>)>,
|
|
38
41
|
config: &PriorityConfig,
|
|
39
|
-
) -> Result<
|
|
42
|
+
) -> Result<TreeArena> {
|
|
40
43
|
let builder = JsonTreeBuilder::new(
|
|
41
44
|
config.array_max_items,
|
|
42
45
|
config.array_sampler.into(),
|
|
@@ -57,6 +60,38 @@ pub fn build_json_tree_arena_from_many(
|
|
|
57
60
|
Ok(arena)
|
|
58
61
|
}
|
|
59
62
|
|
|
63
|
+
/// JSON adapter for the ingest boundary. Delegates to the JSON builder to
|
|
64
|
+
/// produce the neutral `TreeArena`.
|
|
65
|
+
pub struct JsonIngest;
|
|
66
|
+
|
|
67
|
+
impl Ingest for JsonIngest {
|
|
68
|
+
fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
|
|
69
|
+
build_json_tree_arena_from_bytes(bytes, cfg)
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
fn parse_many(
|
|
73
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
74
|
+
cfg: &PriorityConfig,
|
|
75
|
+
) -> Result<TreeArena> {
|
|
76
|
+
build_json_tree_arena_from_many(inputs, cfg)
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/// Convenience functions for the JSON ingest path.
|
|
81
|
+
pub fn parse_json_one(
|
|
82
|
+
bytes: Vec<u8>,
|
|
83
|
+
cfg: &PriorityConfig,
|
|
84
|
+
) -> Result<TreeArena> {
|
|
85
|
+
JsonIngest::parse_one(bytes, cfg)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
pub fn parse_json_many(
|
|
89
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
90
|
+
cfg: &PriorityConfig,
|
|
91
|
+
) -> Result<TreeArena> {
|
|
92
|
+
JsonIngest::parse_many(inputs, cfg)
|
|
93
|
+
}
|
|
94
|
+
|
|
60
95
|
#[cfg(test)]
|
|
61
96
|
mod tests {
|
|
62
97
|
use super::*;
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
use serde::de::{IgnoredAny, SeqAccess};
|
|
2
2
|
|
|
3
|
-
use super::
|
|
3
|
+
use super::JsonTreeBuilder;
|
|
4
|
+
use super::SampledArray;
|
|
4
5
|
|
|
5
|
-
//
|
|
6
|
+
// Default strategy phases: keep-first, greedy, then index-hash acceptance (~50%).
|
|
6
7
|
const RANDOM_ACCEPT_SEED: u64 = 0x9e37_79b9_7f4a_7c15;
|
|
7
|
-
|
|
8
|
-
const RANDOM_ACCEPT_THRESHOLD: u32 = 0x8000_0000;
|
|
9
|
-
// Keep a small, fixed number of items from the head before greedy/random phases.
|
|
8
|
+
const RANDOM_ACCEPT_THRESHOLD: u32 = 0x8000_0000; // ~50%
|
|
10
9
|
const KEEP_FIRST_COUNT: usize = 3;
|
|
11
|
-
// Take roughly half of the remaining capacity greedily after the first items.
|
|
12
10
|
const GREEDY_PORTION_DIVISOR: usize = 2;
|
|
13
11
|
|
|
14
12
|
struct PhaseState {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
use serde::de::SeqAccess;
|
|
2
2
|
|
|
3
|
-
use crate::
|
|
4
|
-
use crate::
|
|
3
|
+
use crate::ingest::formats::json::builder::JsonTreeBuilder;
|
|
4
|
+
use crate::ingest::sampling::ArraySamplerKind;
|
|
5
5
|
|
|
6
6
|
#[derive(Debug)]
|
|
7
7
|
pub(crate) struct SampledArray {
|
|
@@ -10,14 +10,6 @@ pub(crate) struct SampledArray {
|
|
|
10
10
|
pub total_len: usize,
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
#[derive(Copy, Clone, Debug, Default)]
|
|
14
|
-
pub(crate) enum ArraySamplerKind {
|
|
15
|
-
#[default]
|
|
16
|
-
Default,
|
|
17
|
-
Head,
|
|
18
|
-
Tail,
|
|
19
|
-
}
|
|
20
|
-
|
|
21
13
|
impl ArraySamplerKind {
|
|
22
14
|
pub(crate) fn sample_stream<'de, A>(
|
|
23
15
|
self,
|
|
@@ -38,16 +30,6 @@ impl ArraySamplerKind {
|
|
|
38
30
|
}
|
|
39
31
|
}
|
|
40
32
|
|
|
41
|
-
impl From<ArraySamplerStrategy> for ArraySamplerKind {
|
|
42
|
-
fn from(strategy: ArraySamplerStrategy) -> Self {
|
|
43
|
-
match strategy {
|
|
44
|
-
ArraySamplerStrategy::Default => ArraySamplerKind::Default,
|
|
45
|
-
ArraySamplerStrategy::Head => ArraySamplerKind::Head,
|
|
46
|
-
ArraySamplerStrategy::Tail => ArraySamplerKind::Tail,
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
33
|
mod default;
|
|
52
34
|
mod head;
|
|
53
35
|
mod tail;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
use serde::de::{IgnoredAny, SeqAccess};
|
|
2
2
|
|
|
3
|
-
use super::
|
|
3
|
+
use super::JsonTreeBuilder;
|
|
4
|
+
use super::SampledArray;
|
|
4
5
|
|
|
5
6
|
pub(crate) fn sample_stream<'de, A>(
|
|
6
7
|
seq: &mut A,
|
|
@@ -89,8 +90,10 @@ mod tests {
|
|
|
89
90
|
let mut cfg = PriorityConfig::new(usize::MAX, 5);
|
|
90
91
|
cfg.array_sampler = crate::ArraySamplerStrategy::Tail;
|
|
91
92
|
let arena =
|
|
92
|
-
crate::
|
|
93
|
-
|
|
93
|
+
crate::ingest::formats::json::build_json_tree_arena_from_bytes(
|
|
94
|
+
input, &cfg,
|
|
95
|
+
)
|
|
96
|
+
.expect("arena");
|
|
94
97
|
let root = &arena.nodes[arena.root_id];
|
|
95
98
|
assert_eq!(root.children_len, 5, "kept 5");
|
|
96
99
|
let mut orig_indices = Vec::new();
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
// File-format specific ingest adapters live under this module.
|
|
2
|
+
pub mod json;
|
|
3
|
+
pub mod text;
|
|
4
|
+
pub mod yaml;
|
|
5
|
+
|
|
6
|
+
// Re-export commonly used helpers for convenience
|
|
7
|
+
pub use json::{parse_json_many, parse_json_one};
|
|
8
|
+
pub use text::{parse_text_many, parse_text_one};
|
|
9
|
+
pub use yaml::{parse_yaml_many, parse_yaml_one};
|