headson 0.6.2__tar.gz → 0.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {headson-0.6.2 → headson-0.6.4}/Cargo.lock +1 -1
  2. {headson-0.6.2 → headson-0.6.4}/Cargo.toml +1 -1
  3. {headson-0.6.2 → headson-0.6.4}/PKG-INFO +43 -16
  4. {headson-0.6.2 → headson-0.6.4}/README.md +42 -15
  5. headson-0.6.4/docs/assets/tapes/demo.gif +0 -0
  6. {headson-0.6.2 → headson-0.6.4}/pyproject.toml +1 -1
  7. {headson-0.6.2 → headson-0.6.4}/python/Cargo.lock +2 -2
  8. {headson-0.6.2 → headson-0.6.4}/python/Cargo.toml +1 -1
  9. {headson-0.6.2 → headson-0.6.4}/python/README.md +6 -2
  10. {headson-0.6.2 → headson-0.6.4}/python/src/lib.rs +9 -7
  11. {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/builder.rs +2 -5
  12. {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/mod.rs +41 -6
  13. {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/default.rs +4 -6
  14. {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/head.rs +2 -1
  15. {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/mod.rs +2 -20
  16. {headson-0.6.2/src/json_ingest → headson-0.6.4/src/ingest/formats/json}/samplers/tail.rs +6 -3
  17. headson-0.6.4/src/ingest/formats/mod.rs +9 -0
  18. headson-0.6.4/src/ingest/formats/text/mod.rs +274 -0
  19. {headson-0.6.2/src/yaml_ingest → headson-0.6.4/src/ingest/formats/yaml}/mod.rs +124 -78
  20. headson-0.6.4/src/ingest/mod.rs +70 -0
  21. headson-0.6.4/src/ingest/sampling/mod.rs +118 -0
  22. {headson-0.6.2 → headson-0.6.4}/src/lib.rs +26 -2
  23. {headson-0.6.2 → headson-0.6.4}/src/main.rs +41 -13
  24. {headson-0.6.2 → headson-0.6.4}/src/order/build.rs +2 -2
  25. {headson-0.6.2 → headson-0.6.4}/src/serialization/fileset.rs +2 -1
  26. {headson-0.6.2 → headson-0.6.4}/src/serialization/mod.rs +87 -24
  27. {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/mod.rs +3 -0
  28. headson-0.6.4/src/serialization/templates/text.rs +43 -0
  29. {headson-0.6.2 → headson-0.6.4}/src/serialization/types.rs +1 -0
  30. headson-0.6.2/docs/assets/tapes/demo.gif +0 -0
  31. headson-0.6.2/src/ingest/mod.rs +0 -120
  32. {headson-0.6.2 → headson-0.6.4}/docs/assets/algorithm.svg +0 -0
  33. {headson-0.6.2 → headson-0.6.4}/docs/assets/logo.png +0 -0
  34. {headson-0.6.2 → headson-0.6.4}/docs/assets/logo.svg +0 -0
  35. {headson-0.6.2 → headson-0.6.4}/python/headson/__init__.py +0 -0
  36. {headson-0.6.2 → headson-0.6.4}/src/format.rs +0 -0
  37. {headson-0.6.2 → headson-0.6.4}/src/order/mod.rs +0 -0
  38. {headson-0.6.2 → headson-0.6.4}/src/order/scoring.rs +0 -0
  39. {headson-0.6.2 → headson-0.6.4}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
  40. {headson-0.6.2 → headson-0.6.4}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
  41. {headson-0.6.2 → headson-0.6.4}/src/order/types.rs +0 -0
  42. {headson-0.6.2 → headson-0.6.4}/src/serialization/color.rs +0 -0
  43. {headson-0.6.2 → headson-0.6.4}/src/serialization/output.rs +0 -0
  44. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
  45. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty_yaml.snap +0 -0
  46. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
  47. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__arena_render_single_yaml.snap +0 -0
  48. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_internal_gaps_yaml.snap +0 -0
  49. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_head.snap +0 -0
  50. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_tail.snap +0 -0
  51. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_head.snap +0 -0
  52. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_tail.snap +0 -0
  53. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_head.snap +0 -0
  54. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_tail.snap +0 -0
  55. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_json.snap +0 -0
  56. {headson-0.6.2 → headson-0.6.4}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_yaml.snap +0 -0
  57. {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/core.rs +0 -0
  58. {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/js.rs +0 -0
  59. {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/json.rs +0 -0
  60. {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/pseudo.rs +0 -0
  61. {headson-0.6.2 → headson-0.6.4}/src/serialization/templates/yaml.rs +0 -0
  62. {headson-0.6.2 → headson-0.6.4}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
  63. {headson-0.6.2 → headson-0.6.4}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
  64. {headson-0.6.2 → headson-0.6.4}/src/utils/graph.rs +0 -0
  65. {headson-0.6.2 → headson-0.6.4}/src/utils/json.rs +0 -0
  66. {headson-0.6.2 → headson-0.6.4}/src/utils/mod.rs +0 -0
  67. {headson-0.6.2 → headson-0.6.4}/src/utils/search.rs +0 -0
  68. {headson-0.6.2 → headson-0.6.4}/src/utils/text.rs +0 -0
  69. {headson-0.6.2 → headson-0.6.4}/src/utils/tree_arena.rs +0 -0
  70. {headson-0.6.2 → headson-0.6.4}/tests/fixtures/json/JSONTestSuite/LICENSE +0 -0
  71. {headson-0.6.2 → headson-0.6.4}/tests/fixtures/json/JSONTestSuite/README.md +0 -0
@@ -298,7 +298,7 @@ dependencies = [
298
298
 
299
299
  [[package]]
300
300
  name = "headson"
301
- version = "0.6.2"
301
+ version = "0.6.4"
302
302
  dependencies = [
303
303
  "anyhow",
304
304
  "assert_cmd",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson"
3
- version = "0.6.2"
3
+ version = "0.6.4"
4
4
  edition = "2024"
5
5
  description = "Budget‑constrained JSON preview renderer"
6
6
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: headson
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Classifier: Programming Language :: Python
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Rust
@@ -20,12 +20,15 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
20
  <br/>
21
21
  </p>
22
22
 
23
- Head/tail for JSON and YAML but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
23
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
24
24
 
25
25
  Available as:
26
26
  - CLI (see [Usage](#usage))
27
27
  - Python library (see [Python Bindings](#python-bindings))
28
28
 
29
+ ![Codecov](https://img.shields.io/codecov/c/github/kantord/headson?style=flat-square) ![Crates.io Version](https://img.shields.io/crates/v/headson?style=flat-square) ![PyPI - Version](https://img.shields.io/pypi/v/headson?style=flat-square)
30
+
31
+
29
32
  ## Install
30
33
 
31
34
  Using Cargo:
@@ -41,10 +44,11 @@ From source:
41
44
  ## Features
42
45
 
43
46
  - Budgeted output: specify exactly how much you want to see
44
- - Output formats: `auto | json | yaml`
47
+ - Output formats: `auto | json | yaml | text`
45
48
  - Styles: `strict | default | detailed`
46
49
  - JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
47
50
  - YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
51
+ - Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
48
52
  - Multiple inputs: preview many files at once with a shared or per‑file budget
49
53
  - Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
50
54
  - Available as a CLI app and as a Python library
@@ -66,14 +70,14 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
66
70
 
67
71
  Common flags:
68
72
 
69
- - `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
70
- - `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
71
- - `-f, --format <auto|json|yaml>`: output format (default: `auto`).
72
- - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
73
+ - `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
74
+ - `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
75
+ - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
76
+ - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
73
77
  - `-t, --template <strict|default|detailed>`: output style (default: `default`).
74
78
  - JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
75
79
  - YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
76
- - `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
80
+ - `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
77
81
  - `-m, --compact`: no indentation, no spaces, no newlines
78
82
  - `--no-newline`: single line output
79
83
  - `--no-space`: no space after `:` in objects
@@ -86,8 +90,9 @@ Notes:
86
90
 
87
91
  - Multiple inputs:
88
92
  - With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
89
- - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
90
- - `--global-budget` may truncate or omit entire files to respect the total budget.
93
+ - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
94
+ - Unknown extensions are treated as Text (raw lines) safe for logs and `.txt` files.
95
+ - `--global-bytes` may truncate or omit entire files to respect the total budget.
91
96
  - The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
92
97
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
93
98
  - Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
@@ -96,24 +101,45 @@ Quick one‑liners:
96
101
 
97
102
  - Peek a big JSON stream (keeps structure):
98
103
 
99
- zstdcat huge.json.zst | headson -n 800 -f json -t default
104
+ zstdcat huge.json.zst | headson -c 800 -f json -t default
100
105
 
101
106
  - Many files with a fixed overall size:
102
107
 
103
- headson -N 1200 -f json -t strict logs/*.json
108
+ headson -C 1200 -f json -t strict logs/*.json
104
109
 
105
110
  - Glance at a file, JavaScript‑style comments for omissions:
106
111
 
107
- headson -n 400 -f json -t detailed data.json
112
+ headson -c 400 -f json -t detailed data.json
108
113
 
109
114
  - YAML with detailed comments:
110
115
 
111
- headson -n 400 -f yaml -t detailed config.yaml
116
+ headson -c 400 -f yaml -t detailed config.yaml
117
+
118
+ ### Text mode
119
+
120
+ - Single file (auto):
121
+
122
+ headson -c 200 notes.txt
123
+
124
+ - Force Text ingest/output (useful when mixing with other extensions):
125
+
126
+ headson -c 200 -i text -f text notes.txt
127
+
128
+ - Many text files (fileset):
129
+
130
+ headson -c 800 -i text -f text logs/*.txt
131
+
132
+ - Styles on Text:
133
+ - default: omission as a standalone `…` line.
134
+ - detailed: omission as `… N more lines …`.
135
+ - strict: no array‑level omission line (individual long lines may still truncate with `…`).
112
136
 
113
137
  Show help:
114
138
 
115
139
  headson --help
116
140
 
141
+ Note: flags align with head/tail conventions (`-c/--bytes`, `-C/--global-bytes`).
142
+
117
143
  ## Examples: head vs headson
118
144
 
119
145
  Input:
@@ -132,7 +158,7 @@ jq -c . users.json | head -c 80
132
158
  Structured preview with headson (JSON family, default style → Pseudo):
133
159
 
134
160
  ```bash
135
- headson -n 120 -f json -t default users.json
161
+ headson -c 120 -f json -t default users.json
136
162
  # {
137
163
  # users: [
138
164
  # { id: 1, name: "Ana", roles: [ "admin", … ] },
@@ -145,7 +171,7 @@ headson -n 120 -f json -t default users.json
145
171
  Machine‑readable preview (JSON family, strict style → strict JSON):
146
172
 
147
173
  ```bash
148
- headson -n 120 -f json -t strict users.json
174
+ headson -c 120 -f json -t strict users.json
149
175
  # {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
150
176
  ```
151
177
 
@@ -157,6 +183,7 @@ Regenerate locally:
157
183
  - Run: cargo make tapes
158
184
  - Outputs are written to docs/assets/tapes
159
185
 
186
+
160
187
  ## Python Bindings
161
188
 
162
189
  A thin Python extension module is available on PyPI as `headson`.
@@ -6,12 +6,15 @@
6
6
  <br/>
7
7
  </p>
8
8
 
9
- Head/tail for JSON and YAML but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
9
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
10
10
 
11
11
  Available as:
12
12
  - CLI (see [Usage](#usage))
13
13
  - Python library (see [Python Bindings](#python-bindings))
14
14
 
15
+ ![Codecov](https://img.shields.io/codecov/c/github/kantord/headson?style=flat-square) ![Crates.io Version](https://img.shields.io/crates/v/headson?style=flat-square) ![PyPI - Version](https://img.shields.io/pypi/v/headson?style=flat-square)
16
+
17
+
15
18
  ## Install
16
19
 
17
20
  Using Cargo:
@@ -27,10 +30,11 @@ From source:
27
30
  ## Features
28
31
 
29
32
  - Budgeted output: specify exactly how much you want to see
30
- - Output formats: `auto | json | yaml`
33
+ - Output formats: `auto | json | yaml | text`
31
34
  - Styles: `strict | default | detailed`
32
35
  - JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
33
36
  - YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
37
+ - Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
34
38
  - Multiple inputs: preview many files at once with a shared or per‑file budget
35
39
  - Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
36
40
  - Available as a CLI app and as a Python library
@@ -52,14 +56,14 @@ If you’re comfortable with tools like `head` and `tail`, use `headson` when yo
52
56
 
53
57
  Common flags:
54
58
 
55
- - `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
56
- - `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
57
- - `-f, --format <auto|json|yaml>`: output format (default: `auto`).
58
- - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
59
+ - `-c, --bytes <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
60
+ - `-C, --global-bytes <BYTES>`: total output budget across all inputs. With `--bytes`, the effective total is the smaller of the two.
61
+ - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
62
+ - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
59
63
  - `-t, --template <strict|default|detailed>`: output style (default: `default`).
60
64
  - JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
61
65
  - YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
62
- - `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
66
+ - `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
63
67
  - `-m, --compact`: no indentation, no spaces, no newlines
64
68
  - `--no-newline`: single line output
65
69
  - `--no-space`: no space after `:` in objects
@@ -72,8 +76,9 @@ Notes:
72
76
 
73
77
  - Multiple inputs:
74
78
  - With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
75
- - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
76
- - `--global-budget` may truncate or omit entire files to respect the total budget.
79
+ - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
80
+ - Unknown extensions are treated as Text (raw lines) safe for logs and `.txt` files.
81
+ - `--global-bytes` may truncate or omit entire files to respect the total budget.
77
82
  - The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
78
83
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
79
84
  - Head vs Tail sampling: these options bias which part of arrays are kept before rendering. Display styles may still insert internal gap markers to honor very small budgets; strict JSON stays unannotated.
@@ -82,24 +87,45 @@ Quick one‑liners:
82
87
 
83
88
  - Peek a big JSON stream (keeps structure):
84
89
 
85
- zstdcat huge.json.zst | headson -n 800 -f json -t default
90
+ zstdcat huge.json.zst | headson -c 800 -f json -t default
86
91
 
87
92
  - Many files with a fixed overall size:
88
93
 
89
- headson -N 1200 -f json -t strict logs/*.json
94
+ headson -C 1200 -f json -t strict logs/*.json
90
95
 
91
96
  - Glance at a file, JavaScript‑style comments for omissions:
92
97
 
93
- headson -n 400 -f json -t detailed data.json
98
+ headson -c 400 -f json -t detailed data.json
94
99
 
95
100
  - YAML with detailed comments:
96
101
 
97
- headson -n 400 -f yaml -t detailed config.yaml
102
+ headson -c 400 -f yaml -t detailed config.yaml
103
+
104
+ ### Text mode
105
+
106
+ - Single file (auto):
107
+
108
+ headson -c 200 notes.txt
109
+
110
+ - Force Text ingest/output (useful when mixing with other extensions):
111
+
112
+ headson -c 200 -i text -f text notes.txt
113
+
114
+ - Many text files (fileset):
115
+
116
+ headson -c 800 -i text -f text logs/*.txt
117
+
118
+ - Styles on Text:
119
+ - default: omission as a standalone `…` line.
120
+ - detailed: omission as `… N more lines …`.
121
+ - strict: no array‑level omission line (individual long lines may still truncate with `…`).
98
122
 
99
123
  Show help:
100
124
 
101
125
  headson --help
102
126
 
127
+ Note: flags align with head/tail conventions (`-c/--bytes`, `-C/--global-bytes`).
128
+
103
129
  ## Examples: head vs headson
104
130
 
105
131
  Input:
@@ -118,7 +144,7 @@ jq -c . users.json | head -c 80
118
144
  Structured preview with headson (JSON family, default style → Pseudo):
119
145
 
120
146
  ```bash
121
- headson -n 120 -f json -t default users.json
147
+ headson -c 120 -f json -t default users.json
122
148
  # {
123
149
  # users: [
124
150
  # { id: 1, name: "Ana", roles: [ "admin", … ] },
@@ -131,7 +157,7 @@ headson -n 120 -f json -t default users.json
131
157
  Machine‑readable preview (JSON family, strict style → strict JSON):
132
158
 
133
159
  ```bash
134
- headson -n 120 -f json -t strict users.json
160
+ headson -c 120 -f json -t strict users.json
135
161
  # {"users":[{"id":1,"name":"Ana","roles":["admin"]}],"meta":{"count":2}}
136
162
  ```
137
163
 
@@ -143,6 +169,7 @@ Regenerate locally:
143
169
  - Run: cargo make tapes
144
170
  - Outputs are written to docs/assets/tapes
145
171
 
172
+
146
173
  ## Python Bindings
147
174
 
148
175
  A thin Python extension module is available on PyPI as `headson`.
Binary file
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "headson"
7
- version = "0.6.2"
7
+ version = "0.6.4"
8
8
  description = "Budget‑constrained JSON preview renderer (Python bindings)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -214,7 +214,7 @@ dependencies = [
214
214
 
215
215
  [[package]]
216
216
  name = "headson"
217
- version = "0.6.2"
217
+ version = "0.6.4"
218
218
  dependencies = [
219
219
  "anyhow",
220
220
  "clap",
@@ -228,7 +228,7 @@ dependencies = [
228
228
 
229
229
  [[package]]
230
230
  name = "headson-python"
231
- version = "0.6.2"
231
+ version = "0.6.4"
232
232
  dependencies = [
233
233
  "anyhow",
234
234
  "headson",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson-python"
3
- version = "0.6.2"
3
+ version = "0.6.4"
4
4
  edition = "2021"
5
5
  publish = false
6
6
  readme = "README.md"
@@ -5,9 +5,9 @@ Minimal Python API for the `headson` preview renderer.
5
5
  API
6
6
 
7
7
  - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
8
- - `format`: output format — `"auto" | "json" | "yaml"`.
8
+ - `format`: output format — `"auto" | "json" | "yaml" | "text"`.
9
9
  - `style`: output style — `"strict" | "default" | "detailed"`.
10
- - `input_format`: ingestion format — `"json" | "yaml"`.
10
+ - `input_format`: ingestion format — `"json" | "yaml" | "text"`.
11
11
  - `character_budget`: maximum output size in characters (defaults to 500 if not set).
12
12
  - `skew`: one of `"balanced" | "head" | "tail"`.
13
13
  - `balanced` (default), `head` keeps first N, `tail` keeps last N. Display styles place omission markers accordingly; strict JSON remains unannotated.
@@ -36,6 +36,10 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
36
36
  print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", character_budget=60))
37
37
 
38
38
  # Note: tail mode affects only display styles; strict JSON stays strict.
39
+
40
+ # Text: render raw lines with omission markers depending on style
41
+ text = "one\ntwo\nthree\n"
42
+ print(headson.summarize(text, format="text", style="default", input_format="text", character_budget=10))
39
43
  ```
40
44
 
41
45
  Install for development:
@@ -31,7 +31,11 @@ fn map_output_template(format: &str, style: Style) -> Result<OutputTemplate> {
31
31
  "auto" => Ok(map_json_template_for_style(style)), // stdin => JSON family
32
32
  "json" => Ok(map_json_template_for_style(style)),
33
33
  "yaml" | "yml" => Ok(OutputTemplate::Yaml),
34
- other => bail!("unknown format: {} (expected 'auto' | 'json' | 'yaml')", other),
34
+ "text" => Ok(OutputTemplate::Text),
35
+ other => bail!(
36
+ "unknown format: {} (expected 'auto' | 'json' | 'yaml' | 'text')",
37
+ other
38
+ ),
35
39
  }
36
40
  }
37
41
 
@@ -107,14 +111,12 @@ fn summarize(
107
111
  let input = text.as_bytes().to_vec();
108
112
  py.detach(|| {
109
113
  match input_format.to_ascii_lowercase().as_str() {
110
- "json" => headson_core::headson(input, &cfg, &prio, budget)
114
+ "json" => headson_core::headson(input, &cfg, &prio, budget).map_err(to_pyerr),
115
+ "yaml" | "yml" => headson_core::headson_yaml(input, &cfg, &prio, budget)
111
116
  .map_err(to_pyerr),
112
- "yaml" | "yml" => {
113
- headson_core::headson_yaml(input, &cfg, &prio, budget)
114
- .map_err(to_pyerr)
115
- }
117
+ "text" => headson_core::headson_text(input, &cfg, &prio, budget).map_err(to_pyerr),
116
118
  other => Err(to_pyerr(anyhow::anyhow!(
117
- "unknown input_format: {} (expected 'json' | 'yaml')",
119
+ "unknown input_format: {} (expected 'json' | 'yaml' | 'text')",
118
120
  other
119
121
  ))),
120
122
  }
@@ -5,7 +5,7 @@ use std::cell::RefCell;
5
5
  use crate::order::NodeKind;
6
6
  use crate::utils::tree_arena::{JsonTreeArena, JsonTreeNode};
7
7
 
8
- use super::samplers::ArraySamplerKind;
8
+ use crate::ingest::sampling::ArraySamplerKind;
9
9
 
10
10
  #[derive(Default)]
11
11
  pub(crate) struct JsonTreeBuilder {
@@ -15,10 +15,7 @@ pub(crate) struct JsonTreeBuilder {
15
15
  }
16
16
 
17
17
  impl JsonTreeBuilder {
18
- pub(crate) fn new(
19
- array_cap: usize,
20
- sampler: super::samplers::ArraySamplerKind,
21
- ) -> Self {
18
+ pub(crate) fn new(array_cap: usize, sampler: ArraySamplerKind) -> Self {
22
19
  Self {
23
20
  arena: RefCell::new(JsonTreeArena::default()),
24
21
  array_cap,
@@ -1,24 +1,27 @@
1
1
  mod builder;
2
2
  mod samplers;
3
- use serde::de::DeserializeSeed;
4
3
 
5
- use crate::PriorityConfig;
6
- use crate::utils::tree_arena::JsonTreeArena;
7
4
  use anyhow::Result;
8
5
  use builder::JsonTreeBuilder;
6
+ use serde::de::DeserializeSeed;
7
+
8
+ use crate::PriorityConfig;
9
+ use crate::utils::tree_arena::JsonTreeArena as TreeArena;
10
+
11
+ use crate::ingest::Ingest;
9
12
 
10
13
  #[cfg(test)]
11
14
  pub fn build_json_tree_arena(
12
15
  input: &str,
13
16
  config: &PriorityConfig,
14
- ) -> Result<JsonTreeArena> {
17
+ ) -> Result<TreeArena> {
15
18
  build_json_tree_arena_from_bytes(input.as_bytes().to_vec(), config)
16
19
  }
17
20
 
18
21
  pub fn build_json_tree_arena_from_bytes(
19
22
  mut bytes: Vec<u8>,
20
23
  config: &PriorityConfig,
21
- ) -> Result<JsonTreeArena> {
24
+ ) -> Result<TreeArena> {
22
25
  let mut de = simd_json::Deserializer::from_slice(&mut bytes)?;
23
26
  let builder = JsonTreeBuilder::new(
24
27
  config.array_max_items,
@@ -36,7 +39,7 @@ pub fn build_json_tree_arena_from_bytes(
36
39
  pub fn build_json_tree_arena_from_many(
37
40
  mut inputs: Vec<(String, Vec<u8>)>,
38
41
  config: &PriorityConfig,
39
- ) -> Result<JsonTreeArena> {
42
+ ) -> Result<TreeArena> {
40
43
  let builder = JsonTreeBuilder::new(
41
44
  config.array_max_items,
42
45
  config.array_sampler.into(),
@@ -57,6 +60,38 @@ pub fn build_json_tree_arena_from_many(
57
60
  Ok(arena)
58
61
  }
59
62
 
63
+ /// JSON adapter for the ingest boundary. Delegates to the JSON builder to
64
+ /// produce the neutral `TreeArena`.
65
+ pub struct JsonIngest;
66
+
67
+ impl Ingest for JsonIngest {
68
+ fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
69
+ build_json_tree_arena_from_bytes(bytes, cfg)
70
+ }
71
+
72
+ fn parse_many(
73
+ inputs: Vec<(String, Vec<u8>)>,
74
+ cfg: &PriorityConfig,
75
+ ) -> Result<TreeArena> {
76
+ build_json_tree_arena_from_many(inputs, cfg)
77
+ }
78
+ }
79
+
80
+ /// Convenience functions for the JSON ingest path.
81
+ pub fn parse_json_one(
82
+ bytes: Vec<u8>,
83
+ cfg: &PriorityConfig,
84
+ ) -> Result<TreeArena> {
85
+ JsonIngest::parse_one(bytes, cfg)
86
+ }
87
+
88
+ pub fn parse_json_many(
89
+ inputs: Vec<(String, Vec<u8>)>,
90
+ cfg: &PriorityConfig,
91
+ ) -> Result<TreeArena> {
92
+ JsonIngest::parse_many(inputs, cfg)
93
+ }
94
+
60
95
  #[cfg(test)]
61
96
  mod tests {
62
97
  use super::*;
@@ -1,14 +1,12 @@
1
1
  use serde::de::{IgnoredAny, SeqAccess};
2
2
 
3
- use super::{JsonTreeBuilder, SampledArray};
3
+ use super::JsonTreeBuilder;
4
+ use super::SampledArray;
4
5
 
5
- // Tunable sampling constants for the default strategy.
6
+ // Default strategy phases: keep-first, greedy, then index-hash acceptance (~50%).
6
7
  const RANDOM_ACCEPT_SEED: u64 = 0x9e37_79b9_7f4a_7c15;
7
- // ~50% acceptance to thin remaining elements in the random phase.
8
- const RANDOM_ACCEPT_THRESHOLD: u32 = 0x8000_0000;
9
- // Keep a small, fixed number of items from the head before greedy/random phases.
8
+ const RANDOM_ACCEPT_THRESHOLD: u32 = 0x8000_0000; // ~50%
10
9
  const KEEP_FIRST_COUNT: usize = 3;
11
- // Take roughly half of the remaining capacity greedily after the first items.
12
10
  const GREEDY_PORTION_DIVISOR: usize = 2;
13
11
 
14
12
  struct PhaseState {
@@ -1,6 +1,7 @@
1
1
  use serde::de::{IgnoredAny, SeqAccess};
2
2
 
3
- use super::{JsonTreeBuilder, SampledArray};
3
+ use super::JsonTreeBuilder;
4
+ use super::SampledArray;
4
5
 
5
6
  fn parse_keep<'de, A>(
6
7
  seq: &mut A,
@@ -1,7 +1,7 @@
1
1
  use serde::de::SeqAccess;
2
2
 
3
- use crate::ArraySamplerStrategy;
4
- use crate::json_ingest::builder::JsonTreeBuilder;
3
+ use crate::ingest::formats::json::builder::JsonTreeBuilder;
4
+ use crate::ingest::sampling::ArraySamplerKind;
5
5
 
6
6
  #[derive(Debug)]
7
7
  pub(crate) struct SampledArray {
@@ -10,14 +10,6 @@ pub(crate) struct SampledArray {
10
10
  pub total_len: usize,
11
11
  }
12
12
 
13
- #[derive(Copy, Clone, Debug, Default)]
14
- pub(crate) enum ArraySamplerKind {
15
- #[default]
16
- Default,
17
- Head,
18
- Tail,
19
- }
20
-
21
13
  impl ArraySamplerKind {
22
14
  pub(crate) fn sample_stream<'de, A>(
23
15
  self,
@@ -38,16 +30,6 @@ impl ArraySamplerKind {
38
30
  }
39
31
  }
40
32
 
41
- impl From<ArraySamplerStrategy> for ArraySamplerKind {
42
- fn from(strategy: ArraySamplerStrategy) -> Self {
43
- match strategy {
44
- ArraySamplerStrategy::Default => ArraySamplerKind::Default,
45
- ArraySamplerStrategy::Head => ArraySamplerKind::Head,
46
- ArraySamplerStrategy::Tail => ArraySamplerKind::Tail,
47
- }
48
- }
49
- }
50
-
51
33
  mod default;
52
34
  mod head;
53
35
  mod tail;
@@ -1,6 +1,7 @@
1
1
  use serde::de::{IgnoredAny, SeqAccess};
2
2
 
3
- use super::{JsonTreeBuilder, SampledArray};
3
+ use super::JsonTreeBuilder;
4
+ use super::SampledArray;
4
5
 
5
6
  pub(crate) fn sample_stream<'de, A>(
6
7
  seq: &mut A,
@@ -89,8 +90,10 @@ mod tests {
89
90
  let mut cfg = PriorityConfig::new(usize::MAX, 5);
90
91
  cfg.array_sampler = crate::ArraySamplerStrategy::Tail;
91
92
  let arena =
92
- crate::json_ingest::build_json_tree_arena_from_bytes(input, &cfg)
93
- .expect("arena");
93
+ crate::ingest::formats::json::build_json_tree_arena_from_bytes(
94
+ input, &cfg,
95
+ )
96
+ .expect("arena");
94
97
  let root = &arena.nodes[arena.root_id];
95
98
  assert_eq!(root.children_len, 5, "kept 5");
96
99
  let mut orig_indices = Vec::new();
@@ -0,0 +1,9 @@
1
+ // File-format specific ingest adapters live under this module.
2
+ pub mod json;
3
+ pub mod text;
4
+ pub mod yaml;
5
+
6
+ // Re-export commonly used helpers for convenience
7
+ pub use json::{parse_json_many, parse_json_one};
8
+ pub use text::{parse_text_many, parse_text_one};
9
+ pub use yaml::{parse_yaml_many, parse_yaml_one};