pfc-jsonl 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ImpossibleForge
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,236 @@
1
+ Metadata-Version: 2.4
2
+ Name: pfc-jsonl
3
+ Version: 0.1.0
4
+ Summary: Python interface for PFC-JSONL — high-performance log compression with block-level timestamp filtering
5
+ Project-URL: Homepage, https://github.com/ImpossibleForge/pfc-jsonl
6
+ Project-URL: Repository, https://github.com/ImpossibleForge/pfc-jsonl
7
+ Project-URL: Bug Tracker, https://github.com/ImpossibleForge/pfc-jsonl/issues
8
+ Project-URL: Changelog, https://github.com/ImpossibleForge/pfc-jsonl/releases
9
+ Author-email: ImpossibleForge <impossibleforge@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: compression,duckdb,fluent-bit,jsonl,log-compression,logs,structured-logs
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: System Administrators
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Operating System :: Microsoft :: Windows
19
+ Classifier: Operating System :: POSIX :: Linux
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Programming Language :: Python :: 3.13
26
+ Classifier: Topic :: System :: Archiving :: Compression
27
+ Classifier: Topic :: System :: Logging
28
+ Requires-Python: >=3.9
29
+ Description-Content-Type: text/markdown
30
+
31
+ # pfc-jsonl · Python Package
32
+
33
+ Python interface for **PFC-JSONL** — high-performance compression for structured log files (JSONL), with block-level timestamp filtering.
34
+
35
+ ```
36
+ pip install pfc-jsonl
37
+ ```
38
+
39
+ > **Requires the `pfc_jsonl` binary.** Install it separately — see [below](#install-the-binary).
40
+
41
+ ---
42
+
43
+ ## What is PFC-JSONL?
44
+
45
+ PFC-JSONL compresses JSONL log files **26–34% smaller than gzip/zstd** on typical log data. It stores a timestamp index alongside each file, enabling fast time-range queries without full decompression.
46
+
47
+ | Operation | Description |
48
+ |-----------|-------------|
49
+ | `compress` | JSONL → `.pfc` (with timestamp index) |
50
+ | `decompress` | `.pfc` → JSONL |
51
+ | `query` | Decompress only blocks matching a time range |
52
+ | `seek_blocks` | Decompress specific blocks by index (DuckDB primitive) |
53
+
54
+ ---
55
+
56
+ ## Quick Start
57
+
58
+ ```python
59
+ import pfc
60
+
61
+ # Compress
62
+ pfc.compress("logs/app.jsonl", "logs/app.pfc")
63
+
64
+ # Decompress
65
+ pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
66
+
67
+ # Query by time range — only decompresses matching blocks
68
+ pfc.query("logs/app.pfc",
69
+ from_ts="2026-01-15T08:00:00",
70
+ to_ts="2026-01-15T09:00:00",
71
+ output_path="logs/morning.jsonl")
72
+
73
+ # Check Community Mode usage
74
+ usage = pfc.community_usage()
75
+ print(f"Used {usage['used_gb']:.2f} GB of {usage['limit_gb']} GB today")
76
+ ```
77
+
78
+ ---
79
+
80
+ ## Install the Binary
81
+
82
+ The Python package is a thin wrapper — the compression engine is the `pfc_jsonl` binary.
83
+
84
+ **Linux (x64):**
85
+ ```bash
86
+ curl -L https://github.com/ImpossibleForge/pfc-jsonl/releases/latest/download/pfc_jsonl-linux-x64 \
87
+ -o pfc_jsonl && chmod +x pfc_jsonl && sudo mv pfc_jsonl /usr/local/bin/
88
+ ```
89
+
90
+ **macOS:** Coming soon.
91
+
92
+ **Windows:** Download `pfc_jsonl-windows-x64.exe` from the [releases page](https://github.com/ImpossibleForge/pfc-jsonl/releases), rename to `pfc_jsonl.exe`, add to PATH.
93
+
94
+ **Custom location:** Set the `PFC_BINARY` environment variable:
95
+ ```bash
96
+ export PFC_BINARY=/opt/tools/pfc_jsonl
97
+ ```
98
+
99
+ Verify:
100
+ ```bash
101
+ pfc_jsonl --help
102
+ ```
103
+
104
+ ---
105
+
106
+ ## API Reference
107
+
108
+ ### `pfc.compress(input_path, output_path, *, level="balanced", block_size_mb=None, workers=None, verbose=False)`
109
+
110
+ Compress a JSONL file to PFC format.
111
+
112
+ ```python
113
+ pfc.compress("logs/app.jsonl", "logs/app.pfc")
114
+ pfc.compress("big.jsonl", "big.pfc", level="max", workers=4)
115
+ ```
116
+
117
+ | Parameter | Default | Description |
118
+ |-----------|---------|-------------|
119
+ | `level` | `"balanced"` | `"fast"`, `"balanced"`, or `"max"` |
120
+ | `block_size_mb` | auto | Block size in MiB (power of 2, e.g. 16, 32) |
121
+ | `workers` | auto | Parallel compression workers |
122
+ | `verbose` | `False` | Print progress from binary |
123
+
124
+ ---
125
+
126
+ ### `pfc.decompress(input_path, output_path="-", *, verbose=False)`
127
+
128
+ Decompress a PFC file back to JSONL.
129
+
130
+ ```python
131
+ pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
132
+ ```
133
+
134
+ ---
135
+
136
+ ### `pfc.query(pfc_path, from_ts, to_ts, output_path="-", *, verbose=False)`
137
+
138
+ Decompress only the blocks matching a timestamp range.
139
+
140
+ ```python
141
+ pfc.query("logs/app.pfc",
142
+ from_ts="2026-01-15T08:00:00",
143
+ to_ts="2026-01-15T09:00:00",
144
+ output_path="logs/morning.jsonl")
145
+ ```
146
+
147
+ Timestamps can be ISO 8601 strings or Unix epoch integers (as strings).
148
+
149
+ ---
150
+
151
+ ### `pfc.seek_blocks(pfc_path, blocks, output_path="-", *, verbose=False)`
152
+
153
+ Decompress specific blocks by index. Used internally by the DuckDB extension.
154
+
155
+ ```python
156
+ pfc.seek_blocks("logs/app.pfc", [0, 3, 7], "logs/selected.jsonl")
157
+ ```
158
+
159
+ ---
160
+
161
+ ### `pfc.community_usage() -> dict`
162
+
163
+ Return today's Community Mode usage without invoking the binary.
164
+
165
+ ```python
166
+ usage = pfc.community_usage()
167
+ # {
168
+ # "date": "2026-04-04",
169
+ # "bytes_today": 1073741824,
170
+ # "bytes_remaining": 4294967296,
171
+ # "limit_gb": 5.0,
172
+ # "used_gb": 1.0
173
+ # }
174
+ ```
175
+
176
+ ---
177
+
178
+ ### `pfc.get_binary() -> str`
179
+
180
+ Return the path to the `pfc_jsonl` binary being used.
181
+
182
+ ```python
183
+ print(pfc.get_binary()) # /usr/local/bin/pfc_jsonl
184
+ ```
185
+
186
+ ---
187
+
188
+ ## Community Mode
189
+
190
+ Without a license key, PFC-JSONL runs in **Community Mode**:
191
+
192
+ - All operations (compress, decompress, query, seek-blocks) are **free up to 5 GB/day**
193
+ - Usage is tracked locally in `~/.pfc/usage.json` — **no network calls**
194
+ - Resets every calendar day
195
+
196
+ For production use exceeding 5 GB/day, contact: **impossibleforge@gmail.com**
197
+
198
+ ---
199
+
200
+ ## Error Handling
201
+
202
+ ```python
203
+ import pfc
204
+ from pfc import PFCError
205
+
206
+ try:
207
+ pfc.compress("missing.jsonl", "out.pfc")
208
+ except FileNotFoundError as e:
209
+ print(f"Binary not found: {e}")
210
+ except PFCError as e:
211
+ print(f"Compression failed (exit {e.returncode}): {e.stderr}")
212
+ ```
213
+
214
+ ---
215
+
216
+ ## Integration with Fluent Bit
217
+
218
+ Use [pfc-fluentbit](https://github.com/ImpossibleForge/pfc-fluentbit) to receive logs from Fluent Bit and compress them automatically.
219
+
220
+ ## Integration with DuckDB
221
+
222
+ Use the [pfc DuckDB extension](https://github.com/ImpossibleForge/pfc-duckdb) to query `.pfc` files directly with SQL:
223
+
224
+ ```sql
225
+ INSTALL pfc FROM community;
226
+ LOAD pfc;
227
+ SELECT * FROM read_pfc_jsonl('logs/app.pfc') WHERE level = 'ERROR';
228
+ ```
229
+
230
+ ---
231
+
232
+ ## License
233
+
234
+ MIT — see [LICENSE](LICENSE)
235
+
236
+ Binary releases are proprietary. See [pfc-jsonl releases](https://github.com/ImpossibleForge/pfc-jsonl/releases) for terms.
@@ -0,0 +1,206 @@
1
+ # pfc-jsonl · Python Package
2
+
3
+ Python interface for **PFC-JSONL** — high-performance compression for structured log files (JSONL), with block-level timestamp filtering.
4
+
5
+ ```
6
+ pip install pfc-jsonl
7
+ ```
8
+
9
+ > **Requires the `pfc_jsonl` binary.** Install it separately — see [below](#install-the-binary).
10
+
11
+ ---
12
+
13
+ ## What is PFC-JSONL?
14
+
15
+ PFC-JSONL compresses JSONL log files **26–34% smaller than gzip/zstd** on typical log data. It stores a timestamp index alongside each file, enabling fast time-range queries without full decompression.
16
+
17
+ | Operation | Description |
18
+ |-----------|-------------|
19
+ | `compress` | JSONL → `.pfc` (with timestamp index) |
20
+ | `decompress` | `.pfc` → JSONL |
21
+ | `query` | Decompress only blocks matching a time range |
22
+ | `seek_blocks` | Decompress specific blocks by index (DuckDB primitive) |
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ```python
29
+ import pfc
30
+
31
+ # Compress
32
+ pfc.compress("logs/app.jsonl", "logs/app.pfc")
33
+
34
+ # Decompress
35
+ pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
36
+
37
+ # Query by time range — only decompresses matching blocks
38
+ pfc.query("logs/app.pfc",
39
+ from_ts="2026-01-15T08:00:00",
40
+ to_ts="2026-01-15T09:00:00",
41
+ output_path="logs/morning.jsonl")
42
+
43
+ # Check Community Mode usage
44
+ usage = pfc.community_usage()
45
+ print(f"Used {usage['used_gb']:.2f} GB of {usage['limit_gb']} GB today")
46
+ ```
47
+
48
+ ---
49
+
50
+ ## Install the Binary
51
+
52
+ The Python package is a thin wrapper — the compression engine is the `pfc_jsonl` binary.
53
+
54
+ **Linux (x64):**
55
+ ```bash
56
+ curl -L https://github.com/ImpossibleForge/pfc-jsonl/releases/latest/download/pfc_jsonl-linux-x64 \
57
+ -o pfc_jsonl && chmod +x pfc_jsonl && sudo mv pfc_jsonl /usr/local/bin/
58
+ ```
59
+
60
+ **macOS:** Coming soon.
61
+
62
+ **Windows:** Download `pfc_jsonl-windows-x64.exe` from the [releases page](https://github.com/ImpossibleForge/pfc-jsonl/releases), rename to `pfc_jsonl.exe`, add to PATH.
63
+
64
+ **Custom location:** Set the `PFC_BINARY` environment variable:
65
+ ```bash
66
+ export PFC_BINARY=/opt/tools/pfc_jsonl
67
+ ```
68
+
69
+ Verify:
70
+ ```bash
71
+ pfc_jsonl --help
72
+ ```
73
+
74
+ ---
75
+
76
+ ## API Reference
77
+
78
+ ### `pfc.compress(input_path, output_path, *, level="balanced", block_size_mb=None, workers=None, verbose=False)`
79
+
80
+ Compress a JSONL file to PFC format.
81
+
82
+ ```python
83
+ pfc.compress("logs/app.jsonl", "logs/app.pfc")
84
+ pfc.compress("big.jsonl", "big.pfc", level="max", workers=4)
85
+ ```
86
+
87
+ | Parameter | Default | Description |
88
+ |-----------|---------|-------------|
89
+ | `level` | `"balanced"` | `"fast"`, `"balanced"`, or `"max"` |
90
+ | `block_size_mb` | auto | Block size in MiB (power of 2, e.g. 16, 32) |
91
+ | `workers` | auto | Parallel compression workers |
92
+ | `verbose` | `False` | Print progress from binary |
93
+
94
+ ---
95
+
96
+ ### `pfc.decompress(input_path, output_path="-", *, verbose=False)`
97
+
98
+ Decompress a PFC file back to JSONL.
99
+
100
+ ```python
101
+ pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
102
+ ```
103
+
104
+ ---
105
+
106
+ ### `pfc.query(pfc_path, from_ts, to_ts, output_path="-", *, verbose=False)`
107
+
108
+ Decompress only the blocks matching a timestamp range.
109
+
110
+ ```python
111
+ pfc.query("logs/app.pfc",
112
+ from_ts="2026-01-15T08:00:00",
113
+ to_ts="2026-01-15T09:00:00",
114
+ output_path="logs/morning.jsonl")
115
+ ```
116
+
117
+ Timestamps can be ISO 8601 strings or Unix epoch integers (as strings).
118
+
119
+ ---
120
+
121
+ ### `pfc.seek_blocks(pfc_path, blocks, output_path="-", *, verbose=False)`
122
+
123
+ Decompress specific blocks by index. Used internally by the DuckDB extension.
124
+
125
+ ```python
126
+ pfc.seek_blocks("logs/app.pfc", [0, 3, 7], "logs/selected.jsonl")
127
+ ```
128
+
129
+ ---
130
+
131
+ ### `pfc.community_usage() -> dict`
132
+
133
+ Return today's Community Mode usage without invoking the binary.
134
+
135
+ ```python
136
+ usage = pfc.community_usage()
137
+ # {
138
+ # "date": "2026-04-04",
139
+ # "bytes_today": 1073741824,
140
+ # "bytes_remaining": 4294967296,
141
+ # "limit_gb": 5.0,
142
+ # "used_gb": 1.0
143
+ # }
144
+ ```
145
+
146
+ ---
147
+
148
+ ### `pfc.get_binary() -> str`
149
+
150
+ Return the path to the `pfc_jsonl` binary being used.
151
+
152
+ ```python
153
+ print(pfc.get_binary()) # /usr/local/bin/pfc_jsonl
154
+ ```
155
+
156
+ ---
157
+
158
+ ## Community Mode
159
+
160
+ Without a license key, PFC-JSONL runs in **Community Mode**:
161
+
162
+ - All operations (compress, decompress, query, seek-blocks) are **free up to 5 GB/day**
163
+ - Usage is tracked locally in `~/.pfc/usage.json` — **no network calls**
164
+ - Resets every calendar day
165
+
166
+ For production use exceeding 5 GB/day, contact: **impossibleforge@gmail.com**
167
+
168
+ ---
169
+
170
+ ## Error Handling
171
+
172
+ ```python
173
+ import pfc
174
+ from pfc import PFCError
175
+
176
+ try:
177
+ pfc.compress("missing.jsonl", "out.pfc")
178
+ except FileNotFoundError as e:
179
+ print(f"Binary not found: {e}")
180
+ except PFCError as e:
181
+ print(f"Compression failed (exit {e.returncode}): {e.stderr}")
182
+ ```
183
+
184
+ ---
185
+
186
+ ## Integration with Fluent Bit
187
+
188
+ Use [pfc-fluentbit](https://github.com/ImpossibleForge/pfc-fluentbit) to receive logs from Fluent Bit and compress them automatically.
189
+
190
+ ## Integration with DuckDB
191
+
192
+ Use the [pfc DuckDB extension](https://github.com/ImpossibleForge/pfc-duckdb) to query `.pfc` files directly with SQL:
193
+
194
+ ```sql
195
+ INSTALL pfc FROM community;
196
+ LOAD pfc;
197
+ SELECT * FROM read_pfc_jsonl('logs/app.pfc') WHERE level = 'ERROR';
198
+ ```
199
+
200
+ ---
201
+
202
+ ## License
203
+
204
+ MIT — see [LICENSE](LICENSE)
205
+
206
+ Binary releases are proprietary. See [pfc-jsonl releases](https://github.com/ImpossibleForge/pfc-jsonl/releases) for terms.
@@ -0,0 +1,237 @@
1
+ """
2
+ pfc — Python interface for PFC-JSONL compression.
3
+
4
+ PFC-JSONL is a high-performance compressor for structured log files (JSONL).
5
+ This package provides a thin Python wrapper around the pfc_jsonl binary.
6
+
7
+ Community Mode (no license key):
8
+ All operations are free up to 5 GB per calendar day.
9
+ Usage is tracked locally in ~/.pfc/usage.json — no network calls.
10
+
11
+ License keys for production use (>5 GB/day):
12
+ https://github.com/ImpossibleForge/pfc-jsonl
13
+
14
+ Quick start:
15
+ >>> import pfc
16
+ >>> pfc.compress("app.jsonl", "app.pfc")
17
+ >>> pfc.decompress("app.pfc", "app_restored.jsonl")
18
+ >>> results = pfc.query("app.pfc", "2026-01-01T00:00:00", "2026-01-02T00:00:00")
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ from pathlib import Path
25
+ from typing import Optional
26
+
27
+ from ._core import PFCError, _find_binary, run
28
+
29
+ __version__ = "0.1.0"
30
+ __all__ = [
31
+ "compress",
32
+ "decompress",
33
+ "query",
34
+ "seek_blocks",
35
+ "community_usage",
36
+ "get_binary",
37
+ "PFCError",
38
+ ]
39
+
40
+
41
+ def get_binary() -> str:
42
+ """Return the path to the pfc_jsonl binary being used.
43
+
44
+ Useful for debugging which binary is picked up.
45
+
46
+ Returns:
47
+ Absolute path string.
48
+
49
+ Raises:
50
+ FileNotFoundError: if no binary is found.
51
+ """
52
+ return _find_binary()
53
+
54
+
55
+ def compress(
56
+ input_path: str,
57
+ output_path: str,
58
+ *,
59
+ level: str = "balanced",
60
+ block_size_mb: Optional[int] = None,
61
+ workers: Optional[int] = None,
62
+ verbose: bool = False,
63
+ ) -> None:
64
+ """Compress a JSONL file to PFC format.
65
+
66
+ Community Mode: counts input bytes toward the 5 GB/day limit.
67
+
68
+ Args:
69
+ input_path: Path to the input .jsonl file (or "-" for stdin).
70
+ output_path: Path to write the compressed .pfc file.
71
+ level: Compression level: "fast", "balanced" (default), or "max".
72
+ block_size_mb: Block size in MiB (must be a power of 2, e.g. 16, 32).
73
+ Defaults to the binary's built-in default.
74
+ workers: Number of parallel compression workers.
75
+ Defaults to the binary's auto-detection.
76
+ verbose: Print progress info from the binary.
77
+
78
+ Raises:
79
+ FileNotFoundError: if pfc_jsonl binary is not found.
80
+ PFCError: if compression fails.
81
+
82
+ Example:
83
+ >>> pfc.compress("logs/app.jsonl", "logs/app.pfc")
84
+ >>> pfc.compress("big.jsonl", "big.pfc", level="max", workers=4)
85
+ """
86
+ args = ["compress", input_path, output_path, "--level", level]
87
+ if block_size_mb is not None:
88
+ args += ["--block-size", str(block_size_mb)]
89
+ if workers is not None:
90
+ args += ["--workers", str(workers)]
91
+ if not verbose:
92
+ args += ["--quiet"]
93
+ run(args)
94
+
95
+
96
+ def decompress(
97
+ input_path: str,
98
+ output_path: str = "-",
99
+ *,
100
+ verbose: bool = False,
101
+ ) -> None:
102
+ """Decompress a PFC file back to JSONL.
103
+
104
+ Community Mode: counts decompressed output bytes toward the 5 GB/day limit.
105
+
106
+ Args:
107
+ input_path: Path to the .pfc file (or "-" for stdin).
108
+ output_path: Path to write the restored .jsonl file.
109
+ Use "-" to write to stdout (default).
110
+ verbose: Print progress info from the binary.
111
+
112
+ Raises:
113
+ FileNotFoundError: if pfc_jsonl binary is not found.
114
+ PFCError: if decompression fails.
115
+
116
+ Example:
117
+ >>> pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
118
+ """
119
+ args = ["decompress", input_path, output_path]
120
+ if not verbose:
121
+ args += ["--quiet"]
122
+ run(args)
123
+
124
+
125
+ def query(
126
+ pfc_path: str,
127
+ from_ts: str,
128
+ to_ts: str,
129
+ output_path: str = "-",
130
+ *,
131
+ verbose: bool = False,
132
+ ) -> None:
133
+ """Decompress only the blocks matching a timestamp range.
134
+
135
+ Block-level filtering: only blocks that overlap the given time range
136
+ are decompressed. Much faster than full decompression for recent logs.
137
+
138
+ Community Mode: counts decompressed output bytes toward the 5 GB/day limit.
139
+
140
+ Args:
141
+ pfc_path: Path to the .pfc file.
142
+ from_ts: Start of the time range (ISO 8601 or Unix timestamp).
143
+ Example: "2026-01-01T00:00:00" or "1735689600"
144
+ to_ts: End of the time range (inclusive).
145
+ output_path: Path to write the results. Use "-" for stdout (default).
146
+ verbose: Print block selection info from the binary.
147
+
148
+ Raises:
149
+ FileNotFoundError: if pfc_jsonl binary is not found.
150
+ PFCError: if the query fails.
151
+
152
+ Example:
153
+ >>> pfc.query("logs/app.pfc", "2026-01-15T08:00:00", "2026-01-15T09:00:00",
154
+ ... "logs/morning.jsonl")
155
+ """
156
+ args = ["query", pfc_path, "--from", from_ts, "--to", to_ts, "--out", output_path]
157
+ if not verbose:
158
+ args += ["--quiet"]
159
+ run(args)
160
+
161
+
162
+ def seek_blocks(
163
+ pfc_path: str,
164
+ blocks: list[int],
165
+ output_path: str = "-",
166
+ *,
167
+ verbose: bool = False,
168
+ ) -> None:
169
+ """Decompress specific blocks by index without reading the full file.
170
+
171
+ This is the low-level primitive used by the DuckDB extension internally.
172
+ Useful for building custom query layers on top of PFC files.
173
+
174
+ Community Mode: counts decompressed output bytes toward the 5 GB/day limit.
175
+
176
+ Args:
177
+ pfc_path: Path to the .pfc file.
178
+ blocks: List of 0-based block indices to decompress.
179
+ output_path: Path to write the result. Use "-" for stdout (default).
180
+ verbose: Print seek info from the binary.
181
+
182
+ Raises:
183
+ FileNotFoundError: if pfc_jsonl binary is not found.
184
+ PFCError: if decompression fails.
185
+ ValueError: if blocks list is empty.
186
+
187
+ Example:
188
+ >>> pfc.seek_blocks("logs/app.pfc", [0, 3, 7], "logs/selected.jsonl")
189
+ """
190
+ if not blocks:
191
+ raise ValueError("blocks list must not be empty")
192
+ args = ["seek-blocks", pfc_path, "--blocks"] + [str(b) for b in blocks]
193
+ args += ["--out", output_path]
194
+ if not verbose:
195
+ args += ["--quiet"]
196
+ run(args)
197
+
198
+
199
+ def community_usage() -> dict:
200
+ """Return today's Community Mode usage.
201
+
202
+ Reads ~/.pfc/usage.json without invoking the binary.
203
+
204
+ Returns:
205
+ dict with keys:
206
+ "date" (str) — today's date, e.g. "2026-04-04"
207
+ "bytes_today" (int) — bytes processed today
208
+ "bytes_remaining" (int) — bytes remaining before the 5 GB limit
209
+ "limit_gb" (float) — daily limit in GB (always 5.0)
210
+ "used_gb" (float) — bytes_today converted to GB
211
+
212
+ Example:
213
+ >>> usage = pfc.community_usage()
214
+ >>> print(f"Used {usage['used_gb']:.2f} GB of {usage['limit_gb']} GB today")
215
+ """
216
+ import time
217
+
218
+ limit = 5 * 1024 ** 3
219
+ usage_path = Path.home() / ".pfc" / "usage.json"
220
+ today = time.strftime("%Y-%m-%d")
221
+
222
+ bytes_today = 0
223
+ try:
224
+ if usage_path.exists():
225
+ data = json.loads(usage_path.read_text(encoding="utf-8"))
226
+ if data.get("date") == today:
227
+ bytes_today = int(data.get("bytes_today", 0))
228
+ except Exception:
229
+ pass
230
+
231
+ return {
232
+ "date": today,
233
+ "bytes_today": bytes_today,
234
+ "bytes_remaining": max(0, limit - bytes_today),
235
+ "limit_gb": 5.0,
236
+ "used_gb": round(bytes_today / 1024 ** 3, 3),
237
+ }
@@ -0,0 +1,111 @@
1
+ """
2
+ pfc._core — Binary finder and subprocess runner.
3
+
4
+ Locates the pfc_jsonl binary and provides a thin wrapper
5
+ for calling it as a subprocess. No algorithm logic here.
6
+ """
7
+
8
+ import os
9
+ import shutil
10
+ import subprocess
11
+ import sys
12
+ from pathlib import Path
13
+
14
+
15
+ def _find_binary() -> str:
16
+ """Locate the pfc_jsonl binary.
17
+
18
+ Search order:
19
+ 1. PFC_BINARY environment variable
20
+ 2. PATH (shutil.which)
21
+ 3. Common install locations
22
+ 4. Same directory as this package
23
+
24
+ Raises:
25
+ FileNotFoundError: if the binary cannot be found.
26
+ """
27
+ # 1. Explicit override
28
+ env_path = os.environ.get("PFC_BINARY")
29
+ if env_path and Path(env_path).is_file():
30
+ return env_path
31
+
32
+ # 2. PATH
33
+ binary_name = "pfc_jsonl.exe" if sys.platform == "win32" else "pfc_jsonl"
34
+ found = shutil.which(binary_name)
35
+ if found:
36
+ return found
37
+
38
+ # 3. Common locations
39
+ candidates = [
40
+ Path("/usr/local/bin/pfc_jsonl"),
41
+ Path("/usr/bin/pfc_jsonl"),
42
+ Path.home() / ".local" / "bin" / "pfc_jsonl",
43
+ Path.home() / "bin" / "pfc_jsonl",
44
+ ]
45
+ for p in candidates:
46
+ if p.is_file():
47
+ return str(p)
48
+
49
+ # 4. Package directory (for bundled installs)
50
+ pkg_dir = Path(__file__).parent
51
+ local = pkg_dir / binary_name
52
+ if local.is_file():
53
+ return str(local)
54
+
55
+ raise FileNotFoundError(
56
+ "pfc_jsonl binary not found.\n\n"
57
+ "Install it first:\n"
58
+ " Linux: curl -L https://github.com/ImpossibleForge/pfc-jsonl/releases/"
59
+ "latest/download/pfc_jsonl-linux-x64 -o pfc_jsonl && chmod +x pfc_jsonl && "
60
+ "sudo mv pfc_jsonl /usr/local/bin/\n"
61
+ " macOS: coming soon\n"
62
+ " Windows: download pfc_jsonl-windows-x64.exe from the releases page\n\n"
63
+ "Or set the PFC_BINARY environment variable to the binary path."
64
+ )
65
+
66
+
67
+ def run(args: list, capture_stdout: bool = False) -> subprocess.CompletedProcess:
68
+ """Run pfc_jsonl with the given argument list.
69
+
70
+ Args:
71
+ args: Argument list, e.g. ["compress", "in.jsonl", "out.pfc"]
72
+ capture_stdout: If True, capture stdout instead of letting it through.
73
+
74
+ Returns:
75
+ CompletedProcess instance.
76
+
77
+ Raises:
78
+ FileNotFoundError: if binary is missing.
79
+ PFCError: if the binary exits with a non-zero status.
80
+ """
81
+ binary = _find_binary()
82
+ cmd = [binary] + args
83
+ result = subprocess.run(
84
+ cmd,
85
+ stdout=subprocess.PIPE if capture_stdout else None,
86
+ stderr=subprocess.PIPE,
87
+ text=False,
88
+ )
89
+ if result.returncode != 0:
90
+ stderr_msg = result.stderr.decode(errors="replace").strip() if result.stderr else ""
91
+ raise PFCError(
92
+ f"pfc_jsonl exited with code {result.returncode}",
93
+ returncode=result.returncode,
94
+ stderr=stderr_msg,
95
+ )
96
+ return result
97
+
98
+
99
+ class PFCError(RuntimeError):
100
+ """Raised when pfc_jsonl exits with a non-zero status."""
101
+
102
+ def __init__(self, message: str, returncode: int = -1, stderr: str = ""):
103
+ super().__init__(message)
104
+ self.returncode = returncode
105
+ self.stderr = stderr
106
+
107
+ def __str__(self):
108
+ base = super().__str__()
109
+ if self.stderr:
110
+ return f"{base}\n stderr: {self.stderr}"
111
+ return base
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pfc-jsonl"
7
+ version = "0.1.0"
8
+ description = "Python interface for PFC-JSONL — high-performance log compression with block-level timestamp filtering"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [
12
+ { name = "ImpossibleForge", email = "impossibleforge@gmail.com" }
13
+ ]
14
+ keywords = [
15
+ "compression", "jsonl", "logs", "log-compression",
16
+ "structured-logs", "fluent-bit", "duckdb"
17
+ ]
18
+ classifiers = [
19
+ "Development Status :: 4 - Beta",
20
+ "Intended Audience :: Developers",
21
+ "Intended Audience :: System Administrators",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Operating System :: POSIX :: Linux",
24
+ "Operating System :: MacOS",
25
+ "Operating System :: Microsoft :: Windows",
26
+ "Programming Language :: Python :: 3",
27
+ "Programming Language :: Python :: 3.9",
28
+ "Programming Language :: Python :: 3.10",
29
+ "Programming Language :: Python :: 3.11",
30
+ "Programming Language :: Python :: 3.12",
31
+ "Programming Language :: Python :: 3.13",
32
+ "Topic :: System :: Logging",
33
+ "Topic :: System :: Archiving :: Compression",
34
+ ]
35
+ requires-python = ">=3.9"
36
+ dependencies = []
37
+
38
+ [project.urls]
39
+ Homepage = "https://github.com/ImpossibleForge/pfc-jsonl"
40
+ Repository = "https://github.com/ImpossibleForge/pfc-jsonl"
41
+ "Bug Tracker" = "https://github.com/ImpossibleForge/pfc-jsonl/issues"
42
+ Changelog = "https://github.com/ImpossibleForge/pfc-jsonl/releases"
43
+
44
+ [tool.hatch.build.targets.wheel]
45
+ packages = ["pfc"]