pfc-jsonl 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pfc_jsonl-0.1.0/LICENSE +21 -0
- pfc_jsonl-0.1.0/PKG-INFO +236 -0
- pfc_jsonl-0.1.0/README.md +206 -0
- pfc_jsonl-0.1.0/pfc/__init__.py +237 -0
- pfc_jsonl-0.1.0/pfc/_core.py +111 -0
- pfc_jsonl-0.1.0/pyproject.toml +45 -0
pfc_jsonl-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ImpossibleForge
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pfc_jsonl-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pfc-jsonl
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python interface for PFC-JSONL — high-performance log compression with block-level timestamp filtering
|
|
5
|
+
Project-URL: Homepage, https://github.com/ImpossibleForge/pfc-jsonl
|
|
6
|
+
Project-URL: Repository, https://github.com/ImpossibleForge/pfc-jsonl
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/ImpossibleForge/pfc-jsonl/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/ImpossibleForge/pfc-jsonl/releases
|
|
9
|
+
Author-email: ImpossibleForge <impossibleforge@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: compression,duckdb,fluent-bit,jsonl,log-compression,logs,structured-logs
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: System Administrators
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
19
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
26
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
27
|
+
Classifier: Topic :: System :: Logging
|
|
28
|
+
Requires-Python: >=3.9
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# pfc-jsonl · Python Package
|
|
32
|
+
|
|
33
|
+
Python interface for **PFC-JSONL** — high-performance compression for structured log files (JSONL), with block-level timestamp filtering.
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
pip install pfc-jsonl
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
> **Requires the `pfc_jsonl` binary.** Install it separately — see [below](#install-the-binary).
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## What is PFC-JSONL?
|
|
44
|
+
|
|
45
|
+
PFC-JSONL compresses JSONL log files **26–34% smaller than gzip/zstd** on typical log data. It stores a timestamp index alongside each file, enabling fast time-range queries without full decompression.
|
|
46
|
+
|
|
47
|
+
| Operation | Description |
|
|
48
|
+
|-----------|-------------|
|
|
49
|
+
| `compress` | JSONL → `.pfc` (with timestamp index) |
|
|
50
|
+
| `decompress` | `.pfc` → JSONL |
|
|
51
|
+
| `query` | Decompress only blocks matching a time range |
|
|
52
|
+
| `seek_blocks` | Decompress specific blocks by index (DuckDB primitive) |
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import pfc
|
|
60
|
+
|
|
61
|
+
# Compress
|
|
62
|
+
pfc.compress("logs/app.jsonl", "logs/app.pfc")
|
|
63
|
+
|
|
64
|
+
# Decompress
|
|
65
|
+
pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
|
|
66
|
+
|
|
67
|
+
# Query by time range — only decompresses matching blocks
|
|
68
|
+
pfc.query("logs/app.pfc",
|
|
69
|
+
from_ts="2026-01-15T08:00:00",
|
|
70
|
+
to_ts="2026-01-15T09:00:00",
|
|
71
|
+
output_path="logs/morning.jsonl")
|
|
72
|
+
|
|
73
|
+
# Check Community Mode usage
|
|
74
|
+
usage = pfc.community_usage()
|
|
75
|
+
print(f"Used {usage['used_gb']:.2f} GB of {usage['limit_gb']} GB today")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Install the Binary
|
|
81
|
+
|
|
82
|
+
The Python package is a thin wrapper — the compression engine is the `pfc_jsonl` binary.
|
|
83
|
+
|
|
84
|
+
**Linux (x64):**
|
|
85
|
+
```bash
|
|
86
|
+
curl -L https://github.com/ImpossibleForge/pfc-jsonl/releases/latest/download/pfc_jsonl-linux-x64 \
|
|
87
|
+
-o pfc_jsonl && chmod +x pfc_jsonl && sudo mv pfc_jsonl /usr/local/bin/
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**macOS:** Coming soon.
|
|
91
|
+
|
|
92
|
+
**Windows:** Download `pfc_jsonl-windows-x64.exe` from the [releases page](https://github.com/ImpossibleForge/pfc-jsonl/releases), rename to `pfc_jsonl.exe`, add to PATH.
|
|
93
|
+
|
|
94
|
+
**Custom location:** Set the `PFC_BINARY` environment variable:
|
|
95
|
+
```bash
|
|
96
|
+
export PFC_BINARY=/opt/tools/pfc_jsonl
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Verify:
|
|
100
|
+
```bash
|
|
101
|
+
pfc_jsonl --help
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## API Reference
|
|
107
|
+
|
|
108
|
+
### `pfc.compress(input_path, output_path, *, level="balanced", block_size_mb=None, workers=None, verbose=False)`
|
|
109
|
+
|
|
110
|
+
Compress a JSONL file to PFC format.
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
pfc.compress("logs/app.jsonl", "logs/app.pfc")
|
|
114
|
+
pfc.compress("big.jsonl", "big.pfc", level="max", workers=4)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
| Parameter | Default | Description |
|
|
118
|
+
|-----------|---------|-------------|
|
|
119
|
+
| `level` | `"balanced"` | `"fast"`, `"balanced"`, or `"max"` |
|
|
120
|
+
| `block_size_mb` | auto | Block size in MiB (power of 2, e.g. 16, 32) |
|
|
121
|
+
| `workers` | auto | Parallel compression workers |
|
|
122
|
+
| `verbose` | `False` | Print progress from binary |
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
### `pfc.decompress(input_path, output_path="-", *, verbose=False)`
|
|
127
|
+
|
|
128
|
+
Decompress a PFC file back to JSONL.
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
### `pfc.query(pfc_path, from_ts, to_ts, output_path="-", *, verbose=False)`
|
|
137
|
+
|
|
138
|
+
Decompress only the blocks matching a timestamp range.
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
pfc.query("logs/app.pfc",
|
|
142
|
+
from_ts="2026-01-15T08:00:00",
|
|
143
|
+
to_ts="2026-01-15T09:00:00",
|
|
144
|
+
output_path="logs/morning.jsonl")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Timestamps can be ISO 8601 strings or Unix epoch integers (as strings).
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
### `pfc.seek_blocks(pfc_path, blocks, output_path="-", *, verbose=False)`
|
|
152
|
+
|
|
153
|
+
Decompress specific blocks by index. Used internally by the DuckDB extension.
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
pfc.seek_blocks("logs/app.pfc", [0, 3, 7], "logs/selected.jsonl")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
### `pfc.community_usage() -> dict`
|
|
162
|
+
|
|
163
|
+
Return today's Community Mode usage without invoking the binary.
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
usage = pfc.community_usage()
|
|
167
|
+
# {
|
|
168
|
+
# "date": "2026-04-04",
|
|
169
|
+
# "bytes_today": 1073741824,
|
|
170
|
+
# "bytes_remaining": 4294967296,
|
|
171
|
+
# "limit_gb": 5.0,
|
|
172
|
+
# "used_gb": 1.0
|
|
173
|
+
# }
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
### `pfc.get_binary() -> str`
|
|
179
|
+
|
|
180
|
+
Return the path to the `pfc_jsonl` binary being used.
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
print(pfc.get_binary()) # /usr/local/bin/pfc_jsonl
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Community Mode
|
|
189
|
+
|
|
190
|
+
Without a license key, PFC-JSONL runs in **Community Mode**:
|
|
191
|
+
|
|
192
|
+
- All operations (compress, decompress, query, seek-blocks) are **free up to 5 GB/day**
|
|
193
|
+
- Usage is tracked locally in `~/.pfc/usage.json` — **no network calls**
|
|
194
|
+
- Resets every calendar day
|
|
195
|
+
|
|
196
|
+
For production use exceeding 5 GB/day, contact: **impossibleforge@gmail.com**
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Error Handling
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
import pfc
|
|
204
|
+
from pfc import PFCError
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
pfc.compress("missing.jsonl", "out.pfc")
|
|
208
|
+
except FileNotFoundError as e:
|
|
209
|
+
print(f"Binary not found: {e}")
|
|
210
|
+
except PFCError as e:
|
|
211
|
+
print(f"Compression failed (exit {e.returncode}): {e.stderr}")
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Integration with Fluent Bit
|
|
217
|
+
|
|
218
|
+
Use [pfc-fluentbit](https://github.com/ImpossibleForge/pfc-fluentbit) to receive logs from Fluent Bit and compress them automatically.
|
|
219
|
+
|
|
220
|
+
## Integration with DuckDB
|
|
221
|
+
|
|
222
|
+
Use the [pfc DuckDB extension](https://github.com/ImpossibleForge/pfc-duckdb) to query `.pfc` files directly with SQL:
|
|
223
|
+
|
|
224
|
+
```sql
|
|
225
|
+
INSTALL pfc FROM community;
|
|
226
|
+
LOAD pfc;
|
|
227
|
+
SELECT * FROM read_pfc_jsonl('logs/app.pfc') WHERE level = 'ERROR';
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## License
|
|
233
|
+
|
|
234
|
+
MIT — see [LICENSE](LICENSE)
|
|
235
|
+
|
|
236
|
+
Binary releases are proprietary. See [pfc-jsonl releases](https://github.com/ImpossibleForge/pfc-jsonl/releases) for terms.
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# pfc-jsonl · Python Package
|
|
2
|
+
|
|
3
|
+
Python interface for **PFC-JSONL** — high-performance compression for structured log files (JSONL), with block-level timestamp filtering.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
pip install pfc-jsonl
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
> **Requires the `pfc_jsonl` binary.** Install it separately — see [below](#install-the-binary).
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## What is PFC-JSONL?
|
|
14
|
+
|
|
15
|
+
PFC-JSONL compresses JSONL log files **26–34% smaller than gzip/zstd** on typical log data. It stores a timestamp index alongside each file, enabling fast time-range queries without full decompression.
|
|
16
|
+
|
|
17
|
+
| Operation | Description |
|
|
18
|
+
|-----------|-------------|
|
|
19
|
+
| `compress` | JSONL → `.pfc` (with timestamp index) |
|
|
20
|
+
| `decompress` | `.pfc` → JSONL |
|
|
21
|
+
| `query` | Decompress only blocks matching a time range |
|
|
22
|
+
| `seek_blocks` | Decompress specific blocks by index (DuckDB primitive) |
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import pfc
|
|
30
|
+
|
|
31
|
+
# Compress
|
|
32
|
+
pfc.compress("logs/app.jsonl", "logs/app.pfc")
|
|
33
|
+
|
|
34
|
+
# Decompress
|
|
35
|
+
pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
|
|
36
|
+
|
|
37
|
+
# Query by time range — only decompresses matching blocks
|
|
38
|
+
pfc.query("logs/app.pfc",
|
|
39
|
+
from_ts="2026-01-15T08:00:00",
|
|
40
|
+
to_ts="2026-01-15T09:00:00",
|
|
41
|
+
output_path="logs/morning.jsonl")
|
|
42
|
+
|
|
43
|
+
# Check Community Mode usage
|
|
44
|
+
usage = pfc.community_usage()
|
|
45
|
+
print(f"Used {usage['used_gb']:.2f} GB of {usage['limit_gb']} GB today")
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Install the Binary
|
|
51
|
+
|
|
52
|
+
The Python package is a thin wrapper — the compression engine is the `pfc_jsonl` binary.
|
|
53
|
+
|
|
54
|
+
**Linux (x64):**
|
|
55
|
+
```bash
|
|
56
|
+
curl -L https://github.com/ImpossibleForge/pfc-jsonl/releases/latest/download/pfc_jsonl-linux-x64 \
|
|
57
|
+
-o pfc_jsonl && chmod +x pfc_jsonl && sudo mv pfc_jsonl /usr/local/bin/
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**macOS:** Coming soon.
|
|
61
|
+
|
|
62
|
+
**Windows:** Download `pfc_jsonl-windows-x64.exe` from the [releases page](https://github.com/ImpossibleForge/pfc-jsonl/releases), rename to `pfc_jsonl.exe`, add to PATH.
|
|
63
|
+
|
|
64
|
+
**Custom location:** Set the `PFC_BINARY` environment variable:
|
|
65
|
+
```bash
|
|
66
|
+
export PFC_BINARY=/opt/tools/pfc_jsonl
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Verify:
|
|
70
|
+
```bash
|
|
71
|
+
pfc_jsonl --help
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## API Reference
|
|
77
|
+
|
|
78
|
+
### `pfc.compress(input_path, output_path, *, level="balanced", block_size_mb=None, workers=None, verbose=False)`
|
|
79
|
+
|
|
80
|
+
Compress a JSONL file to PFC format.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
pfc.compress("logs/app.jsonl", "logs/app.pfc")
|
|
84
|
+
pfc.compress("big.jsonl", "big.pfc", level="max", workers=4)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
| Parameter | Default | Description |
|
|
88
|
+
|-----------|---------|-------------|
|
|
89
|
+
| `level` | `"balanced"` | `"fast"`, `"balanced"`, or `"max"` |
|
|
90
|
+
| `block_size_mb` | auto | Block size in MiB (power of 2, e.g. 16, 32) |
|
|
91
|
+
| `workers` | auto | Parallel compression workers |
|
|
92
|
+
| `verbose` | `False` | Print progress from binary |
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
### `pfc.decompress(input_path, output_path="-", *, verbose=False)`
|
|
97
|
+
|
|
98
|
+
Decompress a PFC file back to JSONL.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
### `pfc.query(pfc_path, from_ts, to_ts, output_path="-", *, verbose=False)`
|
|
107
|
+
|
|
108
|
+
Decompress only the blocks matching a timestamp range.
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
pfc.query("logs/app.pfc",
|
|
112
|
+
from_ts="2026-01-15T08:00:00",
|
|
113
|
+
to_ts="2026-01-15T09:00:00",
|
|
114
|
+
output_path="logs/morning.jsonl")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Timestamps can be ISO 8601 strings or Unix epoch integers (as strings).
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
### `pfc.seek_blocks(pfc_path, blocks, output_path="-", *, verbose=False)`
|
|
122
|
+
|
|
123
|
+
Decompress specific blocks by index. Used internally by the DuckDB extension.
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
pfc.seek_blocks("logs/app.pfc", [0, 3, 7], "logs/selected.jsonl")
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
### `pfc.community_usage() -> dict`
|
|
132
|
+
|
|
133
|
+
Return today's Community Mode usage without invoking the binary.
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
usage = pfc.community_usage()
|
|
137
|
+
# {
|
|
138
|
+
# "date": "2026-04-04",
|
|
139
|
+
# "bytes_today": 1073741824,
|
|
140
|
+
# "bytes_remaining": 4294967296,
|
|
141
|
+
# "limit_gb": 5.0,
|
|
142
|
+
# "used_gb": 1.0
|
|
143
|
+
# }
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
### `pfc.get_binary() -> str`
|
|
149
|
+
|
|
150
|
+
Return the path to the `pfc_jsonl` binary being used.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
print(pfc.get_binary()) # /usr/local/bin/pfc_jsonl
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Community Mode
|
|
159
|
+
|
|
160
|
+
Without a license key, PFC-JSONL runs in **Community Mode**:
|
|
161
|
+
|
|
162
|
+
- All operations (compress, decompress, query, seek-blocks) are **free up to 5 GB/day**
|
|
163
|
+
- Usage is tracked locally in `~/.pfc/usage.json` — **no network calls**
|
|
164
|
+
- Resets every calendar day
|
|
165
|
+
|
|
166
|
+
For production use exceeding 5 GB/day, contact: **impossibleforge@gmail.com**
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Error Handling
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
import pfc
|
|
174
|
+
from pfc import PFCError
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
pfc.compress("missing.jsonl", "out.pfc")
|
|
178
|
+
except FileNotFoundError as e:
|
|
179
|
+
print(f"Binary not found: {e}")
|
|
180
|
+
except PFCError as e:
|
|
181
|
+
print(f"Compression failed (exit {e.returncode}): {e.stderr}")
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## Integration with Fluent Bit
|
|
187
|
+
|
|
188
|
+
Use [pfc-fluentbit](https://github.com/ImpossibleForge/pfc-fluentbit) to receive logs from Fluent Bit and compress them automatically.
|
|
189
|
+
|
|
190
|
+
## Integration with DuckDB
|
|
191
|
+
|
|
192
|
+
Use the [pfc DuckDB extension](https://github.com/ImpossibleForge/pfc-duckdb) to query `.pfc` files directly with SQL:
|
|
193
|
+
|
|
194
|
+
```sql
|
|
195
|
+
INSTALL pfc FROM community;
|
|
196
|
+
LOAD pfc;
|
|
197
|
+
SELECT * FROM read_pfc_jsonl('logs/app.pfc') WHERE level = 'ERROR';
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT — see [LICENSE](LICENSE)
|
|
205
|
+
|
|
206
|
+
Binary releases are proprietary. See [pfc-jsonl releases](https://github.com/ImpossibleForge/pfc-jsonl/releases) for terms.
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pfc — Python interface for PFC-JSONL compression.
|
|
3
|
+
|
|
4
|
+
PFC-JSONL is a high-performance compressor for structured log files (JSONL).
|
|
5
|
+
This package provides a thin Python wrapper around the pfc_jsonl binary.
|
|
6
|
+
|
|
7
|
+
Community Mode (no license key):
|
|
8
|
+
All operations are free up to 5 GB per calendar day.
|
|
9
|
+
Usage is tracked locally in ~/.pfc/usage.json — no network calls.
|
|
10
|
+
|
|
11
|
+
License keys for production use (>5 GB/day):
|
|
12
|
+
https://github.com/ImpossibleForge/pfc-jsonl
|
|
13
|
+
|
|
14
|
+
Quick start:
|
|
15
|
+
>>> import pfc
|
|
16
|
+
>>> pfc.compress("app.jsonl", "app.pfc")
|
|
17
|
+
>>> pfc.decompress("app.pfc", "app_restored.jsonl")
|
|
18
|
+
>>> results = pfc.query("app.pfc", "2026-01-01T00:00:00", "2026-01-02T00:00:00")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Optional
|
|
26
|
+
|
|
27
|
+
from ._core import PFCError, _find_binary, run
|
|
28
|
+
|
|
29
|
+
__version__ = "0.1.0"
|
|
30
|
+
__all__ = [
|
|
31
|
+
"compress",
|
|
32
|
+
"decompress",
|
|
33
|
+
"query",
|
|
34
|
+
"seek_blocks",
|
|
35
|
+
"community_usage",
|
|
36
|
+
"get_binary",
|
|
37
|
+
"PFCError",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_binary() -> str:
|
|
42
|
+
"""Return the path to the pfc_jsonl binary being used.
|
|
43
|
+
|
|
44
|
+
Useful for debugging which binary is picked up.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Absolute path string.
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
FileNotFoundError: if no binary is found.
|
|
51
|
+
"""
|
|
52
|
+
return _find_binary()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def compress(
|
|
56
|
+
input_path: str,
|
|
57
|
+
output_path: str,
|
|
58
|
+
*,
|
|
59
|
+
level: str = "balanced",
|
|
60
|
+
block_size_mb: Optional[int] = None,
|
|
61
|
+
workers: Optional[int] = None,
|
|
62
|
+
verbose: bool = False,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""Compress a JSONL file to PFC format.
|
|
65
|
+
|
|
66
|
+
Community Mode: counts input bytes toward the 5 GB/day limit.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
input_path: Path to the input .jsonl file (or "-" for stdin).
|
|
70
|
+
output_path: Path to write the compressed .pfc file.
|
|
71
|
+
level: Compression level: "fast", "balanced" (default), or "max".
|
|
72
|
+
block_size_mb: Block size in MiB (must be a power of 2, e.g. 16, 32).
|
|
73
|
+
Defaults to the binary's built-in default.
|
|
74
|
+
workers: Number of parallel compression workers.
|
|
75
|
+
Defaults to the binary's auto-detection.
|
|
76
|
+
verbose: Print progress info from the binary.
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
FileNotFoundError: if pfc_jsonl binary is not found.
|
|
80
|
+
PFCError: if compression fails.
|
|
81
|
+
|
|
82
|
+
Example:
|
|
83
|
+
>>> pfc.compress("logs/app.jsonl", "logs/app.pfc")
|
|
84
|
+
>>> pfc.compress("big.jsonl", "big.pfc", level="max", workers=4)
|
|
85
|
+
"""
|
|
86
|
+
args = ["compress", input_path, output_path, "--level", level]
|
|
87
|
+
if block_size_mb is not None:
|
|
88
|
+
args += ["--block-size", str(block_size_mb)]
|
|
89
|
+
if workers is not None:
|
|
90
|
+
args += ["--workers", str(workers)]
|
|
91
|
+
if not verbose:
|
|
92
|
+
args += ["--quiet"]
|
|
93
|
+
run(args)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def decompress(
|
|
97
|
+
input_path: str,
|
|
98
|
+
output_path: str = "-",
|
|
99
|
+
*,
|
|
100
|
+
verbose: bool = False,
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Decompress a PFC file back to JSONL.
|
|
103
|
+
|
|
104
|
+
Community Mode: counts decompressed output bytes toward the 5 GB/day limit.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
input_path: Path to the .pfc file (or "-" for stdin).
|
|
108
|
+
output_path: Path to write the restored .jsonl file.
|
|
109
|
+
Use "-" to write to stdout (default).
|
|
110
|
+
verbose: Print progress info from the binary.
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
FileNotFoundError: if pfc_jsonl binary is not found.
|
|
114
|
+
PFCError: if decompression fails.
|
|
115
|
+
|
|
116
|
+
Example:
|
|
117
|
+
>>> pfc.decompress("logs/app.pfc", "logs/app_restored.jsonl")
|
|
118
|
+
"""
|
|
119
|
+
args = ["decompress", input_path, output_path]
|
|
120
|
+
if not verbose:
|
|
121
|
+
args += ["--quiet"]
|
|
122
|
+
run(args)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def query(
|
|
126
|
+
pfc_path: str,
|
|
127
|
+
from_ts: str,
|
|
128
|
+
to_ts: str,
|
|
129
|
+
output_path: str = "-",
|
|
130
|
+
*,
|
|
131
|
+
verbose: bool = False,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Decompress only the blocks matching a timestamp range.
|
|
134
|
+
|
|
135
|
+
Block-level filtering: only blocks that overlap the given time range
|
|
136
|
+
are decompressed. Much faster than full decompression for recent logs.
|
|
137
|
+
|
|
138
|
+
Community Mode: counts decompressed output bytes toward the 5 GB/day limit.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
pfc_path: Path to the .pfc file.
|
|
142
|
+
from_ts: Start of the time range (ISO 8601 or Unix timestamp).
|
|
143
|
+
Example: "2026-01-01T00:00:00" or "1735689600"
|
|
144
|
+
to_ts: End of the time range (inclusive).
|
|
145
|
+
output_path: Path to write the results. Use "-" for stdout (default).
|
|
146
|
+
verbose: Print block selection info from the binary.
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
FileNotFoundError: if pfc_jsonl binary is not found.
|
|
150
|
+
PFCError: if the query fails.
|
|
151
|
+
|
|
152
|
+
Example:
|
|
153
|
+
>>> pfc.query("logs/app.pfc", "2026-01-15T08:00:00", "2026-01-15T09:00:00",
|
|
154
|
+
... "logs/morning.jsonl")
|
|
155
|
+
"""
|
|
156
|
+
args = ["query", pfc_path, "--from", from_ts, "--to", to_ts, "--out", output_path]
|
|
157
|
+
if not verbose:
|
|
158
|
+
args += ["--quiet"]
|
|
159
|
+
run(args)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def seek_blocks(
|
|
163
|
+
pfc_path: str,
|
|
164
|
+
blocks: list[int],
|
|
165
|
+
output_path: str = "-",
|
|
166
|
+
*,
|
|
167
|
+
verbose: bool = False,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Decompress specific blocks by index without reading the full file.
|
|
170
|
+
|
|
171
|
+
This is the low-level primitive used by the DuckDB extension internally.
|
|
172
|
+
Useful for building custom query layers on top of PFC files.
|
|
173
|
+
|
|
174
|
+
Community Mode: counts decompressed output bytes toward the 5 GB/day limit.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
pfc_path: Path to the .pfc file.
|
|
178
|
+
blocks: List of 0-based block indices to decompress.
|
|
179
|
+
output_path: Path to write the result. Use "-" for stdout (default).
|
|
180
|
+
verbose: Print seek info from the binary.
|
|
181
|
+
|
|
182
|
+
Raises:
|
|
183
|
+
FileNotFoundError: if pfc_jsonl binary is not found.
|
|
184
|
+
PFCError: if decompression fails.
|
|
185
|
+
ValueError: if blocks list is empty.
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
>>> pfc.seek_blocks("logs/app.pfc", [0, 3, 7], "logs/selected.jsonl")
|
|
189
|
+
"""
|
|
190
|
+
if not blocks:
|
|
191
|
+
raise ValueError("blocks list must not be empty")
|
|
192
|
+
args = ["seek-blocks", pfc_path, "--blocks"] + [str(b) for b in blocks]
|
|
193
|
+
args += ["--out", output_path]
|
|
194
|
+
if not verbose:
|
|
195
|
+
args += ["--quiet"]
|
|
196
|
+
run(args)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def community_usage() -> dict:
|
|
200
|
+
"""Return today's Community Mode usage.
|
|
201
|
+
|
|
202
|
+
Reads ~/.pfc/usage.json without invoking the binary.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
dict with keys:
|
|
206
|
+
"date" (str) — today's date, e.g. "2026-04-04"
|
|
207
|
+
"bytes_today" (int) — bytes processed today
|
|
208
|
+
"bytes_remaining" (int) — bytes remaining before the 5 GB limit
|
|
209
|
+
"limit_gb" (float) — daily limit in GB (always 5.0)
|
|
210
|
+
"used_gb" (float) — bytes_today converted to GB
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
>>> usage = pfc.community_usage()
|
|
214
|
+
>>> print(f"Used {usage['used_gb']:.2f} GB of {usage['limit_gb']} GB today")
|
|
215
|
+
"""
|
|
216
|
+
import time
|
|
217
|
+
|
|
218
|
+
limit = 5 * 1024 ** 3
|
|
219
|
+
usage_path = Path.home() / ".pfc" / "usage.json"
|
|
220
|
+
today = time.strftime("%Y-%m-%d")
|
|
221
|
+
|
|
222
|
+
bytes_today = 0
|
|
223
|
+
try:
|
|
224
|
+
if usage_path.exists():
|
|
225
|
+
data = json.loads(usage_path.read_text(encoding="utf-8"))
|
|
226
|
+
if data.get("date") == today:
|
|
227
|
+
bytes_today = int(data.get("bytes_today", 0))
|
|
228
|
+
except Exception:
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
return {
|
|
232
|
+
"date": today,
|
|
233
|
+
"bytes_today": bytes_today,
|
|
234
|
+
"bytes_remaining": max(0, limit - bytes_today),
|
|
235
|
+
"limit_gb": 5.0,
|
|
236
|
+
"used_gb": round(bytes_today / 1024 ** 3, 3),
|
|
237
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pfc._core — Binary finder and subprocess runner.
|
|
3
|
+
|
|
4
|
+
Locates the pfc_jsonl binary and provides a thin wrapper
|
|
5
|
+
for calling it as a subprocess. No algorithm logic here.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import shutil
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _find_binary() -> str:
|
|
16
|
+
"""Locate the pfc_jsonl binary.
|
|
17
|
+
|
|
18
|
+
Search order:
|
|
19
|
+
1. PFC_BINARY environment variable
|
|
20
|
+
2. PATH (shutil.which)
|
|
21
|
+
3. Common install locations
|
|
22
|
+
4. Same directory as this package
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
FileNotFoundError: if the binary cannot be found.
|
|
26
|
+
"""
|
|
27
|
+
# 1. Explicit override
|
|
28
|
+
env_path = os.environ.get("PFC_BINARY")
|
|
29
|
+
if env_path and Path(env_path).is_file():
|
|
30
|
+
return env_path
|
|
31
|
+
|
|
32
|
+
# 2. PATH
|
|
33
|
+
binary_name = "pfc_jsonl.exe" if sys.platform == "win32" else "pfc_jsonl"
|
|
34
|
+
found = shutil.which(binary_name)
|
|
35
|
+
if found:
|
|
36
|
+
return found
|
|
37
|
+
|
|
38
|
+
# 3. Common locations
|
|
39
|
+
candidates = [
|
|
40
|
+
Path("/usr/local/bin/pfc_jsonl"),
|
|
41
|
+
Path("/usr/bin/pfc_jsonl"),
|
|
42
|
+
Path.home() / ".local" / "bin" / "pfc_jsonl",
|
|
43
|
+
Path.home() / "bin" / "pfc_jsonl",
|
|
44
|
+
]
|
|
45
|
+
for p in candidates:
|
|
46
|
+
if p.is_file():
|
|
47
|
+
return str(p)
|
|
48
|
+
|
|
49
|
+
# 4. Package directory (for bundled installs)
|
|
50
|
+
pkg_dir = Path(__file__).parent
|
|
51
|
+
local = pkg_dir / binary_name
|
|
52
|
+
if local.is_file():
|
|
53
|
+
return str(local)
|
|
54
|
+
|
|
55
|
+
raise FileNotFoundError(
|
|
56
|
+
"pfc_jsonl binary not found.\n\n"
|
|
57
|
+
"Install it first:\n"
|
|
58
|
+
" Linux: curl -L https://github.com/ImpossibleForge/pfc-jsonl/releases/"
|
|
59
|
+
"latest/download/pfc_jsonl-linux-x64 -o pfc_jsonl && chmod +x pfc_jsonl && "
|
|
60
|
+
"sudo mv pfc_jsonl /usr/local/bin/\n"
|
|
61
|
+
" macOS: coming soon\n"
|
|
62
|
+
" Windows: download pfc_jsonl-windows-x64.exe from the releases page\n\n"
|
|
63
|
+
"Or set the PFC_BINARY environment variable to the binary path."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def run(args: list, capture_stdout: bool = False) -> subprocess.CompletedProcess:
|
|
68
|
+
"""Run pfc_jsonl with the given argument list.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
args: Argument list, e.g. ["compress", "in.jsonl", "out.pfc"]
|
|
72
|
+
capture_stdout: If True, capture stdout instead of letting it through.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
CompletedProcess instance.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
FileNotFoundError: if binary is missing.
|
|
79
|
+
PFCError: if the binary exits with a non-zero status.
|
|
80
|
+
"""
|
|
81
|
+
binary = _find_binary()
|
|
82
|
+
cmd = [binary] + args
|
|
83
|
+
result = subprocess.run(
|
|
84
|
+
cmd,
|
|
85
|
+
stdout=subprocess.PIPE if capture_stdout else None,
|
|
86
|
+
stderr=subprocess.PIPE,
|
|
87
|
+
text=False,
|
|
88
|
+
)
|
|
89
|
+
if result.returncode != 0:
|
|
90
|
+
stderr_msg = result.stderr.decode(errors="replace").strip() if result.stderr else ""
|
|
91
|
+
raise PFCError(
|
|
92
|
+
f"pfc_jsonl exited with code {result.returncode}",
|
|
93
|
+
returncode=result.returncode,
|
|
94
|
+
stderr=stderr_msg,
|
|
95
|
+
)
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class PFCError(RuntimeError):
|
|
100
|
+
"""Raised when pfc_jsonl exits with a non-zero status."""
|
|
101
|
+
|
|
102
|
+
def __init__(self, message: str, returncode: int = -1, stderr: str = ""):
|
|
103
|
+
super().__init__(message)
|
|
104
|
+
self.returncode = returncode
|
|
105
|
+
self.stderr = stderr
|
|
106
|
+
|
|
107
|
+
def __str__(self):
|
|
108
|
+
base = super().__str__()
|
|
109
|
+
if self.stderr:
|
|
110
|
+
return f"{base}\n stderr: {self.stderr}"
|
|
111
|
+
return base
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pfc-jsonl"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python interface for PFC-JSONL — high-performance log compression with block-level timestamp filtering"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "ImpossibleForge", email = "impossibleforge@gmail.com" }
|
|
13
|
+
]
|
|
14
|
+
keywords = [
|
|
15
|
+
"compression", "jsonl", "logs", "log-compression",
|
|
16
|
+
"structured-logs", "fluent-bit", "duckdb"
|
|
17
|
+
]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 4 - Beta",
|
|
20
|
+
"Intended Audience :: Developers",
|
|
21
|
+
"Intended Audience :: System Administrators",
|
|
22
|
+
"License :: OSI Approved :: MIT License",
|
|
23
|
+
"Operating System :: POSIX :: Linux",
|
|
24
|
+
"Operating System :: MacOS",
|
|
25
|
+
"Operating System :: Microsoft :: Windows",
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"Programming Language :: Python :: 3.9",
|
|
28
|
+
"Programming Language :: Python :: 3.10",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Programming Language :: Python :: 3.13",
|
|
32
|
+
"Topic :: System :: Logging",
|
|
33
|
+
"Topic :: System :: Archiving :: Compression",
|
|
34
|
+
]
|
|
35
|
+
requires-python = ">=3.9"
|
|
36
|
+
dependencies = []
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
Homepage = "https://github.com/ImpossibleForge/pfc-jsonl"
|
|
40
|
+
Repository = "https://github.com/ImpossibleForge/pfc-jsonl"
|
|
41
|
+
"Bug Tracker" = "https://github.com/ImpossibleForge/pfc-jsonl/issues"
|
|
42
|
+
Changelog = "https://github.com/ImpossibleForge/pfc-jsonl/releases"
|
|
43
|
+
|
|
44
|
+
[tool.hatch.build.targets.wheel]
|
|
45
|
+
packages = ["pfc"]
|