purefzf 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- purefzf-0.1.0/LICENSE +21 -0
- purefzf-0.1.0/PKG-INFO +206 -0
- purefzf-0.1.0/README.md +179 -0
- purefzf-0.1.0/pyproject.toml +41 -0
- purefzf-0.1.0/setup.cfg +4 -0
- purefzf-0.1.0/src/purefzf/__init__.py +49 -0
- purefzf-0.1.0/src/purefzf/algo.py +1037 -0
- purefzf-0.1.0/src/purefzf/cli.py +241 -0
- purefzf-0.1.0/src/purefzf/core.py +167 -0
- purefzf-0.1.0/src/purefzf/normalize.py +517 -0
- purefzf-0.1.0/src/purefzf/pattern.py +278 -0
- purefzf-0.1.0/src/purefzf/result.py +126 -0
- purefzf-0.1.0/src/purefzf/tokenizer.py +245 -0
- purefzf-0.1.0/src/purefzf.egg-info/PKG-INFO +206 -0
- purefzf-0.1.0/src/purefzf.egg-info/SOURCES.txt +22 -0
- purefzf-0.1.0/src/purefzf.egg-info/dependency_links.txt +1 -0
- purefzf-0.1.0/src/purefzf.egg-info/entry_points.txt +2 -0
- purefzf-0.1.0/src/purefzf.egg-info/top_level.txt +1 -0
- purefzf-0.1.0/tests/test_algo.py +231 -0
- purefzf-0.1.0/tests/test_cli.py +106 -0
- purefzf-0.1.0/tests/test_fastpath.py +44 -0
- purefzf-0.1.0/tests/test_filter.py +155 -0
- purefzf-0.1.0/tests/test_pattern.py +122 -0
- purefzf-0.1.0/tests/test_tokenizer.py +74 -0
purefzf-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 adam2go
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
purefzf-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: purefzf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A pure Python implementation of fzf's fuzzy matching (FuzzyMatchV2) and non-interactive filter mode
|
|
5
|
+
Author: adam2go
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/adam2go/purefzf
|
|
8
|
+
Keywords: fzf,fuzzy,fuzzy-search,filter,pure-python
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
20
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Topic :: Text Processing :: Filters
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# purefzf
|
|
29
|
+
|
|
30
|
+
[](https://github.com/adam2go/purefzf/actions/workflows/ci.yml)
|
|
31
|
+
[](https://pypi.org/project/purefzf/)
|
|
32
|
+
|
|
33
|
+
A pure Python implementation of [fzf](https://github.com/junegunn/fzf)'s
|
|
34
|
+
matching engine: the FuzzyMatchV2 scoring algorithm, the extended-search
|
|
35
|
+
syntax, and the non-interactive `--filter` mode.
|
|
36
|
+
|
|
37
|
+
- **Zero dependencies, zero binaries.** `pip install purefzf` is the whole
|
|
38
|
+
setup. Works where the fzf binary cannot: Pyodide/WASM, AWS Lambda,
|
|
39
|
+
locked-down CI sandboxes, agent runtimes. Existing Python packages
|
|
40
|
+
(pyfzf, iterfzf) are wrappers that spawn the fzf binary at runtime.
|
|
41
|
+
- **Verified against fzf, not "inspired by" it.** The scoring model is
|
|
42
|
+
ported line-by-line from fzf v0.73.1 (commit `ce4bef75`) and checked
|
|
43
|
+
three ways (see [Verification](#verification)):
|
|
44
|
+
- fzf's own unit tests for the algorithm, pattern parser, and tokenizer,
|
|
45
|
+
ported to pytest — **179 tests, all passing**;
|
|
46
|
+
- differential testing against the real fzf binary — **4,978 of 4,981
|
|
47
|
+
cases (99.94%) byte-identical output**, the remaining 3 traced to an
|
|
48
|
+
fzf bug, not a porting gap ([details](#known-differences));
|
|
49
|
+
- 6,000 randomized cross-checks between the optimized ASCII fast path
|
|
50
|
+
and the direct port of the Go code.
|
|
51
|
+
- CPython 3.9–3.14 and PyPy, tested in CI.
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
|
|
55
|
+
### CLI
|
|
56
|
+
|
|
57
|
+
`purefzf` is `fzf --filter`: it reads lines from stdin and prints the
|
|
58
|
+
matches in fzf's ranking order.
|
|
59
|
+
|
|
60
|
+
```console
|
|
61
|
+
$ git ls-files | purefzf -f 'tests py$'
|
|
62
|
+
tests/test_party.py
|
|
63
|
+
tests/test_display.py
|
|
64
|
+
$ history | purefzf -f '!sudo git' --tac | head -5
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Supported flags (all with fzf semantics, including exit codes 0/1/2):
|
|
68
|
+
`-f/--filter`, `-e/--exact`, `+e`, `-x/--extended`, `+x`,
|
|
69
|
+
`-i/--ignore-case`, `+i`, `--smart-case`, `--literal`, `--algo=v1|v2`,
|
|
70
|
+
`--scheme=default|path|history`, `-n/--nth`, `-d/--delimiter`,
|
|
71
|
+
`--tiebreak`, `+s/--no-sort`, `--tac`, `--read0`, `--print0`,
|
|
72
|
+
`--print-query`.
|
|
73
|
+
|
|
74
|
+
### Library
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import purefzf
|
|
78
|
+
|
|
79
|
+
# the equivalent of `fzf --filter='qry'`: matched lines in fzf order
|
|
80
|
+
purefzf.filter("qry", ["query.py", "quarry.rs", "manifest.json"])
|
|
81
|
+
# -> ['query.py', 'quarry.rs']
|
|
82
|
+
|
|
83
|
+
# same options as the CLI
|
|
84
|
+
purefzf.filter("conf", lines, nth="2..", delimiter=":", tiebreak="begin")
|
|
85
|
+
|
|
86
|
+
# Match objects with score, original index, and matched positions
|
|
87
|
+
for m in purefzf.matches("qry", lines, with_positions=True):
|
|
88
|
+
print(m.score, m.text, m.positions)
|
|
89
|
+
|
|
90
|
+
# low-level: score a single candidate
|
|
91
|
+
# (case_sensitive, normalize, forward, text, pattern, with_pos)
|
|
92
|
+
result, positions = purefzf.fuzzy_match_v2(
|
|
93
|
+
False, True, True, "src/QueryBuilder.py", "qry", True)
|
|
94
|
+
result # (start, end, score) -> (4, 9, 66)
|
|
95
|
+
positions # [8, 7, 4]
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
The extended-search syntax works exactly as in fzf: `term1 term2` (AND),
|
|
99
|
+
`a | b` (OR), `'exact`, `'boundary'`, `^prefix`, `suffix$`, `^equal$`,
|
|
100
|
+
`!negation`, smart-case, and latin-script normalization (`danco` matches
|
|
101
|
+
`Danço`; disable with `--literal`).
|
|
102
|
+
|
|
103
|
+
## Scope
|
|
104
|
+
|
|
105
|
+
purefzf implements the **matching engine and filter mode**, not the
|
|
106
|
+
interactive terminal UI. Not included (today): the TUI, previews,
|
|
107
|
+
key bindings, `--with-nth`/`--accept-nth` output transforms, ANSI color
|
|
108
|
+
processing, and multi-threaded matching. The algorithm layer is complete,
|
|
109
|
+
so a TUI built on top of it only needs a terminal frontend.
|
|
110
|
+
|
|
111
|
+
## Verification
|
|
112
|
+
|
|
113
|
+
All three layers run in CI on every commit; the differential layer runs on
|
|
114
|
+
any machine that has an fzf binary (development-time only — purefzf itself
|
|
115
|
+
never shells out).
|
|
116
|
+
|
|
117
|
+
1. **fzf's official unit tests, ported.** Every assertion from
|
|
118
|
+
`algo_test.go`, `pattern_test.go`, and `tokenizer_test.go` at v0.73.1
|
|
119
|
+
is reproduced in `tests/` with the same inputs and expected scores.
|
|
120
|
+
The chunk-cache tests are intentionally out of scope (the cache is an
|
|
121
|
+
interactive-typing optimization, not part of matching semantics).
|
|
122
|
+
|
|
123
|
+
```console
|
|
124
|
+
$ python -m pytest # 179 passed
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
2. **Differential testing against the fzf binary.** `tools/diff_fzf.py`
|
|
128
|
+
compares `purefzf --filter` with `fzf --filter` byte by byte across
|
|
129
|
+
8 corpora (dictionary words, file paths, source code, structured
|
|
130
|
+
fields, unicode, empty/whitespace/200KB lines, invalid UTF-8) ×
|
|
131
|
+
29 option sets × up to 37 queries:
|
|
132
|
+
|
|
133
|
+
```console
|
|
134
|
+
$ FZF_BIN=$(which fzf) python tools/diff_fzf.py
|
|
135
|
+
4978/4981 byte-identical (99.94%), 3 known divergences (fzf slab reuse, see README), 0 unexplained
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
3. **Fast-path cross-checking.** The optimized ASCII path must produce
|
|
139
|
+
bit-identical results (score, offsets, positions) to the direct port
|
|
140
|
+
of the Go code on 6,000 randomized inputs per run.
|
|
141
|
+
|
|
142
|
+
## Known differences
|
|
143
|
+
|
|
144
|
+
The complete list — anything not listed here that differs from
|
|
145
|
+
`fzf --filter` is a bug, please report it:
|
|
146
|
+
|
|
147
|
+
1. **fzf's position backtrace reads recycled memory; purefzf doesn't.**
|
|
148
|
+
fzf reuses per-worker score buffers ("slabs") without zeroing, and the
|
|
149
|
+
`preferMatch` heuristic in FuzzyMatchV2's backtrace can read cells the
|
|
150
|
+
current match never wrote. With `--tiebreak=chunk` or
|
|
151
|
+
`--scheme=path`/`--tiebreak=pathname` (the modes that depend on exact
|
|
152
|
+
match positions), the same line can therefore rank differently in fzf
|
|
153
|
+
depending on what the worker processed before it — feed fzf one stream
|
|
154
|
+
containing a duplicated line and the two copies can come out ranked
|
|
155
|
+
apart. purefzf behaves like fzf with a freshly zeroed slab: on isolated
|
|
156
|
+
input the two agree exactly (this is how the 3 differential failures
|
|
157
|
+
above were verified).
|
|
158
|
+
2. **Regex delimiters use Python `re`, not Go RE2.** `--delimiter` accepts
|
|
159
|
+
a regex when it isn't a literal string; the two dialects agree on
|
|
160
|
+
everything typical (`[0-9]+`, `\s+`, `\t`) but are not identical at
|
|
161
|
+
the edges (e.g. RE2 has no backreferences, `re` has no `\p{...}`).
|
|
162
|
+
3. **`--tiebreak=pathname` offsets count runes, not bytes.** fzf compares
|
|
163
|
+
a byte offset with a rune offset when locating the last path separator;
|
|
164
|
+
for non-ASCII paths purefzf uses rune offsets consistently.
|
|
165
|
+
|
|
166
|
+
## Performance
|
|
167
|
+
|
|
168
|
+
Honest numbers: the fzf binary is a multi-threaded Go program and remains
|
|
169
|
+
5–25× faster end-to-end. purefzf is for environments where that binary
|
|
170
|
+
cannot run, and it is fast enough to be practical — selective queries
|
|
171
|
+
scan **1.3–3.2M lines/s** in-process on CPython 3.12 (Apple M-series):
|
|
172
|
+
|
|
173
|
+
| workload | matches | purefzf lib | purefzf CLI | fzf binary |
|
|
174
|
+
|---------------------------------|--------:|------------:|------------:|-----------:|
|
|
175
|
+
| 235k words, `ion` | 16,443 | 145 ms | 170 ms | 21 ms |
|
|
176
|
+
| 235k words, `zsh` | 62 | 75 ms | 101 ms | 12 ms |
|
|
177
|
+
| 235k words, `'tion` | 7,422 | 180 ms | 205 ms | 16 ms |
|
|
178
|
+
| 235k words, `^ab cd$ \| ing$` | 5 | 405 ms | 434 ms | 13 ms |
|
|
179
|
+
| 108k paths, `test` | 104,331 | 1.11 s | 1.14 s | 75 ms |
|
|
180
|
+
| 108k paths, `pyini` | 60,566 | 610 ms | 650 ms | 48 ms |
|
|
181
|
+
| 108k paths, `lib/site` | 107,135 | 1.85 s | 1.88 s | 82 ms |
|
|
182
|
+
|
|
183
|
+
Median of 7 runs; every workload's output is verified byte-identical to
|
|
184
|
+
the fzf binary before it is timed. Reproduce with:
|
|
185
|
+
|
|
186
|
+
```console
|
|
187
|
+
$ FZF_BIN=$(which fzf) python tools/bench.py
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## How the port stays faithful
|
|
191
|
+
|
|
192
|
+
The matcher is ported function-by-function from `src/algo/algo.go`,
|
|
193
|
+
`pattern.go`, `tokenizer.go`, and `result.go` at fzf v0.73.1, including
|
|
194
|
+
the parts that are easy to get subtly wrong: the bonus matrix and scheme
|
|
195
|
+
tables, camelCase/number boundary bonuses, the consecutive-chunk bonus
|
|
196
|
+
rules, first-char multiplier, the `--no-sort --tac` sorting quirk, the
|
|
197
|
+
streaming vs. collected output paths, smart-case and per-term
|
|
198
|
+
normalization decisions, the exact V2→V1 fallback thresholds
|
|
199
|
+
(`N×M > 102400` or `M > 1000`), tiebreak rank encoding, and Go's
|
|
200
|
+
per-byte U+FFFD substitution for invalid UTF-8.
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT. The algorithm and its test suite are ported from
|
|
205
|
+
[junegunn/fzf](https://github.com/junegunn/fzf) (MIT licensed) — all
|
|
206
|
+
credit for the matching algorithm's design belongs there.
|
purefzf-0.1.0/README.md
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# purefzf
|
|
2
|
+
|
|
3
|
+
[](https://github.com/adam2go/purefzf/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/purefzf/)
|
|
5
|
+
|
|
6
|
+
A pure Python implementation of [fzf](https://github.com/junegunn/fzf)'s
|
|
7
|
+
matching engine: the FuzzyMatchV2 scoring algorithm, the extended-search
|
|
8
|
+
syntax, and the non-interactive `--filter` mode.
|
|
9
|
+
|
|
10
|
+
- **Zero dependencies, zero binaries.** `pip install purefzf` is the whole
|
|
11
|
+
setup. Works where the fzf binary cannot: Pyodide/WASM, AWS Lambda,
|
|
12
|
+
locked-down CI sandboxes, agent runtimes. Existing Python packages
|
|
13
|
+
(pyfzf, iterfzf) are wrappers that spawn the fzf binary at runtime.
|
|
14
|
+
- **Verified against fzf, not "inspired by" it.** The scoring model is
|
|
15
|
+
ported line-by-line from fzf v0.73.1 (commit `ce4bef75`) and checked
|
|
16
|
+
three ways (see [Verification](#verification)):
|
|
17
|
+
- fzf's own unit tests for the algorithm, pattern parser, and tokenizer,
|
|
18
|
+
ported to pytest — **179 tests, all passing**;
|
|
19
|
+
- differential testing against the real fzf binary — **4,978 of 4,981
|
|
20
|
+
cases (99.94%) byte-identical output**, the remaining 3 traced to an
|
|
21
|
+
fzf bug, not a porting gap ([details](#known-differences));
|
|
22
|
+
- 6,000 randomized cross-checks between the optimized ASCII fast path
|
|
23
|
+
and the direct port of the Go code.
|
|
24
|
+
- CPython 3.9–3.14 and PyPy, tested in CI.
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
### CLI
|
|
29
|
+
|
|
30
|
+
`purefzf` is `fzf --filter`: it reads lines from stdin and prints the
|
|
31
|
+
matches in fzf's ranking order.
|
|
32
|
+
|
|
33
|
+
```console
|
|
34
|
+
$ git ls-files | purefzf -f 'tests py$'
|
|
35
|
+
tests/test_party.py
|
|
36
|
+
tests/test_display.py
|
|
37
|
+
$ history | purefzf -f '!sudo git' --tac | head -5
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Supported flags (all with fzf semantics, including exit codes 0/1/2):
|
|
41
|
+
`-f/--filter`, `-e/--exact`, `+e`, `-x/--extended`, `+x`,
|
|
42
|
+
`-i/--ignore-case`, `+i`, `--smart-case`, `--literal`, `--algo=v1|v2`,
|
|
43
|
+
`--scheme=default|path|history`, `-n/--nth`, `-d/--delimiter`,
|
|
44
|
+
`--tiebreak`, `+s/--no-sort`, `--tac`, `--read0`, `--print0`,
|
|
45
|
+
`--print-query`.
|
|
46
|
+
|
|
47
|
+
### Library
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import purefzf
|
|
51
|
+
|
|
52
|
+
# the equivalent of `fzf --filter='qry'`: matched lines in fzf order
|
|
53
|
+
purefzf.filter("qry", ["query.py", "quarry.rs", "manifest.json"])
|
|
54
|
+
# -> ['query.py', 'quarry.rs']
|
|
55
|
+
|
|
56
|
+
# same options as the CLI
|
|
57
|
+
purefzf.filter("conf", lines, nth="2..", delimiter=":", tiebreak="begin")
|
|
58
|
+
|
|
59
|
+
# Match objects with score, original index, and matched positions
|
|
60
|
+
for m in purefzf.matches("qry", lines, with_positions=True):
|
|
61
|
+
print(m.score, m.text, m.positions)
|
|
62
|
+
|
|
63
|
+
# low-level: score a single candidate
|
|
64
|
+
# (case_sensitive, normalize, forward, text, pattern, with_pos)
|
|
65
|
+
result, positions = purefzf.fuzzy_match_v2(
|
|
66
|
+
False, True, True, "src/QueryBuilder.py", "qry", True)
|
|
67
|
+
result # (start, end, score) -> (4, 9, 66)
|
|
68
|
+
positions # [8, 7, 4]
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
The extended-search syntax works exactly as in fzf: `term1 term2` (AND),
|
|
72
|
+
`a | b` (OR), `'exact`, `'boundary'`, `^prefix`, `suffix$`, `^equal$`,
|
|
73
|
+
`!negation`, smart-case, and latin-script normalization (`danco` matches
|
|
74
|
+
`Danço`; disable with `--literal`).
|
|
75
|
+
|
|
76
|
+
## Scope
|
|
77
|
+
|
|
78
|
+
purefzf implements the **matching engine and filter mode**, not the
|
|
79
|
+
interactive terminal UI. Not included (today): the TUI, previews,
|
|
80
|
+
key bindings, `--with-nth`/`--accept-nth` output transforms, ANSI color
|
|
81
|
+
processing, and multi-threaded matching. The algorithm layer is complete,
|
|
82
|
+
so a TUI built on top of it only needs a terminal frontend.
|
|
83
|
+
|
|
84
|
+
## Verification
|
|
85
|
+
|
|
86
|
+
All three layers run in CI on every commit; the differential layer runs on
|
|
87
|
+
any machine that has an fzf binary (development-time only — purefzf itself
|
|
88
|
+
never shells out).
|
|
89
|
+
|
|
90
|
+
1. **fzf's official unit tests, ported.** Every assertion from
|
|
91
|
+
`algo_test.go`, `pattern_test.go`, and `tokenizer_test.go` at v0.73.1
|
|
92
|
+
is reproduced in `tests/` with the same inputs and expected scores.
|
|
93
|
+
The chunk-cache tests are intentionally out of scope (the cache is an
|
|
94
|
+
interactive-typing optimization, not part of matching semantics).
|
|
95
|
+
|
|
96
|
+
```console
|
|
97
|
+
$ python -m pytest # 179 passed
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
2. **Differential testing against the fzf binary.** `tools/diff_fzf.py`
|
|
101
|
+
compares `purefzf --filter` with `fzf --filter` byte by byte across
|
|
102
|
+
8 corpora (dictionary words, file paths, source code, structured
|
|
103
|
+
fields, unicode, empty/whitespace/200KB lines, invalid UTF-8) ×
|
|
104
|
+
29 option sets × up to 37 queries:
|
|
105
|
+
|
|
106
|
+
```console
|
|
107
|
+
$ FZF_BIN=$(which fzf) python tools/diff_fzf.py
|
|
108
|
+
4978/4981 byte-identical (99.94%), 3 known divergences (fzf slab reuse, see README), 0 unexplained
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
3. **Fast-path cross-checking.** The optimized ASCII path must produce
|
|
112
|
+
bit-identical results (score, offsets, positions) to the direct port
|
|
113
|
+
of the Go code on 6,000 randomized inputs per run.
|
|
114
|
+
|
|
115
|
+
## Known differences
|
|
116
|
+
|
|
117
|
+
The complete list — anything not listed here that differs from
|
|
118
|
+
`fzf --filter` is a bug, please report it:
|
|
119
|
+
|
|
120
|
+
1. **fzf's position backtrace reads recycled memory; purefzf doesn't.**
|
|
121
|
+
fzf reuses per-worker score buffers ("slabs") without zeroing, and the
|
|
122
|
+
`preferMatch` heuristic in FuzzyMatchV2's backtrace can read cells the
|
|
123
|
+
current match never wrote. With `--tiebreak=chunk` or
|
|
124
|
+
`--scheme=path`/`--tiebreak=pathname` (the modes that depend on exact
|
|
125
|
+
match positions), the same line can therefore rank differently in fzf
|
|
126
|
+
depending on what the worker processed before it — feed fzf one stream
|
|
127
|
+
containing a duplicated line and the two copies can come out ranked
|
|
128
|
+
apart. purefzf behaves like fzf with a freshly zeroed slab: on isolated
|
|
129
|
+
input the two agree exactly (this is how the 3 differential failures
|
|
130
|
+
above were verified).
|
|
131
|
+
2. **Regex delimiters use Python `re`, not Go RE2.** `--delimiter` accepts
|
|
132
|
+
a regex when it isn't a literal string; the two dialects agree on
|
|
133
|
+
everything typical (`[0-9]+`, `\s+`, `\t`) but are not identical at
|
|
134
|
+
the edges (e.g. RE2 has no backreferences, `re` has no `\p{...}`).
|
|
135
|
+
3. **`--tiebreak=pathname` offsets count runes, not bytes.** fzf compares
|
|
136
|
+
a byte offset with a rune offset when locating the last path separator;
|
|
137
|
+
for non-ASCII paths purefzf uses rune offsets consistently.
|
|
138
|
+
|
|
139
|
+
## Performance
|
|
140
|
+
|
|
141
|
+
Honest numbers: the fzf binary is a multi-threaded Go program and remains
|
|
142
|
+
5–25× faster end-to-end. purefzf is for environments where that binary
|
|
143
|
+
cannot run, and it is fast enough to be practical — selective queries
|
|
144
|
+
scan **1.3–3.2M lines/s** in-process on CPython 3.12 (Apple M-series):
|
|
145
|
+
|
|
146
|
+
| workload | matches | purefzf lib | purefzf CLI | fzf binary |
|
|
147
|
+
|---------------------------------|--------:|------------:|------------:|-----------:|
|
|
148
|
+
| 235k words, `ion` | 16,443 | 145 ms | 170 ms | 21 ms |
|
|
149
|
+
| 235k words, `zsh` | 62 | 75 ms | 101 ms | 12 ms |
|
|
150
|
+
| 235k words, `'tion` | 7,422 | 180 ms | 205 ms | 16 ms |
|
|
151
|
+
| 235k words, `^ab cd$ \| ing$` | 5 | 405 ms | 434 ms | 13 ms |
|
|
152
|
+
| 108k paths, `test` | 104,331 | 1.11 s | 1.14 s | 75 ms |
|
|
153
|
+
| 108k paths, `pyini` | 60,566 | 610 ms | 650 ms | 48 ms |
|
|
154
|
+
| 108k paths, `lib/site` | 107,135 | 1.85 s | 1.88 s | 82 ms |
|
|
155
|
+
|
|
156
|
+
Median of 7 runs; every workload's output is verified byte-identical to
|
|
157
|
+
the fzf binary before it is timed. Reproduce with:
|
|
158
|
+
|
|
159
|
+
```console
|
|
160
|
+
$ FZF_BIN=$(which fzf) python tools/bench.py
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## How the port stays faithful
|
|
164
|
+
|
|
165
|
+
The matcher is ported function-by-function from `src/algo/algo.go`,
|
|
166
|
+
`pattern.go`, `tokenizer.go`, and `result.go` at fzf v0.73.1, including
|
|
167
|
+
the parts that are easy to get subtly wrong: the bonus matrix and scheme
|
|
168
|
+
tables, camelCase/number boundary bonuses, the consecutive-chunk bonus
|
|
169
|
+
rules, first-char multiplier, the `--no-sort --tac` sorting quirk, the
|
|
170
|
+
streaming vs. collected output paths, smart-case and per-term
|
|
171
|
+
normalization decisions, the exact V2→V1 fallback thresholds
|
|
172
|
+
(`N×M > 102400` or `M > 1000`), tiebreak rank encoding, and Go's
|
|
173
|
+
per-byte U+FFFD substitution for invalid UTF-8.
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
MIT. The algorithm and its test suite are ported from
|
|
178
|
+
[junegunn/fzf](https://github.com/junegunn/fzf) (MIT licensed) — all
|
|
179
|
+
credit for the matching algorithm's design belongs there.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "purefzf"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A pure Python implementation of fzf's fuzzy matching (FuzzyMatchV2) and non-interactive filter mode"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "adam2go" }]
|
|
13
|
+
keywords = ["fzf", "fuzzy", "fuzzy-search", "filter", "pure-python"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Programming Language :: Python :: 3.14",
|
|
25
|
+
"Programming Language :: Python :: Implementation :: CPython",
|
|
26
|
+
"Programming Language :: Python :: Implementation :: PyPy",
|
|
27
|
+
"Topic :: Software Development :: Libraries",
|
|
28
|
+
"Topic :: Text Processing :: Filters",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/adam2go/purefzf"
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
purefzf = "purefzf.cli:main"
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.packages.find]
|
|
38
|
+
where = ["src"]
|
|
39
|
+
|
|
40
|
+
[tool.pytest.ini_options]
|
|
41
|
+
testpaths = ["tests"]
|
purefzf-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""purefzf: a pure Python implementation of fzf's matching engine.
|
|
2
|
+
|
|
3
|
+
The scoring model, extended-search syntax, and result ordering are ported
|
|
4
|
+
from fzf v0.73.1 and verified against both fzf's unit tests and the fzf
|
|
5
|
+
binary itself (byte-identical `--filter` output on the differential suite).
|
|
6
|
+
|
|
7
|
+
Quick start::
|
|
8
|
+
|
|
9
|
+
import purefzf
|
|
10
|
+
|
|
11
|
+
# The non-interactive equivalent of `fzf --filter='qry'`
|
|
12
|
+
purefzf.filter("qry", ["query.py", "quarry.rs", "manifest.json"])
|
|
13
|
+
|
|
14
|
+
# Match objects with score / index / (optional) matched positions
|
|
15
|
+
purefzf.matches("qry", lines, with_positions=True)
|
|
16
|
+
|
|
17
|
+
# Low-level: score a single candidate
|
|
18
|
+
purefzf.fuzzy_match_v2(False, False, True, "Quarry", "qry", True)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
__version__ = "0.1.0"
|
|
22
|
+
|
|
23
|
+
from .algo import ( # noqa: F401
|
|
24
|
+
Slab,
|
|
25
|
+
Scheme,
|
|
26
|
+
fuzzy_match_v1,
|
|
27
|
+
fuzzy_match_v2,
|
|
28
|
+
exact_match_naive,
|
|
29
|
+
exact_match_boundary,
|
|
30
|
+
prefix_match,
|
|
31
|
+
suffix_match,
|
|
32
|
+
equal_match,
|
|
33
|
+
)
|
|
34
|
+
from .core import Match, run_filter # noqa: F401
|
|
35
|
+
from .core import filter_lines as filter # noqa: F401
|
|
36
|
+
from .pattern import build_pattern # noqa: F401
|
|
37
|
+
from .tokenizer import Delimiter # noqa: F401
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def matches(query, lines, **options):
|
|
41
|
+
"""Match lines against the query; returns a list of Match objects in
|
|
42
|
+
fzf's output order. Accepts the same options as `fzf --filter`:
|
|
43
|
+
|
|
44
|
+
fuzzy=True, extended=True, case="smart" ("ignore"/"respect"),
|
|
45
|
+
normalize=True, algo="v2" ("v1"), scheme=None ("default"/"path"/"history"),
|
|
46
|
+
nth=None, delimiter=None, tiebreak=None, sort=True, tac=False,
|
|
47
|
+
with_positions=False
|
|
48
|
+
"""
|
|
49
|
+
return run_filter(lines, query, **options)
|