skimmatch 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skimmatch-0.1.0/.cargo/config.toml +2 -0
- skimmatch-0.1.0/.gitignore +47 -0
- skimmatch-0.1.0/Cargo.lock +158 -0
- skimmatch-0.1.0/Cargo.toml +13 -0
- skimmatch-0.1.0/PKG-INFO +327 -0
- skimmatch-0.1.0/README.md +314 -0
- skimmatch-0.1.0/agent.md +227 -0
- skimmatch-0.1.0/pyproject.toml +25 -0
- skimmatch-0.1.0/python/skimmatch/__init__.py +8 -0
- skimmatch-0.1.0/python/skimmatch/_skimmatch.pdb +0 -0
- skimmatch-0.1.0/src/lib.rs +454 -0
- skimmatch-0.1.0/tests/test_skimmatch.py +112 -0
- skimmatch-0.1.0/uv.lock +8 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# =========================================================
|
|
2
|
+
# Rust / Cargo
|
|
3
|
+
# =========================================================
|
|
4
|
+
# This is the main build directory where all compiled artifacts are stored.
|
|
5
|
+
# It's the most important entry for any Rust project.
|
|
6
|
+
/target/
|
|
7
|
+
|
|
8
|
+
# =========================================================
|
|
9
|
+
# Python
|
|
10
|
+
# =========================================================
|
|
11
|
+
# Bytecode and compiled files
|
|
12
|
+
__pycache__/
|
|
13
|
+
*.pyc
|
|
14
|
+
*.pyo
|
|
15
|
+
*.pyd
|
|
16
|
+
|
|
17
|
+
# Virtual environments - add others if you use different names
|
|
18
|
+
venv/
|
|
19
|
+
.venv/
|
|
20
|
+
env/
|
|
21
|
+
.env/
|
|
22
|
+
|
|
23
|
+
# Packaging and testing artifacts
|
|
24
|
+
.eggs/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
dist/
|
|
27
|
+
build/
|
|
28
|
+
.pytest_cache/
|
|
29
|
+
.tox/
|
|
30
|
+
wheels/
|
|
31
|
+
|
|
32
|
+
# =========================================================
|
|
33
|
+
# IDEs and Editors
|
|
34
|
+
# =========================================================
|
|
35
|
+
.vscode/
|
|
36
|
+
.idea/
|
|
37
|
+
*.sublime-project
|
|
38
|
+
*.sublime-workspace
|
|
39
|
+
|
|
40
|
+
# =========================================================
|
|
41
|
+
# Operating System Files
|
|
42
|
+
# =========================================================
|
|
43
|
+
# macOS
|
|
44
|
+
.DS_Store
|
|
45
|
+
|
|
46
|
+
# Windows
|
|
47
|
+
Thumbs.db
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "cfg-if"
|
|
7
|
+
version = "1.0.4"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
10
|
+
|
|
11
|
+
[[package]]
|
|
12
|
+
name = "fuzzy-matcher"
|
|
13
|
+
version = "0.3.7"
|
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
15
|
+
checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
|
|
16
|
+
dependencies = [
|
|
17
|
+
"thread_local",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[[package]]
|
|
21
|
+
name = "heck"
|
|
22
|
+
version = "0.5.0"
|
|
23
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
24
|
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
25
|
+
|
|
26
|
+
[[package]]
|
|
27
|
+
name = "libc"
|
|
28
|
+
version = "0.2.186"
|
|
29
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
30
|
+
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
|
|
31
|
+
|
|
32
|
+
[[package]]
|
|
33
|
+
name = "once_cell"
|
|
34
|
+
version = "1.21.4"
|
|
35
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
|
+
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
|
|
37
|
+
|
|
38
|
+
[[package]]
|
|
39
|
+
name = "portable-atomic"
|
|
40
|
+
version = "1.13.1"
|
|
41
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
42
|
+
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
43
|
+
|
|
44
|
+
[[package]]
|
|
45
|
+
name = "proc-macro2"
|
|
46
|
+
version = "1.0.106"
|
|
47
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
48
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
49
|
+
dependencies = [
|
|
50
|
+
"unicode-ident",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[[package]]
|
|
54
|
+
name = "pyo3"
|
|
55
|
+
version = "0.28.3"
|
|
56
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
57
|
+
checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12"
|
|
58
|
+
dependencies = [
|
|
59
|
+
"libc",
|
|
60
|
+
"once_cell",
|
|
61
|
+
"portable-atomic",
|
|
62
|
+
"pyo3-build-config",
|
|
63
|
+
"pyo3-ffi",
|
|
64
|
+
"pyo3-macros",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
[[package]]
|
|
68
|
+
name = "pyo3-build-config"
|
|
69
|
+
version = "0.28.3"
|
|
70
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
71
|
+
checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e"
|
|
72
|
+
dependencies = [
|
|
73
|
+
"target-lexicon",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
[[package]]
|
|
77
|
+
name = "pyo3-ffi"
|
|
78
|
+
version = "0.28.3"
|
|
79
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
80
|
+
checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e"
|
|
81
|
+
dependencies = [
|
|
82
|
+
"libc",
|
|
83
|
+
"pyo3-build-config",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
[[package]]
|
|
87
|
+
name = "pyo3-macros"
|
|
88
|
+
version = "0.28.3"
|
|
89
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
90
|
+
checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813"
|
|
91
|
+
dependencies = [
|
|
92
|
+
"proc-macro2",
|
|
93
|
+
"pyo3-macros-backend",
|
|
94
|
+
"quote",
|
|
95
|
+
"syn",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
[[package]]
|
|
99
|
+
name = "pyo3-macros-backend"
|
|
100
|
+
version = "0.28.3"
|
|
101
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
102
|
+
checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb"
|
|
103
|
+
dependencies = [
|
|
104
|
+
"heck",
|
|
105
|
+
"proc-macro2",
|
|
106
|
+
"pyo3-build-config",
|
|
107
|
+
"quote",
|
|
108
|
+
"syn",
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
[[package]]
|
|
112
|
+
name = "quote"
|
|
113
|
+
version = "1.0.45"
|
|
114
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
115
|
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
|
116
|
+
dependencies = [
|
|
117
|
+
"proc-macro2",
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
[[package]]
|
|
121
|
+
name = "skimmatch"
|
|
122
|
+
version = "0.1.0"
|
|
123
|
+
dependencies = [
|
|
124
|
+
"fuzzy-matcher",
|
|
125
|
+
"pyo3",
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
[[package]]
|
|
129
|
+
name = "syn"
|
|
130
|
+
version = "2.0.117"
|
|
131
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
132
|
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
|
133
|
+
dependencies = [
|
|
134
|
+
"proc-macro2",
|
|
135
|
+
"quote",
|
|
136
|
+
"unicode-ident",
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
[[package]]
|
|
140
|
+
name = "target-lexicon"
|
|
141
|
+
version = "0.13.5"
|
|
142
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
143
|
+
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
|
144
|
+
|
|
145
|
+
[[package]]
|
|
146
|
+
name = "thread_local"
|
|
147
|
+
version = "1.1.9"
|
|
148
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
149
|
+
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
|
|
150
|
+
dependencies = [
|
|
151
|
+
"cfg-if",
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
[[package]]
|
|
155
|
+
name = "unicode-ident"
|
|
156
|
+
version = "1.0.24"
|
|
157
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
158
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "skimmatch"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
|
|
7
|
+
[lib]
|
|
8
|
+
name = "_skimmatch"
|
|
9
|
+
crate-type = ["cdylib"]
|
|
10
|
+
|
|
11
|
+
[dependencies]
|
|
12
|
+
fuzzy-matcher = "0.3"
|
|
13
|
+
pyo3 = { version = "0.28", features = ["extension-module"] }
|
skimmatch-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skimmatch
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Classifier: Programming Language :: Python :: 3
|
|
5
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
6
|
+
Classifier: Programming Language :: Rust
|
|
7
|
+
Summary: In-process fzf/skim-style fuzzy finder for Python, implemented in Rust.
|
|
8
|
+
Author: TELOS
|
|
9
|
+
License: MIT
|
|
10
|
+
Requires-Python: >=3.13
|
|
11
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
12
|
+
|
|
13
|
+
# skimmatch
|
|
14
|
+
|
|
15
|
+
`skimmatch` is an in-process fzf/skim-style fuzzy finder for Python,
|
|
16
|
+
implemented in Rust.
|
|
17
|
+
|
|
18
|
+
It is designed for ranked abbreviation matching over a fixed list of candidate
|
|
19
|
+
strings. You give it strings such as filenames, references, titles, symbols, or
|
|
20
|
+
command labels; users type short abbreviation-style queries; `skimmatch`
|
|
21
|
+
returns the best candidates, scores, and optional highlight positions.
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from skimmatch import Matcher
|
|
25
|
+
|
|
26
|
+
candidates = [
|
|
27
|
+
"Follmer and Schied, Stochastic Finance, 2011",
|
|
28
|
+
"Mildenhall and Major, Pricing Insurance Risk",
|
|
29
|
+
"Wang distortion risk measures",
|
|
30
|
+
"Archive reference catalogue",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
matcher = Matcher(candidates)
|
|
34
|
+
|
|
35
|
+
for result in matcher.search("wang distortion", limit=3):
|
|
36
|
+
print(result)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Example result:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
{
|
|
43
|
+
"index": 2,
|
|
44
|
+
"score": 260,
|
|
45
|
+
"text": "Wang distortion risk measures",
|
|
46
|
+
"matches": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10],
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Scores are backend scores where higher is better. The exact numeric value should
|
|
51
|
+
be treated as ranking information, not as a stable cross-version metric.
|
|
52
|
+
|
|
53
|
+
## What This Is
|
|
54
|
+
|
|
55
|
+
`skimmatch` solves the same broad problem as interactive fuzzy finders such as
|
|
56
|
+
`fzf` and `skim`: finding good abbreviation matches quickly.
|
|
57
|
+
|
|
58
|
+
For example, a query like:
|
|
59
|
+
|
|
60
|
+
```text
|
|
61
|
+
fs sf 2011
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
can match:
|
|
65
|
+
|
|
66
|
+
```text
|
|
67
|
+
Follmer and Schied, Stochastic Finance, 2011
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
because the query characters and tokens appear in useful positions and in the
|
|
71
|
+
right order.
|
|
72
|
+
|
|
73
|
+
This is different from edit-distance fuzzy matching. Libraries such as
|
|
74
|
+
RapidFuzz, Levenshtein, or token-ratio matchers are excellent for typo
|
|
75
|
+
correction, deduplication, OCR cleanup, and record linkage. `skimmatch` is aimed
|
|
76
|
+
at fast candidate selection, interactive search, and highlightable abbreviation
|
|
77
|
+
matching.
|
|
78
|
+
|
|
79
|
+
## Features
|
|
80
|
+
|
|
81
|
+
- In-process Python extension: no external `fzf` executable required.
|
|
82
|
+
- Rust matching core using `SkimMatcherV2` from the `fuzzy-matcher` crate.
|
|
83
|
+
- Preloaded candidate lists for fast repeated queries.
|
|
84
|
+
- Single-token and multi-token search modes.
|
|
85
|
+
- Optional highlight indices for UI rendering.
|
|
86
|
+
- Legacy tuple-returning APIs for compatibility with the earlier `rustfuzz`
|
|
87
|
+
shape.
|
|
88
|
+
- Structured `Matcher.search(...)` API for new code.
|
|
89
|
+
- Backend argument already present, so future backends can be added without
|
|
90
|
+
changing the public matcher classes.
|
|
91
|
+
|
|
92
|
+
## Installation
|
|
93
|
+
|
|
94
|
+
When published on PyPI:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install skimmatch
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
From a local checkout:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
uv pip install -e .
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
or build with maturin:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
uv run maturin develop
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The current package metadata targets Python 3.13 or newer.
|
|
113
|
+
|
|
114
|
+
## Quick Start
|
|
115
|
+
|
|
116
|
+
Use `Matcher` for new code.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from skimmatch import Matcher
|
|
120
|
+
|
|
121
|
+
candidates = [
|
|
122
|
+
"Buhlmann, Mathematical Methods in Risk Theory",
|
|
123
|
+
"Cramer, Collective Risk Theory",
|
|
124
|
+
"Mildenhall and Major, Pricing Insurance Risk",
|
|
125
|
+
"Kaas, Goovaerts, Dhaene, and Denuit, Modern Actuarial Risk Theory",
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
matcher = Matcher(candidates)
|
|
129
|
+
results = matcher.search("risk theory", limit=5)
|
|
130
|
+
|
|
131
|
+
for result in results:
|
|
132
|
+
print(result["index"], result["score"], result["text"])
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
By default, `search`:
|
|
136
|
+
|
|
137
|
+
- splits the query on whitespace;
|
|
138
|
+
- requires every query token to match;
|
|
139
|
+
- returns up to 20 results;
|
|
140
|
+
- includes candidate text;
|
|
141
|
+
- includes highlight positions.
|
|
142
|
+
|
|
143
|
+
## Structured API
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
matcher = Matcher(candidates, backend="skim")
|
|
147
|
+
results = matcher.search(
|
|
148
|
+
query,
|
|
149
|
+
limit=20,
|
|
150
|
+
highlights=True,
|
|
151
|
+
include_text=True,
|
|
152
|
+
multi=True,
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Each result is a dictionary containing:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
{
|
|
160
|
+
"index": 0, # original candidate index
|
|
161
|
+
"score": 123, # backend score, higher is better
|
|
162
|
+
"text": "...", # included when include_text=True
|
|
163
|
+
"matches": [0, 3], # included when highlights=True
|
|
164
|
+
}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Parameters
|
|
168
|
+
|
|
169
|
+
`query`
|
|
170
|
+
|
|
171
|
+
The search string. In multi-token mode, whitespace-separated tokens are matched
|
|
172
|
+
independently and every token must match the candidate.
|
|
173
|
+
|
|
174
|
+
`limit`
|
|
175
|
+
|
|
176
|
+
The maximum number of results to return. `limit=0` returns an empty list.
|
|
177
|
+
|
|
178
|
+
`highlights`
|
|
179
|
+
|
|
180
|
+
When true, results include `matches`, a sorted and deduplicated list of matched
|
|
181
|
+
positions. Turn this off when you only need ranking; score-only matching does
|
|
182
|
+
less work.
|
|
183
|
+
|
|
184
|
+
`include_text`
|
|
185
|
+
|
|
186
|
+
When true, each result includes the original candidate string. Turn this off if
|
|
187
|
+
you already have the candidate list and want smaller result objects.
|
|
188
|
+
|
|
189
|
+
`multi`
|
|
190
|
+
|
|
191
|
+
When true, the query is split on whitespace and all tokens are required. When
|
|
192
|
+
false, the whole query is sent to the matcher as one pattern.
|
|
193
|
+
|
|
194
|
+
## Legacy APIs
|
|
195
|
+
|
|
196
|
+
The package also exports compatibility classes with tuple return shapes:
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from skimmatch import FuzzyMatcher, FuzzyMatcherMulti, FuzzyMatcherMultiHi
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### `FuzzyMatcher`
|
|
203
|
+
|
|
204
|
+
Treats the whole query as one pattern.
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
matcher = FuzzyMatcher(candidates)
|
|
208
|
+
indices, scores = matcher.query("sf", top_k=10)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### `FuzzyMatcherMulti`
|
|
212
|
+
|
|
213
|
+
Splits the query on whitespace. Every token must match.
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
matcher = FuzzyMatcherMulti(candidates)
|
|
217
|
+
indices, scores = matcher.query("pricing insurance", top_k=10)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### `FuzzyMatcherMultiHi`
|
|
221
|
+
|
|
222
|
+
Like `FuzzyMatcherMulti`, but also returns highlight positions.
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
matcher = FuzzyMatcherMultiHi(candidates)
|
|
226
|
+
indices, scores, highlights = matcher.query("pricing insurance", top_k=10)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Matching Behavior
|
|
230
|
+
|
|
231
|
+
The current backend is:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
backend="skim"
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
It uses `SkimMatcherV2` from the Rust `fuzzy-matcher` crate.
|
|
238
|
+
|
|
239
|
+
Good matches tend to reward:
|
|
240
|
+
|
|
241
|
+
- characters appearing in order;
|
|
242
|
+
- compact alignments;
|
|
243
|
+
- word-boundary matches;
|
|
244
|
+
- punctuation-separated and camel-case transitions;
|
|
245
|
+
- early matches;
|
|
246
|
+
- consecutive query-character matches;
|
|
247
|
+
- candidates that match every query token in multi-token mode.
|
|
248
|
+
|
|
249
|
+
`skimmatch` returns candidates sorted by descending score. Ties are ordered by
|
|
250
|
+
the original candidate index for deterministic output.
|
|
251
|
+
|
|
252
|
+
## When To Use It
|
|
253
|
+
|
|
254
|
+
`skimmatch` is a good fit for:
|
|
255
|
+
|
|
256
|
+
- command palettes;
|
|
257
|
+
- file pickers;
|
|
258
|
+
- bibliography and reference search;
|
|
259
|
+
- symbol search;
|
|
260
|
+
- autocomplete over known labels;
|
|
261
|
+
- terminal or web UI candidate selection;
|
|
262
|
+
- fast repeated queries over a preloaded list.
|
|
263
|
+
|
|
264
|
+
It is probably not the right tool for:
|
|
265
|
+
|
|
266
|
+
- typo correction;
|
|
267
|
+
- deduplication;
|
|
268
|
+
- record linkage;
|
|
269
|
+
- token-sort similarity;
|
|
270
|
+
- OCR cleanup;
|
|
271
|
+
- semantic search;
|
|
272
|
+
- embedding-based retrieval.
|
|
273
|
+
|
|
274
|
+
Those are useful problems, but they are different from fzf/skim-style
|
|
275
|
+
abbreviation matching.
|
|
276
|
+
|
|
277
|
+
## Performance Notes
|
|
278
|
+
|
|
279
|
+
Candidate strings are copied into Rust once when the matcher is constructed.
|
|
280
|
+
Repeated calls to `query` or `search` scan that Rust-owned list and return only
|
|
281
|
+
the final top results to Python.
|
|
282
|
+
|
|
283
|
+
For best performance:
|
|
284
|
+
|
|
285
|
+
- construct one matcher and reuse it across queries;
|
|
286
|
+
- set `highlights=False` when you only need indices and scores;
|
|
287
|
+
- set `include_text=False` when you already have the candidate strings;
|
|
288
|
+
- use `limit` to keep returned result objects small.
|
|
289
|
+
|
|
290
|
+
## Development
|
|
291
|
+
|
|
292
|
+
This project is a Python package with a Rust extension built by maturin.
|
|
293
|
+
|
|
294
|
+
Run the tests:
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
uv run pytest tests/test_skimmatch.py -q
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
Check Rust formatting:
|
|
301
|
+
|
|
302
|
+
```bash
|
|
303
|
+
cargo fmt --check
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
Important files:
|
|
307
|
+
|
|
308
|
+
- `src/lib.rs`: Rust/PyO3 extension implementation.
|
|
309
|
+
- `python/skimmatch/__init__.py`: Python re-exports.
|
|
310
|
+
- `tests/test_skimmatch.py`: API and behavior tests.
|
|
311
|
+
- `pyproject.toml`: Python packaging and maturin configuration.
|
|
312
|
+
- `Cargo.toml`: Rust crate configuration.
|
|
313
|
+
|
|
314
|
+
## Backend Roadmap
|
|
315
|
+
|
|
316
|
+
The public API already accepts a `backend` argument. Today only `"skim"` is
|
|
317
|
+
implemented. Future candidates include:
|
|
318
|
+
|
|
319
|
+
- `backend="nucleo"` for a modern fzf-like matcher;
|
|
320
|
+
- `backend="frizbee"` for an experimental fast and typo-tolerant matcher.
|
|
321
|
+
|
|
322
|
+
Unknown backend names currently raise `ValueError`.
|
|
323
|
+
|
|
324
|
+
## License
|
|
325
|
+
|
|
326
|
+
MIT.
|
|
327
|
+
|