skimmatch 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ [build]
2
+ target-dir = "C:/Users/steve/.cargo-target/skimmatch"
@@ -0,0 +1,47 @@
1
+ # =========================================================
2
+ # Rust / Cargo
3
+ # =========================================================
4
+ # This is the main build directory where all compiled artifacts are stored.
5
+ # It's the most important entry for any Rust project.
6
+ /target/
7
+
8
+ # =========================================================
9
+ # Python
10
+ # =========================================================
11
+ # Bytecode and compiled files
12
+ __pycache__/
13
+ *.pyc
14
+ *.pyo
15
+ *.pyd
16
+
17
+ # Virtual environments - add others if you use different names
18
+ venv/
19
+ .venv/
20
+ env/
21
+ .env/
22
+
23
+ # Packaging and testing artifacts
24
+ .eggs/
25
+ *.egg-info/
26
+ dist/
27
+ build/
28
+ .pytest_cache/
29
+ .tox/
30
+ wheels/
31
+
32
+ # =========================================================
33
+ # IDEs and Editors
34
+ # =========================================================
35
+ .vscode/
36
+ .idea/
37
+ *.sublime-project
38
+ *.sublime-workspace
39
+
40
+ # =========================================================
41
+ # Operating System Files
42
+ # =========================================================
43
+ # macOS
44
+ .DS_Store
45
+
46
+ # Windows
47
+ Thumbs.db
@@ -0,0 +1,158 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "cfg-if"
7
+ version = "1.0.4"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
10
+
11
+ [[package]]
12
+ name = "fuzzy-matcher"
13
+ version = "0.3.7"
14
+ source = "registry+https://github.com/rust-lang/crates.io-index"
15
+ checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
16
+ dependencies = [
17
+ "thread_local",
18
+ ]
19
+
20
+ [[package]]
21
+ name = "heck"
22
+ version = "0.5.0"
23
+ source = "registry+https://github.com/rust-lang/crates.io-index"
24
+ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
25
+
26
+ [[package]]
27
+ name = "libc"
28
+ version = "0.2.186"
29
+ source = "registry+https://github.com/rust-lang/crates.io-index"
30
+ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
31
+
32
+ [[package]]
33
+ name = "once_cell"
34
+ version = "1.21.4"
35
+ source = "registry+https://github.com/rust-lang/crates.io-index"
36
+ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
37
+
38
+ [[package]]
39
+ name = "portable-atomic"
40
+ version = "1.13.1"
41
+ source = "registry+https://github.com/rust-lang/crates.io-index"
42
+ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
43
+
44
+ [[package]]
45
+ name = "proc-macro2"
46
+ version = "1.0.106"
47
+ source = "registry+https://github.com/rust-lang/crates.io-index"
48
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
49
+ dependencies = [
50
+ "unicode-ident",
51
+ ]
52
+
53
+ [[package]]
54
+ name = "pyo3"
55
+ version = "0.28.3"
56
+ source = "registry+https://github.com/rust-lang/crates.io-index"
57
+ checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12"
58
+ dependencies = [
59
+ "libc",
60
+ "once_cell",
61
+ "portable-atomic",
62
+ "pyo3-build-config",
63
+ "pyo3-ffi",
64
+ "pyo3-macros",
65
+ ]
66
+
67
+ [[package]]
68
+ name = "pyo3-build-config"
69
+ version = "0.28.3"
70
+ source = "registry+https://github.com/rust-lang/crates.io-index"
71
+ checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e"
72
+ dependencies = [
73
+ "target-lexicon",
74
+ ]
75
+
76
+ [[package]]
77
+ name = "pyo3-ffi"
78
+ version = "0.28.3"
79
+ source = "registry+https://github.com/rust-lang/crates.io-index"
80
+ checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e"
81
+ dependencies = [
82
+ "libc",
83
+ "pyo3-build-config",
84
+ ]
85
+
86
+ [[package]]
87
+ name = "pyo3-macros"
88
+ version = "0.28.3"
89
+ source = "registry+https://github.com/rust-lang/crates.io-index"
90
+ checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813"
91
+ dependencies = [
92
+ "proc-macro2",
93
+ "pyo3-macros-backend",
94
+ "quote",
95
+ "syn",
96
+ ]
97
+
98
+ [[package]]
99
+ name = "pyo3-macros-backend"
100
+ version = "0.28.3"
101
+ source = "registry+https://github.com/rust-lang/crates.io-index"
102
+ checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb"
103
+ dependencies = [
104
+ "heck",
105
+ "proc-macro2",
106
+ "pyo3-build-config",
107
+ "quote",
108
+ "syn",
109
+ ]
110
+
111
+ [[package]]
112
+ name = "quote"
113
+ version = "1.0.45"
114
+ source = "registry+https://github.com/rust-lang/crates.io-index"
115
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
116
+ dependencies = [
117
+ "proc-macro2",
118
+ ]
119
+
120
+ [[package]]
121
+ name = "skimmatch"
122
+ version = "0.1.0"
123
+ dependencies = [
124
+ "fuzzy-matcher",
125
+ "pyo3",
126
+ ]
127
+
128
+ [[package]]
129
+ name = "syn"
130
+ version = "2.0.117"
131
+ source = "registry+https://github.com/rust-lang/crates.io-index"
132
+ checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
133
+ dependencies = [
134
+ "proc-macro2",
135
+ "quote",
136
+ "unicode-ident",
137
+ ]
138
+
139
+ [[package]]
140
+ name = "target-lexicon"
141
+ version = "0.13.5"
142
+ source = "registry+https://github.com/rust-lang/crates.io-index"
143
+ checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
144
+
145
+ [[package]]
146
+ name = "thread_local"
147
+ version = "1.1.9"
148
+ source = "registry+https://github.com/rust-lang/crates.io-index"
149
+ checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
150
+ dependencies = [
151
+ "cfg-if",
152
+ ]
153
+
154
+ [[package]]
155
+ name = "unicode-ident"
156
+ version = "1.0.24"
157
+ source = "registry+https://github.com/rust-lang/crates.io-index"
158
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
@@ -0,0 +1,13 @@
1
+ [package]
2
+ name = "skimmatch"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ readme = "README.md"
6
+
7
+ [lib]
8
+ name = "_skimmatch"
9
+ crate-type = ["cdylib"]
10
+
11
+ [dependencies]
12
+ fuzzy-matcher = "0.3"
13
+ pyo3 = { version = "0.28", features = ["extension-module"] }
@@ -0,0 +1,327 @@
1
+ Metadata-Version: 2.4
2
+ Name: skimmatch
3
+ Version: 0.1.0
4
+ Classifier: Programming Language :: Python :: 3
5
+ Classifier: Programming Language :: Python :: 3.13
6
+ Classifier: Programming Language :: Rust
7
+ Summary: In-process fzf/skim-style fuzzy finder for Python, implemented in Rust.
8
+ Author: TELOS
9
+ License: MIT
10
+ Requires-Python: >=3.13
11
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
12
+
13
+ # skimmatch
14
+
15
+ `skimmatch` is an in-process fzf/skim-style fuzzy finder for Python,
16
+ implemented in Rust.
17
+
18
+ It is designed for ranked abbreviation matching over a fixed list of candidate
19
+ strings. You give it strings such as filenames, references, titles, symbols, or
20
+ command labels; users type short abbreviation-style queries; `skimmatch`
21
+ returns the best candidates, scores, and optional highlight positions.
22
+
23
+ ```python
24
+ from skimmatch import Matcher
25
+
26
+ candidates = [
27
+ "Follmer and Schied, Stochastic Finance, 2011",
28
+ "Mildenhall and Major, Pricing Insurance Risk",
29
+ "Wang distortion risk measures",
30
+ "Archive reference catalogue",
31
+ ]
32
+
33
+ matcher = Matcher(candidates)
34
+
35
+ for result in matcher.search("wang distortion", limit=3):
36
+ print(result)
37
+ ```
38
+
39
+ Example result:
40
+
41
+ ```python
42
+ {
43
+ "index": 2,
44
+ "score": 260,
45
+ "text": "Wang distortion risk measures",
46
+ "matches": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10],
47
+ }
48
+ ```
49
+
50
+ Scores are backend scores where higher is better. The exact numeric value should
51
+ be treated as ranking information, not as a stable cross-version metric.
52
+
53
+ ## What This Is
54
+
55
+ `skimmatch` solves the same broad problem as interactive fuzzy finders such as
56
+ `fzf` and `skim`: finding good abbreviation matches quickly.
57
+
58
+ For example, a query like:
59
+
60
+ ```text
61
+ fs sf 2011
62
+ ```
63
+
64
+ can match:
65
+
66
+ ```text
67
+ Follmer and Schied, Stochastic Finance, 2011
68
+ ```
69
+
70
+ because the query characters and tokens appear in useful positions and in the
71
+ right order.
72
+
73
+ This is different from edit-distance fuzzy matching. Libraries such as
74
+ RapidFuzz, Levenshtein, or token-ratio matchers are excellent for typo
75
+ correction, deduplication, OCR cleanup, and record linkage. `skimmatch` is aimed
76
+ at fast candidate selection, interactive search, and highlightable abbreviation
77
+ matching.
78
+
79
+ ## Features
80
+
81
+ - In-process Python extension: no external `fzf` executable required.
82
+ - Rust matching core using `SkimMatcherV2` from the `fuzzy-matcher` crate.
83
+ - Preloaded candidate lists for fast repeated queries.
84
+ - Single-token and multi-token search modes.
85
+ - Optional highlight indices for UI rendering.
86
+ - Legacy tuple-returning APIs for compatibility with the earlier `rustfuzz`
87
+ shape.
88
+ - Structured `Matcher.search(...)` API for new code.
89
+ - Backend argument already present, so future backends can be added without
90
+ changing the public matcher classes.
91
+
92
+ ## Installation
93
+
94
+ When published on PyPI:
95
+
96
+ ```bash
97
+ pip install skimmatch
98
+ ```
99
+
100
+ From a local checkout:
101
+
102
+ ```bash
103
+ uv pip install -e .
104
+ ```
105
+
106
+ or build with maturin:
107
+
108
+ ```bash
109
+ uv run maturin develop
110
+ ```
111
+
112
+ The current package metadata targets Python 3.13 or newer.
113
+
114
+ ## Quick Start
115
+
116
+ Use `Matcher` for new code.
117
+
118
+ ```python
119
+ from skimmatch import Matcher
120
+
121
+ candidates = [
122
+ "Buhlmann, Mathematical Methods in Risk Theory",
123
+ "Cramer, Collective Risk Theory",
124
+ "Mildenhall and Major, Pricing Insurance Risk",
125
+ "Kaas, Goovaerts, Dhaene, and Denuit, Modern Actuarial Risk Theory",
126
+ ]
127
+
128
+ matcher = Matcher(candidates)
129
+ results = matcher.search("risk theory", limit=5)
130
+
131
+ for result in results:
132
+ print(result["index"], result["score"], result["text"])
133
+ ```
134
+
135
+ By default, `search`:
136
+
137
+ - splits the query on whitespace;
138
+ - requires every query token to match;
139
+ - returns up to 20 results;
140
+ - includes candidate text;
141
+ - includes highlight positions.
142
+
143
+ ## Structured API
144
+
145
+ ```python
146
+ matcher = Matcher(candidates, backend="skim")
147
+ results = matcher.search(
148
+ query,
149
+ limit=20,
150
+ highlights=True,
151
+ include_text=True,
152
+ multi=True,
153
+ )
154
+ ```
155
+
156
+ Each result is a dictionary containing:
157
+
158
+ ```python
159
+ {
160
+ "index": 0, # original candidate index
161
+ "score": 123, # backend score, higher is better
162
+ "text": "...", # included when include_text=True
163
+ "matches": [0, 3], # included when highlights=True
164
+ }
165
+ ```
166
+
167
+ ### Parameters
168
+
169
+ `query`
170
+
171
+ The search string. In multi-token mode, whitespace-separated tokens are matched
172
+ independently and every token must match the candidate.
173
+
174
+ `limit`
175
+
176
+ The maximum number of results to return. `limit=0` returns an empty list.
177
+
178
+ `highlights`
179
+
180
+ When true, results include `matches`, a sorted and deduplicated list of matched
181
+ positions. Turn this off when you only need ranking; score-only matching does
182
+ less work.
183
+
184
+ `include_text`
185
+
186
+ When true, each result includes the original candidate string. Turn this off if
187
+ you already have the candidate list and want smaller result objects.
188
+
189
+ `multi`
190
+
191
+ When true, the query is split on whitespace and all tokens are required. When
192
+ false, the whole query is sent to the matcher as one pattern.
193
+
194
+ ## Legacy APIs
195
+
196
+ The package also exports compatibility classes with tuple return shapes:
197
+
198
+ ```python
199
+ from skimmatch import FuzzyMatcher, FuzzyMatcherMulti, FuzzyMatcherMultiHi
200
+ ```
201
+
202
+ ### `FuzzyMatcher`
203
+
204
+ Treats the whole query as one pattern.
205
+
206
+ ```python
207
+ matcher = FuzzyMatcher(candidates)
208
+ indices, scores = matcher.query("sf", top_k=10)
209
+ ```
210
+
211
+ ### `FuzzyMatcherMulti`
212
+
213
+ Splits the query on whitespace. Every token must match.
214
+
215
+ ```python
216
+ matcher = FuzzyMatcherMulti(candidates)
217
+ indices, scores = matcher.query("pricing insurance", top_k=10)
218
+ ```
219
+
220
+ ### `FuzzyMatcherMultiHi`
221
+
222
+ Like `FuzzyMatcherMulti`, but also returns highlight positions.
223
+
224
+ ```python
225
+ matcher = FuzzyMatcherMultiHi(candidates)
226
+ indices, scores, highlights = matcher.query("pricing insurance", top_k=10)
227
+ ```
228
+
229
+ ## Matching Behavior
230
+
231
+ The current backend is:
232
+
233
+ ```python
234
+ backend="skim"
235
+ ```
236
+
237
+ It uses `SkimMatcherV2` from the Rust `fuzzy-matcher` crate.
238
+
239
+ Good matches tend to reward:
240
+
241
+ - characters appearing in order;
242
+ - compact alignments;
243
+ - word-boundary matches;
244
+ - punctuation-separated and camel-case transitions;
245
+ - early matches;
246
+ - consecutive query-character matches;
247
+ - candidates that match every query token in multi-token mode.
248
+
249
+ `skimmatch` returns candidates sorted by descending score. Ties are ordered by
250
+ the original candidate index for deterministic output.
251
+
252
+ ## When To Use It
253
+
254
+ `skimmatch` is a good fit for:
255
+
256
+ - command palettes;
257
+ - file pickers;
258
+ - bibliography and reference search;
259
+ - symbol search;
260
+ - autocomplete over known labels;
261
+ - terminal or web UI candidate selection;
262
+ - fast repeated queries over a preloaded list.
263
+
264
+ It is probably not the right tool for:
265
+
266
+ - typo correction;
267
+ - deduplication;
268
+ - record linkage;
269
+ - token-sort similarity;
270
+ - OCR cleanup;
271
+ - semantic search;
272
+ - embedding-based retrieval.
273
+
274
+ Those are useful problems, but they are different from fzf/skim-style
275
+ abbreviation matching.
276
+
277
+ ## Performance Notes
278
+
279
+ Candidate strings are copied into Rust once when the matcher is constructed.
280
+ Repeated calls to `query` or `search` scan that Rust-owned list and return only
281
+ the final top results to Python.
282
+
283
+ For best performance:
284
+
285
+ - construct one matcher and reuse it across queries;
286
+ - set `highlights=False` when you only need indices and scores;
287
+ - set `include_text=False` when you already have the candidate strings;
288
+ - use `limit` to keep returned result objects small.
289
+
290
+ ## Development
291
+
292
+ This project is a Python package with a Rust extension built by maturin.
293
+
294
+ Run the tests:
295
+
296
+ ```bash
297
+ uv run pytest tests/test_skimmatch.py -q
298
+ ```
299
+
300
+ Check Rust formatting:
301
+
302
+ ```bash
303
+ cargo fmt --check
304
+ ```
305
+
306
+ Important files:
307
+
308
+ - `src/lib.rs`: Rust/PyO3 extension implementation.
309
+ - `python/skimmatch/__init__.py`: Python re-exports.
310
+ - `tests/test_skimmatch.py`: API and behavior tests.
311
+ - `pyproject.toml`: Python packaging and maturin configuration.
312
+ - `Cargo.toml`: Rust crate configuration.
313
+
314
+ ## Backend Roadmap
315
+
316
+ The public API already accepts a `backend` argument. Today only `"skim"` is
317
+ implemented. Future candidates include:
318
+
319
+ - `backend="nucleo"` for a modern fzf-like matcher;
320
+ - `backend="frizbee"` for an experimental fast and typo-tolerant matcher.
321
+
322
+ Unknown backend names currently raise `ValueError`.
323
+
324
+ ## License
325
+
326
+ MIT.
327
+