fasttext-rs 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fasttext_rs-0.1.0/.cargo/config.toml +2 -0
- fasttext_rs-0.1.0/.github/workflows/ci.yml +33 -0
- fasttext_rs-0.1.0/.github/workflows/publish.yml +68 -0
- fasttext_rs-0.1.0/.gitignore +14 -0
- fasttext_rs-0.1.0/Cargo.lock +368 -0
- fasttext_rs-0.1.0/Cargo.toml +21 -0
- fasttext_rs-0.1.0/LICENSE +21 -0
- fasttext_rs-0.1.0/Makefile +57 -0
- fasttext_rs-0.1.0/PKG-INFO +59 -0
- fasttext_rs-0.1.0/README.md +50 -0
- fasttext_rs-0.1.0/pyproject.toml +24 -0
- fasttext_rs-0.1.0/python/fasttext_rs/__init__.py +15 -0
- fasttext_rs-0.1.0/src/args.rs +22 -0
- fasttext_rs-0.1.0/src/dictionary.rs +241 -0
- fasttext_rs-0.1.0/src/fasttext.rs +864 -0
- fasttext_rs-0.1.0/src/lib.rs +361 -0
- fasttext_rs-0.1.0/src/matrix.rs +86 -0
- fasttext_rs-0.1.0/src/model.rs +366 -0
- fasttext_rs-0.1.0/src/utils.rs +75 -0
- fasttext_rs-0.1.0/tests/bench.py +90 -0
- fasttext_rs-0.1.0/tests/test_api.py +257 -0
- fasttext_rs-0.1.0/tests/test_integration.py +325 -0
- fasttext_rs-0.1.0/uv.lock +500 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [master]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
15
|
+
with:
|
|
16
|
+
components: clippy, rustfmt
|
|
17
|
+
- run: cargo fmt --check
|
|
18
|
+
- run: cargo clippy -- -D warnings
|
|
19
|
+
- uses: astral-sh/setup-uv@v4
|
|
20
|
+
- run: uv sync
|
|
21
|
+
- run: uv run ruff format --check .
|
|
22
|
+
- run: uv run ruff check .
|
|
23
|
+
|
|
24
|
+
test:
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
29
|
+
- uses: astral-sh/setup-uv@v4
|
|
30
|
+
- run: uv sync
|
|
31
|
+
- run: PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1 uv run maturin develop --release
|
|
32
|
+
- run: make data
|
|
33
|
+
- run: uv run pytest tests/test_api.py tests/test_integration.py -v
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
|
|
6
|
+
permissions:
|
|
7
|
+
id-token: write
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build-wheels:
|
|
11
|
+
name: Build wheels (${{ matrix.os }}, ${{ matrix.target }})
|
|
12
|
+
runs-on: ${{ matrix.os }}
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
include:
|
|
16
|
+
- os: ubuntu-latest
|
|
17
|
+
target: x86_64
|
|
18
|
+
- os: ubuntu-latest
|
|
19
|
+
target: aarch64
|
|
20
|
+
- os: macos-latest
|
|
21
|
+
target: x86_64
|
|
22
|
+
- os: macos-latest
|
|
23
|
+
target: aarch64
|
|
24
|
+
- os: windows-latest
|
|
25
|
+
target: x86_64
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: "3.12"
|
|
31
|
+
- uses: PyO3/maturin-action@v1
|
|
32
|
+
with:
|
|
33
|
+
target: ${{ matrix.target }}
|
|
34
|
+
args: --release --out dist
|
|
35
|
+
manylinux: auto
|
|
36
|
+
env:
|
|
37
|
+
PYO3_USE_ABI3_FORWARD_COMPATIBILITY: "1"
|
|
38
|
+
- uses: actions/upload-artifact@v4
|
|
39
|
+
with:
|
|
40
|
+
name: wheels-${{ matrix.os }}-${{ matrix.target }}
|
|
41
|
+
path: dist/*.whl
|
|
42
|
+
|
|
43
|
+
build-sdist:
|
|
44
|
+
name: Build sdist
|
|
45
|
+
runs-on: ubuntu-latest
|
|
46
|
+
steps:
|
|
47
|
+
- uses: actions/checkout@v4
|
|
48
|
+
- uses: PyO3/maturin-action@v1
|
|
49
|
+
with:
|
|
50
|
+
command: sdist
|
|
51
|
+
args: --out dist
|
|
52
|
+
- uses: actions/upload-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: sdist
|
|
55
|
+
path: dist/*.tar.gz
|
|
56
|
+
|
|
57
|
+
publish:
|
|
58
|
+
name: Publish to PyPI
|
|
59
|
+
needs: [build-wheels, build-sdist]
|
|
60
|
+
runs-on: ubuntu-latest
|
|
61
|
+
environment: pypi
|
|
62
|
+
steps:
|
|
63
|
+
- uses: actions/download-artifact@v4
|
|
64
|
+
with:
|
|
65
|
+
path: dist
|
|
66
|
+
merge-multiple: true
|
|
67
|
+
- run: ls -la dist/
|
|
68
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "autocfg"
|
|
7
|
+
version = "1.5.0"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
|
10
|
+
|
|
11
|
+
[[package]]
|
|
12
|
+
name = "byteorder"
|
|
13
|
+
version = "1.5.0"
|
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
15
|
+
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
|
16
|
+
|
|
17
|
+
[[package]]
|
|
18
|
+
name = "cfg-if"
|
|
19
|
+
version = "1.0.4"
|
|
20
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
21
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
22
|
+
|
|
23
|
+
[[package]]
|
|
24
|
+
name = "crossbeam-deque"
|
|
25
|
+
version = "0.8.6"
|
|
26
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
27
|
+
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
|
28
|
+
dependencies = [
|
|
29
|
+
"crossbeam-epoch",
|
|
30
|
+
"crossbeam-utils",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[[package]]
|
|
34
|
+
name = "crossbeam-epoch"
|
|
35
|
+
version = "0.9.18"
|
|
36
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
37
|
+
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
|
38
|
+
dependencies = [
|
|
39
|
+
"crossbeam-utils",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[[package]]
|
|
43
|
+
name = "crossbeam-utils"
|
|
44
|
+
version = "0.8.21"
|
|
45
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
46
|
+
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
|
47
|
+
|
|
48
|
+
[[package]]
|
|
49
|
+
name = "either"
|
|
50
|
+
version = "1.15.0"
|
|
51
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
52
|
+
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
53
|
+
|
|
54
|
+
[[package]]
|
|
55
|
+
name = "fasttext_rs"
|
|
56
|
+
version = "0.1.0"
|
|
57
|
+
dependencies = [
|
|
58
|
+
"byteorder",
|
|
59
|
+
"pyo3",
|
|
60
|
+
"rand",
|
|
61
|
+
"rand_distr",
|
|
62
|
+
"rayon",
|
|
63
|
+
"unicode-normalization",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[[package]]
|
|
67
|
+
name = "getrandom"
|
|
68
|
+
version = "0.2.17"
|
|
69
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
70
|
+
checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
|
|
71
|
+
dependencies = [
|
|
72
|
+
"cfg-if",
|
|
73
|
+
"libc",
|
|
74
|
+
"wasi",
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
[[package]]
|
|
78
|
+
name = "heck"
|
|
79
|
+
version = "0.5.0"
|
|
80
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
81
|
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
82
|
+
|
|
83
|
+
[[package]]
|
|
84
|
+
name = "indoc"
|
|
85
|
+
version = "2.0.7"
|
|
86
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
87
|
+
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
|
|
88
|
+
dependencies = [
|
|
89
|
+
"rustversion",
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
[[package]]
|
|
93
|
+
name = "libc"
|
|
94
|
+
version = "0.2.182"
|
|
95
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
96
|
+
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
|
97
|
+
|
|
98
|
+
[[package]]
|
|
99
|
+
name = "libm"
|
|
100
|
+
version = "0.2.16"
|
|
101
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
102
|
+
checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
|
|
103
|
+
|
|
104
|
+
[[package]]
|
|
105
|
+
name = "memoffset"
|
|
106
|
+
version = "0.9.1"
|
|
107
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
108
|
+
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
|
|
109
|
+
dependencies = [
|
|
110
|
+
"autocfg",
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
[[package]]
|
|
114
|
+
name = "num-traits"
|
|
115
|
+
version = "0.2.19"
|
|
116
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
117
|
+
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
|
118
|
+
dependencies = [
|
|
119
|
+
"autocfg",
|
|
120
|
+
"libm",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
[[package]]
|
|
124
|
+
name = "once_cell"
|
|
125
|
+
version = "1.21.3"
|
|
126
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
127
|
+
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
128
|
+
|
|
129
|
+
[[package]]
|
|
130
|
+
name = "portable-atomic"
|
|
131
|
+
version = "1.13.1"
|
|
132
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
133
|
+
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
134
|
+
|
|
135
|
+
[[package]]
|
|
136
|
+
name = "ppv-lite86"
|
|
137
|
+
version = "0.2.21"
|
|
138
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
139
|
+
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
|
|
140
|
+
dependencies = [
|
|
141
|
+
"zerocopy",
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
[[package]]
|
|
145
|
+
name = "proc-macro2"
|
|
146
|
+
version = "1.0.106"
|
|
147
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
148
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
149
|
+
dependencies = [
|
|
150
|
+
"unicode-ident",
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
[[package]]
|
|
154
|
+
name = "pyo3"
|
|
155
|
+
version = "0.24.2"
|
|
156
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
157
|
+
checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219"
|
|
158
|
+
dependencies = [
|
|
159
|
+
"cfg-if",
|
|
160
|
+
"indoc",
|
|
161
|
+
"libc",
|
|
162
|
+
"memoffset",
|
|
163
|
+
"once_cell",
|
|
164
|
+
"portable-atomic",
|
|
165
|
+
"pyo3-build-config",
|
|
166
|
+
"pyo3-ffi",
|
|
167
|
+
"pyo3-macros",
|
|
168
|
+
"unindent",
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
[[package]]
|
|
172
|
+
name = "pyo3-build-config"
|
|
173
|
+
version = "0.24.2"
|
|
174
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
175
|
+
checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999"
|
|
176
|
+
dependencies = [
|
|
177
|
+
"once_cell",
|
|
178
|
+
"target-lexicon",
|
|
179
|
+
]
|
|
180
|
+
|
|
181
|
+
[[package]]
|
|
182
|
+
name = "pyo3-ffi"
|
|
183
|
+
version = "0.24.2"
|
|
184
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
185
|
+
checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33"
|
|
186
|
+
dependencies = [
|
|
187
|
+
"libc",
|
|
188
|
+
"pyo3-build-config",
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
[[package]]
|
|
192
|
+
name = "pyo3-macros"
|
|
193
|
+
version = "0.24.2"
|
|
194
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
195
|
+
checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9"
|
|
196
|
+
dependencies = [
|
|
197
|
+
"proc-macro2",
|
|
198
|
+
"pyo3-macros-backend",
|
|
199
|
+
"quote",
|
|
200
|
+
"syn",
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
[[package]]
|
|
204
|
+
name = "pyo3-macros-backend"
|
|
205
|
+
version = "0.24.2"
|
|
206
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
207
|
+
checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a"
|
|
208
|
+
dependencies = [
|
|
209
|
+
"heck",
|
|
210
|
+
"proc-macro2",
|
|
211
|
+
"pyo3-build-config",
|
|
212
|
+
"quote",
|
|
213
|
+
"syn",
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
[[package]]
|
|
217
|
+
name = "quote"
|
|
218
|
+
version = "1.0.45"
|
|
219
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
220
|
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
|
221
|
+
dependencies = [
|
|
222
|
+
"proc-macro2",
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
[[package]]
|
|
226
|
+
name = "rand"
|
|
227
|
+
version = "0.8.5"
|
|
228
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
229
|
+
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
|
230
|
+
dependencies = [
|
|
231
|
+
"libc",
|
|
232
|
+
"rand_chacha",
|
|
233
|
+
"rand_core",
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
[[package]]
|
|
237
|
+
name = "rand_chacha"
|
|
238
|
+
version = "0.3.1"
|
|
239
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
240
|
+
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
|
241
|
+
dependencies = [
|
|
242
|
+
"ppv-lite86",
|
|
243
|
+
"rand_core",
|
|
244
|
+
]
|
|
245
|
+
|
|
246
|
+
[[package]]
|
|
247
|
+
name = "rand_core"
|
|
248
|
+
version = "0.6.4"
|
|
249
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
250
|
+
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
|
251
|
+
dependencies = [
|
|
252
|
+
"getrandom",
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
[[package]]
|
|
256
|
+
name = "rand_distr"
|
|
257
|
+
version = "0.4.3"
|
|
258
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
259
|
+
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
|
|
260
|
+
dependencies = [
|
|
261
|
+
"num-traits",
|
|
262
|
+
"rand",
|
|
263
|
+
]
|
|
264
|
+
|
|
265
|
+
[[package]]
|
|
266
|
+
name = "rayon"
|
|
267
|
+
version = "1.11.0"
|
|
268
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
269
|
+
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
|
|
270
|
+
dependencies = [
|
|
271
|
+
"either",
|
|
272
|
+
"rayon-core",
|
|
273
|
+
]
|
|
274
|
+
|
|
275
|
+
[[package]]
|
|
276
|
+
name = "rayon-core"
|
|
277
|
+
version = "1.13.0"
|
|
278
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
279
|
+
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
|
|
280
|
+
dependencies = [
|
|
281
|
+
"crossbeam-deque",
|
|
282
|
+
"crossbeam-utils",
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
[[package]]
|
|
286
|
+
name = "rustversion"
|
|
287
|
+
version = "1.0.22"
|
|
288
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
289
|
+
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
|
290
|
+
|
|
291
|
+
[[package]]
|
|
292
|
+
name = "syn"
|
|
293
|
+
version = "2.0.117"
|
|
294
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
295
|
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
|
296
|
+
dependencies = [
|
|
297
|
+
"proc-macro2",
|
|
298
|
+
"quote",
|
|
299
|
+
"unicode-ident",
|
|
300
|
+
]
|
|
301
|
+
|
|
302
|
+
[[package]]
|
|
303
|
+
name = "target-lexicon"
|
|
304
|
+
version = "0.13.5"
|
|
305
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
306
|
+
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
|
307
|
+
|
|
308
|
+
[[package]]
|
|
309
|
+
name = "tinyvec"
|
|
310
|
+
version = "1.10.0"
|
|
311
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
312
|
+
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
|
|
313
|
+
dependencies = [
|
|
314
|
+
"tinyvec_macros",
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
[[package]]
|
|
318
|
+
name = "tinyvec_macros"
|
|
319
|
+
version = "0.1.1"
|
|
320
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
321
|
+
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
|
322
|
+
|
|
323
|
+
[[package]]
|
|
324
|
+
name = "unicode-ident"
|
|
325
|
+
version = "1.0.24"
|
|
326
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
327
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
328
|
+
|
|
329
|
+
[[package]]
|
|
330
|
+
name = "unicode-normalization"
|
|
331
|
+
version = "0.1.25"
|
|
332
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
333
|
+
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
|
334
|
+
dependencies = [
|
|
335
|
+
"tinyvec",
|
|
336
|
+
]
|
|
337
|
+
|
|
338
|
+
[[package]]
|
|
339
|
+
name = "unindent"
|
|
340
|
+
version = "0.2.4"
|
|
341
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
342
|
+
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
|
|
343
|
+
|
|
344
|
+
[[package]]
|
|
345
|
+
name = "wasi"
|
|
346
|
+
version = "0.11.1+wasi-snapshot-preview1"
|
|
347
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
348
|
+
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
|
349
|
+
|
|
350
|
+
[[package]]
|
|
351
|
+
name = "zerocopy"
|
|
352
|
+
version = "0.8.40"
|
|
353
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
354
|
+
checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5"
|
|
355
|
+
dependencies = [
|
|
356
|
+
"zerocopy-derive",
|
|
357
|
+
]
|
|
358
|
+
|
|
359
|
+
[[package]]
|
|
360
|
+
name = "zerocopy-derive"
|
|
361
|
+
version = "0.8.40"
|
|
362
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
363
|
+
checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953"
|
|
364
|
+
dependencies = [
|
|
365
|
+
"proc-macro2",
|
|
366
|
+
"quote",
|
|
367
|
+
"syn",
|
|
368
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "fasttext_rs"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
description = "A Rust-backed Python implementation of Facebook's fastText"
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
|
|
8
|
+
[lib]
|
|
9
|
+
name = "fasttext_rs"
|
|
10
|
+
crate-type = ["cdylib", "rlib"]
|
|
11
|
+
|
|
12
|
+
[profile.release]
|
|
13
|
+
opt-level = 3
|
|
14
|
+
|
|
15
|
+
[dependencies]
|
|
16
|
+
pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
|
|
17
|
+
rand = "0.8"
|
|
18
|
+
rand_distr = "0.4"
|
|
19
|
+
rayon = "1.10"
|
|
20
|
+
byteorder = "1.5"
|
|
21
|
+
unicode-normalization = "0.1"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Duarte OC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
default: help
|
|
2
|
+
|
|
3
|
+
ENV_FLAGS = PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1
|
|
4
|
+
|
|
5
|
+
.PHONY: help
|
|
6
|
+
help: # Show help for each of the Makefile recipes.
|
|
7
|
+
@grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done
|
|
8
|
+
|
|
9
|
+
.PHONY: build
|
|
10
|
+
build: # Build the Rust extension in release mode
|
|
11
|
+
$(ENV_FLAGS) uv run maturin develop --release
|
|
12
|
+
|
|
13
|
+
.PHONY: dev
|
|
14
|
+
dev: # Build the Rust extension in debug mode
|
|
15
|
+
$(ENV_FLAGS) uv run maturin develop
|
|
16
|
+
|
|
17
|
+
.PHONY: test
|
|
18
|
+
test: build # Run all tests
|
|
19
|
+
uv run pytest tests/ -v
|
|
20
|
+
|
|
21
|
+
.PHONY: check
|
|
22
|
+
check: # Run cargo check and clippy
|
|
23
|
+
cargo check
|
|
24
|
+
cargo clippy
|
|
25
|
+
|
|
26
|
+
.PHONY: format
|
|
27
|
+
format: # Format Rust and Python code
|
|
28
|
+
cargo fmt
|
|
29
|
+
uv run ruff format .
|
|
30
|
+
uv run ruff check . --fix
|
|
31
|
+
|
|
32
|
+
.PHONY: lint
|
|
33
|
+
lint: # Lint Rust and Python code
|
|
34
|
+
cargo clippy -- -D warnings
|
|
35
|
+
uv run ruff format --check .
|
|
36
|
+
uv run ruff check .
|
|
37
|
+
|
|
38
|
+
.PHONY: data
|
|
39
|
+
data: # Download test data (cooking.stackexchange)
|
|
40
|
+
mkdir -p data
|
|
41
|
+
cd data && curl -L -O https://dl.fbaipublicfiles.com/fasttext/data/cooking.stackexchange.tar.gz
|
|
42
|
+
cd data && tar -xzf cooking.stackexchange.tar.gz
|
|
43
|
+
head -n 12404 data/cooking.stackexchange.txt > data/cooking.train
|
|
44
|
+
tail -n 3000 data/cooking.stackexchange.txt > data/cooking.valid
|
|
45
|
+
|
|
46
|
+
.PHONY: bench
|
|
47
|
+
bench: build # Run performance benchmarks
|
|
48
|
+
uv run python tests/bench.py
|
|
49
|
+
|
|
50
|
+
.PHONY: clean
|
|
51
|
+
clean: # Clean build artifacts
|
|
52
|
+
cargo clean
|
|
53
|
+
@rm -rf target/ dist/ *.egg-info
|
|
54
|
+
@rm -rf .pytest_cache **/.pytest_cache
|
|
55
|
+
@rm -rf __pycache__ **/__pycache__
|
|
56
|
+
@rm -rf .ipynb_checkpoints **/.ipynb_checkpoints
|
|
57
|
+
@rm -rf build
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fasttext-rs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
License-File: LICENSE
|
|
5
|
+
Summary: A Rust-backed Python implementation of Facebook's fastText
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
8
|
+
|
|
9
|
+
> [!WARNING]
|
|
10
|
+
> This was completely AI-generated. Running on a ralph wiggum loop to see what was possible. I HAVE NOT checked the code.
|
|
11
|
+
|
|
12
|
+
# fasttext.rs
|
|
13
|
+
|
|
14
|
+
A Rust-backed Python implementation of Facebook's [fastText](https://github.com/facebookresearch/fastText).
|
|
15
|
+
|
|
16
|
+
## Performance
|
|
17
|
+
|
|
18
|
+
Benchmarked on the [cooking.stackexchange](https://fasttext.cc/docs/en/supervised-tutorial.html) dataset (12k training, 3k validation) with default parameters, single-threaded. Median of 5 runs, Apple M3 Pro.
|
|
19
|
+
|
|
20
|
+
| Task | fasttext (C++) | fasttext.rs (Rust) | Speedup |
|
|
21
|
+
|---|--:|--:|--:|
|
|
22
|
+
| Training (5 epochs) | 12.633s | 6.406s | **1.97x** |
|
|
23
|
+
| Inference (3000 samples) | 0.443s | 0.218s | **2.03x** |
|
|
24
|
+
| Precision@1 | 0.1363 | 0.1543 | — |
|
|
25
|
+
| Recall@1 | 0.0590 | 0.0703 | — |
|
|
26
|
+
|
|
27
|
+
Run `make bench` to reproduce.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
uv pip install -e .
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quick start
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import fasttext_rs
|
|
39
|
+
|
|
40
|
+
# Train
|
|
41
|
+
model = fasttext_rs.train_supervised(input="data/cooking.train", epoch=25, lr=1.0, word_ngrams=2)
|
|
42
|
+
|
|
43
|
+
# Predict
|
|
44
|
+
labels, probs = model.predict("Which baking dish is best?")
|
|
45
|
+
|
|
46
|
+
# Vectors
|
|
47
|
+
vec = model.get_word_vector("hello")
|
|
48
|
+
|
|
49
|
+
# Save / Load
|
|
50
|
+
model.save_model("model.bin")
|
|
51
|
+
model = fasttext_rs.load_model("model.bin")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Development
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
make # Show all commands
|
|
58
|
+
```
|
|
59
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
> [!WARNING]
|
|
2
|
+
> This was completely AI-generated. Running on a ralph wiggum loop to see what was possible. I HAVE NOT checked the code.
|
|
3
|
+
|
|
4
|
+
# fasttext.rs
|
|
5
|
+
|
|
6
|
+
A Rust-backed Python implementation of Facebook's [fastText](https://github.com/facebookresearch/fastText).
|
|
7
|
+
|
|
8
|
+
## Performance
|
|
9
|
+
|
|
10
|
+
Benchmarked on the [cooking.stackexchange](https://fasttext.cc/docs/en/supervised-tutorial.html) dataset (12k training, 3k validation) with default parameters, single-threaded. Median of 5 runs, Apple M3 Pro.
|
|
11
|
+
|
|
12
|
+
| Task | fasttext (C++) | fasttext.rs (Rust) | Speedup |
|
|
13
|
+
|---|--:|--:|--:|
|
|
14
|
+
| Training (5 epochs) | 12.633s | 6.406s | **1.97x** |
|
|
15
|
+
| Inference (3000 samples) | 0.443s | 0.218s | **2.03x** |
|
|
16
|
+
| Precision@1 | 0.1363 | 0.1543 | — |
|
|
17
|
+
| Recall@1 | 0.0590 | 0.0703 | — |
|
|
18
|
+
|
|
19
|
+
Run `make bench` to reproduce.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
uv pip install -e .
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick start
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import fasttext_rs
|
|
31
|
+
|
|
32
|
+
# Train
|
|
33
|
+
model = fasttext_rs.train_supervised(input="data/cooking.train", epoch=25, lr=1.0, word_ngrams=2)
|
|
34
|
+
|
|
35
|
+
# Predict
|
|
36
|
+
labels, probs = model.predict("Which baking dish is best?")
|
|
37
|
+
|
|
38
|
+
# Vectors
|
|
39
|
+
vec = model.get_word_vector("hello")
|
|
40
|
+
|
|
41
|
+
# Save / Load
|
|
42
|
+
model.save_model("model.bin")
|
|
43
|
+
model = fasttext_rs.load_model("model.bin")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Development
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
make # Show all commands
|
|
50
|
+
```
|