rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/__init__.py +18 -27
- rust_crate_pipeline/__main__.py +1 -0
- rust_crate_pipeline/ai_processing.py +718 -596
- rust_crate_pipeline/analysis.py +330 -363
- rust_crate_pipeline/azure_ai_processing.py +462 -0
- rust_crate_pipeline/config.py +46 -28
- rust_crate_pipeline/core/__init__.py +19 -0
- rust_crate_pipeline/core/canon_registry.py +133 -0
- rust_crate_pipeline/core/irl_engine.py +256 -0
- rust_crate_pipeline/core/sacred_chain.py +117 -0
- rust_crate_pipeline/crate_analysis.py +54 -0
- rust_crate_pipeline/crate_list.txt +424 -0
- rust_crate_pipeline/github_token_checker.py +108 -112
- rust_crate_pipeline/main.py +329 -109
- rust_crate_pipeline/network.py +317 -308
- rust_crate_pipeline/pipeline.py +300 -375
- rust_crate_pipeline/production_config.py +24 -27
- rust_crate_pipeline/progress_monitor.py +334 -0
- rust_crate_pipeline/scraping/__init__.py +13 -0
- rust_crate_pipeline/scraping/unified_scraper.py +259 -0
- rust_crate_pipeline/unified_llm_processor.py +637 -0
- rust_crate_pipeline/unified_pipeline.py +548 -0
- rust_crate_pipeline/utils/file_utils.py +32 -5
- rust_crate_pipeline/utils/logging_utils.py +21 -16
- rust_crate_pipeline/version.py +76 -47
- rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
- rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
- rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
- rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,424 @@
|
|
1
|
+
actix-web
|
2
|
+
rocket
|
3
|
+
axum
|
4
|
+
warp
|
5
|
+
tower
|
6
|
+
tide
|
7
|
+
gotham
|
8
|
+
iron
|
9
|
+
nickel
|
10
|
+
rouille
|
11
|
+
thruster
|
12
|
+
poem
|
13
|
+
salvo
|
14
|
+
viz
|
15
|
+
ntex
|
16
|
+
may-minihttp
|
17
|
+
tiny_http
|
18
|
+
httptest
|
19
|
+
mockito
|
20
|
+
wiremock
|
21
|
+
tokio
|
22
|
+
tokio-stream
|
23
|
+
async-trait
|
24
|
+
futures
|
25
|
+
async-std
|
26
|
+
smol
|
27
|
+
embassy
|
28
|
+
embassy-executor
|
29
|
+
embassy-time
|
30
|
+
embassy-sync
|
31
|
+
async-channel
|
32
|
+
async-broadcast
|
33
|
+
async-lock
|
34
|
+
async-once
|
35
|
+
async-recursion
|
36
|
+
futures-util
|
37
|
+
futures-channel
|
38
|
+
futures-timer
|
39
|
+
futures-test
|
40
|
+
pin-project
|
41
|
+
pin-project-lite
|
42
|
+
serde
|
43
|
+
serde_json
|
44
|
+
serde_yaml
|
45
|
+
bincode
|
46
|
+
toml
|
47
|
+
ron
|
48
|
+
postcard
|
49
|
+
ciborium
|
50
|
+
rmp-serde
|
51
|
+
quick-xml
|
52
|
+
roxmltree
|
53
|
+
serde_cbor
|
54
|
+
serde_derive
|
55
|
+
serde_repr
|
56
|
+
serde_with
|
57
|
+
serde_bytes
|
58
|
+
flexbuffers
|
59
|
+
bson
|
60
|
+
avro-rs
|
61
|
+
anyhow
|
62
|
+
thiserror
|
63
|
+
eyre
|
64
|
+
color-eyre
|
65
|
+
miette
|
66
|
+
fehler
|
67
|
+
snafu
|
68
|
+
failure
|
69
|
+
quick-error
|
70
|
+
derive_more
|
71
|
+
displaydoc
|
72
|
+
backtrace
|
73
|
+
better-panic
|
74
|
+
clap
|
75
|
+
structopt
|
76
|
+
argh
|
77
|
+
gumdrop
|
78
|
+
docopt
|
79
|
+
getopts
|
80
|
+
pico-args
|
81
|
+
crossterm
|
82
|
+
termion
|
83
|
+
console
|
84
|
+
indicati
|
85
|
+
dialoguer
|
86
|
+
termcolor
|
87
|
+
colored
|
88
|
+
yansi
|
89
|
+
owo-colors
|
90
|
+
nu-ansi-term
|
91
|
+
terminal_size
|
92
|
+
rand
|
93
|
+
uuid
|
94
|
+
itertools
|
95
|
+
num
|
96
|
+
cfg-i
|
97
|
+
bytes
|
98
|
+
mime
|
99
|
+
form_urlencoded
|
100
|
+
csv
|
101
|
+
once_cell
|
102
|
+
base64
|
103
|
+
flate2
|
104
|
+
tar
|
105
|
+
dirs
|
106
|
+
walkdir
|
107
|
+
glob
|
108
|
+
bitflags
|
109
|
+
indexmap
|
110
|
+
smallvec
|
111
|
+
arrayvec
|
112
|
+
tinyvec
|
113
|
+
ahash
|
114
|
+
fxhash
|
115
|
+
rustc-hash
|
116
|
+
seahash
|
117
|
+
siphasher
|
118
|
+
wyhash
|
119
|
+
xxhash-rust
|
120
|
+
getrandom
|
121
|
+
fastrand
|
122
|
+
nanorand
|
123
|
+
url
|
124
|
+
percent-encoding
|
125
|
+
unicode-segmentation
|
126
|
+
unicode-normalization
|
127
|
+
unicode-width
|
128
|
+
memchr
|
129
|
+
aho-corasick
|
130
|
+
bstr
|
131
|
+
reqwest
|
132
|
+
hyper
|
133
|
+
sur
|
134
|
+
ureq
|
135
|
+
attohttpc
|
136
|
+
isahc
|
137
|
+
curl
|
138
|
+
libcurl-sys
|
139
|
+
http
|
140
|
+
http-body
|
141
|
+
httparse
|
142
|
+
hyper-tls
|
143
|
+
hyper-rustls
|
144
|
+
native-tls
|
145
|
+
webpki
|
146
|
+
webpki-roots
|
147
|
+
sqlx
|
148
|
+
diesel
|
149
|
+
postgres
|
150
|
+
rusqlite
|
151
|
+
mysql
|
152
|
+
mongodb
|
153
|
+
redis
|
154
|
+
tokio-postgres
|
155
|
+
deadpool-postgres
|
156
|
+
bb8
|
157
|
+
r2d2
|
158
|
+
sea-orm
|
159
|
+
rbatis
|
160
|
+
sled
|
161
|
+
rocksdb
|
162
|
+
lmdb
|
163
|
+
redb
|
164
|
+
pickledb
|
165
|
+
persy
|
166
|
+
heed
|
167
|
+
fjall
|
168
|
+
rayon
|
169
|
+
crossbeam
|
170
|
+
crossbeam-channel
|
171
|
+
crossbeam-utils
|
172
|
+
crossbeam-epoch
|
173
|
+
crossbeam-deque
|
174
|
+
parking_lot
|
175
|
+
spin
|
176
|
+
atomic
|
177
|
+
arc-swap
|
178
|
+
dashmap
|
179
|
+
flume
|
180
|
+
kanal
|
181
|
+
tokio-util
|
182
|
+
futures-concurrency
|
183
|
+
prost
|
184
|
+
tonic
|
185
|
+
protobu
|
186
|
+
grpcio
|
187
|
+
tarpc
|
188
|
+
capnp
|
189
|
+
rmp
|
190
|
+
zmq
|
191
|
+
nanomsg
|
192
|
+
nats
|
193
|
+
rdkafka
|
194
|
+
pulsar
|
195
|
+
lapin
|
196
|
+
amqp
|
197
|
+
rumqttc
|
198
|
+
syn
|
199
|
+
quote
|
200
|
+
proc-macro2
|
201
|
+
proc-macro-crate
|
202
|
+
proc-macro-error
|
203
|
+
darling
|
204
|
+
derive_builder
|
205
|
+
strum
|
206
|
+
strum_macros
|
207
|
+
enum-iterator
|
208
|
+
num-derive
|
209
|
+
num-traits
|
210
|
+
paste
|
211
|
+
lazy_static
|
212
|
+
ring
|
213
|
+
rustls
|
214
|
+
openssl
|
215
|
+
sha2
|
216
|
+
sha3
|
217
|
+
blake2
|
218
|
+
blake3
|
219
|
+
md5
|
220
|
+
hmac
|
221
|
+
pbkdf2
|
222
|
+
scrypt
|
223
|
+
argon2
|
224
|
+
bcrypt
|
225
|
+
chacha20poly1305
|
226
|
+
aes-gcm
|
227
|
+
rsa
|
228
|
+
ed25519-dalek
|
229
|
+
x25519-dalek
|
230
|
+
curve25519-dalek
|
231
|
+
secp256k1
|
232
|
+
k256
|
233
|
+
p256
|
234
|
+
ecdsa
|
235
|
+
signature
|
236
|
+
rand_core
|
237
|
+
bevy
|
238
|
+
macroquad
|
239
|
+
ggez
|
240
|
+
piston
|
241
|
+
winit
|
242
|
+
wgpu
|
243
|
+
vulkano
|
244
|
+
glium
|
245
|
+
three-d
|
246
|
+
kiss3d
|
247
|
+
nalgebra
|
248
|
+
cgmath
|
249
|
+
glam
|
250
|
+
ultraviolet
|
251
|
+
mint
|
252
|
+
image
|
253
|
+
imageproc
|
254
|
+
resvg
|
255
|
+
tiny-skia
|
256
|
+
lyon
|
257
|
+
femtovg
|
258
|
+
skulpin
|
259
|
+
socket2
|
260
|
+
mio
|
261
|
+
polling
|
262
|
+
async-io
|
263
|
+
calloop
|
264
|
+
quinn
|
265
|
+
rustls-pemfile
|
266
|
+
trust-dns
|
267
|
+
hickory-dns
|
268
|
+
async-h1
|
269
|
+
h2
|
270
|
+
h3
|
271
|
+
websocket
|
272
|
+
tokio-tungstenite
|
273
|
+
tungstenite
|
274
|
+
ws
|
275
|
+
warp-ws
|
276
|
+
regex
|
277
|
+
regex-syntax
|
278
|
+
pest
|
279
|
+
pest_derive
|
280
|
+
nom
|
281
|
+
combine
|
282
|
+
winnow
|
283
|
+
lalrpop
|
284
|
+
chumsky
|
285
|
+
logos
|
286
|
+
lex
|
287
|
+
yacc
|
288
|
+
tree-sitter
|
289
|
+
syntect
|
290
|
+
pulldown-cmark
|
291
|
+
comrak
|
292
|
+
markdown
|
293
|
+
ammonia
|
294
|
+
scraper
|
295
|
+
kuchiki
|
296
|
+
libc
|
297
|
+
winapi
|
298
|
+
windows
|
299
|
+
nix
|
300
|
+
users
|
301
|
+
sysinfo
|
302
|
+
procfs
|
303
|
+
psutil
|
304
|
+
notify
|
305
|
+
inotify
|
306
|
+
hotwatch
|
307
|
+
signal-hook
|
308
|
+
ctrlc
|
309
|
+
daemonize
|
310
|
+
fork
|
311
|
+
shared_memory
|
312
|
+
memmap2
|
313
|
+
mlock
|
314
|
+
caps
|
315
|
+
uzers
|
316
|
+
criterion
|
317
|
+
proptest
|
318
|
+
quickcheck
|
319
|
+
rstest
|
320
|
+
serial_test
|
321
|
+
mockall
|
322
|
+
httpmock
|
323
|
+
assert_cmd
|
324
|
+
assert_fs
|
325
|
+
predicates
|
326
|
+
tempfile
|
327
|
+
insta
|
328
|
+
goldenfile
|
329
|
+
similar
|
330
|
+
difference
|
331
|
+
pretty_assertions
|
332
|
+
config
|
333
|
+
figment
|
334
|
+
envy
|
335
|
+
dotenv
|
336
|
+
confy
|
337
|
+
directories
|
338
|
+
app_dirs
|
339
|
+
etcetera
|
340
|
+
platform-dirs
|
341
|
+
home
|
342
|
+
which
|
343
|
+
dunce
|
344
|
+
normpath
|
345
|
+
log
|
346
|
+
env_logger
|
347
|
+
tracing
|
348
|
+
tracing-subscriber
|
349
|
+
tracing-futures
|
350
|
+
tracing-actix-web
|
351
|
+
tracing-log
|
352
|
+
slog
|
353
|
+
fern
|
354
|
+
flexi_logger
|
355
|
+
log4rs
|
356
|
+
simplelog
|
357
|
+
stderrlog
|
358
|
+
pretty_env_logger
|
359
|
+
fast_log
|
360
|
+
chrono
|
361
|
+
time
|
362
|
+
humantime
|
363
|
+
chrono-tz
|
364
|
+
chrono-english
|
365
|
+
ical
|
366
|
+
cron
|
367
|
+
tokio-cron-scheduler
|
368
|
+
job_scheduler
|
369
|
+
delay_timer
|
370
|
+
tokenizers
|
371
|
+
safetensors
|
372
|
+
linfa
|
373
|
+
ndarray
|
374
|
+
smartcore
|
375
|
+
burn
|
376
|
+
tract-core
|
377
|
+
tract-onnx
|
378
|
+
tract-hir
|
379
|
+
tract-linalg
|
380
|
+
tract-data
|
381
|
+
tract-nne
|
382
|
+
tract-onnx-opl
|
383
|
+
tract-pulse
|
384
|
+
tract-pulse-opl
|
385
|
+
tract-nnef-resources
|
386
|
+
tch
|
387
|
+
torch-sys
|
388
|
+
ort
|
389
|
+
ort-sys
|
390
|
+
candle-core
|
391
|
+
candle-nn
|
392
|
+
candle-transformers
|
393
|
+
candle-kernels
|
394
|
+
candle-onnx
|
395
|
+
candle-metal-kernels
|
396
|
+
tiktoken-rs
|
397
|
+
tensorflow
|
398
|
+
tensorflow-sys
|
399
|
+
onnxruntime
|
400
|
+
onnxruntime-sys
|
401
|
+
onnx-protobu
|
402
|
+
llama-cpp-2
|
403
|
+
llama-cpp-sys-2
|
404
|
+
llm
|
405
|
+
llm-samplers
|
406
|
+
llm-chain
|
407
|
+
llm-chain-openai
|
408
|
+
llama-core
|
409
|
+
llamaedge
|
410
|
+
openai
|
411
|
+
openai-api-rs
|
412
|
+
openai_dive
|
413
|
+
genai
|
414
|
+
aleph-alpha-client
|
415
|
+
llm_api_access
|
416
|
+
ollama-rs
|
417
|
+
rust-bert
|
418
|
+
fastembed
|
419
|
+
hf-hub
|
420
|
+
whisper-rs-sys
|
421
|
+
toktrie
|
422
|
+
toktrie_hf_tokenizers
|
423
|
+
toktrie_hf_downloader
|
424
|
+
rust_tokenizers
|
@@ -1,112 +1,108 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
import
|
9
|
-
import
|
10
|
-
import
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
token
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
"
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
headers=headers,
|
33
|
-
|
34
|
-
|
35
|
-
if response.status_code == 200:
|
36
|
-
data = response.json()
|
37
|
-
remaining = data["resources"]["core"]["remaining"]
|
38
|
-
return True, f"Token valid, {remaining} API calls remaining"
|
39
|
-
elif response.status_code == 401:
|
40
|
-
return False, "GitHub token is invalid or expired"
|
41
|
-
else:
|
42
|
-
return
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
print("
|
55
|
-
print("
|
56
|
-
print("
|
57
|
-
print("
|
58
|
-
print("
|
59
|
-
print("
|
60
|
-
print("
|
61
|
-
print("
|
62
|
-
print("
|
63
|
-
print(
|
64
|
-
print("
|
65
|
-
print("
|
66
|
-
print("
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
if response in [
|
73
|
-
print("
|
74
|
-
print(" You may encounter rate limit warnings.")
|
75
|
-
return True
|
76
|
-
else:
|
77
|
-
print("\n
|
78
|
-
return False
|
79
|
-
|
80
|
-
|
81
|
-
def check_and_setup_github_token():
|
82
|
-
"""
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
#
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
print(f"Token check: {'✅' if is_valid else '❌'} {message}")
|
110
|
-
|
111
|
-
if not is_valid:
|
112
|
-
check_and_setup_github_token()
|
1
|
+
from typing import Dict, List, Tuple, Optional, Any
|
2
|
+
# github_token_checker.py
|
3
|
+
"""
|
4
|
+
GitHub Token Checker Module
|
5
|
+
Lightweight version of the token checker for integration into the main pipeline.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
import sys
|
10
|
+
import requests
|
11
|
+
import logging
|
12
|
+
|
13
|
+
|
14
|
+
def check_github_token_quick() -> tuple[bool, str]:
|
15
|
+
"""Quick check if GitHub token is available and valid"""
|
16
|
+
token = os.getenv("GITHUB_TOKEN")
|
17
|
+
|
18
|
+
if not token:
|
19
|
+
return False, "GITHUB_TOKEN environment variable not set"
|
20
|
+
|
21
|
+
if len(token) < 20:
|
22
|
+
return False, "GITHUB_TOKEN seems too short - may be invalid"
|
23
|
+
|
24
|
+
try:
|
25
|
+
# Quick API check
|
26
|
+
headers = {
|
27
|
+
"Accept": "application/vnd.github.v3+json",
|
28
|
+
"Authorization": f"token {token}",
|
29
|
+
}
|
30
|
+
|
31
|
+
response = requests.get(
|
32
|
+
"https://api.github.com/rate_limit", headers=headers, timeout=10
|
33
|
+
)
|
34
|
+
|
35
|
+
if response.status_code == 200:
|
36
|
+
data = response.json()
|
37
|
+
remaining = data["resources"]["core"]["remaining"]
|
38
|
+
return True, f"Token valid, {remaining} API calls remaining"
|
39
|
+
elif response.status_code == 401:
|
40
|
+
return False, "GitHub token is invalid or expired"
|
41
|
+
else:
|
42
|
+
return (
|
43
|
+
False,
|
44
|
+
f"GitHub API returned status code: {response.status_code}",
|
45
|
+
)
|
46
|
+
except requests.RequestException as e:
|
47
|
+
return False, f"API request failed: {e}"
|
48
|
+
except Exception as e:
|
49
|
+
return False, f"Error checking token: {str(e)}"
|
50
|
+
|
51
|
+
|
52
|
+
def prompt_for_token_setup() -> bool:
|
53
|
+
"""Prompt user to set up GitHub token"""
|
54
|
+
print("\n" + "=" * 60)
|
55
|
+
print("[KEY] GitHub Token Required")
|
56
|
+
print("=" * 60)
|
57
|
+
print("\nThe Rust Crate Pipeline requires a GitHub Personal Access Token")
|
58
|
+
print("to access repository information and avoid rate limits.")
|
59
|
+
print("\n[GUIDE] Quick Setup:")
|
60
|
+
print("1. Get token: https://github.com/settings/tokens")
|
61
|
+
print("2. Required scopes: public_repo, read:user")
|
62
|
+
print("3. Set in environment:")
|
63
|
+
print(' export GITHUB_TOKEN="your_token_here"')
|
64
|
+
print("\n[TOOLS] Setup Scripts Available:")
|
65
|
+
print(" ./setup_github_token.sh (Interactive setup)")
|
66
|
+
print(" python3 check_github_token.py (Full verification)")
|
67
|
+
print("\n" + "=" * 60)
|
68
|
+
|
69
|
+
# Ask if user wants to continue without token (limited functionality)
|
70
|
+
response = input("\nContinue without GitHub token? (y/N): ").strip().lower()
|
71
|
+
|
72
|
+
if response in ["y", "yes"]:
|
73
|
+
print("[WARNING] Running with limited GitHub API access (60 requests/hour)")
|
74
|
+
print(" You may encounter rate limit warnings.")
|
75
|
+
return True
|
76
|
+
else:
|
77
|
+
print("\n[STOP] Please set up your GitHub token and try again.")
|
78
|
+
return False
|
79
|
+
|
80
|
+
|
81
|
+
def check_and_setup_github_token() -> bool:
|
82
|
+
"""Checks and sets up the GitHub token."""
|
83
|
+
is_valid, message = check_github_token_quick()
|
84
|
+
|
85
|
+
if is_valid:
|
86
|
+
logging.debug(f"GitHub token check: {message}")
|
87
|
+
return True
|
88
|
+
|
89
|
+
# Token is missing or invalid
|
90
|
+
logging.warning(f"GitHub token issue: {message}")
|
91
|
+
|
92
|
+
# Check if we're in a non-interactive environment
|
93
|
+
if not sys.stdin.isatty():
|
94
|
+
logging.error("GitHub token not configured and running in non-interactive mode")
|
95
|
+
logging.error("Set GITHUB_TOKEN environment variable before running")
|
96
|
+
return False
|
97
|
+
|
98
|
+
# Interactive prompt
|
99
|
+
return prompt_for_token_setup()
|
100
|
+
|
101
|
+
|
102
|
+
if __name__ == "__main__":
|
103
|
+
# Allow running this module directly for testing
|
104
|
+
is_valid, message = check_github_token_quick()
|
105
|
+
print(f"Token check: {'[OK]' if is_valid else '[FAIL]'} {message}")
|
106
|
+
|
107
|
+
if not is_valid:
|
108
|
+
check_and_setup_github_token()
|