rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. rust_crate_pipeline/__init__.py +18 -27
  2. rust_crate_pipeline/__main__.py +1 -0
  3. rust_crate_pipeline/ai_processing.py +718 -596
  4. rust_crate_pipeline/analysis.py +330 -363
  5. rust_crate_pipeline/azure_ai_processing.py +462 -0
  6. rust_crate_pipeline/config.py +46 -28
  7. rust_crate_pipeline/core/__init__.py +19 -0
  8. rust_crate_pipeline/core/canon_registry.py +133 -0
  9. rust_crate_pipeline/core/irl_engine.py +256 -0
  10. rust_crate_pipeline/core/sacred_chain.py +117 -0
  11. rust_crate_pipeline/crate_analysis.py +54 -0
  12. rust_crate_pipeline/crate_list.txt +424 -0
  13. rust_crate_pipeline/github_token_checker.py +108 -112
  14. rust_crate_pipeline/main.py +329 -109
  15. rust_crate_pipeline/network.py +317 -308
  16. rust_crate_pipeline/pipeline.py +300 -375
  17. rust_crate_pipeline/production_config.py +24 -27
  18. rust_crate_pipeline/progress_monitor.py +334 -0
  19. rust_crate_pipeline/scraping/__init__.py +13 -0
  20. rust_crate_pipeline/scraping/unified_scraper.py +259 -0
  21. rust_crate_pipeline/unified_llm_processor.py +637 -0
  22. rust_crate_pipeline/unified_pipeline.py +548 -0
  23. rust_crate_pipeline/utils/file_utils.py +32 -5
  24. rust_crate_pipeline/utils/logging_utils.py +21 -16
  25. rust_crate_pipeline/version.py +76 -47
  26. rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
  27. rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
  28. rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
  29. rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
  30. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
  31. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
  32. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
  33. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,424 @@
1
+ actix-web
2
+ rocket
3
+ axum
4
+ warp
5
+ tower
6
+ tide
7
+ gotham
8
+ iron
9
+ nickel
10
+ rouille
11
+ thruster
12
+ poem
13
+ salvo
14
+ viz
15
+ ntex
16
+ may-minihttp
17
+ tiny_http
18
+ httptest
19
+ mockito
20
+ wiremock
21
+ tokio
22
+ tokio-stream
23
+ async-trait
24
+ futures
25
+ async-std
26
+ smol
27
+ embassy
28
+ embassy-executor
29
+ embassy-time
30
+ embassy-sync
31
+ async-channel
32
+ async-broadcast
33
+ async-lock
34
+ async-once
35
+ async-recursion
36
+ futures-util
37
+ futures-channel
38
+ futures-timer
39
+ futures-test
40
+ pin-project
41
+ pin-project-lite
42
+ serde
43
+ serde_json
44
+ serde_yaml
45
+ bincode
46
+ toml
47
+ ron
48
+ postcard
49
+ ciborium
50
+ rmp-serde
51
+ quick-xml
52
+ roxmltree
53
+ serde_cbor
54
+ serde_derive
55
+ serde_repr
56
+ serde_with
57
+ serde_bytes
58
+ flexbuffers
59
+ bson
60
+ avro-rs
61
+ anyhow
62
+ thiserror
63
+ eyre
64
+ color-eyre
65
+ miette
66
+ fehler
67
+ snafu
68
+ failure
69
+ quick-error
70
+ derive_more
71
+ displaydoc
72
+ backtrace
73
+ better-panic
74
+ clap
75
+ structopt
76
+ argh
77
+ gumdrop
78
+ docopt
79
+ getopts
80
+ pico-args
81
+ crossterm
82
+ termion
83
+ console
84
+ indicati
85
+ dialoguer
86
+ termcolor
87
+ colored
88
+ yansi
89
+ owo-colors
90
+ nu-ansi-term
91
+ terminal_size
92
+ rand
93
+ uuid
94
+ itertools
95
+ num
96
+ cfg-i
97
+ bytes
98
+ mime
99
+ form_urlencoded
100
+ csv
101
+ once_cell
102
+ base64
103
+ flate2
104
+ tar
105
+ dirs
106
+ walkdir
107
+ glob
108
+ bitflags
109
+ indexmap
110
+ smallvec
111
+ arrayvec
112
+ tinyvec
113
+ ahash
114
+ fxhash
115
+ rustc-hash
116
+ seahash
117
+ siphasher
118
+ wyhash
119
+ xxhash-rust
120
+ getrandom
121
+ fastrand
122
+ nanorand
123
+ url
124
+ percent-encoding
125
+ unicode-segmentation
126
+ unicode-normalization
127
+ unicode-width
128
+ memchr
129
+ aho-corasick
130
+ bstr
131
+ reqwest
132
+ hyper
133
+ sur
134
+ ureq
135
+ attohttpc
136
+ isahc
137
+ curl
138
+ libcurl-sys
139
+ http
140
+ http-body
141
+ httparse
142
+ hyper-tls
143
+ hyper-rustls
144
+ native-tls
145
+ webpki
146
+ webpki-roots
147
+ sqlx
148
+ diesel
149
+ postgres
150
+ rusqlite
151
+ mysql
152
+ mongodb
153
+ redis
154
+ tokio-postgres
155
+ deadpool-postgres
156
+ bb8
157
+ r2d2
158
+ sea-orm
159
+ rbatis
160
+ sled
161
+ rocksdb
162
+ lmdb
163
+ redb
164
+ pickledb
165
+ persy
166
+ heed
167
+ fjall
168
+ rayon
169
+ crossbeam
170
+ crossbeam-channel
171
+ crossbeam-utils
172
+ crossbeam-epoch
173
+ crossbeam-deque
174
+ parking_lot
175
+ spin
176
+ atomic
177
+ arc-swap
178
+ dashmap
179
+ flume
180
+ kanal
181
+ tokio-util
182
+ futures-concurrency
183
+ prost
184
+ tonic
185
+ protobu
186
+ grpcio
187
+ tarpc
188
+ capnp
189
+ rmp
190
+ zmq
191
+ nanomsg
192
+ nats
193
+ rdkafka
194
+ pulsar
195
+ lapin
196
+ amqp
197
+ rumqttc
198
+ syn
199
+ quote
200
+ proc-macro2
201
+ proc-macro-crate
202
+ proc-macro-error
203
+ darling
204
+ derive_builder
205
+ strum
206
+ strum_macros
207
+ enum-iterator
208
+ num-derive
209
+ num-traits
210
+ paste
211
+ lazy_static
212
+ ring
213
+ rustls
214
+ openssl
215
+ sha2
216
+ sha3
217
+ blake2
218
+ blake3
219
+ md5
220
+ hmac
221
+ pbkdf2
222
+ scrypt
223
+ argon2
224
+ bcrypt
225
+ chacha20poly1305
226
+ aes-gcm
227
+ rsa
228
+ ed25519-dalek
229
+ x25519-dalek
230
+ curve25519-dalek
231
+ secp256k1
232
+ k256
233
+ p256
234
+ ecdsa
235
+ signature
236
+ rand_core
237
+ bevy
238
+ macroquad
239
+ ggez
240
+ piston
241
+ winit
242
+ wgpu
243
+ vulkano
244
+ glium
245
+ three-d
246
+ kiss3d
247
+ nalgebra
248
+ cgmath
249
+ glam
250
+ ultraviolet
251
+ mint
252
+ image
253
+ imageproc
254
+ resvg
255
+ tiny-skia
256
+ lyon
257
+ femtovg
258
+ skulpin
259
+ socket2
260
+ mio
261
+ polling
262
+ async-io
263
+ calloop
264
+ quinn
265
+ rustls-pemfile
266
+ trust-dns
267
+ hickory-dns
268
+ async-h1
269
+ h2
270
+ h3
271
+ websocket
272
+ tokio-tungstenite
273
+ tungstenite
274
+ ws
275
+ warp-ws
276
+ regex
277
+ regex-syntax
278
+ pest
279
+ pest_derive
280
+ nom
281
+ combine
282
+ winnow
283
+ lalrpop
284
+ chumsky
285
+ logos
286
+ lex
287
+ yacc
288
+ tree-sitter
289
+ syntect
290
+ pulldown-cmark
291
+ comrak
292
+ markdown
293
+ ammonia
294
+ scraper
295
+ kuchiki
296
+ libc
297
+ winapi
298
+ windows
299
+ nix
300
+ users
301
+ sysinfo
302
+ procfs
303
+ psutil
304
+ notify
305
+ inotify
306
+ hotwatch
307
+ signal-hook
308
+ ctrlc
309
+ daemonize
310
+ fork
311
+ shared_memory
312
+ memmap2
313
+ mlock
314
+ caps
315
+ uzers
316
+ criterion
317
+ proptest
318
+ quickcheck
319
+ rstest
320
+ serial_test
321
+ mockall
322
+ httpmock
323
+ assert_cmd
324
+ assert_fs
325
+ predicates
326
+ tempfile
327
+ insta
328
+ goldenfile
329
+ similar
330
+ difference
331
+ pretty_assertions
332
+ config
333
+ figment
334
+ envy
335
+ dotenv
336
+ confy
337
+ directories
338
+ app_dirs
339
+ etcetera
340
+ platform-dirs
341
+ home
342
+ which
343
+ dunce
344
+ normpath
345
+ log
346
+ env_logger
347
+ tracing
348
+ tracing-subscriber
349
+ tracing-futures
350
+ tracing-actix-web
351
+ tracing-log
352
+ slog
353
+ fern
354
+ flexi_logger
355
+ log4rs
356
+ simplelog
357
+ stderrlog
358
+ pretty_env_logger
359
+ fast_log
360
+ chrono
361
+ time
362
+ humantime
363
+ chrono-tz
364
+ chrono-english
365
+ ical
366
+ cron
367
+ tokio-cron-scheduler
368
+ job_scheduler
369
+ delay_timer
370
+ tokenizers
371
+ safetensors
372
+ linfa
373
+ ndarray
374
+ smartcore
375
+ burn
376
+ tract-core
377
+ tract-onnx
378
+ tract-hir
379
+ tract-linalg
380
+ tract-data
381
+ tract-nne
382
+ tract-onnx-opl
383
+ tract-pulse
384
+ tract-pulse-opl
385
+ tract-nnef-resources
386
+ tch
387
+ torch-sys
388
+ ort
389
+ ort-sys
390
+ candle-core
391
+ candle-nn
392
+ candle-transformers
393
+ candle-kernels
394
+ candle-onnx
395
+ candle-metal-kernels
396
+ tiktoken-rs
397
+ tensorflow
398
+ tensorflow-sys
399
+ onnxruntime
400
+ onnxruntime-sys
401
+ onnx-protobu
402
+ llama-cpp-2
403
+ llama-cpp-sys-2
404
+ llm
405
+ llm-samplers
406
+ llm-chain
407
+ llm-chain-openai
408
+ llama-core
409
+ llamaedge
410
+ openai
411
+ openai-api-rs
412
+ openai_dive
413
+ genai
414
+ aleph-alpha-client
415
+ llm_api_access
416
+ ollama-rs
417
+ rust-bert
418
+ fastembed
419
+ hf-hub
420
+ whisper-rs-sys
421
+ toktrie
422
+ toktrie_hf_tokenizers
423
+ toktrie_hf_downloader
424
+ rust_tokenizers
@@ -1,112 +1,108 @@
1
- # github_token_checker.py
2
- """
3
- GitHub Token Checker Module
4
- Lightweight version of the token checker for integration into the main pipeline.
5
- """
6
-
7
- import os
8
- import sys
9
- import requests
10
- import logging
11
-
12
-
13
- def check_github_token_quick():
14
- """Quick check if GitHub token is available and valid"""
15
- token = os.getenv("GITHUB_TOKEN")
16
-
17
- if not token:
18
- return False, "GITHUB_TOKEN environment variable not set"
19
-
20
- if len(token) < 20:
21
- return False, "GITHUB_TOKEN seems too short - may be invalid"
22
-
23
- try:
24
- # Quick API check
25
- headers = {
26
- "Accept": "application/vnd.github.v3+json",
27
- "Authorization": f"token {token}"
28
- }
29
-
30
- response = requests.get(
31
- "https://api.github.com/rate_limit",
32
- headers=headers,
33
- timeout=10)
34
-
35
- if response.status_code == 200:
36
- data = response.json()
37
- remaining = data["resources"]["core"]["remaining"]
38
- return True, f"Token valid, {remaining} API calls remaining"
39
- elif response.status_code == 401:
40
- return False, "GitHub token is invalid or expired"
41
- else:
42
- return False, f"GitHub API returned status code: {
43
- response.status_code}"
44
-
45
- except requests.exceptions.RequestException as e:
46
- return False, f"Network error checking token: {str(e)}"
47
- except Exception as e:
48
- return False, f"Error checking token: {str(e)}"
49
-
50
-
51
- def prompt_for_token_setup():
52
- """Prompt user to set up GitHub token"""
53
- print("\n" + "=" * 60)
54
- print("🔑 GitHub Token Required")
55
- print("=" * 60)
56
- print("\nThe Rust Crate Pipeline requires a GitHub Personal Access Token")
57
- print("to access repository information and avoid rate limits.")
58
- print("\n📋 Quick Setup:")
59
- print("1. Get token: https://github.com/settings/tokens")
60
- print("2. Required scopes: public_repo, read:user")
61
- print("3. Set in environment:")
62
- print(" export GITHUB_TOKEN=\"your_token_here\"")
63
- print("\n🔧 Setup Scripts Available:")
64
- print(" ./setup_github_token.sh (Interactive setup)")
65
- print(" python3 check_github_token.py (Full verification)")
66
- print("\n" + "=" * 60)
67
-
68
- # Ask if user wants to continue without token (limited functionality)
69
- response = input(
70
- "\nContinue without GitHub token? (y/N): ").strip().lower()
71
-
72
- if response in ['y', 'yes']:
73
- print("⚠️ Running with limited GitHub API access (60 requests/hour)")
74
- print(" You may encounter rate limit warnings.")
75
- return True
76
- else:
77
- print("\n🛑 Please set up your GitHub token and try again.")
78
- return False
79
-
80
-
81
- def check_and_setup_github_token():
82
- """
83
- Check GitHub token and prompt for setup if missing.
84
- Returns True if should continue, False if should exit.
85
- """
86
- is_valid, message = check_github_token_quick()
87
-
88
- if is_valid:
89
- logging.debug(f"GitHub token check: {message}")
90
- return True
91
-
92
- # Token is missing or invalid
93
- logging.warning(f"GitHub token issue: {message}")
94
-
95
- # Check if we're in a non-interactive environment
96
- if not sys.stdin.isatty():
97
- logging.error(
98
- "GitHub token not configured and running in non-interactive mode")
99
- logging.error("Set GITHUB_TOKEN environment variable before running")
100
- return False
101
-
102
- # Interactive prompt
103
- return prompt_for_token_setup()
104
-
105
-
106
- if __name__ == "__main__":
107
- # Allow running this module directly for testing
108
- is_valid, message = check_github_token_quick()
109
- print(f"Token check: {'✅' if is_valid else '❌'} {message}")
110
-
111
- if not is_valid:
112
- check_and_setup_github_token()
1
+ from typing import Dict, List, Tuple, Optional, Any
2
+ # github_token_checker.py
3
+ """
4
+ GitHub Token Checker Module
5
+ Lightweight version of the token checker for integration into the main pipeline.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import requests
11
+ import logging
12
+
13
+
14
+ def check_github_token_quick() -> tuple[bool, str]:
15
+ """Quick check if GitHub token is available and valid"""
16
+ token = os.getenv("GITHUB_TOKEN")
17
+
18
+ if not token:
19
+ return False, "GITHUB_TOKEN environment variable not set"
20
+
21
+ if len(token) < 20:
22
+ return False, "GITHUB_TOKEN seems too short - may be invalid"
23
+
24
+ try:
25
+ # Quick API check
26
+ headers = {
27
+ "Accept": "application/vnd.github.v3+json",
28
+ "Authorization": f"token {token}",
29
+ }
30
+
31
+ response = requests.get(
32
+ "https://api.github.com/rate_limit", headers=headers, timeout=10
33
+ )
34
+
35
+ if response.status_code == 200:
36
+ data = response.json()
37
+ remaining = data["resources"]["core"]["remaining"]
38
+ return True, f"Token valid, {remaining} API calls remaining"
39
+ elif response.status_code == 401:
40
+ return False, "GitHub token is invalid or expired"
41
+ else:
42
+ return (
43
+ False,
44
+ f"GitHub API returned status code: {response.status_code}",
45
+ )
46
+ except requests.RequestException as e:
47
+ return False, f"API request failed: {e}"
48
+ except Exception as e:
49
+ return False, f"Error checking token: {str(e)}"
50
+
51
+
52
+ def prompt_for_token_setup() -> bool:
53
+ """Prompt user to set up GitHub token"""
54
+ print("\n" + "=" * 60)
55
+ print("[KEY] GitHub Token Required")
56
+ print("=" * 60)
57
+ print("\nThe Rust Crate Pipeline requires a GitHub Personal Access Token")
58
+ print("to access repository information and avoid rate limits.")
59
+ print("\n[GUIDE] Quick Setup:")
60
+ print("1. Get token: https://github.com/settings/tokens")
61
+ print("2. Required scopes: public_repo, read:user")
62
+ print("3. Set in environment:")
63
+ print(' export GITHUB_TOKEN="your_token_here"')
64
+ print("\n[TOOLS] Setup Scripts Available:")
65
+ print(" ./setup_github_token.sh (Interactive setup)")
66
+ print(" python3 check_github_token.py (Full verification)")
67
+ print("\n" + "=" * 60)
68
+
69
+ # Ask if user wants to continue without token (limited functionality)
70
+ response = input("\nContinue without GitHub token? (y/N): ").strip().lower()
71
+
72
+ if response in ["y", "yes"]:
73
+ print("[WARNING] Running with limited GitHub API access (60 requests/hour)")
74
+ print(" You may encounter rate limit warnings.")
75
+ return True
76
+ else:
77
+ print("\n[STOP] Please set up your GitHub token and try again.")
78
+ return False
79
+
80
+
81
+ def check_and_setup_github_token() -> bool:
82
+ """Checks and sets up the GitHub token."""
83
+ is_valid, message = check_github_token_quick()
84
+
85
+ if is_valid:
86
+ logging.debug(f"GitHub token check: {message}")
87
+ return True
88
+
89
+ # Token is missing or invalid
90
+ logging.warning(f"GitHub token issue: {message}")
91
+
92
+ # Check if we're in a non-interactive environment
93
+ if not sys.stdin.isatty():
94
+ logging.error("GitHub token not configured and running in non-interactive mode")
95
+ logging.error("Set GITHUB_TOKEN environment variable before running")
96
+ return False
97
+
98
+ # Interactive prompt
99
+ return prompt_for_token_setup()
100
+
101
+
102
+ if __name__ == "__main__":
103
+ # Allow running this module directly for testing
104
+ is_valid, message = check_github_token_quick()
105
+ print(f"Token check: {'[OK]' if is_valid else '[FAIL]'} {message}")
106
+
107
+ if not is_valid:
108
+ check_and_setup_github_token()