rust-crate-pipeline 1.2.3__py3-none-any.whl → 1.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/main.py +44 -7
- rust_crate_pipeline/pipeline.py +81 -20
- rust_crate_pipeline/production_config.py +2 -8
- rust_crate_pipeline/version.py +1 -1
- {rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/METADATA +1 -1
- {rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/RECORD +10 -10
- {rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/top_level.txt +0 -0
rust_crate_pipeline/main.py
CHANGED
@@ -111,15 +111,52 @@ Examples:
|
|
111
111
|
return parser.parse_args()
|
112
112
|
|
113
113
|
def configure_logging(log_level: str = 'INFO'):
|
114
|
+
"""Configure logging with both console and file output"""
|
114
115
|
level = getattr(logging, log_level.upper())
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
116
|
+
|
117
|
+
# Clear any existing handlers to avoid conflicts
|
118
|
+
root_logger = logging.getLogger()
|
119
|
+
for handler in root_logger.handlers[:]:
|
120
|
+
root_logger.removeHandler(handler)
|
121
|
+
|
122
|
+
# Set root logger level
|
123
|
+
root_logger.setLevel(level)
|
124
|
+
|
125
|
+
# Create formatters
|
126
|
+
detailed_formatter = logging.Formatter(
|
127
|
+
"%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
128
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
122
129
|
)
|
130
|
+
simple_formatter = logging.Formatter(
|
131
|
+
"%(asctime)s [%(levelname)s] %(message)s"
|
132
|
+
)
|
133
|
+
|
134
|
+
# Console handler
|
135
|
+
console_handler = logging.StreamHandler()
|
136
|
+
console_handler.setLevel(level)
|
137
|
+
console_handler.setFormatter(simple_formatter)
|
138
|
+
root_logger.addHandler(console_handler)
|
139
|
+
|
140
|
+
# File handler with unique timestamp
|
141
|
+
log_filename = f"crate_enrichment_{time.strftime('%Y%m%d-%H%M%S')}.log"
|
142
|
+
try:
|
143
|
+
file_handler = logging.FileHandler(log_filename, mode='w', encoding='utf-8')
|
144
|
+
file_handler.setLevel(logging.DEBUG) # Always capture DEBUG+ to file
|
145
|
+
file_handler.setFormatter(detailed_formatter)
|
146
|
+
root_logger.addHandler(file_handler)
|
147
|
+
|
148
|
+
# Log a test message to verify file handler works
|
149
|
+
logging.info(f"Logging initialized - file: {log_filename}")
|
150
|
+
|
151
|
+
except Exception as e:
|
152
|
+
logging.error(f"Failed to create log file {log_filename}: {e}")
|
153
|
+
print(f"Warning: Could not create log file: {e}")
|
154
|
+
|
155
|
+
# Set library loggers to less verbose levels
|
156
|
+
logging.getLogger('requests').setLevel(logging.WARNING)
|
157
|
+
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
158
|
+
logging.getLogger('requests_cache').setLevel(logging.WARNING)
|
159
|
+
logging.getLogger('llama_cpp').setLevel(logging.WARNING)
|
123
160
|
|
124
161
|
def check_disk_space():
|
125
162
|
if shutil.disk_usage(".").free < 1_000_000_000: # 1GB
|
rust_crate_pipeline/pipeline.py
CHANGED
@@ -29,37 +29,98 @@ class CrateDataPipeline:
|
|
29
29
|
def get_crate_list(self, limit: Optional[int] = None) -> List[str]:
|
30
30
|
"""Return a comprehensive list of all high-value crates to process"""
|
31
31
|
crates = [
|
32
|
-
# Web frameworks
|
33
|
-
"actix-web", "rocket", "axum", "warp", "tower",
|
32
|
+
# Web frameworks and servers
|
33
|
+
"actix-web", "rocket", "axum", "warp", "tower", "tide", "gotham", "iron",
|
34
|
+
"nickel", "rouille", "thruster", "poem", "salvo", "viz", "ntex", "may-minihttp",
|
35
|
+
"tiny_http", "httptest", "mockito", "wiremock",
|
34
36
|
|
35
37
|
# Async runtimes and utilities
|
36
|
-
"tokio", "tokio-stream", "async-trait", "futures",
|
38
|
+
"tokio", "tokio-stream", "async-trait", "futures", "async-std", "smol",
|
39
|
+
"embassy", "embassy-executor", "embassy-time", "embassy-sync", "async-channel",
|
40
|
+
"async-broadcast", "async-lock", "async-once", "async-recursion", "futures-util",
|
41
|
+
"futures-channel", "futures-timer", "futures-test", "pin-project", "pin-project-lite",
|
37
42
|
|
38
43
|
# Serialization/deserialization
|
39
|
-
"serde", "serde_json", "serde_yaml", "bincode",
|
44
|
+
"serde", "serde_json", "serde_yaml", "bincode", "toml", "ron", "postcard",
|
45
|
+
"ciborium", "rmp-serde", "quick-xml", "roxmltree", "serde_cbor", "serde_derive",
|
46
|
+
"serde_repr", "serde_with", "serde_bytes", "flexbuffers", "bson", "avro-rs",
|
40
47
|
|
41
|
-
# Error handling
|
42
|
-
"anyhow", "thiserror",
|
48
|
+
# Error handling and debugging
|
49
|
+
"anyhow", "thiserror", "eyre", "color-eyre", "miette", "fehler", "snafu",
|
50
|
+
"failure", "quick-error", "derive_more", "displaydoc", "backtrace", "better-panic",
|
51
|
+
# Command line and terminal
|
52
|
+
"clap", "structopt", "argh", "gumdrop", "docopt", "getopts", "pico-args",
|
53
|
+
"crossterm", "termion", "console", "indicatif", "dialoguer", "termcolor",
|
54
|
+
"colored", "yansi", "owo-colors", "nu-ansi-term", "terminal_size",
|
55
|
+
# Utilities and general purpose
|
56
|
+
"rand", "uuid", "itertools", "num", "cfg-if", "bytes", "mime",
|
57
|
+
"form_urlencoded", "csv", "once_cell", "base64", "flate2", "tar", "dirs",
|
58
|
+
"walkdir", "glob", "bitflags", "indexmap", "smallvec", "arrayvec", "tinyvec",
|
59
|
+
"ahash", "fxhash", "rustc-hash", "seahash", "siphasher", "wyhash", "xxhash-rust",
|
60
|
+
"getrandom", "fastrand", "nanorand", "url", "percent-encoding", "unicode-segmentation",
|
61
|
+
"unicode-normalization", "unicode-width", "memchr", "aho-corasick", "bstr",
|
62
|
+
# HTTP clients and servers
|
63
|
+
"reqwest", "hyper", "surf", "ureq", "attohttpc", "isahc", "curl", "libcurl-sys",
|
64
|
+
"http", "http-body", "httparse", "hyper-tls", "hyper-rustls", "native-tls",
|
65
|
+
"webpki", "webpki-roots",
|
43
66
|
|
44
|
-
#
|
45
|
-
"
|
46
|
-
"
|
47
|
-
"
|
67
|
+
# Database and storage
|
68
|
+
"sqlx", "diesel", "postgres", "rusqlite", "mysql", "mongodb", "redis",
|
69
|
+
"tokio-postgres", "deadpool-postgres", "bb8", "r2d2", "sea-orm", "rbatis",
|
70
|
+
"sled", "rocksdb", "lmdb", "redb", "pickledb", "persy", "heed", "fjall",
|
71
|
+
# Concurrency and parallelism
|
72
|
+
"rayon", "crossbeam", "crossbeam-channel", "crossbeam-utils", "crossbeam-epoch",
|
73
|
+
"crossbeam-deque", "parking_lot", "spin", "atomic", "arc-swap", "dashmap",
|
74
|
+
"flume", "kanal", "tokio-util", "futures-concurrency",
|
75
|
+
# Protocol buffers, gRPC, and messaging
|
76
|
+
"prost", "tonic", "protobuf", "grpcio", "tarpc", "capnp", "rmp",
|
77
|
+
"zmq", "nanomsg", "nats", "rdkafka", "pulsar", "lapin", "amqp", "rumqttc",
|
78
|
+
# Procedural macros and metaprogramming
|
79
|
+
"syn", "quote", "proc-macro2", "proc-macro-crate", "proc-macro-error",
|
80
|
+
"darling", "derive_builder", "strum", "strum_macros",
|
81
|
+
"enum-iterator", "num-derive", "num-traits", "paste", "lazy_static",
|
48
82
|
|
49
|
-
#
|
50
|
-
"
|
83
|
+
# Cryptography and security
|
84
|
+
"ring", "rustls", "openssl", "sha2", "sha3", "blake2", "blake3", "md5",
|
85
|
+
"hmac", "pbkdf2", "scrypt", "argon2", "bcrypt", "chacha20poly1305",
|
86
|
+
"aes-gcm", "rsa", "ed25519-dalek", "x25519-dalek", "curve25519-dalek",
|
87
|
+
"secp256k1", "k256", "p256", "ecdsa", "signature", "rand_core",
|
51
88
|
|
52
|
-
#
|
53
|
-
"
|
89
|
+
# Game development and graphics
|
90
|
+
"bevy", "macroquad", "ggez", "piston", "winit", "wgpu", "vulkano", "glium",
|
91
|
+
"three-d", "kiss3d", "nalgebra", "cgmath", "glam", "ultraviolet", "mint",
|
92
|
+
"image", "imageproc", "resvg", "tiny-skia", "lyon", "femtovg", "skulpin",
|
93
|
+
# Networking and protocols
|
94
|
+
"socket2", "mio", "polling", "async-io", "calloop", "quinn",
|
95
|
+
"rustls-pemfile", "trust-dns", "hickory-dns", "async-h1", "h2", "h3",
|
96
|
+
"websocket", "tokio-tungstenite", "tungstenite", "ws", "warp-ws",
|
54
97
|
|
55
|
-
#
|
56
|
-
"
|
98
|
+
# Text processing and parsing
|
99
|
+
"regex", "regex-syntax", "pest", "pest_derive", "nom", "combine", "winnow",
|
100
|
+
"lalrpop", "chumsky", "logos", "lex", "yacc", "tree-sitter", "syntect",
|
101
|
+
"pulldown-cmark", "comrak", "markdown", "ammonia", "scraper", "kuchiki",
|
57
102
|
|
58
|
-
#
|
59
|
-
"
|
103
|
+
# System programming and OS interfaces
|
104
|
+
"libc", "winapi", "windows", "nix", "users", "sysinfo", "procfs", "psutil",
|
105
|
+
"notify", "inotify", "hotwatch", "signal-hook", "ctrlc", "daemonize",
|
106
|
+
"fork", "shared_memory", "memmap2", "mlock", "caps", "uzers",
|
107
|
+
# Testing and development tools
|
108
|
+
"criterion", "proptest", "quickcheck", "rstest", "serial_test", "mockall",
|
109
|
+
"httpmock", "assert_cmd", "assert_fs", "predicates", "tempfile",
|
110
|
+
"insta", "goldenfile", "similar", "difference", "pretty_assertions",
|
60
111
|
|
61
|
-
#
|
62
|
-
"
|
112
|
+
# Configuration and environment
|
113
|
+
"config", "figment", "envy", "dotenv", "confy", "directories", "app_dirs",
|
114
|
+
"etcetera", "platform-dirs", "home", "which", "dunce", "normpath",
|
115
|
+
|
116
|
+
# Logging and observability
|
117
|
+
"log", "env_logger", "tracing", "tracing-subscriber", "tracing-futures",
|
118
|
+
"tracing-actix-web", "tracing-log", "slog", "fern", "flexi_logger",
|
119
|
+
"log4rs", "simplelog", "stderrlog", "pretty_env_logger", "fast_log",
|
120
|
+
|
121
|
+
# Time and date
|
122
|
+
"chrono", "time", "humantime", "chrono-tz", "chrono-english", "ical",
|
123
|
+
"cron", "tokio-cron-scheduler", "job_scheduler", "delay_timer",
|
63
124
|
|
64
125
|
# Machine Learning & AI
|
65
126
|
"tokenizers", "safetensors", "linfa", "ndarray", "smartcore", "burn",
|
@@ -12,14 +12,8 @@ import os
|
|
12
12
|
def configure_production_logging():
|
13
13
|
"""Configure logging for production to reduce verbose warnings"""
|
14
14
|
|
15
|
-
#
|
16
|
-
|
17
|
-
level=logging.INFO, # Default to INFO level
|
18
|
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
19
|
-
datefmt='%Y-%m-%d %H:%M:%S'
|
20
|
-
)
|
21
|
-
|
22
|
-
# Set specific loggers to less verbose levels
|
15
|
+
# Don't use basicConfig here - let main.py handle it
|
16
|
+
# Just set specific loggers to less verbose levels
|
23
17
|
logging.getLogger('requests').setLevel(logging.WARNING)
|
24
18
|
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
25
19
|
logging.getLogger('requests_cache').setLevel(logging.WARNING)
|
rust_crate_pipeline/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rust-crate-pipeline
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.5
|
4
4
|
Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
|
5
5
|
Home-page: https://github.com/DaveTmire85/SigilDERG-Data_Production
|
6
6
|
Author: SuperUser666-Sigil
|
@@ -4,16 +4,16 @@ rust_crate_pipeline/ai_processing.py,sha256=B93rCDdxE-UkYMjmT0UotQTahx9-Lgzec7_b
|
|
4
4
|
rust_crate_pipeline/analysis.py,sha256=ijP4zp3cFnN09nZkeCluyAvbyAtAW_M2YSxALpQX8LY,18615
|
5
5
|
rust_crate_pipeline/config.py,sha256=r4Y_5SD-lfrM1112edk9T0S0MiVxaNSSHk4q2yDrM88,1528
|
6
6
|
rust_crate_pipeline/github_token_checker.py,sha256=MJqHP8J84NEZ6nzdutpC7iRnsP0kyqscjLUosvmI4MI,3768
|
7
|
-
rust_crate_pipeline/main.py,sha256=
|
7
|
+
rust_crate_pipeline/main.py,sha256=Wz4Q4TX-G7qvLNMyYT6cHbgRCeMJoWILCvXcJr1FYAc,7876
|
8
8
|
rust_crate_pipeline/network.py,sha256=t_G8eh_WHNugm_laMftcWVbHsmP0bOlTPnVW9DqF6SU,13375
|
9
|
-
rust_crate_pipeline/pipeline.py,sha256=
|
10
|
-
rust_crate_pipeline/production_config.py,sha256=
|
11
|
-
rust_crate_pipeline/version.py,sha256=
|
9
|
+
rust_crate_pipeline/pipeline.py,sha256=fcWgqKC0teGeVyNbwayFwngoZLJGWwWZAlWtMqwtdyY,17074
|
10
|
+
rust_crate_pipeline/production_config.py,sha256=TdvmO1SIRpex1xZ0AymTKXpLfkkvOG44Jyy7S5M-u7k,2304
|
11
|
+
rust_crate_pipeline/version.py,sha256=coYkryx3UFGAYS5obh7aBC1n8hhQIMhIeFhlnGfggeg,1022
|
12
12
|
rust_crate_pipeline/utils/file_utils.py,sha256=lnHeLrt1JYaQhRDKtA1TWR2HIyRO8zwOyWb-KmAmWgk,2126
|
13
13
|
rust_crate_pipeline/utils/logging_utils.py,sha256=O4Jnr_k9dBchrVqXf-vqtDKgizDtL_ljh8g7G2VCX_c,2241
|
14
|
-
rust_crate_pipeline-1.2.
|
15
|
-
rust_crate_pipeline-1.2.
|
16
|
-
rust_crate_pipeline-1.2.
|
17
|
-
rust_crate_pipeline-1.2.
|
18
|
-
rust_crate_pipeline-1.2.
|
19
|
-
rust_crate_pipeline-1.2.
|
14
|
+
rust_crate_pipeline-1.2.5.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
|
15
|
+
rust_crate_pipeline-1.2.5.dist-info/METADATA,sha256=tCIAwMZ41r7K7BesgCHH27f8S_5UMdqsujdu9MPDePk,16741
|
16
|
+
rust_crate_pipeline-1.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
17
|
+
rust_crate_pipeline-1.2.5.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
|
18
|
+
rust_crate_pipeline-1.2.5.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
|
19
|
+
rust_crate_pipeline-1.2.5.dist-info/RECORD,,
|
File without changes
|
{rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/entry_points.txt
RENAMED
File without changes
|
{rust_crate_pipeline-1.2.3.dist-info → rust_crate_pipeline-1.2.5.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|