rust-crate-pipeline 1.2.3__tar.gz → 1.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/CHANGELOG.md +37 -0
  2. {rust_crate_pipeline-1.2.3/rust_crate_pipeline.egg-info → rust_crate_pipeline-1.2.5}/PKG-INFO +1 -1
  3. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/pyproject.toml +1 -1
  4. rust_crate_pipeline-1.2.5/requirements-dev.txt +64 -0
  5. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/requirements.txt +2 -7
  6. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/main.py +44 -7
  7. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/pipeline.py +81 -20
  8. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/production_config.py +2 -8
  9. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/version.py +1 -1
  10. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5/rust_crate_pipeline.egg-info}/PKG-INFO +1 -1
  11. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline.egg-info/SOURCES.txt +1 -0
  12. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/setup.py +1 -1
  13. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/LICENSE +0 -0
  14. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/MANIFEST.in +0 -0
  15. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/README.md +0 -0
  16. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/__init__.py +0 -0
  17. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/__main__.py +0 -0
  18. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/ai_processing.py +0 -0
  19. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/analysis.py +0 -0
  20. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/config.py +0 -0
  21. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/github_token_checker.py +0 -0
  22. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/network.py +0 -0
  23. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/utils/file_utils.py +0 -0
  24. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline/utils/logging_utils.py +0 -0
  25. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline.egg-info/dependency_links.txt +0 -0
  26. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline.egg-info/entry_points.txt +0 -0
  27. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline.egg-info/requires.txt +0 -0
  28. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/rust_crate_pipeline.egg-info/top_level.txt +0 -0
  29. {rust_crate_pipeline-1.2.3 → rust_crate_pipeline-1.2.5}/setup.cfg +0 -0
@@ -2,6 +2,43 @@
2
2
 
3
3
  All notable changes to the Rust Crate Pipeline project.
4
4
 
5
+ ## [1.2.5] - 2025-06-18
6
+
7
+ ### 🎯 Balanced Dataset & Clean Build
8
+
9
+ #### ✨ Enhanced
10
+ - **Balanced Dataset**: Expanded crate list from 105 to ~425 crates with balanced category distribution
11
+ - **Reduced ML/AI Bias**: Decreased from 52% ML/AI crates to ~13% for more representative ecosystem analysis
12
+ - **Comprehensive Coverage**: Added 19 well-distributed categories covering the full Rust ecosystem
13
+ - **Category Expansion**: Significantly expanded web frameworks, async runtimes, databases, cryptography, gaming, and system programming categories
14
+ - **Clean Build Environment**: Cleaned up build artifacts and temporary scripts
15
+
16
+ #### 🔧 Technical
17
+ - **Duplicate Removal**: Eliminated duplicate crates across categories
18
+ - **Build Process**: Clean package build and validation
19
+ - **Version Alignment**: Updated all version references across all files
20
+
21
+ ## [1.2.4] - 2025-06-18
22
+
23
+ ### 🐛 Critical Logging Fix
24
+
25
+ #### ✨ Fixed
26
+ - **Critical Logging Issue**: Fixed 0-byte log file problem caused by conflicting `logging.basicConfig()` calls
27
+ - **Enhanced File Logging**: Improved logging setup with proper handler management and UTF-8 encoding
28
+ - **Better Error Tracking**: Now properly logs all processing steps, errors, and skipped crates to file
29
+ - **Console + File Output**: Maintains both console output and detailed file logging
30
+
31
+ #### 🔧 Improved
32
+ - **Logging Conflicts**: Resolved production config vs main config logging conflicts
33
+ - **File Handler**: Added proper error handling for log file creation
34
+ - **Encoding Issues**: Fixed Unicode handling in log files
35
+ - **Debug Information**: Always captures DEBUG+ level info to log files while respecting console log level
36
+
37
+ #### 📊 Monitoring
38
+ - **Better Tracking**: Now you can properly see which crates were skipped and why
39
+ - **Detailed Logs**: Each processing step is properly logged with timestamps
40
+ - **Error Analysis**: Failed crates and reasons are now captured in log files
41
+
5
42
  ## [1.2.3] - 2025-06-18
6
43
 
7
44
  ### 🚀 L4 GPU Optimization Release
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rust-crate-pipeline
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
5
5
  Home-page: https://github.com/DaveTmire85/SigilDERG-Data_Production
6
6
  Author: SuperUser666-Sigil
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rust-crate-pipeline"
7
- version = "1.2.3"
7
+ version = "1.2.5"
8
8
  authors = [
9
9
  {name = "SuperUser666-Sigil", email = "miragemodularframework@gmail.com"},
10
10
  ]
@@ -0,0 +1,64 @@
1
+ # Development Dependencies for Rust Crate Pipeline
2
+ # Install with: pip install -r requirements-dev.txt
3
+
4
+ # Testing frameworks and utilities
5
+ pytest>=7.4.0
6
+ pytest-cov>=4.1.0
7
+ pytest-mock>=3.11.0
8
+ pytest-asyncio>=0.21.0
9
+ pytest-timeout>=2.1.0
10
+ pytest-xdist>=3.3.0 # Parallel test execution
11
+ coverage>=7.2.0
12
+
13
+ # Code formatting and linting
14
+ black>=23.0.0
15
+ isort>=5.12.0
16
+ flake8>=6.0.0
17
+ mypy>=1.5.0
18
+ pylint>=2.17.0
19
+ bandit>=1.7.5 # Security linting
20
+
21
+ # Documentation
22
+ sphinx>=7.0.0
23
+ sphinx-rtd-theme>=1.3.0
24
+ sphinx-autodoc-typehints>=1.24.0
25
+
26
+ # Type checking and development tools
27
+ pre-commit>=3.3.0
28
+ twine>=4.0.0 # For PyPI publishing
29
+ build>=0.10.0 # Modern Python build tool
30
+ wheel>=0.40.0
31
+
32
+ # Jupyter notebook support (for data analysis)
33
+ jupyter>=1.0.0
34
+ ipykernel>=6.25.0
35
+ matplotlib>=3.7.0
36
+ pandas>=2.0.0
37
+ seaborn>=0.12.0
38
+
39
+ # Database development and debugging
40
+ sqlite-utils>=3.34.0
41
+
42
+ # Performance profiling
43
+ py-spy>=0.3.14
44
+ memory-profiler>=0.61.0
45
+
46
+ # Mock and testing utilities
47
+ responses>=0.23.0 # HTTP request mocking
48
+ factory-boy>=3.3.0 # Test data generation
49
+ faker>=19.0.0 # Fake data generation
50
+
51
+ # API development and testing
52
+ httpx>=0.24.0 # Modern HTTP client for testing
53
+ respx>=0.20.0 # HTTPX request mocking
54
+
55
+ # Environment and configuration management
56
+ python-dotenv>=1.0.0
57
+ pydantic>=2.0.0 # Data validation
58
+
59
+ # Git hooks and workflow
60
+ gitpython>=3.1.0
61
+
62
+ # Optional: Static analysis tools
63
+ vulture>=2.9.0 # Dead code detection
64
+ radon>=6.0.0 # Code complexity analysis
@@ -1,4 +1,4 @@
1
- # Core dependencies
1
+ # Core dependencies for Rust Crate Pipeline
2
2
  requests>=2.28.0
3
3
  requests-cache>=1.0.0
4
4
  beautifulsoup4>=4.11.0
@@ -8,11 +8,6 @@ tiktoken>=0.5.0
8
8
  psutil>=5.9.0
9
9
  python-dateutil>=2.8.0
10
10
 
11
- # Optional dependencies for advanced features
11
+ # Analysis and data processing
12
12
  radon>=6.0.0
13
13
  rustworkx>=0.13.0
14
-
15
- # Development dependencies (optional)
16
- pytest>=7.0.0
17
- black>=22.0.0
18
- isort>=5.10.0
@@ -111,15 +111,52 @@ Examples:
111
111
  return parser.parse_args()
112
112
 
113
113
  def configure_logging(log_level: str = 'INFO'):
114
+ """Configure logging with both console and file output"""
114
115
  level = getattr(logging, log_level.upper())
115
- logging.basicConfig(
116
- level=level,
117
- format="%(asctime)s [%(levelname)s] %(message)s",
118
- handlers=[
119
- logging.StreamHandler(),
120
- logging.FileHandler(f"crate_enrichment_{time.strftime('%Y%m%d-%H%M%S')}.log")
121
- ]
116
+
117
+ # Clear any existing handlers to avoid conflicts
118
+ root_logger = logging.getLogger()
119
+ for handler in root_logger.handlers[:]:
120
+ root_logger.removeHandler(handler)
121
+
122
+ # Set root logger level
123
+ root_logger.setLevel(level)
124
+
125
+ # Create formatters
126
+ detailed_formatter = logging.Formatter(
127
+ "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
128
+ datefmt='%Y-%m-%d %H:%M:%S'
122
129
  )
130
+ simple_formatter = logging.Formatter(
131
+ "%(asctime)s [%(levelname)s] %(message)s"
132
+ )
133
+
134
+ # Console handler
135
+ console_handler = logging.StreamHandler()
136
+ console_handler.setLevel(level)
137
+ console_handler.setFormatter(simple_formatter)
138
+ root_logger.addHandler(console_handler)
139
+
140
+ # File handler with unique timestamp
141
+ log_filename = f"crate_enrichment_{time.strftime('%Y%m%d-%H%M%S')}.log"
142
+ try:
143
+ file_handler = logging.FileHandler(log_filename, mode='w', encoding='utf-8')
144
+ file_handler.setLevel(logging.DEBUG) # Always capture DEBUG+ to file
145
+ file_handler.setFormatter(detailed_formatter)
146
+ root_logger.addHandler(file_handler)
147
+
148
+ # Log a test message to verify file handler works
149
+ logging.info(f"Logging initialized - file: {log_filename}")
150
+
151
+ except Exception as e:
152
+ logging.error(f"Failed to create log file {log_filename}: {e}")
153
+ print(f"Warning: Could not create log file: {e}")
154
+
155
+ # Set library loggers to less verbose levels
156
+ logging.getLogger('requests').setLevel(logging.WARNING)
157
+ logging.getLogger('urllib3').setLevel(logging.WARNING)
158
+ logging.getLogger('requests_cache').setLevel(logging.WARNING)
159
+ logging.getLogger('llama_cpp').setLevel(logging.WARNING)
123
160
 
124
161
  def check_disk_space():
125
162
  if shutil.disk_usage(".").free < 1_000_000_000: # 1GB
@@ -29,37 +29,98 @@ class CrateDataPipeline:
29
29
  def get_crate_list(self, limit: Optional[int] = None) -> List[str]:
30
30
  """Return a comprehensive list of all high-value crates to process"""
31
31
  crates = [
32
- # Web frameworks
33
- "actix-web", "rocket", "axum", "warp", "tower",
32
+ # Web frameworks and servers
33
+ "actix-web", "rocket", "axum", "warp", "tower", "tide", "gotham", "iron",
34
+ "nickel", "rouille", "thruster", "poem", "salvo", "viz", "ntex", "may-minihttp",
35
+ "tiny_http", "httptest", "mockito", "wiremock",
34
36
 
35
37
  # Async runtimes and utilities
36
- "tokio", "tokio-stream", "async-trait", "futures",
38
+ "tokio", "tokio-stream", "async-trait", "futures", "async-std", "smol",
39
+ "embassy", "embassy-executor", "embassy-time", "embassy-sync", "async-channel",
40
+ "async-broadcast", "async-lock", "async-once", "async-recursion", "futures-util",
41
+ "futures-channel", "futures-timer", "futures-test", "pin-project", "pin-project-lite",
37
42
 
38
43
  # Serialization/deserialization
39
- "serde", "serde_json", "serde_yaml", "bincode",
44
+ "serde", "serde_json", "serde_yaml", "bincode", "toml", "ron", "postcard",
45
+ "ciborium", "rmp-serde", "quick-xml", "roxmltree", "serde_cbor", "serde_derive",
46
+ "serde_repr", "serde_with", "serde_bytes", "flexbuffers", "bson", "avro-rs",
40
47
 
41
- # Error handling
42
- "anyhow", "thiserror",
48
+ # Error handling and debugging
49
+ "anyhow", "thiserror", "eyre", "color-eyre", "miette", "fehler", "snafu",
50
+ "failure", "quick-error", "derive_more", "displaydoc", "backtrace", "better-panic",
51
+ # Command line and terminal
52
+ "clap", "structopt", "argh", "gumdrop", "docopt", "getopts", "pico-args",
53
+ "crossterm", "termion", "console", "indicatif", "dialoguer", "termcolor",
54
+ "colored", "yansi", "owo-colors", "nu-ansi-term", "terminal_size",
55
+ # Utilities and general purpose
56
+ "rand", "uuid", "itertools", "num", "cfg-if", "bytes", "mime",
57
+ "form_urlencoded", "csv", "once_cell", "base64", "flate2", "tar", "dirs",
58
+ "walkdir", "glob", "bitflags", "indexmap", "smallvec", "arrayvec", "tinyvec",
59
+ "ahash", "fxhash", "rustc-hash", "seahash", "siphasher", "wyhash", "xxhash-rust",
60
+ "getrandom", "fastrand", "nanorand", "url", "percent-encoding", "unicode-segmentation",
61
+ "unicode-normalization", "unicode-width", "memchr", "aho-corasick", "bstr",
62
+ # HTTP clients and servers
63
+ "reqwest", "hyper", "surf", "ureq", "attohttpc", "isahc", "curl", "libcurl-sys",
64
+ "http", "http-body", "httparse", "hyper-tls", "hyper-rustls", "native-tls",
65
+ "webpki", "webpki-roots",
43
66
 
44
- # Utilities
45
- "rand", "uuid", "chrono", "regex", "log", "env_logger", "clap", "crossterm",
46
- "itertools", "num", "cfg-if", "bytes", "mime", "form_urlencoded", "parking_lot",
47
- "csv", "lazy_static", "once_cell", "tracing", "base64", "sha2", "flate2", "tar",
67
+ # Database and storage
68
+ "sqlx", "diesel", "postgres", "rusqlite", "mysql", "mongodb", "redis",
69
+ "tokio-postgres", "deadpool-postgres", "bb8", "r2d2", "sea-orm", "rbatis",
70
+ "sled", "rocksdb", "lmdb", "redb", "pickledb", "persy", "heed", "fjall",
71
+ # Concurrency and parallelism
72
+ "rayon", "crossbeam", "crossbeam-channel", "crossbeam-utils", "crossbeam-epoch",
73
+ "crossbeam-deque", "parking_lot", "spin", "atomic", "arc-swap", "dashmap",
74
+ "flume", "kanal", "tokio-util", "futures-concurrency",
75
+ # Protocol buffers, gRPC, and messaging
76
+ "prost", "tonic", "protobuf", "grpcio", "tarpc", "capnp", "rmp",
77
+ "zmq", "nanomsg", "nats", "rdkafka", "pulsar", "lapin", "amqp", "rumqttc",
78
+ # Procedural macros and metaprogramming
79
+ "syn", "quote", "proc-macro2", "proc-macro-crate", "proc-macro-error",
80
+ "darling", "derive_builder", "strum", "strum_macros",
81
+ "enum-iterator", "num-derive", "num-traits", "paste", "lazy_static",
48
82
 
49
- # HTTP clients and servers
50
- "reqwest", "hyper",
83
+ # Cryptography and security
84
+ "ring", "rustls", "openssl", "sha2", "sha3", "blake2", "blake3", "md5",
85
+ "hmac", "pbkdf2", "scrypt", "argon2", "bcrypt", "chacha20poly1305",
86
+ "aes-gcm", "rsa", "ed25519-dalek", "x25519-dalek", "curve25519-dalek",
87
+ "secp256k1", "k256", "p256", "ecdsa", "signature", "rand_core",
51
88
 
52
- # Database
53
- "sqlx", "diesel", "postgres", "rusqlite",
89
+ # Game development and graphics
90
+ "bevy", "macroquad", "ggez", "piston", "winit", "wgpu", "vulkano", "glium",
91
+ "three-d", "kiss3d", "nalgebra", "cgmath", "glam", "ultraviolet", "mint",
92
+ "image", "imageproc", "resvg", "tiny-skia", "lyon", "femtovg", "skulpin",
93
+ # Networking and protocols
94
+ "socket2", "mio", "polling", "async-io", "calloop", "quinn",
95
+ "rustls-pemfile", "trust-dns", "hickory-dns", "async-h1", "h2", "h3",
96
+ "websocket", "tokio-tungstenite", "tungstenite", "ws", "warp-ws",
54
97
 
55
- # Concurrency
56
- "rayon",
98
+ # Text processing and parsing
99
+ "regex", "regex-syntax", "pest", "pest_derive", "nom", "combine", "winnow",
100
+ "lalrpop", "chumsky", "logos", "lex", "yacc", "tree-sitter", "syntect",
101
+ "pulldown-cmark", "comrak", "markdown", "ammonia", "scraper", "kuchiki",
57
102
 
58
- # Protocol buffers and gRPC
59
- "prost", "tonic",
103
+ # System programming and OS interfaces
104
+ "libc", "winapi", "windows", "nix", "users", "sysinfo", "procfs", "psutil",
105
+ "notify", "inotify", "hotwatch", "signal-hook", "ctrlc", "daemonize",
106
+ "fork", "shared_memory", "memmap2", "mlock", "caps", "uzers",
107
+ # Testing and development tools
108
+ "criterion", "proptest", "quickcheck", "rstest", "serial_test", "mockall",
109
+ "httpmock", "assert_cmd", "assert_fs", "predicates", "tempfile",
110
+ "insta", "goldenfile", "similar", "difference", "pretty_assertions",
60
111
 
61
- # Procedural macros
62
- "syn", "quote", "proc-macro2",
112
+ # Configuration and environment
113
+ "config", "figment", "envy", "dotenv", "confy", "directories", "app_dirs",
114
+ "etcetera", "platform-dirs", "home", "which", "dunce", "normpath",
115
+
116
+ # Logging and observability
117
+ "log", "env_logger", "tracing", "tracing-subscriber", "tracing-futures",
118
+ "tracing-actix-web", "tracing-log", "slog", "fern", "flexi_logger",
119
+ "log4rs", "simplelog", "stderrlog", "pretty_env_logger", "fast_log",
120
+
121
+ # Time and date
122
+ "chrono", "time", "humantime", "chrono-tz", "chrono-english", "ical",
123
+ "cron", "tokio-cron-scheduler", "job_scheduler", "delay_timer",
63
124
 
64
125
  # Machine Learning & AI
65
126
  "tokenizers", "safetensors", "linfa", "ndarray", "smartcore", "burn",
@@ -12,14 +12,8 @@ import os
12
12
  def configure_production_logging():
13
13
  """Configure logging for production to reduce verbose warnings"""
14
14
 
15
- # Set up logging format
16
- logging.basicConfig(
17
- level=logging.INFO, # Default to INFO level
18
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
19
- datefmt='%Y-%m-%d %H:%M:%S'
20
- )
21
-
22
- # Set specific loggers to less verbose levels
15
+ # Don't use basicConfig here - let main.py handle it
16
+ # Just set specific loggers to less verbose levels
23
17
  logging.getLogger('requests').setLevel(logging.WARNING)
24
18
  logging.getLogger('urllib3').setLevel(logging.WARNING)
25
19
  logging.getLogger('requests_cache').setLevel(logging.WARNING)
@@ -1,6 +1,6 @@
1
1
  """Version information for rust-crate-pipeline."""
2
2
 
3
- __version__ = "1.2.3"
3
+ __version__ = "1.2.5"
4
4
  __version_info__ = tuple(int(x) for x in __version__.split("."))
5
5
 
6
6
  # Version history
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rust-crate-pipeline
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Summary: A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights
5
5
  Home-page: https://github.com/DaveTmire85/SigilDERG-Data_Production
6
6
  Author: SuperUser666-Sigil
@@ -3,6 +3,7 @@ LICENSE
3
3
  MANIFEST.in
4
4
  README.md
5
5
  pyproject.toml
6
+ requirements-dev.txt
6
7
  requirements.txt
7
8
  setup.py
8
9
  rust_crate_pipeline/__init__.py
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="rust-crate-pipeline",
11
- version="1.2.3",
11
+ version="1.2.5",
12
12
  author="SuperUser666-Sigil",
13
13
  author_email="miragemodularframework@gmail.com",
14
14
  description="A comprehensive system for gathering, enriching, and analyzing metadata for Rust crates using AI-powered insights",