rust-crate-pipeline 1.3.5__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,70 +1,112 @@
1
1
  # analysis.py
2
+ from __future__ import annotations
3
+
2
4
  import io
3
5
  import re
4
6
  import tarfile
5
7
  import requests
6
8
  import logging
7
9
  import tempfile
8
- from typing import Any
10
+ from typing import Any, Dict, List, Optional, Union
9
11
  import os
10
12
  import sys
11
13
  import time
12
14
  import subprocess
15
+ from dataclasses import dataclass
13
16
 
14
17
  from .config import EnrichedCrate
15
18
 
16
- # Add the project root to the path to ensure utils can be imported
17
- # This is a common pattern in scripts to handle execution from different directories
18
- project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
19
- if project_root not in sys.path:
20
- sys.path.insert(0, project_root)
21
-
22
- try:
23
- from utils.rust_code_analyzer import RustCodeAnalyzer # type: ignore
24
- except ImportError as e:
25
- logging.error(
26
- f"Failed to import RustCodeAnalyzer: {e}. "
27
- f"Ensure the utils directory is in the Python path."
28
- )
29
- # Provide a non-functional fallback to avoid crashing the entire application
30
- # if the import fails, but ensure it logs the error.
31
-
32
- class RustCodeAnalyzer: # type: ignore
33
- def __init__(self, code_content: str) -> None:
34
- logging.error(
35
- "Using fallback RustCodeAnalyzer. Analysis will be incomplete."
36
- )
37
- self.code_content = code_content
38
-
39
- def analyze(self) -> dict[str, Any]:
40
- return {
41
- "functions": [],
42
- "structs": [],
43
- "enums": [],
44
- "traits": [],
45
- "complexity": 0,
46
- "lines_of_code": len(self.code_content.split("\n")),
47
- }
48
-
49
- @staticmethod
50
- def create_empty_metrics() -> dict[str, Any]:
51
- return {}
52
-
53
- @staticmethod
54
- def detect_project_structure(files: list[str]) -> dict[str, bool]:
55
- return {}
56
-
57
- @staticmethod
58
- def analyze_rust_content(content: str) -> dict[str, Any]:
59
- return {}
60
-
61
- @staticmethod
62
- def aggregate_metrics(
63
- metrics: dict[str, Any],
64
- content_analysis: dict[str, Any],
65
- structure: dict[str, bool],
66
- ) -> dict[str, Any]:
67
- return metrics
19
+ # Create a fallback RustCodeAnalyzer that doesn't depend on external utils
20
+ class RustCodeAnalyzer:
21
+ """Fallback Rust code analyzer for when the full analyzer is not available."""
22
+
23
+ def __init__(self, code_content: str) -> None:
24
+ self.code_content = code_content
25
+
26
+ def analyze(self) -> dict[str, Any]:
27
+ """Basic analysis of Rust code content."""
28
+ lines = self.code_content.split('\n')
29
+ return {
30
+ "functions": self._count_functions(),
31
+ "structs": self._count_structs(),
32
+ "enums": self._count_enums(),
33
+ "traits": self._count_traits(),
34
+ "complexity": self._calculate_complexity(),
35
+ "lines_of_code": len(lines),
36
+ }
37
+
38
+ def _count_functions(self) -> int:
39
+ """Count function definitions."""
40
+ return len(re.findall(r'fn\s+\w+\s*\(', self.code_content))
41
+
42
+ def _count_structs(self) -> int:
43
+ """Count struct definitions."""
44
+ return len(re.findall(r'struct\s+\w+', self.code_content))
45
+
46
+ def _count_enums(self) -> int:
47
+ """Count enum definitions."""
48
+ return len(re.findall(r'enum\s+\w+', self.code_content))
49
+
50
+ def _count_traits(self) -> int:
51
+ """Count trait definitions."""
52
+ return len(re.findall(r'trait\s+\w+', self.code_content))
53
+
54
+ def _calculate_complexity(self) -> int:
55
+ """Calculate basic cyclomatic complexity."""
56
+ complexity = 0
57
+ complexity += len(re.findall(r'\bif\b', self.code_content))
58
+ complexity += len(re.findall(r'\bfor\b', self.code_content))
59
+ complexity += len(re.findall(r'\bwhile\b', self.code_content))
60
+ complexity += len(re.findall(r'\bmatch\b', self.code_content))
61
+ return complexity
62
+
63
+ @staticmethod
64
+ def create_empty_metrics() -> dict[str, Any]:
65
+ """Create empty metrics structure."""
66
+ return {
67
+ "functions": 0,
68
+ "structs": 0,
69
+ "enums": 0,
70
+ "traits": 0,
71
+ "complexity": 0,
72
+ "lines_of_code": 0,
73
+ "file_count": 0,
74
+ }
75
+
76
+ @staticmethod
77
+ def detect_project_structure(files: list[str]) -> dict[str, bool]:
78
+ """Detect basic project structure."""
79
+ return {
80
+ "has_cargo_toml": any("Cargo.toml" in f for f in files),
81
+ "has_src": any("/src/" in f for f in files),
82
+ "has_tests": any("/tests/" in f for f in files),
83
+ "has_examples": any("/examples/" in f for f in files),
84
+ }
85
+
86
+ @staticmethod
87
+ def analyze_rust_content(content: str) -> dict[str, Any]:
88
+ """Analyze Rust content."""
89
+ analyzer = RustCodeAnalyzer(content)
90
+ return analyzer.analyze()
91
+
92
+ @staticmethod
93
+ def aggregate_metrics(
94
+ metrics: dict[str, Any],
95
+ content_analysis: dict[str, Any],
96
+ structure: dict[str, bool],
97
+ ) -> dict[str, Any]:
98
+ """Aggregate metrics from multiple sources."""
99
+ for key, value in content_analysis.items():
100
+ if isinstance(value, (int, float)):
101
+ metrics[key] = metrics.get(key, 0) + value
102
+ elif isinstance(value, list):
103
+ if key not in metrics:
104
+ metrics[key] = []
105
+ metrics[key].extend(value)
106
+
107
+ # Add structure information
108
+ metrics.update(structure)
109
+ return metrics
68
110
 
69
111
 
70
112
  # Constants for URLs and paths
@@ -1,18 +1,14 @@
1
1
  # network.py
2
2
  import os
3
- import sys
4
3
  import re
4
+ import sys
5
5
  import time
6
6
  import logging
7
7
  import requests
8
+ from typing import Any, Dict, List, Optional, Union
8
9
  from bs4 import BeautifulSoup, Tag
9
- from typing import Any, Union
10
10
  from .config import PipelineConfig
11
11
 
12
- # Import utilities
13
- # Add the parent directory to the path to import utils
14
- sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
15
-
16
12
 
17
13
  class GitHubBatchClient:
18
14
  def __init__(self, config: PipelineConfig) -> None:
@@ -1,7 +1,7 @@
1
1
  from typing import Dict, List, Tuple, Optional, Any
2
2
  """Version information for rust-crate-pipeline."""
3
3
 
4
- __version__ = "1.3.5"
4
+ __version__ = "1.3.6"
5
5
  __version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
6
6
  __author__ = "SigilDERG Team"
7
7
  __email__ = "sigilderg@example.com"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rust-crate-pipeline
3
- Version: 1.3.5
3
+ Version: 1.3.6
4
4
  Summary: A comprehensive pipeline for analyzing Rust crates with AI enrichment and enhanced scraping
5
5
  Home-page: https://github.com/SigilDERG/rust-crate-pipeline
6
6
  Author: SigilDERG Team
@@ -15,24 +15,24 @@ Classifier: Development Status :: 4 - Beta
15
15
  Classifier: Intended Audience :: Developers
16
16
  Classifier: License :: OSI Approved :: MIT License
17
17
  Classifier: Programming Language :: Python :: 3
18
- Classifier: Programming Language :: Python :: 3.8
19
- Classifier: Programming Language :: Python :: 3.9
20
- Classifier: Programming Language :: Python :: 3.10
21
- Classifier: Programming Language :: Python :: 3.11
22
18
  Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
23
20
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
21
  Classifier: Topic :: Software Development :: Quality Assurance
25
- Requires-Python: >=3.8
22
+ Requires-Python: >=3.12
26
23
  Description-Content-Type: text/markdown
27
24
  License-File: LICENSE
28
25
  Requires-Dist: requests>=2.28.0
29
26
  Requires-Dist: requests-cache>=1.0.0
30
27
  Requires-Dist: beautifulsoup4>=4.11.0
28
+ Requires-Dist: crawl4ai>=0.6.0
29
+ Requires-Dist: playwright>=1.49.0
31
30
  Requires-Dist: tqdm>=4.64.0
32
31
  Requires-Dist: llama-cpp-python>=0.2.0
33
32
  Requires-Dist: tiktoken>=0.5.0
34
33
  Requires-Dist: psutil>=5.9.0
35
34
  Requires-Dist: python-dateutil>=2.8.0
35
+ Requires-Dist: litellm>=1.0.0
36
36
  Provides-Extra: dev
37
37
  Requires-Dist: pytest>=7.0.0; extra == "dev"
38
38
  Requires-Dist: black>=22.0.0; extra == "dev"
@@ -55,7 +55,7 @@ The Rust Crate Pipeline is designed to collect, process, and enrich metadata fro
55
55
 
56
56
  ## Features
57
57
 
58
- - **Web Scraping**: Automated collection of crate metadata from crates.io using Crawl4AI
58
+ - **Enhanced Web Scraping**: Automated collection of crate metadata from crates.io using Crawl4AI with Playwright
59
59
  - **AI Enrichment**: Local and Azure OpenAI-powered analysis of crate descriptions, features, and documentation
60
60
  - **Multi-Provider LLM Support**: Unified LLM processor supporting OpenAI, Azure OpenAI, Ollama, LM Studio, and LiteLLM
61
61
  - **Cargo Testing**: Automated cargo build, test, and audit execution for comprehensive crate analysis
@@ -64,6 +64,15 @@ The Rust Crate Pipeline is designed to collect, process, and enrich metadata fro
64
64
  - **Data Export**: Structured output in JSON format for further analysis
65
65
  - **RAG Cache**: Intelligent caching with Rule Zero policies and architectural patterns
66
66
  - **Docker Support**: Containerized deployment with optimized Docker configurations
67
+ - **Real-time Progress Monitoring**: CLI-based progress tracking with ASCII status indicators
68
+ - **Cross-platform Compatibility**: Full Unicode symbol replacement for better encoding support
69
+
70
+ ## Requirements
71
+
72
+ - **Python 3.12+**: Required for modern type annotations and language features
73
+ - **Git**: For cloning repositories during analysis
74
+ - **Cargo**: For Rust crate testing and analysis
75
+ - **Playwright**: Automatically installed for enhanced web scraping
67
76
 
68
77
  ## Installation
69
78
 
@@ -72,13 +81,22 @@ The Rust Crate Pipeline is designed to collect, process, and enrich metadata fro
72
81
  git clone https://github.com/Superuser666-Sigil/SigilDERG-Data_Production.git
73
82
  cd SigilDERG-Data_Production
74
83
 
75
- # Install in development mode
84
+ # Install in development mode (includes all dependencies)
76
85
  pip install -e .
77
86
 
78
- # Install additional dependencies for AI processing
79
- pip install -r requirements-crawl4ai.txt
87
+ # Install Playwright browsers for enhanced scraping
88
+ playwright install
80
89
  ```
81
90
 
91
+ ### Automatic Dependency Installation
92
+
93
+ The package automatically installs all required dependencies including:
94
+ - `crawl4ai` for web scraping
95
+ - `playwright` for enhanced browser automation
96
+ - `requests` for HTTP requests
97
+ - `aiohttp` for async operations
98
+ - And all other required packages
99
+
82
100
  ## Configuration
83
101
 
84
102
  ### Environment Variables
@@ -158,6 +176,27 @@ python -m rust_crate_pipeline --checkpoint-interval 5
158
176
 
159
177
  # Enable verbose logging
160
178
  python -m rust_crate_pipeline --log-level DEBUG
179
+
180
+ # Enable enhanced scraping with Playwright
181
+ python -m rust_crate_pipeline --enable-enhanced-scraping
182
+
183
+ # Set output directory for results
184
+ python -m rust_crate_pipeline --output-path ./results
185
+ ```
186
+
187
+ #### Enhanced Scraping
188
+
189
+ The pipeline now supports enhanced web scraping using Playwright for better data extraction:
190
+
191
+ ```bash
192
+ # Enable enhanced scraping (default)
193
+ python -m rust_crate_pipeline --enable-enhanced-scraping
194
+
195
+ # Use basic scraping only
196
+ python -m rust_crate_pipeline --disable-enhanced-scraping
197
+
198
+ # Configure scraping options
199
+ python -m rust_crate_pipeline --scraping-config '{"max_pages": 10, "concurrency": 3}'
161
200
  ```
162
201
 
163
202
  #### Multi-Provider LLM Support
@@ -278,6 +317,12 @@ clap
278
317
 
279
318
  ## Development
280
319
 
320
+ ### Prerequisites
321
+
322
+ - Python 3.12+ (required for modern type annotations)
323
+ - Git for version control
324
+ - Cargo for Rust crate testing
325
+
281
326
  ### Running Tests
282
327
 
283
328
  ```bash
@@ -289,6 +334,12 @@ pytest tests/test_main_integration.py
289
334
 
290
335
  # Run with coverage
291
336
  pytest --cov=rust_crate_pipeline tests/
337
+
338
+ # Run type checking
339
+ pyright rust_crate_pipeline/
340
+
341
+ # Run linting
342
+ flake8 rust_crate_pipeline/
292
343
  ```
293
344
 
294
345
  ### Code Quality
@@ -302,14 +353,64 @@ isort rust_crate_pipeline/
302
353
 
303
354
  # Type checking
304
355
  pyright rust_crate_pipeline/
356
+
357
+ # Lint code
358
+ flake8 rust_crate_pipeline/
305
359
  ```
306
360
 
307
- ## Requirements
361
+ ### Building and Publishing
362
+
363
+ ```bash
364
+ # Build package
365
+ python -m build
366
+
367
+ # Upload to PyPI (requires PYPI_API_TOKEN)
368
+ python -m twine upload dist/*
369
+
370
+ # Create release
371
+ python scripts/create_release.py
372
+ ```
373
+
374
+ ### Docker Development
375
+
376
+ ```bash
377
+ # Build Docker image
378
+ docker build -t rust-crate-pipeline .
379
+
380
+ # Run in Docker
381
+ docker run -it rust-crate-pipeline
382
+
383
+ # Run with volume mount for development
384
+ docker run -it -v $(pwd):/app rust-crate-pipeline
385
+ ```
308
386
 
309
- - Python 3.12+
310
- - Rust toolchain (for cargo testing)
311
- - Git (for GitHub API access)
312
- - Internet connection (for web scraping and API calls)
387
+ ## Recent Improvements
388
+
389
+ ### Version 1.3.6
390
+ - **Python 3.12+ Requirement**: Updated to use modern type annotations and language features
391
+ - **Type Safety**: Enhanced type annotations throughout the codebase with modern syntax
392
+ - **Build System**: Updated pyproject.toml and setup.py for better compatibility
393
+
394
+ ### Version 1.3.5
395
+ - **Enhanced Web Scraping**: Added Playwright-based scraping for better data extraction
396
+ - **Unicode Compatibility**: Replaced all Unicode symbols with ASCII equivalents for better cross-platform support
397
+ - **Automatic Dependencies**: All required packages are now automatically installed
398
+ - **Real-time Progress**: Added CLI-based progress monitoring with ASCII status indicators
399
+ - **Docker Optimization**: Updated Dockerfile to include Playwright browser installation
400
+
401
+ ### Version 1.3.4
402
+ - **PEP8 Compliance**: Fixed all Unicode emoji and symbols for better encoding support
403
+ - **Cross-platform Compatibility**: Improved compatibility across different operating systems
404
+ - **Type Safety**: Enhanced type annotations throughout the codebase
405
+
406
+ ### Version 1.3.3
407
+ - **Real-time Progress Monitoring**: Added CLI-only progress tracking feature
408
+ - **Enhanced Logging**: Improved status reporting and error handling
409
+
410
+ ### Version 1.3.2
411
+ - **Multi-Provider LLM Support**: Added support for OpenAI, Azure OpenAI, Ollama, LM Studio, and LiteLLM
412
+ - **Unified LLM Processor**: Centralized LLM processing with provider abstraction
413
+ - **Enhanced Error Handling**: Better error recovery and retry mechanisms
313
414
 
314
415
  ## License
315
416
 
@@ -1,20 +1,20 @@
1
1
  rust_crate_pipeline/__init__.py,sha256=ZJCApGu8h2Rn5-dkoBLXOpdoeD6b36w76--o0fEismQ,1749
2
2
  rust_crate_pipeline/__main__.py,sha256=PexSWQYtbFQg5P36WEnJ0X-oAtT8WDej3bIJoSAcCCQ,157
3
3
  rust_crate_pipeline/ai_processing.py,sha256=MP6VcvV3Jw2Pjof3NrewjTmO8ruVyJKcJGa9zhS_2eY,24140
4
- rust_crate_pipeline/analysis.py,sha256=9-WpGCOwto7mQre_AEYB6MA6bELekUMlGJzs-C5jg5g,15359
4
+ rust_crate_pipeline/analysis.py,sha256=_cmjynLWaQbGIdLQHU3P3rfqHB3gcNNgCdzStbsKrdw,17021
5
5
  rust_crate_pipeline/azure_ai_processing.py,sha256=kxbHGNSRSD_5KNkL2ihqCASJq8kdnb_N9u1-ogXbneE,16449
6
6
  rust_crate_pipeline/config.py,sha256=Fw3fRKCZawKaLQi7YqsmNNku4whZi89mWzr8BVRNS5E,3009
7
7
  rust_crate_pipeline/crate_analysis.py,sha256=GsoXemJ9VFyAbb4Sm5gY5ToTqNtOA4pI38AtngAQONk,2090
8
8
  rust_crate_pipeline/crate_list.txt,sha256=W3NxDtxvihyKp9SN85FYXX6p8Hh49IFih1M4-c-CynM,4334
9
9
  rust_crate_pipeline/github_token_checker.py,sha256=COXXS9uoLV9WYIcT02C-bV5uH3fa9D9HJImc07vMjLs,3766
10
10
  rust_crate_pipeline/main.py,sha256=iGYEAYvXkoFFvaA6DIVGiUL3wLhiCzatB6Fvf-Yrj2A,18858
11
- rust_crate_pipeline/network.py,sha256=khyjfOplaDvMxLWGB-JbPQnc27ZfozKGYBFw2b3BScM,12834
11
+ rust_crate_pipeline/network.py,sha256=mWjiRvOX31piBZ2QiJ-F75DBD4l6cqzTXcQdJvHxe90,12718
12
12
  rust_crate_pipeline/pipeline.py,sha256=CqPHLLRvMOpy-3ONL6hnPahV6Vh6S4M8oDsHd_lDrPc,16203
13
13
  rust_crate_pipeline/production_config.py,sha256=uWylP9AIZZx7-9aT4sFmAKEEW9miJDxaiek8VE6WP-0,2372
14
14
  rust_crate_pipeline/progress_monitor.py,sha256=5K9KP-Xggi1JEINfRmq2W-wGUHtNIBTcocpDtB1t8iM,13743
15
15
  rust_crate_pipeline/unified_llm_processor.py,sha256=eo7KotNuqwc7_hgpFm18QLokFoufFslnvi8TnDsSYEg,25064
16
16
  rust_crate_pipeline/unified_pipeline.py,sha256=2yglmXVlQfSkVq0HVTPonDee6VxWaQWZw0X2l4lLBGw,23704
17
- rust_crate_pipeline/version.py,sha256=1OVfgKIllkCMfu2zCazTqINgUwkozqsQSa2z_MDz5yY,4481
17
+ rust_crate_pipeline/version.py,sha256=izXdwKOkBxecVcCuMmOVbZnu5y-hHZZkEg39LmBPnis,4481
18
18
  rust_crate_pipeline/core/__init__.py,sha256=Sq4HWdANGqoYln7JdCog7m3BsGeR3tHdseeflvNetoQ,509
19
19
  rust_crate_pipeline/core/canon_registry.py,sha256=36tmt_wU6-kSyZnGfh53N64C7E3G-QR7GFbr9epj4zg,4700
20
20
  rust_crate_pipeline/core/irl_engine.py,sha256=QRZUdkN24W9XutLkj8JDplEz6FmnquUrwKsl0s2zRr4,10491
@@ -23,9 +23,9 @@ rust_crate_pipeline/scraping/__init__.py,sha256=ySkTRg7nIxgcbHJQ3L1XzcrOo281NZu0
23
23
  rust_crate_pipeline/scraping/unified_scraper.py,sha256=ZE2gkc0vQ3BOLdSX_IV-kMe8QAm2Av4M7VqpkxEKyT4,9965
24
24
  rust_crate_pipeline/utils/file_utils.py,sha256=tMaCPy7ghs9x4Hxu_sviX8MXU2sBjNvohUrvt4MejoM,2853
25
25
  rust_crate_pipeline/utils/logging_utils.py,sha256=e5jG0Yd6k3exgAdbVca46kWADJ_Qz8UJ3yEJzwTqPyI,2452
26
- rust_crate_pipeline-1.3.5.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
27
- rust_crate_pipeline-1.3.5.dist-info/METADATA,sha256=CXaKKIGRNDIkeaJvDcsslH7aM9Bu0zFzsNKwG_P2i10,11048
28
- rust_crate_pipeline-1.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- rust_crate_pipeline-1.3.5.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
30
- rust_crate_pipeline-1.3.5.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
31
- rust_crate_pipeline-1.3.5.dist-info/RECORD,,
26
+ rust_crate_pipeline-1.3.6.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
27
+ rust_crate_pipeline-1.3.6.dist-info/METADATA,sha256=BWUkQKtJCbXt1KZbmdofzy0eC4LpoeMBLKVXu3H3hD0,14539
28
+ rust_crate_pipeline-1.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ rust_crate_pipeline-1.3.6.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
30
+ rust_crate_pipeline-1.3.6.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
31
+ rust_crate_pipeline-1.3.6.dist-info/RECORD,,