rust-crate-pipeline 1.3.4__py3-none-any.whl → 1.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/pipeline.py +14 -21
- rust_crate_pipeline/version.py +3 -1
- {rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/METADATA +8 -10
- {rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/RECORD +8 -8
- {rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/top_level.txt +0 -0
rust_crate_pipeline/pipeline.py
CHANGED
@@ -25,19 +25,12 @@ except ImportError:
|
|
25
25
|
|
26
26
|
# Import enhanced scraping capabilities
|
27
27
|
try:
|
28
|
-
import
|
29
|
-
|
30
|
-
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
31
|
-
from enhanced_scraping import (
|
32
|
-
CrateDocumentationScraper,
|
33
|
-
EnhancedScrapingResult,
|
34
|
-
)
|
35
|
-
|
28
|
+
from .scraping.unified_scraper import UnifiedScraper, ScrapingResult
|
36
29
|
ENHANCED_SCRAPING_AVAILABLE = True
|
37
30
|
except ImportError:
|
38
31
|
ENHANCED_SCRAPING_AVAILABLE = False
|
39
|
-
|
40
|
-
|
32
|
+
UnifiedScraper = None # type: ignore[assignment,misc]
|
33
|
+
ScrapingResult = None # type: ignore[assignment,misc]
|
41
34
|
logging.warning("Enhanced scraping not available - using basic methods")
|
42
35
|
|
43
36
|
|
@@ -53,16 +46,16 @@ class CrateDataPipeline:
|
|
53
46
|
if config.use_azure_openai and AZURE_OPENAI_AVAILABLE and AzureOpenAIEnricher is not None:
|
54
47
|
try:
|
55
48
|
self.enricher = AzureOpenAIEnricher(config)
|
56
|
-
logging.info("
|
49
|
+
logging.info("[OK] Using Azure OpenAI enricher")
|
57
50
|
except Exception as e:
|
58
|
-
logging.warning(f"
|
59
|
-
logging.info("
|
51
|
+
logging.warning(f"[WARN] Failed to initialize Azure OpenAI enricher: {e}")
|
52
|
+
logging.info("[INFO] Falling back to local LLM enricher")
|
60
53
|
self.enricher = LLMEnricher(config)
|
61
54
|
else:
|
62
55
|
if config.use_azure_openai and not AZURE_OPENAI_AVAILABLE:
|
63
|
-
logging.warning("
|
56
|
+
logging.warning("[WARN] Azure OpenAI requested but not available")
|
64
57
|
self.enricher = LLMEnricher(config)
|
65
|
-
logging.info("
|
58
|
+
logging.info("[OK] Using local LLM enricher")
|
66
59
|
|
67
60
|
# Initialize cargo analyzer
|
68
61
|
self.cargo_analyzer = CrateAnalyzer(".")
|
@@ -78,15 +71,15 @@ class CrateDataPipeline:
|
|
78
71
|
if (
|
79
72
|
not ENHANCED_SCRAPING_AVAILABLE
|
80
73
|
or not self.config.enable_crawl4ai
|
81
|
-
or
|
74
|
+
or UnifiedScraper is None
|
82
75
|
):
|
83
76
|
return None
|
84
77
|
try:
|
85
|
-
scraper =
|
86
|
-
logging.info("
|
78
|
+
scraper = UnifiedScraper()
|
79
|
+
logging.info("[OK] Enhanced scraping with Crawl4AI enabled")
|
87
80
|
return scraper
|
88
81
|
except Exception as e:
|
89
|
-
logging.warning(f"
|
82
|
+
logging.warning(f"[ERROR] Failed to initialize enhanced scraping: {e}")
|
90
83
|
return None
|
91
84
|
|
92
85
|
def _create_output_dir(self) -> str:
|
@@ -223,7 +216,7 @@ class CrateDataPipeline:
|
|
223
216
|
return
|
224
217
|
|
225
218
|
try:
|
226
|
-
scraping_results = await self.enhanced_scraper.
|
219
|
+
scraping_results = await self.enhanced_scraper.scrape_crate_documentation(crate.name)
|
227
220
|
if scraping_results:
|
228
221
|
self._integrate_scraping_results(crate, scraping_results)
|
229
222
|
logging.info(
|
@@ -393,5 +386,5 @@ class CrateDataPipeline:
|
|
393
386
|
self.save_final_output(all_enriched, dependency_analysis)
|
394
387
|
|
395
388
|
duration = time.time() - start_time
|
396
|
-
logging.info(f"
|
389
|
+
logging.info(f"[OK] Done. Enriched {len(all_enriched)} crates in {duration:.2f}s")
|
397
390
|
return all_enriched, dependency_analysis
|
rust_crate_pipeline/version.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
from typing import Dict, List, Tuple, Optional, Any
|
2
2
|
"""Version information for rust-crate-pipeline."""
|
3
3
|
|
4
|
-
__version__ = "1.3.
|
4
|
+
__version__ = "1.3.5"
|
5
5
|
__version_info__ = tuple(int(x) for x in __version__.split("-")[0].split("."))
|
6
|
+
__author__ = "SigilDERG Team"
|
7
|
+
__email__ = "sigilderg@example.com"
|
6
8
|
|
7
9
|
# Version history
|
8
10
|
# 1.2.5-dev.20250621 - Dev branch: experimental, not a formal
|
@@ -1,19 +1,19 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rust-crate-pipeline
|
3
|
-
Version: 1.3.
|
4
|
-
Summary: A comprehensive
|
5
|
-
Home-page: https://github.com/
|
6
|
-
Author:
|
7
|
-
Author-email:
|
8
|
-
License
|
3
|
+
Version: 1.3.5
|
4
|
+
Summary: A comprehensive pipeline for analyzing Rust crates with AI enrichment and enhanced scraping
|
5
|
+
Home-page: https://github.com/SigilDERG/rust-crate-pipeline
|
6
|
+
Author: SigilDERG Team
|
7
|
+
Author-email: SigilDERG Team <sigilderg@example.com>
|
8
|
+
License: MIT
|
9
9
|
Project-URL: Homepage, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
|
10
10
|
Project-URL: Documentation, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production#readme
|
11
11
|
Project-URL: Repository, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production
|
12
12
|
Project-URL: Bug Tracker, https://github.com/Superuser666-Sigil/SigilDERG-Data_Production/issues
|
13
|
-
Keywords: rust,crates,
|
13
|
+
Keywords: rust,crates,analysis,ai,pipeline,scraping
|
14
14
|
Classifier: Development Status :: 4 - Beta
|
15
15
|
Classifier: Intended Audience :: Developers
|
16
|
-
Classifier:
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
17
17
|
Classifier: Programming Language :: Python :: 3
|
18
18
|
Classifier: Programming Language :: Python :: 3.8
|
19
19
|
Classifier: Programming Language :: Python :: 3.9
|
@@ -21,9 +21,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
22
22
|
Classifier: Programming Language :: Python :: 3.12
|
23
23
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
24
|
-
Classifier: Topic :: Software Development :: Build Tools
|
25
24
|
Classifier: Topic :: Software Development :: Quality Assurance
|
26
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
27
25
|
Requires-Python: >=3.8
|
28
26
|
Description-Content-Type: text/markdown
|
29
27
|
License-File: LICENSE
|
@@ -9,12 +9,12 @@ rust_crate_pipeline/crate_list.txt,sha256=W3NxDtxvihyKp9SN85FYXX6p8Hh49IFih1M4-c
|
|
9
9
|
rust_crate_pipeline/github_token_checker.py,sha256=COXXS9uoLV9WYIcT02C-bV5uH3fa9D9HJImc07vMjLs,3766
|
10
10
|
rust_crate_pipeline/main.py,sha256=iGYEAYvXkoFFvaA6DIVGiUL3wLhiCzatB6Fvf-Yrj2A,18858
|
11
11
|
rust_crate_pipeline/network.py,sha256=khyjfOplaDvMxLWGB-JbPQnc27ZfozKGYBFw2b3BScM,12834
|
12
|
-
rust_crate_pipeline/pipeline.py,sha256=
|
12
|
+
rust_crate_pipeline/pipeline.py,sha256=CqPHLLRvMOpy-3ONL6hnPahV6Vh6S4M8oDsHd_lDrPc,16203
|
13
13
|
rust_crate_pipeline/production_config.py,sha256=uWylP9AIZZx7-9aT4sFmAKEEW9miJDxaiek8VE6WP-0,2372
|
14
14
|
rust_crate_pipeline/progress_monitor.py,sha256=5K9KP-Xggi1JEINfRmq2W-wGUHtNIBTcocpDtB1t8iM,13743
|
15
15
|
rust_crate_pipeline/unified_llm_processor.py,sha256=eo7KotNuqwc7_hgpFm18QLokFoufFslnvi8TnDsSYEg,25064
|
16
16
|
rust_crate_pipeline/unified_pipeline.py,sha256=2yglmXVlQfSkVq0HVTPonDee6VxWaQWZw0X2l4lLBGw,23704
|
17
|
-
rust_crate_pipeline/version.py,sha256=
|
17
|
+
rust_crate_pipeline/version.py,sha256=1OVfgKIllkCMfu2zCazTqINgUwkozqsQSa2z_MDz5yY,4481
|
18
18
|
rust_crate_pipeline/core/__init__.py,sha256=Sq4HWdANGqoYln7JdCog7m3BsGeR3tHdseeflvNetoQ,509
|
19
19
|
rust_crate_pipeline/core/canon_registry.py,sha256=36tmt_wU6-kSyZnGfh53N64C7E3G-QR7GFbr9epj4zg,4700
|
20
20
|
rust_crate_pipeline/core/irl_engine.py,sha256=QRZUdkN24W9XutLkj8JDplEz6FmnquUrwKsl0s2zRr4,10491
|
@@ -23,9 +23,9 @@ rust_crate_pipeline/scraping/__init__.py,sha256=ySkTRg7nIxgcbHJQ3L1XzcrOo281NZu0
|
|
23
23
|
rust_crate_pipeline/scraping/unified_scraper.py,sha256=ZE2gkc0vQ3BOLdSX_IV-kMe8QAm2Av4M7VqpkxEKyT4,9965
|
24
24
|
rust_crate_pipeline/utils/file_utils.py,sha256=tMaCPy7ghs9x4Hxu_sviX8MXU2sBjNvohUrvt4MejoM,2853
|
25
25
|
rust_crate_pipeline/utils/logging_utils.py,sha256=e5jG0Yd6k3exgAdbVca46kWADJ_Qz8UJ3yEJzwTqPyI,2452
|
26
|
-
rust_crate_pipeline-1.3.
|
27
|
-
rust_crate_pipeline-1.3.
|
28
|
-
rust_crate_pipeline-1.3.
|
29
|
-
rust_crate_pipeline-1.3.
|
30
|
-
rust_crate_pipeline-1.3.
|
31
|
-
rust_crate_pipeline-1.3.
|
26
|
+
rust_crate_pipeline-1.3.5.dist-info/licenses/LICENSE,sha256=tpd4XNpbssrSx9-iErATOLrOh0ivNPfO2I5MAPUpats,1088
|
27
|
+
rust_crate_pipeline-1.3.5.dist-info/METADATA,sha256=CXaKKIGRNDIkeaJvDcsslH7aM9Bu0zFzsNKwG_P2i10,11048
|
28
|
+
rust_crate_pipeline-1.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
29
|
+
rust_crate_pipeline-1.3.5.dist-info/entry_points.txt,sha256=9Rr_IRuFRIridXxUSdEJbB3ba0NnpEfKmknZXFdYRC0,70
|
30
|
+
rust_crate_pipeline-1.3.5.dist-info/top_level.txt,sha256=GUdB7RyxHLhijQxui_KTy3B8p_L2APui9C6RYa0FuaE,20
|
31
|
+
rust_crate_pipeline-1.3.5.dist-info/RECORD,,
|
File without changes
|
{rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/entry_points.txt
RENAMED
File without changes
|
{rust_crate_pipeline-1.3.4.dist-info → rust_crate_pipeline-1.3.5.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|