rust-crate-pipeline 1.2.6__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/__init__.py +25 -25
- rust_crate_pipeline/__main__.py +1 -0
- rust_crate_pipeline/ai_processing.py +309 -200
- rust_crate_pipeline/analysis.py +304 -368
- rust_crate_pipeline/azure_ai_processing.py +453 -0
- rust_crate_pipeline/config.py +57 -19
- rust_crate_pipeline/core/__init__.py +19 -0
- rust_crate_pipeline/core/canon_registry.py +133 -0
- rust_crate_pipeline/core/irl_engine.py +256 -0
- rust_crate_pipeline/core/sacred_chain.py +117 -0
- rust_crate_pipeline/crate_analysis.py +54 -0
- rust_crate_pipeline/crate_list.txt +424 -0
- rust_crate_pipeline/github_token_checker.py +42 -36
- rust_crate_pipeline/main.py +386 -102
- rust_crate_pipeline/network.py +153 -133
- rust_crate_pipeline/pipeline.py +340 -264
- rust_crate_pipeline/production_config.py +35 -32
- rust_crate_pipeline/scraping/__init__.py +13 -0
- rust_crate_pipeline/scraping/unified_scraper.py +259 -0
- rust_crate_pipeline/unified_llm_processor.py +637 -0
- rust_crate_pipeline/unified_pipeline.py +548 -0
- rust_crate_pipeline/utils/file_utils.py +45 -14
- rust_crate_pipeline/utils/logging_utils.py +34 -17
- rust_crate_pipeline/version.py +47 -2
- rust_crate_pipeline-1.3.0.dist-info/METADATA +331 -0
- rust_crate_pipeline-1.3.0.dist-info/RECORD +30 -0
- rust_crate_pipeline-1.2.6.dist-info/METADATA +0 -573
- rust_crate_pipeline-1.2.6.dist-info/RECORD +0 -19
- {rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.2.6.dist-info → rust_crate_pipeline-1.3.0.dist-info}/top_level.txt +0 -0
rust_crate_pipeline/analysis.py
CHANGED
@@ -1,436 +1,372 @@
|
|
1
1
|
# analysis.py
|
2
|
-
import os
|
3
|
-
import re
|
4
2
|
import io
|
5
|
-
import
|
6
|
-
import time
|
3
|
+
import re
|
7
4
|
import tarfile
|
5
|
+
import requests
|
6
|
+
import logging
|
8
7
|
import tempfile
|
8
|
+
from typing import Any
|
9
|
+
import os
|
10
|
+
import sys
|
11
|
+
import time
|
9
12
|
import subprocess
|
10
|
-
|
11
|
-
from datetime import datetime
|
12
|
-
from dateutil.relativedelta import relativedelta
|
13
|
-
from bs4 import BeautifulSoup
|
14
|
-
from typing import Dict, Optional, List
|
13
|
+
|
15
14
|
from .config import EnrichedCrate
|
16
15
|
|
16
|
+
# Add the project root to the path to ensure utils can be imported
|
17
|
+
# This is a common pattern in scripts to handle execution from different directories
|
18
|
+
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
19
|
+
if project_root not in sys.path:
|
20
|
+
sys.path.insert(0, project_root)
|
21
|
+
|
22
|
+
try:
|
23
|
+
from utils.rust_code_analyzer import RustCodeAnalyzer # type: ignore
|
24
|
+
except ImportError as e:
|
25
|
+
logging.error(
|
26
|
+
f"Failed to import RustCodeAnalyzer: {e}. "
|
27
|
+
f"Ensure the utils directory is in the Python path."
|
28
|
+
)
|
29
|
+
# Provide a non-functional fallback to avoid crashing the entire application
|
30
|
+
# if the import fails, but ensure it logs the error.
|
31
|
+
|
32
|
+
class RustCodeAnalyzer: # type: ignore
|
33
|
+
def __init__(self, code_content: str) -> None:
|
34
|
+
logging.error(
|
35
|
+
"Using fallback RustCodeAnalyzer. Analysis will be incomplete."
|
36
|
+
)
|
37
|
+
self.code_content = code_content
|
38
|
+
|
39
|
+
def analyze(self) -> dict[str, Any]:
|
40
|
+
return {
|
41
|
+
"functions": [],
|
42
|
+
"structs": [],
|
43
|
+
"enums": [],
|
44
|
+
"traits": [],
|
45
|
+
"complexity": 0,
|
46
|
+
"lines_of_code": len(self.code_content.split("\n")),
|
47
|
+
}
|
48
|
+
|
49
|
+
@staticmethod
|
50
|
+
def create_empty_metrics() -> dict[str, Any]:
|
51
|
+
return {}
|
52
|
+
|
53
|
+
@staticmethod
|
54
|
+
def detect_project_structure(files: list[str]) -> dict[str, bool]:
|
55
|
+
return {}
|
56
|
+
|
57
|
+
@staticmethod
|
58
|
+
def analyze_rust_content(content: str) -> dict[str, Any]:
|
59
|
+
return {}
|
60
|
+
|
61
|
+
@staticmethod
|
62
|
+
def aggregate_metrics(
|
63
|
+
metrics: dict[str, Any],
|
64
|
+
content_analysis: dict[str, Any],
|
65
|
+
structure: dict[str, bool],
|
66
|
+
) -> dict[str, Any]:
|
67
|
+
return metrics
|
68
|
+
|
69
|
+
|
70
|
+
# Constants for URLs and paths
|
71
|
+
CRATES_IO_API_URL = "https://crates.io/api/v1/crates"
|
72
|
+
GITHUB_API_URL = "https://api.github.com/repos"
|
73
|
+
LIB_RS_URL = "https://lib.rs/crates"
|
74
|
+
|
75
|
+
|
17
76
|
class SourceAnalyzer:
|
18
77
|
@staticmethod
|
19
|
-
def analyze_crate_source(crate: EnrichedCrate) ->
|
20
|
-
"""Orchestrate source analysis from multiple sources"""
|
21
|
-
crate_name = crate.name
|
22
|
-
version = crate.version
|
78
|
+
def analyze_crate_source(crate: EnrichedCrate) -> dict[str, Any]:
|
79
|
+
"""Orchestrate source analysis from multiple sources."""
|
23
80
|
repo_url = crate.repository
|
24
|
-
|
81
|
+
|
25
82
|
# Method 1: Try to download from crates.io
|
26
83
|
try:
|
27
|
-
url = f"
|
28
|
-
response = requests.get(url, stream=True)
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
84
|
+
url = f"{CRATES_IO_API_URL}/{crate.name}/{crate.version}/download"
|
85
|
+
response = requests.get(url, stream=True, timeout=30)
|
86
|
+
response.raise_for_status()
|
87
|
+
logging.info(f"Successfully downloaded {crate.name} from crates.io")
|
88
|
+
return SourceAnalyzer.analyze_crate_tarball(response.content)
|
89
|
+
except requests.RequestException as e:
|
90
|
+
logging.warning(f"Failed to download from crates.io: {e}")
|
91
|
+
|
36
92
|
# Method 2: Try GitHub if we have a GitHub URL
|
37
|
-
if "github.com" in repo_url:
|
93
|
+
if repo_url and "github.com" in repo_url:
|
94
|
+
match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
|
95
|
+
if match:
|
96
|
+
owner, repo_name = match.groups()
|
97
|
+
repo_name = repo_name.replace(".git", "")
|
98
|
+
try:
|
99
|
+
github_url = f"{GITHUB_API_URL}/{owner}/{repo_name}/tarball"
|
100
|
+
response = requests.get(github_url, timeout=30)
|
101
|
+
response.raise_for_status()
|
102
|
+
logging.info(f"Successfully downloaded {crate.name} from GitHub")
|
103
|
+
return SourceAnalyzer.analyze_github_tarball(response.content)
|
104
|
+
except requests.RequestException as e:
|
105
|
+
logging.warning(f"Failed to analyze from GitHub: {e}")
|
106
|
+
|
107
|
+
# Method 3: Fallback to cloning from the repository directly
|
108
|
+
if repo_url:
|
38
109
|
try:
|
39
|
-
|
40
|
-
|
41
|
-
if match:
|
42
|
-
owner, repo_name = match.groups()
|
43
|
-
repo_name = repo_name.split('.')[0] # Remove .git extension
|
44
|
-
|
45
|
-
# Try to download tarball from GitHub
|
46
|
-
github_url = f"https://api.github.com/repos/{owner}/{repo_name}/tarball"
|
47
|
-
response = requests.get(github_url)
|
48
|
-
|
49
|
-
if response.ok:
|
50
|
-
return SourceAnalyzer.analyze_github_tarball(response.content)
|
110
|
+
logging.info(f"Attempting to clone repository for {crate.name}")
|
111
|
+
return SourceAnalyzer.analyze_crate_source_from_repo(repo_url)
|
51
112
|
except Exception as e:
|
52
|
-
|
53
|
-
|
54
|
-
# Method 3: Try lib.rs
|
55
|
-
try:
|
56
|
-
# lib.rs doesn't have a direct download API, but redirects to crates.io or GitHub
|
57
|
-
url = f"https://lib.rs/crates/{crate_name}"
|
58
|
-
response = requests.get(url)
|
59
|
-
|
60
|
-
if response.ok:
|
61
|
-
soup = BeautifulSoup(response.text, 'html.parser')
|
62
|
-
|
63
|
-
# Look for repository links
|
64
|
-
repo_links = soup.select('a[href*="github.com"]')
|
65
|
-
if repo_links:
|
66
|
-
repo_url = repo_links[0]['href']
|
67
|
-
|
68
|
-
# We found a GitHub link, now analyze it
|
69
|
-
return SourceAnalyzer.analyze_crate_source_from_repo(crate_name, version, repo_url)
|
70
|
-
except Exception as e:
|
71
|
-
print(f"Failed to analyze from lib.rs: {str(e)}")
|
72
|
-
|
73
|
-
# If we get here, we failed to analyze from any source
|
113
|
+
logging.error(f"Failed to clone and analyze repository {repo_url}: {e}")
|
114
|
+
|
74
115
|
return {
|
75
|
-
"error": "Could not analyze crate from any source",
|
76
|
-
"attempted_sources": ["crates.io", "github", "
|
116
|
+
"error": "Could not analyze crate from any available source.",
|
117
|
+
"attempted_sources": ["crates.io", "github", "git_clone"],
|
77
118
|
"file_count": 0,
|
78
|
-
"loc": 0
|
119
|
+
"loc": 0,
|
79
120
|
}
|
80
121
|
|
81
122
|
@staticmethod
|
82
|
-
def
|
83
|
-
"""
|
84
|
-
metrics =
|
85
|
-
"file_count": 0,
|
86
|
-
"loc": 0,
|
87
|
-
"complexity": [],
|
88
|
-
"types": [],
|
89
|
-
"traits": [],
|
90
|
-
"functions": [],
|
91
|
-
"has_tests": False,
|
92
|
-
"has_examples": False,
|
93
|
-
"has_benchmarks": False
|
94
|
-
}
|
95
|
-
|
123
|
+
def _analyze_tarball_content(content: bytes) -> dict[str, Any]:
|
124
|
+
"""Shared logic to analyze tarball content from any source."""
|
125
|
+
metrics = RustCodeAnalyzer.create_empty_metrics()
|
96
126
|
try:
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
rust_files = [f for f in tar.getnames() if f.endswith('.rs')]
|
127
|
+
with io.BytesIO(content) as tar_content, tarfile.open(
|
128
|
+
fileobj=tar_content, mode="r:gz"
|
129
|
+
) as tar:
|
130
|
+
rust_files = [f for f in tar.getnames() if f.endswith(".rs")]
|
102
131
|
metrics["file_count"] = len(rust_files)
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
struct_matches = re.findall(r'struct\s+([a-zA-Z0-9_]+)', content_str)
|
125
|
-
trait_matches = re.findall(r'trait\s+([a-zA-Z0-9_]+)', content_str)
|
126
|
-
|
127
|
-
metrics["functions"].extend(fn_matches)
|
128
|
-
metrics["types"].extend(struct_matches)
|
129
|
-
metrics["traits"].extend(trait_matches)
|
130
|
-
except Exception as e:
|
131
|
-
print(f"Error analyzing file {filename}: {str(e)}")
|
132
|
-
|
133
|
-
except Exception as e:
|
134
|
-
metrics["error"] = str(e)
|
135
|
-
|
132
|
+
structure = RustCodeAnalyzer.detect_project_structure(tar.getnames())
|
133
|
+
|
134
|
+
for member in tar.getmembers():
|
135
|
+
if member.isfile() and member.name.endswith(".rs"):
|
136
|
+
file_content = tar.extractfile(member)
|
137
|
+
if file_content:
|
138
|
+
try:
|
139
|
+
content_str = file_content.read().decode("utf-8")
|
140
|
+
analysis = RustCodeAnalyzer.analyze_rust_content(
|
141
|
+
content_str
|
142
|
+
)
|
143
|
+
metrics = RustCodeAnalyzer.aggregate_metrics(
|
144
|
+
metrics, analysis, structure
|
145
|
+
)
|
146
|
+
except UnicodeDecodeError:
|
147
|
+
logging.warning(
|
148
|
+
f"Skipping non-UTF-8 file: {member.name}"
|
149
|
+
)
|
150
|
+
except tarfile.TarError as e:
|
151
|
+
metrics["error"] = f"Failed to read tarball: {e}"
|
152
|
+
logging.error(metrics["error"])
|
136
153
|
return metrics
|
137
154
|
|
138
155
|
@staticmethod
|
139
|
-
def
|
140
|
-
"""Analyze a
|
141
|
-
|
142
|
-
"file_count": 0,
|
143
|
-
"loc": 0,
|
144
|
-
"complexity": [],
|
145
|
-
"types": [],
|
146
|
-
"traits": [],
|
147
|
-
"functions": [],
|
148
|
-
"has_tests": False,
|
149
|
-
"has_examples": False,
|
150
|
-
"has_benchmarks": False
|
151
|
-
}
|
152
|
-
|
153
|
-
try:
|
154
|
-
# GitHub tarballs are typically gzipped tar files
|
155
|
-
tar_content = io.BytesIO(content)
|
156
|
-
with tarfile.open(fileobj=tar_content, mode='r:gz') as tar:
|
157
|
-
# GitHub tarballs include the repo name and commit as the top dir
|
158
|
-
# So we need to handle the different structure
|
159
|
-
rust_files = [f for f in tar.getnames() if f.endswith('.rs')]
|
160
|
-
metrics["file_count"] = len(rust_files)
|
161
|
-
|
162
|
-
# Check for test/example/bench directories
|
163
|
-
all_files = tar.getnames()
|
164
|
-
metrics["has_tests"] = any('test' in f.lower() for f in all_files)
|
165
|
-
metrics["has_examples"] = any('example' in f.lower() for f in all_files)
|
166
|
-
metrics["has_benchmarks"] = any('bench' in f.lower() for f in all_files)
|
167
|
-
|
168
|
-
# Analyze each Rust file (same as crate tarball)
|
169
|
-
for filename in rust_files:
|
170
|
-
try:
|
171
|
-
member = tar.getmember(filename)
|
172
|
-
if member.isfile():
|
173
|
-
file_content = tar.extractfile(member)
|
174
|
-
if file_content:
|
175
|
-
content_str = file_content.read().decode('utf-8', errors='ignore')
|
176
|
-
|
177
|
-
# Count lines of code
|
178
|
-
metrics["loc"] += len(content_str.splitlines())
|
179
|
-
|
180
|
-
# Extract code elements
|
181
|
-
fn_matches = re.findall(r'fn\s+([a-zA-Z0-9_]+)', content_str)
|
182
|
-
struct_matches = re.findall(r'struct\s+([a-zA-Z0-9_]+)', content_str)
|
183
|
-
trait_matches = re.findall(r'trait\s+([a-zA-Z0-9_]+)', content_str)
|
184
|
-
|
185
|
-
metrics["functions"].extend(fn_matches)
|
186
|
-
metrics["types"].extend(struct_matches)
|
187
|
-
metrics["traits"].extend(trait_matches)
|
188
|
-
except Exception as e:
|
189
|
-
print(f"Error analyzing file {filename}: {str(e)}")
|
190
|
-
|
191
|
-
except Exception as e:
|
192
|
-
metrics["error"] = str(e)
|
193
|
-
|
194
|
-
return metrics
|
156
|
+
def analyze_crate_tarball(content: bytes) -> dict[str, Any]:
|
157
|
+
"""Analyze a .crate tarball from crates.io."""
|
158
|
+
return SourceAnalyzer._analyze_tarball_content(content)
|
195
159
|
|
196
160
|
@staticmethod
|
197
|
-
def
|
198
|
-
"""Analyze
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
"functions": [],
|
206
|
-
"has_tests": False,
|
207
|
-
"has_examples": False,
|
208
|
-
"has_benchmarks": False
|
209
|
-
}
|
210
|
-
|
161
|
+
def analyze_github_tarball(content: bytes) -> dict[str, Any]:
|
162
|
+
"""Analyze a GitHub tarball."""
|
163
|
+
return SourceAnalyzer._analyze_tarball_content(content)
|
164
|
+
|
165
|
+
@staticmethod
|
166
|
+
def analyze_local_directory(directory: str) -> dict[str, Any]:
|
167
|
+
"""Analyze source code from a local directory."""
|
168
|
+
metrics = RustCodeAnalyzer.create_empty_metrics()
|
211
169
|
try:
|
212
|
-
|
213
|
-
|
214
|
-
for root,
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
170
|
+
rust_files: list[str] = []
|
171
|
+
all_paths: list[str] = []
|
172
|
+
for root, dirs, files in os.walk(directory):
|
173
|
+
# Exclude target and .git directories
|
174
|
+
dirs[:] = [d for d in dirs if d not in ["target", ".git"]]
|
175
|
+
for file in files:
|
176
|
+
full_path = os.path.join(root, file)
|
177
|
+
all_paths.append(full_path)
|
178
|
+
if file.endswith(".rs"):
|
179
|
+
rust_files.append(full_path)
|
180
|
+
|
219
181
|
metrics["file_count"] = len(rust_files)
|
220
|
-
|
221
|
-
|
222
|
-
metrics["has_tests"] = any(os.path.exists(os.path.join(directory, d))
|
223
|
-
for d in ["tests", "test"])
|
224
|
-
metrics["has_examples"] = os.path.exists(os.path.join(directory, "examples"))
|
225
|
-
metrics["has_benchmarks"] = os.path.exists(os.path.join(directory, "benches"))
|
226
|
-
|
227
|
-
# Analyze each Rust file
|
182
|
+
structure = RustCodeAnalyzer.detect_project_structure(all_paths)
|
183
|
+
|
228
184
|
for file_path in rust_files:
|
229
185
|
try:
|
230
|
-
with open(file_path,
|
186
|
+
with open(file_path, encoding="utf-8", errors="ignore") as f:
|
231
187
|
content = f.read()
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
# Extract code elements
|
237
|
-
fn_matches = re.findall(r'fn\s+([a-zA-Z0-9_]+)', content)
|
238
|
-
struct_matches = re.findall(r'struct\s+([a-zA-Z0-9_]+)', content)
|
239
|
-
trait_matches = re.findall(r'trait\s+([a-zA-Z0-9_]+)', content)
|
240
|
-
|
241
|
-
metrics["functions"].extend(fn_matches)
|
242
|
-
metrics["types"].extend(struct_matches)
|
243
|
-
metrics["traits"].extend(trait_matches)
|
244
|
-
|
188
|
+
analysis = RustCodeAnalyzer.analyze_rust_content(content)
|
189
|
+
metrics = RustCodeAnalyzer.aggregate_metrics(
|
190
|
+
metrics, analysis, structure
|
191
|
+
)
|
245
192
|
except Exception as e:
|
246
|
-
|
247
|
-
|
193
|
+
logging.warning(f"Error analyzing file {file_path}: {e}")
|
248
194
|
except Exception as e:
|
249
|
-
metrics["error"] =
|
250
|
-
|
195
|
+
metrics["error"] = f"Failed to analyze local directory {directory}: {e}"
|
196
|
+
logging.error(metrics["error"])
|
251
197
|
return metrics
|
252
198
|
|
253
199
|
@staticmethod
|
254
|
-
def analyze_crate_source_from_repo(
|
255
|
-
"""Clone and analyze a crate's source code from repository"""
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
"
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
200
|
+
def analyze_crate_source_from_repo(repo_url: str) -> dict[str, Any]:
|
201
|
+
"""Clone and analyze a crate's source code from a repository."""
|
202
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
203
|
+
try:
|
204
|
+
logging.info(f"Cloning {repo_url} into {temp_dir}")
|
205
|
+
subprocess.run(
|
206
|
+
["git", "clone", "--depth=1", repo_url, temp_dir],
|
207
|
+
capture_output=True,
|
208
|
+
text=True,
|
209
|
+
check=True,
|
210
|
+
timeout=120,
|
211
|
+
)
|
212
|
+
return SourceAnalyzer.analyze_local_directory(temp_dir)
|
213
|
+
except (
|
214
|
+
subprocess.CalledProcessError,
|
215
|
+
subprocess.TimeoutExpired,
|
216
|
+
) as e:
|
217
|
+
error_output = ""
|
218
|
+
if hasattr(e, "stderr") and e.stderr:
|
219
|
+
error_output = e.stderr.decode("utf-8", "ignore")
|
220
|
+
else:
|
221
|
+
error_output = str(e)
|
222
|
+
logging.error(f"Failed to clone repository {repo_url}: {error_output}")
|
223
|
+
return {
|
224
|
+
"error": f"Failed to clone repository: {error_output}",
|
225
|
+
"file_count": 0,
|
226
|
+
"loc": 0,
|
227
|
+
}
|
228
|
+
|
277
229
|
|
278
230
|
class SecurityAnalyzer:
|
279
231
|
@staticmethod
|
280
|
-
def check_security_metrics(crate: EnrichedCrate) ->
|
281
|
-
"""Check security metrics for a crate"""
|
282
|
-
security_data = {
|
232
|
+
def check_security_metrics(crate: EnrichedCrate) -> dict[str, Any]:
|
233
|
+
"""Check security metrics for a crate (placeholder)."""
|
234
|
+
security_data: dict[str, Any] = {
|
283
235
|
"advisories": [],
|
284
236
|
"vulnerability_count": 0,
|
285
237
|
"cargo_audit": None,
|
286
|
-
"
|
287
|
-
"test_coverage": None
|
238
|
+
"unsafe_blocks": 0,
|
288
239
|
}
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
# Check RustSec Advisory Database
|
294
|
-
try:
|
295
|
-
# This would require the RustSec advisory database
|
296
|
-
# For now, just return placeholder data
|
297
|
-
advisories_url = f"https://rustsec.org/advisories/{crate_name}.json"
|
298
|
-
response = requests.get(advisories_url)
|
299
|
-
if response.ok:
|
300
|
-
advisories = response.json()
|
301
|
-
security_data["advisories"] = advisories
|
302
|
-
security_data["vulnerability_count"] = len(advisories)
|
303
|
-
except Exception:
|
304
|
-
pass
|
305
|
-
|
306
|
-
# Check for common security patterns in code
|
307
|
-
try:
|
308
|
-
# This would analyze the source code for unsafe blocks, etc.
|
309
|
-
# Placeholder for now
|
310
|
-
security_data["unsafe_blocks"] = 0
|
311
|
-
security_data["security_patterns"] = []
|
312
|
-
except Exception:
|
313
|
-
pass
|
314
|
-
|
240
|
+
# In a real implementation, this would run tools like `cargo-audit`
|
241
|
+
# and parse the output. For now, it remains a placeholder.
|
242
|
+
logging.info(f"Running placeholder security check for {crate.name}")
|
315
243
|
return security_data
|
316
244
|
|
245
|
+
|
317
246
|
class UserBehaviorAnalyzer:
|
318
247
|
@staticmethod
|
319
|
-
def
|
320
|
-
"""
|
321
|
-
|
248
|
+
def _get_github_headers() -> dict[str, str]:
|
249
|
+
"""Get headers for GitHub API requests, including auth if available."""
|
250
|
+
headers = {"Accept": "application/vnd.github.v3+json"}
|
251
|
+
if token := os.environ.get("GITHUB_TOKEN"):
|
252
|
+
headers["Authorization"] = f"token {token}"
|
253
|
+
return headers
|
254
|
+
|
255
|
+
@staticmethod
|
256
|
+
def fetch_user_behavior_data(crate: EnrichedCrate) -> dict[str, Any]:
|
257
|
+
"""Fetch user behavior data from GitHub and crates.io."""
|
258
|
+
result: dict[str, Any] = {
|
322
259
|
"issues": [],
|
323
260
|
"pull_requests": [],
|
324
261
|
"version_adoption": {},
|
325
|
-
"community_metrics": {}
|
262
|
+
"community_metrics": {},
|
326
263
|
}
|
327
|
-
|
328
|
-
crate_name = crate.name
|
329
264
|
repo_url = crate.repository
|
330
|
-
|
331
|
-
# Extract owner/repo from URL
|
332
265
|
if not repo_url or "github.com" not in repo_url:
|
333
266
|
return result
|
334
|
-
|
335
|
-
|
336
|
-
if
|
267
|
+
|
268
|
+
match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
|
269
|
+
if not match:
|
337
270
|
return result
|
338
|
-
owner, repo =
|
339
|
-
|
340
|
-
|
341
|
-
headers =
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
# Process issue data
|
354
|
-
for issue in issues_data:
|
355
|
-
if "pull_request" in issue:
|
356
|
-
# This is a PR, not an issue
|
357
|
-
result["pull_requests"].append({
|
358
|
-
"number": issue["number"],
|
359
|
-
"title": issue["title"],
|
360
|
-
"state": issue["state"],
|
361
|
-
"created_at": issue["created_at"],
|
362
|
-
"closed_at": issue["closed_at"],
|
363
|
-
"url": issue["html_url"]
|
364
|
-
})
|
365
|
-
else:
|
366
|
-
# Regular issue
|
367
|
-
result["issues"].append({
|
368
|
-
"number": issue["number"],
|
369
|
-
"title": issue["title"],
|
370
|
-
"state": issue["state"],
|
371
|
-
"created_at": issue["created_at"],
|
372
|
-
"closed_at": issue["closed_at"],
|
373
|
-
"url": issue["html_url"]
|
374
|
-
})
|
375
|
-
|
376
|
-
# Fetch commit activity for the past year
|
377
|
-
commits_url = f"https://api.github.com/repos/{owner}/{repo}/stats/commit_activity"
|
378
|
-
commits_resp = requests.get(commits_url, headers=headers)
|
379
|
-
if commits_resp.ok:
|
380
|
-
result["community_metrics"]["commit_activity"] = commits_resp.json()
|
381
|
-
|
382
|
-
# Rate limiting - be nice to GitHub API
|
383
|
-
time.sleep(1)
|
384
|
-
except Exception as e:
|
385
|
-
print(f"Error fetching GitHub data: {str(e)}")
|
386
|
-
|
387
|
-
# Get version adoption data from crates.io
|
271
|
+
owner, repo = match.groups()
|
272
|
+
repo = repo.replace(".git", "")
|
273
|
+
|
274
|
+
headers = UserBehaviorAnalyzer._get_github_headers()
|
275
|
+
UserBehaviorAnalyzer._fetch_github_activity(owner, repo, headers, result)
|
276
|
+
UserBehaviorAnalyzer._fetch_crates_io_versions(crate.name, result)
|
277
|
+
|
278
|
+
return result
|
279
|
+
|
280
|
+
@staticmethod
|
281
|
+
def _fetch_github_activity(
|
282
|
+
owner: str, repo: str, headers: dict[str, str], result: dict[str, Any]
|
283
|
+
) -> None:
|
284
|
+
"""Fetch issues, PRs, and commit activity from GitHub."""
|
388
285
|
try:
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
"
|
403
|
-
"
|
286
|
+
issues_url = f"{GITHUB_API_URL}/{owner}/{repo}/issues?state=all&per_page=30"
|
287
|
+
issues_resp = requests.get(issues_url, headers=headers, timeout=30)
|
288
|
+
issues_resp.raise_for_status()
|
289
|
+
|
290
|
+
for item in issues_resp.json():
|
291
|
+
is_pr = "pull_request" in item
|
292
|
+
data_list = result["pull_requests"] if is_pr else result["issues"]
|
293
|
+
data_list.append(
|
294
|
+
{
|
295
|
+
"number": item["number"],
|
296
|
+
"title": item["title"],
|
297
|
+
"state": item["state"],
|
298
|
+
"created_at": item["created_at"],
|
299
|
+
"closed_at": item["closed_at"],
|
300
|
+
"url": item["html_url"],
|
404
301
|
}
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
302
|
+
)
|
303
|
+
|
304
|
+
# Fetch commit activity (retries on 202)
|
305
|
+
activity_url = f"{GITHUB_API_URL}/{owner}/{repo}/stats/commit_activity"
|
306
|
+
for _ in range(3): # Retry up to 3 times
|
307
|
+
activity_resp = requests.get(activity_url, headers=headers, timeout=60)
|
308
|
+
if activity_resp.status_code == 200:
|
309
|
+
result["community_metrics"][
|
310
|
+
"commit_activity"
|
311
|
+
] = activity_resp.json()
|
312
|
+
break
|
313
|
+
elif activity_resp.status_code == 202:
|
314
|
+
logging.info(
|
315
|
+
f"GitHub is calculating stats for {owner}/{repo}, waiting..."
|
316
|
+
)
|
317
|
+
time.sleep(2)
|
318
|
+
else:
|
319
|
+
activity_resp.raise_for_status()
|
320
|
+
|
321
|
+
except requests.RequestException as e:
|
322
|
+
logging.warning(f"Error fetching GitHub data for {owner}/{repo}: {e}")
|
323
|
+
|
324
|
+
@staticmethod
|
325
|
+
def _fetch_crates_io_versions(crate_name: str, result: dict[str, Any]) -> None:
|
326
|
+
"""Fetch version adoption data from crates.io."""
|
327
|
+
try:
|
328
|
+
versions_url = f"{CRATES_IO_API_URL}/{crate_name}/versions"
|
329
|
+
versions_resp = requests.get(versions_url, timeout=30)
|
330
|
+
versions_resp.raise_for_status()
|
331
|
+
versions_data = versions_resp.json().get("versions", [])
|
332
|
+
|
333
|
+
for version in versions_data[:10]: # Top 10 versions
|
334
|
+
result["version_adoption"][version["num"]] = {
|
335
|
+
"downloads": version["downloads"],
|
336
|
+
"created_at": version["created_at"],
|
337
|
+
}
|
338
|
+
except requests.RequestException as e:
|
339
|
+
logging.warning(
|
340
|
+
f"Error fetching crates.io version data for {crate_name}: {e}"
|
341
|
+
)
|
342
|
+
|
409
343
|
|
410
344
|
class DependencyAnalyzer:
|
411
345
|
@staticmethod
|
412
|
-
def analyze_dependencies(crates:
|
413
|
-
"""Analyze dependencies
|
414
|
-
dependency_graph = {}
|
346
|
+
def analyze_dependencies(crates: list[EnrichedCrate]) -> dict[str, Any]:
|
347
|
+
"""Analyze dependencies within a given list of crates."""
|
415
348
|
crate_names = {crate.name for crate in crates}
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
if dep.get("crate_id") in crate_names
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
reverse_deps = {}
|
349
|
+
dependency_graph: dict[str, list[str]] = {
|
350
|
+
crate.name: [
|
351
|
+
dep_id
|
352
|
+
for dep in crate.dependencies
|
353
|
+
if (dep_id := dep.get("crate_id")) and dep_id in crate_names
|
354
|
+
]
|
355
|
+
for crate in crates
|
356
|
+
}
|
357
|
+
|
358
|
+
reverse_deps: dict[str, list[str]] = {}
|
426
359
|
for crate_name, deps in dependency_graph.items():
|
427
360
|
for dep in deps:
|
428
|
-
if dep not
|
429
|
-
reverse_deps
|
430
|
-
|
431
|
-
|
361
|
+
if dep: # Ensure dep is not None
|
362
|
+
reverse_deps.setdefault(dep, []).append(crate_name)
|
363
|
+
|
364
|
+
most_depended = sorted(
|
365
|
+
reverse_deps.items(), key=lambda item: len(item[1]), reverse=True
|
366
|
+
)[:10]
|
367
|
+
|
432
368
|
return {
|
433
369
|
"dependency_graph": dependency_graph,
|
434
370
|
"reverse_dependencies": reverse_deps,
|
435
|
-
"most_depended":
|
371
|
+
"most_depended": most_depended,
|
436
372
|
}
|