rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rust_crate_pipeline/__init__.py +18 -27
- rust_crate_pipeline/__main__.py +1 -0
- rust_crate_pipeline/ai_processing.py +718 -596
- rust_crate_pipeline/analysis.py +330 -363
- rust_crate_pipeline/azure_ai_processing.py +462 -0
- rust_crate_pipeline/config.py +46 -28
- rust_crate_pipeline/core/__init__.py +19 -0
- rust_crate_pipeline/core/canon_registry.py +133 -0
- rust_crate_pipeline/core/irl_engine.py +256 -0
- rust_crate_pipeline/core/sacred_chain.py +117 -0
- rust_crate_pipeline/crate_analysis.py +54 -0
- rust_crate_pipeline/crate_list.txt +424 -0
- rust_crate_pipeline/github_token_checker.py +108 -112
- rust_crate_pipeline/main.py +329 -109
- rust_crate_pipeline/network.py +317 -308
- rust_crate_pipeline/pipeline.py +300 -375
- rust_crate_pipeline/production_config.py +24 -27
- rust_crate_pipeline/progress_monitor.py +334 -0
- rust_crate_pipeline/scraping/__init__.py +13 -0
- rust_crate_pipeline/scraping/unified_scraper.py +259 -0
- rust_crate_pipeline/unified_llm_processor.py +637 -0
- rust_crate_pipeline/unified_pipeline.py +548 -0
- rust_crate_pipeline/utils/file_utils.py +32 -5
- rust_crate_pipeline/utils/logging_utils.py +21 -16
- rust_crate_pipeline/version.py +76 -47
- rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
- rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
- rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
- rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
- {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
rust_crate_pipeline/analysis.py
CHANGED
@@ -1,447 +1,414 @@
|
|
1
1
|
# analysis.py
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
import io
|
5
|
+
import re
|
6
|
+
import tarfile
|
7
|
+
import requests
|
8
|
+
import logging
|
9
|
+
import tempfile
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
2
11
|
import os
|
3
12
|
import sys
|
4
|
-
import re
|
5
|
-
import io
|
6
13
|
import time
|
7
|
-
import tarfile
|
8
14
|
import subprocess
|
9
|
-
import
|
10
|
-
|
11
|
-
|
12
|
-
# Import utilities with fallback
|
13
|
-
try:
|
14
|
-
# Add the parent directory to the path to import utils
|
15
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
16
|
-
from utils.rust_code_analyzer import RustCodeAnalyzer
|
17
|
-
except ImportError:
|
18
|
-
# Fallback implementation for when utils are not available
|
19
|
-
class RustCodeAnalyzer:
|
20
|
-
def __init__(self, code_content):
|
21
|
-
self.code_content = code_content
|
22
|
-
|
23
|
-
def analyze(self):
|
24
|
-
return {
|
25
|
-
"functions": [],
|
26
|
-
"structs": [],
|
27
|
-
"enums": [],
|
28
|
-
"traits": [],
|
29
|
-
"complexity": 0,
|
30
|
-
"lines_of_code": len(self.code_content.split('\n'))
|
31
|
-
}
|
32
|
-
from typing import Dict, List
|
15
|
+
from dataclasses import dataclass
|
16
|
+
|
33
17
|
from .config import EnrichedCrate
|
34
18
|
|
35
|
-
#
|
36
|
-
|
37
|
-
|
19
|
+
# Create a fallback RustCodeAnalyzer that doesn't depend on external utils
|
20
|
+
class RustCodeAnalyzer:
|
21
|
+
"""Fallback Rust code analyzer for when the full analyzer is not available."""
|
22
|
+
|
23
|
+
def __init__(self, code_content: str) -> None:
|
24
|
+
self.code_content = code_content
|
38
25
|
|
26
|
+
def analyze(self) -> dict[str, Any]:
|
27
|
+
"""Basic analysis of Rust code content."""
|
28
|
+
lines = self.code_content.split('\n')
|
29
|
+
return {
|
30
|
+
"functions": self._count_functions(),
|
31
|
+
"structs": self._count_structs(),
|
32
|
+
"enums": self._count_enums(),
|
33
|
+
"traits": self._count_traits(),
|
34
|
+
"complexity": self._calculate_complexity(),
|
35
|
+
"lines_of_code": len(lines),
|
36
|
+
}
|
39
37
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
"""Orchestrate source analysis from multiple sources"""
|
44
|
-
crate_name = crate.name
|
45
|
-
version = crate.version
|
46
|
-
repo_url = crate.repository
|
38
|
+
def _count_functions(self) -> int:
|
39
|
+
"""Count function definitions."""
|
40
|
+
return len(re.findall(r'fn\s+\w+\s*\(', self.code_content))
|
47
41
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
response = requests.get(url, stream=True)
|
42
|
+
def _count_structs(self) -> int:
|
43
|
+
"""Count struct definitions."""
|
44
|
+
return len(re.findall(r'struct\s+\w+', self.code_content))
|
52
45
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
except Exception as e:
|
57
|
-
print(f"Failed to download from crates.io: {str(e)}")
|
46
|
+
def _count_enums(self) -> int:
|
47
|
+
"""Count enum definitions."""
|
48
|
+
return len(re.findall(r'enum\s+\w+', self.code_content))
|
58
49
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
# Extract owner/repo from URL
|
63
|
-
match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
|
64
|
-
if match:
|
65
|
-
owner, repo_name = match.groups()
|
66
|
-
repo_name = repo_name.split(
|
67
|
-
'.')[0] # Remove .git extension
|
68
|
-
|
69
|
-
# Try to download tarball from GitHub
|
70
|
-
github_url = f"https://api.github.com/repos/{owner}/{repo_name}/tarball"
|
71
|
-
response = requests.get(github_url)
|
72
|
-
|
73
|
-
if response.ok:
|
74
|
-
return SourceAnalyzer.analyze_github_tarball(
|
75
|
-
response.content)
|
76
|
-
except Exception as e:
|
77
|
-
print(f"Failed to analyze from GitHub: {str(e)}")
|
50
|
+
def _count_traits(self) -> int:
|
51
|
+
"""Count trait definitions."""
|
52
|
+
return len(re.findall(r'trait\s+\w+', self.code_content))
|
78
53
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
soup = BeautifulSoup(response.text, 'html.parser')
|
88
|
-
|
89
|
-
# Look for repository links
|
90
|
-
repo_links = soup.select('a[href*="github.com"]')
|
91
|
-
if repo_links:
|
92
|
-
repo_url = repo_links[0]['href']
|
93
|
-
|
94
|
-
# We found a GitHub link, now analyze it
|
95
|
-
return SourceAnalyzer.analyze_crate_source_from_repo(
|
96
|
-
crate_name, version, repo_url)
|
97
|
-
except Exception as e:
|
98
|
-
print(f"Failed to analyze from lib.rs: {str(e)}")
|
54
|
+
def _calculate_complexity(self) -> int:
|
55
|
+
"""Calculate basic cyclomatic complexity."""
|
56
|
+
complexity = 0
|
57
|
+
complexity += len(re.findall(r'\bif\b', self.code_content))
|
58
|
+
complexity += len(re.findall(r'\bfor\b', self.code_content))
|
59
|
+
complexity += len(re.findall(r'\bwhile\b', self.code_content))
|
60
|
+
complexity += len(re.findall(r'\bmatch\b', self.code_content))
|
61
|
+
return complexity
|
99
62
|
|
100
|
-
|
63
|
+
@staticmethod
|
64
|
+
def create_empty_metrics() -> dict[str, Any]:
|
65
|
+
"""Create empty metrics structure."""
|
101
66
|
return {
|
102
|
-
"
|
103
|
-
"
|
67
|
+
"functions": 0,
|
68
|
+
"structs": 0,
|
69
|
+
"enums": 0,
|
70
|
+
"traits": 0,
|
71
|
+
"complexity": 0,
|
72
|
+
"lines_of_code": 0,
|
104
73
|
"file_count": 0,
|
105
|
-
|
106
|
-
} @ staticmethod
|
74
|
+
}
|
107
75
|
|
108
|
-
|
109
|
-
|
110
|
-
|
76
|
+
@staticmethod
|
77
|
+
def detect_project_structure(files: list[str]) -> dict[str, bool]:
|
78
|
+
"""Detect basic project structure."""
|
79
|
+
return {
|
80
|
+
"has_cargo_toml": any("Cargo.toml" in f for f in files),
|
81
|
+
"has_src": any("/src/" in f for f in files),
|
82
|
+
"has_tests": any("/tests/" in f for f in files),
|
83
|
+
"has_examples": any("/examples/" in f for f in files),
|
84
|
+
}
|
111
85
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
rust_files = [f for f in tar.getnames() if f.endswith('.rs')]
|
118
|
-
metrics["file_count"] = len(rust_files)
|
86
|
+
@staticmethod
|
87
|
+
def analyze_rust_content(content: str) -> dict[str, Any]:
|
88
|
+
"""Analyze Rust content."""
|
89
|
+
analyzer = RustCodeAnalyzer(content)
|
90
|
+
return analyzer.analyze()
|
119
91
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
metrics, content_analysis, structure)
|
92
|
+
@staticmethod
|
93
|
+
def aggregate_metrics(
|
94
|
+
metrics: dict[str, Any],
|
95
|
+
content_analysis: dict[str, Any],
|
96
|
+
structure: dict[str, bool],
|
97
|
+
) -> dict[str, Any]:
|
98
|
+
"""Aggregate metrics from multiple sources."""
|
99
|
+
for key, value in content_analysis.items():
|
100
|
+
if isinstance(value, (int, float)):
|
101
|
+
metrics[key] = metrics.get(key, 0) + value
|
102
|
+
elif isinstance(value, list):
|
103
|
+
if key not in metrics:
|
104
|
+
metrics[key] = []
|
105
|
+
metrics[key].extend(value)
|
106
|
+
|
107
|
+
# Add structure information
|
108
|
+
metrics.update(structure)
|
109
|
+
return metrics
|
139
110
|
|
140
|
-
except Exception as e:
|
141
|
-
print(f"Error analyzing file {filename}: {str(e)}")
|
142
111
|
|
143
|
-
|
144
|
-
|
112
|
+
# Constants for URLs and paths
|
113
|
+
CRATES_IO_API_URL = "https://crates.io/api/v1/crates"
|
114
|
+
GITHUB_API_URL = "https://api.github.com/repos"
|
115
|
+
LIB_RS_URL = "https://lib.rs/crates"
|
145
116
|
|
146
|
-
return metrics @ staticmethod
|
147
117
|
|
148
|
-
|
149
|
-
|
150
|
-
|
118
|
+
class SourceAnalyzer:
|
119
|
+
@staticmethod
|
120
|
+
def analyze_crate_source(crate: EnrichedCrate) -> dict[str, Any]:
|
121
|
+
"""Orchestrate source analysis from multiple sources."""
|
122
|
+
repo_url = crate.repository
|
151
123
|
|
124
|
+
# Method 1: Try to download from crates.io
|
152
125
|
try:
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
126
|
+
url = f"{CRATES_IO_API_URL}/{crate.name}/{crate.version}/download"
|
127
|
+
response = requests.get(url, stream=True, timeout=30)
|
128
|
+
response.raise_for_status()
|
129
|
+
logging.info(f"Successfully downloaded {crate.name} from crates.io")
|
130
|
+
return SourceAnalyzer.analyze_crate_tarball(response.content)
|
131
|
+
except requests.RequestException as e:
|
132
|
+
logging.warning(f"Failed to download from crates.io: {e}")
|
160
133
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
134
|
+
# Method 2: Try GitHub if we have a GitHub URL
|
135
|
+
if repo_url and "github.com" in repo_url:
|
136
|
+
match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
|
137
|
+
if match:
|
138
|
+
owner, repo_name = match.groups()
|
139
|
+
repo_name = repo_name.replace(".git", "")
|
140
|
+
try:
|
141
|
+
github_url = f"{GITHUB_API_URL}/{owner}/{repo_name}/tarball"
|
142
|
+
response = requests.get(github_url, timeout=30)
|
143
|
+
response.raise_for_status()
|
144
|
+
logging.info(f"Successfully downloaded {crate.name} from GitHub")
|
145
|
+
return SourceAnalyzer.analyze_github_tarball(response.content)
|
146
|
+
except requests.RequestException as e:
|
147
|
+
logging.warning(f"Failed to analyze from GitHub: {e}")
|
148
|
+
|
149
|
+
# Method 3: Fallback to cloning from the repository directly
|
150
|
+
if repo_url:
|
151
|
+
try:
|
152
|
+
logging.info(f"Attempting to clone repository for {crate.name}")
|
153
|
+
return SourceAnalyzer.analyze_crate_source_from_repo(repo_url)
|
154
|
+
except Exception as e:
|
155
|
+
logging.error(f"Failed to clone and analyze repository {repo_url}: {e}")
|
181
156
|
|
182
|
-
|
183
|
-
|
157
|
+
return {
|
158
|
+
"error": "Could not analyze crate from any available source.",
|
159
|
+
"attempted_sources": ["crates.io", "github", "git_clone"],
|
160
|
+
"file_count": 0,
|
161
|
+
"loc": 0,
|
162
|
+
}
|
184
163
|
|
185
|
-
|
186
|
-
|
164
|
+
@staticmethod
|
165
|
+
def _analyze_tarball_content(content: bytes) -> dict[str, Any]:
|
166
|
+
"""Shared logic to analyze tarball content from any source."""
|
167
|
+
metrics = RustCodeAnalyzer.create_empty_metrics()
|
168
|
+
try:
|
169
|
+
with io.BytesIO(content) as tar_content, tarfile.open(
|
170
|
+
fileobj=tar_content, mode="r:gz"
|
171
|
+
) as tar:
|
172
|
+
rust_files = [f for f in tar.getnames() if f.endswith(".rs")]
|
173
|
+
metrics["file_count"] = len(rust_files)
|
174
|
+
structure = RustCodeAnalyzer.detect_project_structure(tar.getnames())
|
175
|
+
|
176
|
+
for member in tar.getmembers():
|
177
|
+
if member.isfile() and member.name.endswith(".rs"):
|
178
|
+
file_content = tar.extractfile(member)
|
179
|
+
if file_content:
|
180
|
+
try:
|
181
|
+
content_str = file_content.read().decode("utf-8")
|
182
|
+
analysis = RustCodeAnalyzer.analyze_rust_content(
|
183
|
+
content_str
|
184
|
+
)
|
185
|
+
metrics = RustCodeAnalyzer.aggregate_metrics(
|
186
|
+
metrics, analysis, structure
|
187
|
+
)
|
188
|
+
except UnicodeDecodeError:
|
189
|
+
logging.warning(
|
190
|
+
f"Skipping non-UTF-8 file: {member.name}"
|
191
|
+
)
|
192
|
+
except tarfile.TarError as e:
|
193
|
+
metrics["error"] = f"Failed to read tarball: {e}"
|
194
|
+
logging.error(metrics["error"])
|
195
|
+
return metrics
|
196
|
+
|
197
|
+
@staticmethod
|
198
|
+
def analyze_crate_tarball(content: bytes) -> dict[str, Any]:
|
199
|
+
"""Analyze a .crate tarball from crates.io."""
|
200
|
+
return SourceAnalyzer._analyze_tarball_content(content)
|
187
201
|
|
188
|
-
|
202
|
+
@staticmethod
|
203
|
+
def analyze_github_tarball(content: bytes) -> dict[str, Any]:
|
204
|
+
"""Analyze a GitHub tarball."""
|
205
|
+
return SourceAnalyzer._analyze_tarball_content(content)
|
189
206
|
|
190
|
-
|
191
|
-
|
207
|
+
@staticmethod
|
208
|
+
def analyze_local_directory(directory: str) -> dict[str, Any]:
|
209
|
+
"""Analyze source code from a local directory."""
|
192
210
|
metrics = RustCodeAnalyzer.create_empty_metrics()
|
193
|
-
|
194
211
|
try:
|
195
|
-
|
196
|
-
|
197
|
-
for root,
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
212
|
+
rust_files: list[str] = []
|
213
|
+
all_paths: list[str] = []
|
214
|
+
for root, dirs, files in os.walk(directory):
|
215
|
+
# Exclude target and .git directories
|
216
|
+
dirs[:] = [d for d in dirs if d not in ["target", ".git"]]
|
217
|
+
for file in files:
|
218
|
+
full_path = os.path.join(root, file)
|
219
|
+
all_paths.append(full_path)
|
220
|
+
if file.endswith(".rs"):
|
221
|
+
rust_files.append(full_path)
|
202
222
|
|
203
223
|
metrics["file_count"] = len(rust_files)
|
224
|
+
structure = RustCodeAnalyzer.detect_project_structure(all_paths)
|
204
225
|
|
205
|
-
# Check if the crate has tests/examples/benchmarks using atomic
|
206
|
-
# utility
|
207
|
-
project_dirs = [
|
208
|
-
d for d in os.listdir(directory) if os.path.isdir(
|
209
|
-
os.path.join(
|
210
|
-
directory, d))]
|
211
|
-
structure = RustCodeAnalyzer.detect_project_structure(
|
212
|
-
project_dirs + ["tests", "examples", "benches"])
|
213
|
-
|
214
|
-
# Override with actual directory checks
|
215
|
-
structure["has_tests"] = any(
|
216
|
-
os.path.exists(
|
217
|
-
os.path.join(
|
218
|
-
directory,
|
219
|
-
d)) for d in [
|
220
|
-
"tests",
|
221
|
-
"test"])
|
222
|
-
structure["has_examples"] = os.path.exists(
|
223
|
-
os.path.join(directory, "examples"))
|
224
|
-
structure["has_benchmarks"] = os.path.exists(
|
225
|
-
os.path.join(directory, "benches"))
|
226
|
-
|
227
|
-
# Analyze each Rust file using atomic utility
|
228
226
|
for file_path in rust_files:
|
229
227
|
try:
|
230
|
-
with open(file_path,
|
228
|
+
with open(file_path, encoding="utf-8", errors="ignore") as f:
|
231
229
|
content = f.read()
|
232
|
-
|
233
|
-
# Use atomic content analysis
|
234
|
-
content_analysis = RustCodeAnalyzer.analyze_rust_content(
|
235
|
-
content)
|
230
|
+
analysis = RustCodeAnalyzer.analyze_rust_content(content)
|
236
231
|
metrics = RustCodeAnalyzer.aggregate_metrics(
|
237
|
-
metrics,
|
238
|
-
|
232
|
+
metrics, analysis, structure
|
233
|
+
)
|
239
234
|
except Exception as e:
|
240
|
-
|
241
|
-
|
235
|
+
logging.warning(f"Error analyzing file {file_path}: {e}")
|
242
236
|
except Exception as e:
|
243
|
-
metrics["error"] =
|
244
|
-
|
237
|
+
metrics["error"] = f"Failed to analyze local directory {directory}: {e}"
|
238
|
+
logging.error(metrics["error"])
|
245
239
|
return metrics
|
246
240
|
|
247
241
|
@staticmethod
|
248
|
-
def analyze_crate_source_from_repo(
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
finally:
|
277
|
-
# Clean up (optional)
|
278
|
-
# subprocess.run(["rm", "-r", temp_dir], capture_output=True)
|
279
|
-
pass
|
242
|
+
def analyze_crate_source_from_repo(repo_url: str) -> dict[str, Any]:
|
243
|
+
"""Clone and analyze a crate's source code from a repository."""
|
244
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
245
|
+
try:
|
246
|
+
logging.info(f"Cloning {repo_url} into {temp_dir}")
|
247
|
+
subprocess.run(
|
248
|
+
["git", "clone", "--depth=1", repo_url, temp_dir],
|
249
|
+
capture_output=True,
|
250
|
+
text=True,
|
251
|
+
check=True,
|
252
|
+
timeout=120,
|
253
|
+
)
|
254
|
+
return SourceAnalyzer.analyze_local_directory(temp_dir)
|
255
|
+
except (
|
256
|
+
subprocess.CalledProcessError,
|
257
|
+
subprocess.TimeoutExpired,
|
258
|
+
) as e:
|
259
|
+
error_output = ""
|
260
|
+
if hasattr(e, "stderr") and e.stderr:
|
261
|
+
error_output = e.stderr.decode("utf-8", "ignore")
|
262
|
+
else:
|
263
|
+
error_output = str(e)
|
264
|
+
logging.error(f"Failed to clone repository {repo_url}: {error_output}")
|
265
|
+
return {
|
266
|
+
"error": f"Failed to clone repository: {error_output}",
|
267
|
+
"file_count": 0,
|
268
|
+
"loc": 0,
|
269
|
+
}
|
280
270
|
|
281
271
|
|
282
272
|
class SecurityAnalyzer:
|
283
273
|
@staticmethod
|
284
|
-
def check_security_metrics(crate: EnrichedCrate) ->
|
285
|
-
"""Check security metrics for a crate"""
|
286
|
-
security_data = {
|
274
|
+
def check_security_metrics(crate: EnrichedCrate) -> dict[str, Any]:
|
275
|
+
"""Check security metrics for a crate (placeholder)."""
|
276
|
+
security_data: dict[str, Any] = {
|
287
277
|
"advisories": [],
|
288
278
|
"vulnerability_count": 0,
|
289
279
|
"cargo_audit": None,
|
290
|
-
"
|
291
|
-
"test_coverage": None
|
280
|
+
"unsafe_blocks": 0,
|
292
281
|
}
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
# Check RustSec Advisory Database
|
298
|
-
try:
|
299
|
-
# This would require the RustSec advisory database
|
300
|
-
# For now, just return placeholder data
|
301
|
-
advisories_url = f"https://rustsec.org/advisories/{crate_name}.json"
|
302
|
-
response = requests.get(advisories_url)
|
303
|
-
if response.ok:
|
304
|
-
advisories = response.json()
|
305
|
-
security_data["advisories"] = advisories
|
306
|
-
security_data["vulnerability_count"] = len(advisories)
|
307
|
-
except Exception:
|
308
|
-
pass
|
309
|
-
|
310
|
-
# Check for common security patterns in code
|
311
|
-
try:
|
312
|
-
# This would analyze the source code for unsafe blocks, etc.
|
313
|
-
# Placeholder for now
|
314
|
-
security_data["unsafe_blocks"] = 0
|
315
|
-
security_data["security_patterns"] = []
|
316
|
-
except Exception:
|
317
|
-
pass
|
318
|
-
|
282
|
+
# In a real implementation, this would run tools like `cargo-audit`
|
283
|
+
# and parse the output. For now, it remains a placeholder.
|
284
|
+
logging.info(f"Running placeholder security check for {crate.name}")
|
319
285
|
return security_data
|
320
286
|
|
321
287
|
|
322
288
|
class UserBehaviorAnalyzer:
|
323
289
|
@staticmethod
|
324
|
-
def
|
325
|
-
"""
|
326
|
-
|
290
|
+
def _get_github_headers() -> dict[str, str]:
|
291
|
+
"""Get headers for GitHub API requests, including auth if available."""
|
292
|
+
headers = {"Accept": "application/vnd.github.v3+json"}
|
293
|
+
if token := os.environ.get("GITHUB_TOKEN"):
|
294
|
+
headers["Authorization"] = f"token {token}"
|
295
|
+
return headers
|
296
|
+
|
297
|
+
@staticmethod
|
298
|
+
def fetch_user_behavior_data(crate: EnrichedCrate) -> dict[str, Any]:
|
299
|
+
"""Fetch user behavior data from GitHub and crates.io."""
|
300
|
+
result: dict[str, Any] = {
|
327
301
|
"issues": [],
|
328
302
|
"pull_requests": [],
|
329
303
|
"version_adoption": {},
|
330
|
-
"community_metrics": {}
|
304
|
+
"community_metrics": {},
|
331
305
|
}
|
332
|
-
|
333
|
-
crate_name = crate.name
|
334
306
|
repo_url = crate.repository
|
335
|
-
|
336
|
-
# Extract owner/repo from URL
|
337
307
|
if not repo_url or "github.com" not in repo_url:
|
338
308
|
return result
|
339
309
|
|
340
|
-
|
341
|
-
if
|
310
|
+
match = re.search(r"github\.com/([^/]+)/([^/]+)", repo_url)
|
311
|
+
if not match:
|
342
312
|
return result
|
343
|
-
owner, repo =
|
313
|
+
owner, repo = match.groups()
|
314
|
+
repo = repo.replace(".git", "")
|
344
315
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
headers["Authorization"] = f"token {
|
349
|
-
os.environ.get('GITHUB_TOKEN')}"
|
316
|
+
headers = UserBehaviorAnalyzer._get_github_headers()
|
317
|
+
UserBehaviorAnalyzer._fetch_github_activity(owner, repo, headers, result)
|
318
|
+
UserBehaviorAnalyzer._fetch_crates_io_versions(crate.name, result)
|
350
319
|
|
351
|
-
|
352
|
-
try:
|
353
|
-
# Get issues (last 30)
|
354
|
-
issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=30"
|
355
|
-
issues_resp = requests.get(issues_url, headers=headers)
|
356
|
-
if issues_resp.ok:
|
357
|
-
issues_data = issues_resp.json()
|
358
|
-
|
359
|
-
# Process issue data
|
360
|
-
for issue in issues_data:
|
361
|
-
if "pull_request" in issue:
|
362
|
-
# This is a PR, not an issue
|
363
|
-
result["pull_requests"].append({
|
364
|
-
"number": issue["number"],
|
365
|
-
"title": issue["title"],
|
366
|
-
"state": issue["state"],
|
367
|
-
"created_at": issue["created_at"],
|
368
|
-
"closed_at": issue["closed_at"],
|
369
|
-
"url": issue["html_url"]
|
370
|
-
})
|
371
|
-
else:
|
372
|
-
# Regular issue
|
373
|
-
result["issues"].append({
|
374
|
-
"number": issue["number"],
|
375
|
-
"title": issue["title"],
|
376
|
-
"state": issue["state"],
|
377
|
-
"created_at": issue["created_at"],
|
378
|
-
"closed_at": issue["closed_at"],
|
379
|
-
"url": issue["html_url"]
|
380
|
-
})
|
381
|
-
|
382
|
-
# Fetch commit activity for the past year
|
383
|
-
commits_url = f"https://api.github.com/repos/{owner}/{repo}/stats/commit_activity"
|
384
|
-
commits_resp = requests.get(commits_url, headers=headers)
|
385
|
-
if commits_resp.ok:
|
386
|
-
result["community_metrics"]["commit_activity"] = commits_resp.json()
|
387
|
-
|
388
|
-
# Rate limiting - be nice to GitHub API
|
389
|
-
time.sleep(1)
|
390
|
-
except Exception as e:
|
391
|
-
print(f"Error fetching GitHub data: {str(e)}")
|
320
|
+
return result
|
392
321
|
|
393
|
-
|
322
|
+
@staticmethod
|
323
|
+
def _fetch_github_activity(
|
324
|
+
owner: str, repo: str, headers: dict[str, str], result: dict[str, Any]
|
325
|
+
) -> None:
|
326
|
+
"""Fetch issues, PRs, and commit activity from GitHub."""
|
394
327
|
try:
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
"
|
409
|
-
"
|
328
|
+
issues_url = f"{GITHUB_API_URL}/{owner}/{repo}/issues?state=all&per_page=30"
|
329
|
+
issues_resp = requests.get(issues_url, headers=headers, timeout=30)
|
330
|
+
issues_resp.raise_for_status()
|
331
|
+
|
332
|
+
for item in issues_resp.json():
|
333
|
+
is_pr = "pull_request" in item
|
334
|
+
data_list = result["pull_requests"] if is_pr else result["issues"]
|
335
|
+
data_list.append(
|
336
|
+
{
|
337
|
+
"number": item["number"],
|
338
|
+
"title": item["title"],
|
339
|
+
"state": item["state"],
|
340
|
+
"created_at": item["created_at"],
|
341
|
+
"closed_at": item["closed_at"],
|
342
|
+
"url": item["html_url"],
|
410
343
|
}
|
411
|
-
|
412
|
-
|
344
|
+
)
|
345
|
+
|
346
|
+
# Fetch commit activity (retries on 202)
|
347
|
+
activity_url = f"{GITHUB_API_URL}/{owner}/{repo}/stats/commit_activity"
|
348
|
+
for _ in range(3): # Retry up to 3 times
|
349
|
+
activity_resp = requests.get(activity_url, headers=headers, timeout=60)
|
350
|
+
if activity_resp.status_code == 200:
|
351
|
+
result["community_metrics"][
|
352
|
+
"commit_activity"
|
353
|
+
] = activity_resp.json()
|
354
|
+
break
|
355
|
+
elif activity_resp.status_code == 202:
|
356
|
+
logging.info(
|
357
|
+
f"GitHub is calculating stats for {owner}/{repo}, waiting..."
|
358
|
+
)
|
359
|
+
time.sleep(2)
|
360
|
+
else:
|
361
|
+
activity_resp.raise_for_status()
|
362
|
+
|
363
|
+
except requests.RequestException as e:
|
364
|
+
logging.warning(f"Error fetching GitHub data for {owner}/{repo}: {e}")
|
413
365
|
|
414
|
-
|
366
|
+
@staticmethod
|
367
|
+
def _fetch_crates_io_versions(crate_name: str, result: dict[str, Any]) -> None:
|
368
|
+
"""Fetch version adoption data from crates.io."""
|
369
|
+
try:
|
370
|
+
versions_url = f"{CRATES_IO_API_URL}/{crate_name}/versions"
|
371
|
+
versions_resp = requests.get(versions_url, timeout=30)
|
372
|
+
versions_resp.raise_for_status()
|
373
|
+
versions_data = versions_resp.json().get("versions", [])
|
374
|
+
|
375
|
+
for version in versions_data[:10]: # Top 10 versions
|
376
|
+
result["version_adoption"][version["num"]] = {
|
377
|
+
"downloads": version["downloads"],
|
378
|
+
"created_at": version["created_at"],
|
379
|
+
}
|
380
|
+
except requests.RequestException as e:
|
381
|
+
logging.warning(
|
382
|
+
f"Error fetching crates.io version data for {crate_name}: {e}"
|
383
|
+
)
|
415
384
|
|
416
385
|
|
417
386
|
class DependencyAnalyzer:
|
418
387
|
@staticmethod
|
419
|
-
def analyze_dependencies(crates:
|
420
|
-
"""Analyze dependencies
|
421
|
-
dependency_graph = {}
|
388
|
+
def analyze_dependencies(crates: list[EnrichedCrate]) -> dict[str, Any]:
|
389
|
+
"""Analyze dependencies within a given list of crates."""
|
422
390
|
crate_names = {crate.name for crate in crates}
|
391
|
+
dependency_graph: dict[str, list[str]] = {
|
392
|
+
crate.name: [
|
393
|
+
dep_id
|
394
|
+
for dep in crate.dependencies
|
395
|
+
if (dep_id := dep.get("crate_id")) and dep_id in crate_names
|
396
|
+
]
|
397
|
+
for crate in crates
|
398
|
+
}
|
423
399
|
|
424
|
-
|
425
|
-
deps = []
|
426
|
-
for dep in crate.dependencies:
|
427
|
-
if dep.get("crate_id") in crate_names:
|
428
|
-
deps.append(dep.get("crate_id"))
|
429
|
-
dependency_graph[crate.name] = deps
|
430
|
-
|
431
|
-
# Find most depended-upon crates
|
432
|
-
reverse_deps = {}
|
400
|
+
reverse_deps: dict[str, list[str]] = {}
|
433
401
|
for crate_name, deps in dependency_graph.items():
|
434
402
|
for dep in deps:
|
435
|
-
if dep not
|
436
|
-
reverse_deps
|
437
|
-
|
403
|
+
if dep: # Ensure dep is not None
|
404
|
+
reverse_deps.setdefault(dep, []).append(crate_name)
|
405
|
+
|
406
|
+
most_depended = sorted(
|
407
|
+
reverse_deps.items(), key=lambda item: len(item[1]), reverse=True
|
408
|
+
)[:10]
|
438
409
|
|
439
410
|
return {
|
440
411
|
"dependency_graph": dependency_graph,
|
441
412
|
"reverse_dependencies": reverse_deps,
|
442
|
-
"most_depended":
|
443
|
-
|
444
|
-
key=lambda x: len(
|
445
|
-
x[1]),
|
446
|
-
reverse=True)[
|
447
|
-
:10]}
|
413
|
+
"most_depended": most_depended,
|
414
|
+
}
|