packagepurge 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +125 -0
- package/core/Cargo.lock +1093 -0
- package/core/Cargo.toml +22 -0
- package/core/src/arc_lfu.rs +91 -0
- package/core/src/cache.rs +205 -0
- package/core/src/lockfiles.rs +112 -0
- package/core/src/main.rs +125 -0
- package/core/src/ml.rs +188 -0
- package/core/src/optimization.rs +314 -0
- package/core/src/safety.rs +103 -0
- package/core/src/scanner.rs +136 -0
- package/core/src/symlink.rs +223 -0
- package/core/src/types.rs +87 -0
- package/core/src/usage_tracker.rs +107 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +249 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/bindings.d.ts +33 -0
- package/dist/core/bindings.d.ts.map +1 -0
- package/dist/core/bindings.js +172 -0
- package/dist/core/bindings.js.map +1 -0
- package/dist/managers/base-manager.d.ts +33 -0
- package/dist/managers/base-manager.d.ts.map +1 -0
- package/dist/managers/base-manager.js +122 -0
- package/dist/managers/base-manager.js.map +1 -0
- package/dist/managers/index.d.ts +12 -0
- package/dist/managers/index.d.ts.map +1 -0
- package/dist/managers/index.js +37 -0
- package/dist/managers/index.js.map +1 -0
- package/dist/managers/npm-manager.d.ts +14 -0
- package/dist/managers/npm-manager.d.ts.map +1 -0
- package/dist/managers/npm-manager.js +128 -0
- package/dist/managers/npm-manager.js.map +1 -0
- package/dist/managers/pnpm-manager.d.ts +14 -0
- package/dist/managers/pnpm-manager.d.ts.map +1 -0
- package/dist/managers/pnpm-manager.js +137 -0
- package/dist/managers/pnpm-manager.js.map +1 -0
- package/dist/managers/yarn-manager.d.ts +14 -0
- package/dist/managers/yarn-manager.d.ts.map +1 -0
- package/dist/managers/yarn-manager.js +141 -0
- package/dist/managers/yarn-manager.js.map +1 -0
- package/dist/types/index.d.ts +85 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +13 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/logger.d.ts +18 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +50 -0
- package/dist/utils/logger.js.map +1 -0
- package/package.json +64 -0
- package/src/cli/index.ts +212 -0
- package/src/core/bindings.ts +157 -0
- package/src/managers/base-manager.ts +117 -0
- package/src/managers/index.ts +32 -0
- package/src/managers/npm-manager.ts +96 -0
- package/src/managers/pnpm-manager.ts +107 -0
- package/src/managers/yarn-manager.ts +112 -0
- package/src/types/index.ts +97 -0
- package/src/utils/logger.ts +50 -0
- package/tsconfig.json +22 -0
package/core/src/ml.rs
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
use chrono::Utc;
|
|
2
|
+
use crate::types::{PackageUsageMetrics, ProjectMetadata, DeveloperBehavior};
|
|
3
|
+
|
|
4
|
+
#[allow(dead_code)]
|
|
5
|
+
pub trait MlRecommender {
|
|
6
|
+
fn is_safe_to_evict(&self, package_id: &str) -> Option<bool>;
|
|
7
|
+
fn should_keep(&self, package_id: &str, metrics: &PackageUsageMetrics, project: &ProjectMetadata, behavior: &DeveloperBehavior) -> bool;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
#[allow(dead_code)]
|
|
11
|
+
pub struct NoopRecommender;
|
|
12
|
+
impl MlRecommender for NoopRecommender {
|
|
13
|
+
fn is_safe_to_evict(&self, _package_id: &str) -> Option<bool> { None }
|
|
14
|
+
fn should_keep(&self, _package_id: &str, _metrics: &PackageUsageMetrics, _project: &ProjectMetadata, _behavior: &DeveloperBehavior) -> bool {
|
|
15
|
+
true // Conservative: keep by default
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/// Predictive Optimizer using rule-based ML (can be extended with actual ML models)
|
|
20
|
+
#[allow(dead_code)]
|
|
21
|
+
pub struct PredictiveOptimizer {
|
|
22
|
+
/// Keep threshold in days (packages used within this period are likely needed)
|
|
23
|
+
prediction_window_days: i64,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
impl PredictiveOptimizer {
|
|
27
|
+
pub fn new(prediction_window_days: i64) -> Self {
|
|
28
|
+
Self { prediction_window_days }
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/// Extract features from package metadata for ML prediction
|
|
32
|
+
fn extract_features(
|
|
33
|
+
&self,
|
|
34
|
+
metrics: &PackageUsageMetrics,
|
|
35
|
+
project: &ProjectMetadata,
|
|
36
|
+
behavior: &DeveloperBehavior,
|
|
37
|
+
) -> Vec<f64> {
|
|
38
|
+
let now = Utc::now();
|
|
39
|
+
|
|
40
|
+
// Feature 1: Days since last access
|
|
41
|
+
let days_since_access = (now - metrics.last_access_time).num_days() as f64;
|
|
42
|
+
|
|
43
|
+
// Feature 2: Days since last script execution
|
|
44
|
+
let days_since_script = metrics.last_script_execution
|
|
45
|
+
.map(|t| (now - t).num_days() as f64)
|
|
46
|
+
.unwrap_or(365.0); // High value if never executed
|
|
47
|
+
|
|
48
|
+
// Feature 3: Days since last successful build
|
|
49
|
+
let days_since_build = metrics.last_successful_build
|
|
50
|
+
.map(|t| (now - t).num_days() as f64)
|
|
51
|
+
.unwrap_or(365.0);
|
|
52
|
+
|
|
53
|
+
// Feature 4: Access frequency (normalized)
|
|
54
|
+
let access_frequency = metrics.access_count as f64 / 100.0; // Normalize
|
|
55
|
+
|
|
56
|
+
// Feature 5: Script execution frequency
|
|
57
|
+
let script_frequency = metrics.script_execution_count as f64 / 10.0;
|
|
58
|
+
|
|
59
|
+
// Feature 6: Project activity (days since last commit)
|
|
60
|
+
let days_since_commit = project.last_commit_date
|
|
61
|
+
.map(|t| (now - t).num_days() as f64)
|
|
62
|
+
.unwrap_or(365.0);
|
|
63
|
+
|
|
64
|
+
// Feature 7: Project type score (higher for active project types)
|
|
65
|
+
let project_type_score = match project.project_type.as_str() {
|
|
66
|
+
"react" | "typescript" | "nextjs" => 1.0,
|
|
67
|
+
"node" => 0.8,
|
|
68
|
+
_ => 0.5,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Feature 8: Dependency count (more deps = more likely to need packages)
|
|
72
|
+
let dep_score = (project.dependency_count as f64 / 100.0).min(1.0);
|
|
73
|
+
|
|
74
|
+
// Feature 9: Days since last build (from behavior)
|
|
75
|
+
let behavior_days_since_build = behavior.days_since_last_build
|
|
76
|
+
.map(|d| d as f64)
|
|
77
|
+
.unwrap_or(365.0);
|
|
78
|
+
|
|
79
|
+
// Feature 10: File access frequency
|
|
80
|
+
let file_access_score = (behavior.file_access_frequency as f64 / 1000.0).min(1.0);
|
|
81
|
+
|
|
82
|
+
vec![
|
|
83
|
+
days_since_access,
|
|
84
|
+
days_since_script,
|
|
85
|
+
days_since_build,
|
|
86
|
+
access_frequency,
|
|
87
|
+
script_frequency,
|
|
88
|
+
days_since_commit,
|
|
89
|
+
project_type_score,
|
|
90
|
+
dep_score,
|
|
91
|
+
behavior_days_since_build,
|
|
92
|
+
file_access_score,
|
|
93
|
+
]
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/// Predict whether package should be kept (binary classification)
|
|
97
|
+
/// Returns true if package is likely needed in the next prediction_window_days
|
|
98
|
+
pub fn predict_keep(
|
|
99
|
+
&self,
|
|
100
|
+
metrics: &PackageUsageMetrics,
|
|
101
|
+
project: &ProjectMetadata,
|
|
102
|
+
behavior: &DeveloperBehavior,
|
|
103
|
+
) -> bool {
|
|
104
|
+
let features = self.extract_features(metrics, project, behavior);
|
|
105
|
+
|
|
106
|
+
// Simple rule-based classifier (can be replaced with actual ML model)
|
|
107
|
+
// This implements a heuristic that mimics what a trained model would do
|
|
108
|
+
|
|
109
|
+
// Rule 1: Recently accessed packages are likely needed
|
|
110
|
+
let days_since_access = features[0];
|
|
111
|
+
if days_since_access < 7.0 {
|
|
112
|
+
return true; // Keep if accessed in last week
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Rule 2: Recently used in scripts
|
|
116
|
+
let days_since_script = features[1];
|
|
117
|
+
if days_since_script < 14.0 {
|
|
118
|
+
return true; // Keep if used in script in last 2 weeks
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Rule 3: Recently built successfully
|
|
122
|
+
let days_since_build = features[2];
|
|
123
|
+
if days_since_build < 30.0 {
|
|
124
|
+
return true; // Keep if built in last month
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Rule 4: High access frequency
|
|
128
|
+
let access_frequency = features[3];
|
|
129
|
+
if access_frequency > 0.5 {
|
|
130
|
+
return true; // Keep if frequently accessed
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Rule 5: Active project with recent commits
|
|
134
|
+
let days_since_commit = features[5];
|
|
135
|
+
let project_type_score = features[6];
|
|
136
|
+
if days_since_commit < 30.0 && project_type_score > 0.7 {
|
|
137
|
+
return true; // Keep if project is active
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Rule 6: Weighted score combining all features
|
|
141
|
+
// This is a simplified logistic regression-like decision
|
|
142
|
+
let score = self.compute_keep_score(&features);
|
|
143
|
+
score > 0.5
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/// Compute a keep score (0.0 to 1.0) based on features
|
|
147
|
+
/// This mimics a logistic regression output
|
|
148
|
+
fn compute_keep_score(&self, features: &[f64]) -> f64 {
|
|
149
|
+
// Weighted combination of features (weights learned from training data in real ML)
|
|
150
|
+
// For now, use heuristic weights
|
|
151
|
+
let weights = vec![
|
|
152
|
+
-0.1, // days_since_access (negative: more days = lower score)
|
|
153
|
+
-0.05, // days_since_script
|
|
154
|
+
-0.03, // days_since_build
|
|
155
|
+
0.3, // access_frequency (positive: more access = higher score)
|
|
156
|
+
0.2, // script_frequency
|
|
157
|
+
-0.02, // days_since_commit
|
|
158
|
+
0.15, // project_type_score
|
|
159
|
+
0.1, // dep_score
|
|
160
|
+
-0.03, // behavior_days_since_build
|
|
161
|
+
0.1, // file_access_score
|
|
162
|
+
];
|
|
163
|
+
|
|
164
|
+
let mut score = 0.5; // Base score
|
|
165
|
+
for (feature, weight) in features.iter().zip(weights.iter()) {
|
|
166
|
+
score += feature * weight;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Apply sigmoid-like function to bound between 0 and 1
|
|
170
|
+
1.0 / (1.0 + (-score).exp())
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
impl MlRecommender for PredictiveOptimizer {
|
|
175
|
+
fn is_safe_to_evict(&self, _package_id: &str) -> Option<bool> {
|
|
176
|
+
None // Use should_keep instead
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
fn should_keep(
|
|
180
|
+
&self,
|
|
181
|
+
_package_id: &str,
|
|
182
|
+
metrics: &PackageUsageMetrics,
|
|
183
|
+
project: &ProjectMetadata,
|
|
184
|
+
behavior: &DeveloperBehavior,
|
|
185
|
+
) -> bool {
|
|
186
|
+
self.predict_keep(metrics, project, behavior)
|
|
187
|
+
}
|
|
188
|
+
}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use chrono::{Duration, Utc};
|
|
3
|
+
use std::collections::{HashMap, HashSet};
|
|
4
|
+
use std::path::PathBuf;
|
|
5
|
+
|
|
6
|
+
use crate::types::{DryRunReport, PlanItem, ScanOutput, PackageUsageMetrics, ProjectMetadata, DeveloperBehavior};
|
|
7
|
+
use crate::symlink::SemanticDeduplication;
|
|
8
|
+
use crate::cache::PackageLruCache;
|
|
9
|
+
use crate::ml::{MlRecommender, PredictiveOptimizer};
|
|
10
|
+
|
|
11
|
+
#[allow(dead_code)]
|
|
12
|
+
pub enum EvictionPolicy {
|
|
13
|
+
MlThenArcThenLru,
|
|
14
|
+
LruOnly,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
#[allow(dead_code)]
|
|
18
|
+
pub struct RulesConfig {
|
|
19
|
+
pub preserve_days: i64,
|
|
20
|
+
#[allow(dead_code)]
|
|
21
|
+
pub enable_symlinking: bool,
|
|
22
|
+
#[allow(dead_code)]
|
|
23
|
+
pub enable_ml_prediction: bool,
|
|
24
|
+
#[allow(dead_code)]
|
|
25
|
+
pub lru_max_packages: usize,
|
|
26
|
+
#[allow(dead_code)]
|
|
27
|
+
pub lru_max_size_bytes: u64,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
pub fn plan_basic_cleanup(scan: &ScanOutput, cfg: &RulesConfig) -> Result<DryRunReport> {
|
|
31
|
+
let cutoff = Utc::now() - Duration::days(cfg.preserve_days);
|
|
32
|
+
|
|
33
|
+
let mut used: HashSet<(String, String)> = HashSet::new();
|
|
34
|
+
for proj in &scan.projects {
|
|
35
|
+
for (n, v) in &proj.dependencies {
|
|
36
|
+
used.insert((n.clone(), v.clone()));
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
let mut seen_locations: HashMap<(String, String), Vec<PathBuf>> = HashMap::new();
|
|
41
|
+
|
|
42
|
+
let mut items: Vec<PlanItem> = Vec::new();
|
|
43
|
+
for pkg in &scan.packages {
|
|
44
|
+
let key = (pkg.name.clone(), pkg.version.clone());
|
|
45
|
+
seen_locations.entry(key.clone()).or_default().push(PathBuf::from(&pkg.path));
|
|
46
|
+
|
|
47
|
+
let is_orphan = !used.contains(&key);
|
|
48
|
+
let is_old = pkg.mtime < cutoff;
|
|
49
|
+
|
|
50
|
+
if is_orphan || is_old {
|
|
51
|
+
items.push(PlanItem {
|
|
52
|
+
target_path: pkg.path.clone(),
|
|
53
|
+
estimated_size_bytes: pkg.size_bytes,
|
|
54
|
+
reason: if is_orphan { "orphaned".into() } else { "old".into() },
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
for (_key, paths) in seen_locations.into_iter() {
|
|
60
|
+
if paths.len() > 1 {
|
|
61
|
+
for p in paths.into_iter().skip(1) {
|
|
62
|
+
items.push(PlanItem { target_path: p.to_string_lossy().to_string(), estimated_size_bytes: 0, reason: "duplicate".into() });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
let total = items.iter().map(|i| i.estimated_size_bytes).sum();
|
|
68
|
+
Ok(DryRunReport { items, total_estimated_bytes: total })
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/// Optimization engine with symlinking and ML/LRU strategies
|
|
72
|
+
#[allow(dead_code)]
|
|
73
|
+
pub struct OptimizationEngine {
|
|
74
|
+
deduplication: Option<SemanticDeduplication>,
|
|
75
|
+
lru_cache: Option<PackageLruCache>,
|
|
76
|
+
ml_predictor: Option<PredictiveOptimizer>,
|
|
77
|
+
config: RulesConfig,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
#[allow(dead_code)]
|
|
81
|
+
impl OptimizationEngine {
|
|
82
|
+
pub fn new(config: RulesConfig) -> Result<Self> {
|
|
83
|
+
let deduplication = if config.enable_symlinking {
|
|
84
|
+
Some(SemanticDeduplication::new()?)
|
|
85
|
+
} else {
|
|
86
|
+
None
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
let lru_cache = Some(PackageLruCache::new(
|
|
90
|
+
config.lru_max_packages,
|
|
91
|
+
config.lru_max_size_bytes,
|
|
92
|
+
));
|
|
93
|
+
|
|
94
|
+
let ml_predictor = if config.enable_ml_prediction {
|
|
95
|
+
Some(PredictiveOptimizer::new(config.preserve_days))
|
|
96
|
+
} else {
|
|
97
|
+
None
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
Ok(Self {
|
|
101
|
+
deduplication,
|
|
102
|
+
lru_cache,
|
|
103
|
+
ml_predictor,
|
|
104
|
+
config,
|
|
105
|
+
})
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/// Plan cleanup with symlinking and ML/LRU optimization
|
|
109
|
+
pub fn plan_optimized_cleanup(
|
|
110
|
+
&mut self,
|
|
111
|
+
scan: &ScanOutput,
|
|
112
|
+
) -> Result<DryRunReport> {
|
|
113
|
+
let cutoff = Utc::now() - Duration::days(self.config.preserve_days);
|
|
114
|
+
|
|
115
|
+
// Build usage metrics map from scan
|
|
116
|
+
let mut usage_map: HashMap<String, PackageUsageMetrics> = HashMap::new();
|
|
117
|
+
for pkg in &scan.packages {
|
|
118
|
+
let key = format!("{}@{}", pkg.name, pkg.version);
|
|
119
|
+
let metrics = PackageUsageMetrics {
|
|
120
|
+
package_key: key.clone(),
|
|
121
|
+
last_access_time: pkg.atime,
|
|
122
|
+
last_script_execution: None, // Would be populated from execution tracking
|
|
123
|
+
access_count: 1, // Would be tracked over time
|
|
124
|
+
script_execution_count: 0,
|
|
125
|
+
last_successful_build: None,
|
|
126
|
+
};
|
|
127
|
+
usage_map.insert(key, metrics);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Build project metadata map
|
|
131
|
+
let mut project_map: HashMap<String, ProjectMetadata> = HashMap::new();
|
|
132
|
+
for proj in &scan.projects {
|
|
133
|
+
let metadata = ProjectMetadata {
|
|
134
|
+
path: proj.path.clone(),
|
|
135
|
+
project_type: detect_project_type(&proj.path),
|
|
136
|
+
last_commit_date: None, // Would be populated from git
|
|
137
|
+
dependency_count: proj.dependencies.len(),
|
|
138
|
+
last_modified: proj.mtime,
|
|
139
|
+
};
|
|
140
|
+
project_map.insert(proj.path.clone(), metadata);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
let mut used: HashSet<(String, String)> = HashSet::new();
|
|
144
|
+
for proj in &scan.projects {
|
|
145
|
+
for (n, v) in &proj.dependencies {
|
|
146
|
+
used.insert((n.clone(), v.clone()));
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
let mut seen_locations: HashMap<(String, String), Vec<PathBuf>> = HashMap::new();
|
|
151
|
+
let mut items: Vec<PlanItem> = Vec::new();
|
|
152
|
+
let mut symlink_candidates: Vec<(PathBuf, String, String)> = Vec::new();
|
|
153
|
+
|
|
154
|
+
for pkg in &scan.packages {
|
|
155
|
+
let key = (pkg.name.clone(), pkg.version.clone());
|
|
156
|
+
seen_locations.entry(key.clone()).or_default().push(PathBuf::from(&pkg.path));
|
|
157
|
+
|
|
158
|
+
let package_key = format!("{}@{}", pkg.name, pkg.version);
|
|
159
|
+
let is_orphan = !used.contains(&key);
|
|
160
|
+
let is_old = pkg.mtime < cutoff;
|
|
161
|
+
|
|
162
|
+
// Record access in LRU cache
|
|
163
|
+
if let Some(ref mut cache) = self.lru_cache {
|
|
164
|
+
cache.record_access(&package_key, pkg.size_bytes);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Check ML prediction
|
|
168
|
+
let should_keep_ml = if let Some(ref predictor) = self.ml_predictor {
|
|
169
|
+
if let (Some(metrics), Some(proj_path)) = (usage_map.get(&package_key), pkg.project_paths.first()) {
|
|
170
|
+
if let Some(project_meta) = project_map.get(proj_path) {
|
|
171
|
+
let behavior = DeveloperBehavior {
|
|
172
|
+
npm_commands_executed: Vec::new(), // Would be populated from tracking
|
|
173
|
+
file_access_frequency: 0,
|
|
174
|
+
days_since_last_build: None,
|
|
175
|
+
};
|
|
176
|
+
predictor.should_keep(&package_key, metrics, project_meta, &behavior)
|
|
177
|
+
} else {
|
|
178
|
+
true // Conservative: keep if no project metadata
|
|
179
|
+
}
|
|
180
|
+
} else {
|
|
181
|
+
true
|
|
182
|
+
}
|
|
183
|
+
} else {
|
|
184
|
+
true
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
// Check LRU strategy
|
|
188
|
+
let should_keep_lru = if let Some(ref mut cache) = self.lru_cache {
|
|
189
|
+
cache.should_keep_lru(&package_key, self.config.preserve_days)
|
|
190
|
+
} else {
|
|
191
|
+
true
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
// Determine if package should be removed
|
|
195
|
+
if is_orphan || (is_old && !should_keep_ml && !should_keep_lru) {
|
|
196
|
+
items.push(PlanItem {
|
|
197
|
+
target_path: pkg.path.clone(),
|
|
198
|
+
estimated_size_bytes: pkg.size_bytes,
|
|
199
|
+
reason: if is_orphan {
|
|
200
|
+
"orphaned".into()
|
|
201
|
+
} else if !should_keep_ml {
|
|
202
|
+
"ml_predicted_unused".into()
|
|
203
|
+
} else {
|
|
204
|
+
"old".into()
|
|
205
|
+
},
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Collect symlink candidates (duplicates)
|
|
210
|
+
if let Some(ref _dedup) = self.deduplication {
|
|
211
|
+
if seen_locations.get(&key).map(|v| v.len()).unwrap_or(0) > 1 {
|
|
212
|
+
symlink_candidates.push((PathBuf::from(&pkg.path), pkg.name.clone(), pkg.version.clone()));
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Process symlink candidates (in dry run, just mark them)
|
|
218
|
+
for (path, _name, _version) in symlink_candidates {
|
|
219
|
+
items.push(PlanItem {
|
|
220
|
+
target_path: path.to_string_lossy().to_string(),
|
|
221
|
+
estimated_size_bytes: 0,
|
|
222
|
+
reason: "duplicate_symlink_candidate".into(),
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
let total = items.iter().map(|i| i.estimated_size_bytes).sum();
|
|
227
|
+
Ok(DryRunReport { items, total_estimated_bytes: total })
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/// Execute symlinking for duplicate packages
|
|
231
|
+
pub fn execute_symlinking(&self, scan: &ScanOutput) -> Result<usize> {
|
|
232
|
+
if let Some(ref dedup) = self.deduplication {
|
|
233
|
+
let mut seen: HashMap<(String, String), PathBuf> = HashMap::new();
|
|
234
|
+
let mut symlinked_count = 0;
|
|
235
|
+
|
|
236
|
+
for pkg in &scan.packages {
|
|
237
|
+
let key = (pkg.name.clone(), pkg.version.clone());
|
|
238
|
+
|
|
239
|
+
// Keep first occurrence as canonical
|
|
240
|
+
let canonical = seen.entry(key.clone()).or_insert_with(|| PathBuf::from(&pkg.path));
|
|
241
|
+
|
|
242
|
+
// Symlink duplicates
|
|
243
|
+
if canonical.to_string_lossy() != pkg.path {
|
|
244
|
+
let pkg_path = PathBuf::from(&pkg.path);
|
|
245
|
+
if let Err(e) = dedup.deduplicate_package(&pkg_path, &pkg.name, &pkg.version) {
|
|
246
|
+
eprintln!("Failed to symlink {:?}: {}", pkg_path, e);
|
|
247
|
+
} else {
|
|
248
|
+
symlinked_count += 1;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
Ok(symlinked_count)
|
|
254
|
+
} else {
|
|
255
|
+
Ok(0)
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
fn detect_project_type(project_path: &str) -> String {
|
|
261
|
+
use std::fs;
|
|
262
|
+
use std::path::Path;
|
|
263
|
+
|
|
264
|
+
let path = Path::new(project_path);
|
|
265
|
+
let package_json = path.join("package.json");
|
|
266
|
+
|
|
267
|
+
// Check package.json for project type indicators
|
|
268
|
+
if package_json.exists() {
|
|
269
|
+
if let Ok(content) = fs::read_to_string(&package_json) {
|
|
270
|
+
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
|
|
271
|
+
// Check for framework-specific dependencies
|
|
272
|
+
if let Some(deps) = json.get("dependencies").and_then(|d| d.as_object()) {
|
|
273
|
+
if deps.contains_key("react") || deps.contains_key("next") {
|
|
274
|
+
return "react".into();
|
|
275
|
+
}
|
|
276
|
+
if deps.contains_key("vue") || deps.contains_key("nuxt") {
|
|
277
|
+
return "vue".into();
|
|
278
|
+
}
|
|
279
|
+
if deps.contains_key("angular") || deps.contains_key("@angular/core") {
|
|
280
|
+
return "angular".into();
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Check devDependencies
|
|
285
|
+
if let Some(dev_deps) = json.get("devDependencies").and_then(|d| d.as_object()) {
|
|
286
|
+
if dev_deps.contains_key("typescript") || dev_deps.contains_key("tsc") {
|
|
287
|
+
return "typescript".into();
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Check for TypeScript config files
|
|
294
|
+
if path.join("tsconfig.json").exists() {
|
|
295
|
+
return "typescript".into();
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Check for Next.js
|
|
299
|
+
if path.join("next.config.js").exists() || path.join("next.config.ts").exists() {
|
|
300
|
+
return "nextjs".into();
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Check path-based heuristics as fallback
|
|
304
|
+
let path_lower = project_path.to_lowercase();
|
|
305
|
+
if path_lower.contains("react") || path_lower.contains("next") {
|
|
306
|
+
return "react".into();
|
|
307
|
+
}
|
|
308
|
+
if path_lower.contains("typescript") || path_lower.contains("ts") {
|
|
309
|
+
return "typescript".into();
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
"node".into()
|
|
314
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
use anyhow::{Context, Result};
|
|
2
|
+
use chrono::Utc;
|
|
3
|
+
use sha2::{Digest, Sha256};
|
|
4
|
+
use std::{fs, path::{Path, PathBuf}};
|
|
5
|
+
|
|
6
|
+
use crate::types::QuarantineRecord;
|
|
7
|
+
|
|
8
|
+
fn quarantine_dir() -> PathBuf {
|
|
9
|
+
let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("."));
|
|
10
|
+
home.join(".packagepurge").join("quarantine")
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
fn index_path() -> PathBuf {
|
|
14
|
+
quarantine_dir().join("index.json")
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
fn read_index() -> Vec<QuarantineRecord> {
|
|
18
|
+
let p = index_path();
|
|
19
|
+
if let Ok(text) = fs::read_to_string(&p) {
|
|
20
|
+
if let Ok(list) = serde_json::from_str::<Vec<QuarantineRecord>>(&text) { return list; }
|
|
21
|
+
}
|
|
22
|
+
Vec::new()
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
fn write_index(mut list: Vec<QuarantineRecord>) -> Result<()> {
|
|
26
|
+
// keep only recent N entries (e.g., 200) to bound file size
|
|
27
|
+
if list.len() > 200 { let keep = list.split_off(list.len() - 200); list = keep; }
|
|
28
|
+
let data = serde_json::to_string_pretty(&list)?;
|
|
29
|
+
fs::write(index_path(), data).context("Failed to write quarantine index")?;
|
|
30
|
+
Ok(())
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
fn sha256_dir(path: &Path) -> Result<(String, u64)> {
|
|
34
|
+
let mut hasher = Sha256::new();
|
|
35
|
+
let mut total: u64 = 0;
|
|
36
|
+
for entry in walkdir::WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
|
|
37
|
+
let p = entry.path();
|
|
38
|
+
hasher.update(p.to_string_lossy().as_bytes());
|
|
39
|
+
if entry.file_type().is_file() {
|
|
40
|
+
let data = fs::read(p)?;
|
|
41
|
+
total += data.len() as u64;
|
|
42
|
+
hasher.update(&data);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
Ok((hex::encode(hasher.finalize()), total))
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
pub fn move_to_quarantine(target: &Path) -> Result<QuarantineRecord> {
|
|
49
|
+
let qdir = quarantine_dir();
|
|
50
|
+
fs::create_dir_all(&qdir).ok();
|
|
51
|
+
let id = format!("{}", Utc::now().timestamp_nanos_opt().unwrap_or(0));
|
|
52
|
+
let (checksum, size) = sha256_dir(target)?;
|
|
53
|
+
let qpath = qdir.join(format!("{}_{}", id, target.file_name().unwrap_or_default().to_string_lossy()));
|
|
54
|
+
if let Err(e) = fs::rename(target, &qpath) {
|
|
55
|
+
// Handle cross-device link errors (os error 17 or 18 on Unix, or similar on Windows)
|
|
56
|
+
// We simply try copy-and-delete as fallback for any rename failure
|
|
57
|
+
if let Err(copy_err) = fs_extra::dir::copy(target, &qpath, &fs_extra::dir::CopyOptions::new().content_only(true)) {
|
|
58
|
+
return Err(anyhow::anyhow!("Failed to move {:?} to quarantine (rename failed: {}, copy failed: {})", target, e, copy_err));
|
|
59
|
+
}
|
|
60
|
+
if let Err(rm_err) = fs::remove_dir_all(target) {
|
|
61
|
+
// If we can't remove original, we should probably clean up the quarantine copy
|
|
62
|
+
fs::remove_dir_all(&qpath).ok();
|
|
63
|
+
return Err(anyhow::anyhow!("Failed to remove original {:?} after copy to quarantine: {}", target, rm_err));
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
let rec = QuarantineRecord {
|
|
67
|
+
id,
|
|
68
|
+
original_path: target.to_string_lossy().to_string(),
|
|
69
|
+
quarantine_path: qpath.to_string_lossy().to_string(),
|
|
70
|
+
sha256: checksum,
|
|
71
|
+
size_bytes: size,
|
|
72
|
+
created_at: Utc::now(),
|
|
73
|
+
};
|
|
74
|
+
let mut list = read_index();
|
|
75
|
+
list.push(rec.clone());
|
|
76
|
+
write_index(list)?;
|
|
77
|
+
Ok(rec)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
#[allow(dead_code)]
|
|
81
|
+
pub fn list_quarantine() -> Vec<QuarantineRecord> { read_index() }
|
|
82
|
+
|
|
83
|
+
pub fn latest_quarantine() -> Option<QuarantineRecord> {
|
|
84
|
+
let mut list = read_index();
|
|
85
|
+
list.sort_by_key(|r| r.created_at);
|
|
86
|
+
list.pop()
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
pub fn find_quarantine_by_id(id: &str) -> Option<QuarantineRecord> {
|
|
90
|
+
read_index().into_iter().find(|r| r.id == id)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
pub fn rollback_record(rec: &QuarantineRecord) -> Result<()> {
|
|
94
|
+
let orig = PathBuf::from(&rec.original_path);
|
|
95
|
+
let q = PathBuf::from(&rec.quarantine_path);
|
|
96
|
+
if let Some(parent) = orig.parent() { fs::create_dir_all(parent).ok(); }
|
|
97
|
+
fs::rename(&q, &orig).with_context(|| "Failed to rollback from quarantine")?;
|
|
98
|
+
// remove from index
|
|
99
|
+
let mut list = read_index();
|
|
100
|
+
list.retain(|r| r.id != rec.id);
|
|
101
|
+
write_index(list)?;
|
|
102
|
+
Ok(())
|
|
103
|
+
}
|