packagepurge 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/src/ml.rs DELETED
@@ -1,188 +0,0 @@
1
- use chrono::Utc;
2
- use crate::types::{PackageUsageMetrics, ProjectMetadata, DeveloperBehavior};
3
-
4
- #[allow(dead_code)]
5
- pub trait MlRecommender {
6
- fn is_safe_to_evict(&self, package_id: &str) -> Option<bool>;
7
- fn should_keep(&self, package_id: &str, metrics: &PackageUsageMetrics, project: &ProjectMetadata, behavior: &DeveloperBehavior) -> bool;
8
- }
9
-
10
- #[allow(dead_code)]
11
- pub struct NoopRecommender;
12
- impl MlRecommender for NoopRecommender {
13
- fn is_safe_to_evict(&self, _package_id: &str) -> Option<bool> { None }
14
- fn should_keep(&self, _package_id: &str, _metrics: &PackageUsageMetrics, _project: &ProjectMetadata, _behavior: &DeveloperBehavior) -> bool {
15
- true // Conservative: keep by default
16
- }
17
- }
18
-
19
- /// Predictive Optimizer using rule-based ML (can be extended with actual ML models)
20
- #[allow(dead_code)]
21
- pub struct PredictiveOptimizer {
22
- /// Keep threshold in days (packages used within this period are likely needed)
23
- prediction_window_days: i64,
24
- }
25
-
26
- impl PredictiveOptimizer {
27
- pub fn new(prediction_window_days: i64) -> Self {
28
- Self { prediction_window_days }
29
- }
30
-
31
- /// Extract features from package metadata for ML prediction
32
- fn extract_features(
33
- &self,
34
- metrics: &PackageUsageMetrics,
35
- project: &ProjectMetadata,
36
- behavior: &DeveloperBehavior,
37
- ) -> Vec<f64> {
38
- let now = Utc::now();
39
-
40
- // Feature 1: Days since last access
41
- let days_since_access = (now - metrics.last_access_time).num_days() as f64;
42
-
43
- // Feature 2: Days since last script execution
44
- let days_since_script = metrics.last_script_execution
45
- .map(|t| (now - t).num_days() as f64)
46
- .unwrap_or(365.0); // High value if never executed
47
-
48
- // Feature 3: Days since last successful build
49
- let days_since_build = metrics.last_successful_build
50
- .map(|t| (now - t).num_days() as f64)
51
- .unwrap_or(365.0);
52
-
53
- // Feature 4: Access frequency (normalized)
54
- let access_frequency = metrics.access_count as f64 / 100.0; // Normalize
55
-
56
- // Feature 5: Script execution frequency
57
- let script_frequency = metrics.script_execution_count as f64 / 10.0;
58
-
59
- // Feature 6: Project activity (days since last commit)
60
- let days_since_commit = project.last_commit_date
61
- .map(|t| (now - t).num_days() as f64)
62
- .unwrap_or(365.0);
63
-
64
- // Feature 7: Project type score (higher for active project types)
65
- let project_type_score = match project.project_type.as_str() {
66
- "react" | "typescript" | "nextjs" => 1.0,
67
- "node" => 0.8,
68
- _ => 0.5,
69
- };
70
-
71
- // Feature 8: Dependency count (more deps = more likely to need packages)
72
- let dep_score = (project.dependency_count as f64 / 100.0).min(1.0);
73
-
74
- // Feature 9: Days since last build (from behavior)
75
- let behavior_days_since_build = behavior.days_since_last_build
76
- .map(|d| d as f64)
77
- .unwrap_or(365.0);
78
-
79
- // Feature 10: File access frequency
80
- let file_access_score = (behavior.file_access_frequency as f64 / 1000.0).min(1.0);
81
-
82
- vec![
83
- days_since_access,
84
- days_since_script,
85
- days_since_build,
86
- access_frequency,
87
- script_frequency,
88
- days_since_commit,
89
- project_type_score,
90
- dep_score,
91
- behavior_days_since_build,
92
- file_access_score,
93
- ]
94
- }
95
-
96
- /// Predict whether package should be kept (binary classification)
97
- /// Returns true if package is likely needed in the next prediction_window_days
98
- pub fn predict_keep(
99
- &self,
100
- metrics: &PackageUsageMetrics,
101
- project: &ProjectMetadata,
102
- behavior: &DeveloperBehavior,
103
- ) -> bool {
104
- let features = self.extract_features(metrics, project, behavior);
105
-
106
- // Simple rule-based classifier (can be replaced with actual ML model)
107
- // This implements a heuristic that mimics what a trained model would do
108
-
109
- // Rule 1: Recently accessed packages are likely needed
110
- let days_since_access = features[0];
111
- if days_since_access < 7.0 {
112
- return true; // Keep if accessed in last week
113
- }
114
-
115
- // Rule 2: Recently used in scripts
116
- let days_since_script = features[1];
117
- if days_since_script < 14.0 {
118
- return true; // Keep if used in script in last 2 weeks
119
- }
120
-
121
- // Rule 3: Recently built successfully
122
- let days_since_build = features[2];
123
- if days_since_build < 30.0 {
124
- return true; // Keep if built in last month
125
- }
126
-
127
- // Rule 4: High access frequency
128
- let access_frequency = features[3];
129
- if access_frequency > 0.5 {
130
- return true; // Keep if frequently accessed
131
- }
132
-
133
- // Rule 5: Active project with recent commits
134
- let days_since_commit = features[5];
135
- let project_type_score = features[6];
136
- if days_since_commit < 30.0 && project_type_score > 0.7 {
137
- return true; // Keep if project is active
138
- }
139
-
140
- // Rule 6: Weighted score combining all features
141
- // This is a simplified logistic regression-like decision
142
- let score = self.compute_keep_score(&features);
143
- score > 0.5
144
- }
145
-
146
- /// Compute a keep score (0.0 to 1.0) based on features
147
- /// This mimics a logistic regression output
148
- fn compute_keep_score(&self, features: &[f64]) -> f64 {
149
- // Weighted combination of features (weights learned from training data in real ML)
150
- // For now, use heuristic weights
151
- let weights = vec![
152
- -0.1, // days_since_access (negative: more days = lower score)
153
- -0.05, // days_since_script
154
- -0.03, // days_since_build
155
- 0.3, // access_frequency (positive: more access = higher score)
156
- 0.2, // script_frequency
157
- -0.02, // days_since_commit
158
- 0.15, // project_type_score
159
- 0.1, // dep_score
160
- -0.03, // behavior_days_since_build
161
- 0.1, // file_access_score
162
- ];
163
-
164
- let mut score = 0.5; // Base score
165
- for (feature, weight) in features.iter().zip(weights.iter()) {
166
- score += feature * weight;
167
- }
168
-
169
- // Apply sigmoid-like function to bound between 0 and 1
170
- 1.0 / (1.0 + (-score).exp())
171
- }
172
- }
173
-
174
- impl MlRecommender for PredictiveOptimizer {
175
- fn is_safe_to_evict(&self, _package_id: &str) -> Option<bool> {
176
- None // Use should_keep instead
177
- }
178
-
179
- fn should_keep(
180
- &self,
181
- _package_id: &str,
182
- metrics: &PackageUsageMetrics,
183
- project: &ProjectMetadata,
184
- behavior: &DeveloperBehavior,
185
- ) -> bool {
186
- self.predict_keep(metrics, project, behavior)
187
- }
188
- }
@@ -1,314 +0,0 @@
1
- use anyhow::Result;
2
- use chrono::{Duration, Utc};
3
- use std::collections::{HashMap, HashSet};
4
- use std::path::PathBuf;
5
-
6
- use crate::types::{DryRunReport, PlanItem, ScanOutput, PackageUsageMetrics, ProjectMetadata, DeveloperBehavior};
7
- use crate::symlink::SemanticDeduplication;
8
- use crate::cache::PackageLruCache;
9
- use crate::ml::{MlRecommender, PredictiveOptimizer};
10
-
11
- #[allow(dead_code)]
12
- pub enum EvictionPolicy {
13
- MlThenArcThenLru,
14
- LruOnly,
15
- }
16
-
17
- #[allow(dead_code)]
18
- pub struct RulesConfig {
19
- pub preserve_days: i64,
20
- #[allow(dead_code)]
21
- pub enable_symlinking: bool,
22
- #[allow(dead_code)]
23
- pub enable_ml_prediction: bool,
24
- #[allow(dead_code)]
25
- pub lru_max_packages: usize,
26
- #[allow(dead_code)]
27
- pub lru_max_size_bytes: u64,
28
- }
29
-
30
- pub fn plan_basic_cleanup(scan: &ScanOutput, cfg: &RulesConfig) -> Result<DryRunReport> {
31
- let cutoff = Utc::now() - Duration::days(cfg.preserve_days);
32
-
33
- let mut used: HashSet<(String, String)> = HashSet::new();
34
- for proj in &scan.projects {
35
- for (n, v) in &proj.dependencies {
36
- used.insert((n.clone(), v.clone()));
37
- }
38
- }
39
-
40
- let mut seen_locations: HashMap<(String, String), Vec<PathBuf>> = HashMap::new();
41
-
42
- let mut items: Vec<PlanItem> = Vec::new();
43
- for pkg in &scan.packages {
44
- let key = (pkg.name.clone(), pkg.version.clone());
45
- seen_locations.entry(key.clone()).or_default().push(PathBuf::from(&pkg.path));
46
-
47
- let is_orphan = !used.contains(&key);
48
- let is_old = pkg.mtime < cutoff;
49
-
50
- if is_orphan || is_old {
51
- items.push(PlanItem {
52
- target_path: pkg.path.clone(),
53
- estimated_size_bytes: pkg.size_bytes,
54
- reason: if is_orphan { "orphaned".into() } else { "old".into() },
55
- });
56
- }
57
- }
58
-
59
- for (_key, paths) in seen_locations.into_iter() {
60
- if paths.len() > 1 {
61
- for p in paths.into_iter().skip(1) {
62
- items.push(PlanItem { target_path: p.to_string_lossy().to_string(), estimated_size_bytes: 0, reason: "duplicate".into() });
63
- }
64
- }
65
- }
66
-
67
- let total = items.iter().map(|i| i.estimated_size_bytes).sum();
68
- Ok(DryRunReport { items, total_estimated_bytes: total })
69
- }
70
-
71
- /// Optimization engine with symlinking and ML/LRU strategies
72
- #[allow(dead_code)]
73
- pub struct OptimizationEngine {
74
- deduplication: Option<SemanticDeduplication>,
75
- lru_cache: Option<PackageLruCache>,
76
- ml_predictor: Option<PredictiveOptimizer>,
77
- config: RulesConfig,
78
- }
79
-
80
- #[allow(dead_code)]
81
- impl OptimizationEngine {
82
- pub fn new(config: RulesConfig) -> Result<Self> {
83
- let deduplication = if config.enable_symlinking {
84
- Some(SemanticDeduplication::new()?)
85
- } else {
86
- None
87
- };
88
-
89
- let lru_cache = Some(PackageLruCache::new(
90
- config.lru_max_packages,
91
- config.lru_max_size_bytes,
92
- ));
93
-
94
- let ml_predictor = if config.enable_ml_prediction {
95
- Some(PredictiveOptimizer::new(config.preserve_days))
96
- } else {
97
- None
98
- };
99
-
100
- Ok(Self {
101
- deduplication,
102
- lru_cache,
103
- ml_predictor,
104
- config,
105
- })
106
- }
107
-
108
- /// Plan cleanup with symlinking and ML/LRU optimization
109
- pub fn plan_optimized_cleanup(
110
- &mut self,
111
- scan: &ScanOutput,
112
- ) -> Result<DryRunReport> {
113
- let cutoff = Utc::now() - Duration::days(self.config.preserve_days);
114
-
115
- // Build usage metrics map from scan
116
- let mut usage_map: HashMap<String, PackageUsageMetrics> = HashMap::new();
117
- for pkg in &scan.packages {
118
- let key = format!("{}@{}", pkg.name, pkg.version);
119
- let metrics = PackageUsageMetrics {
120
- package_key: key.clone(),
121
- last_access_time: pkg.atime,
122
- last_script_execution: None, // Would be populated from execution tracking
123
- access_count: 1, // Would be tracked over time
124
- script_execution_count: 0,
125
- last_successful_build: None,
126
- };
127
- usage_map.insert(key, metrics);
128
- }
129
-
130
- // Build project metadata map
131
- let mut project_map: HashMap<String, ProjectMetadata> = HashMap::new();
132
- for proj in &scan.projects {
133
- let metadata = ProjectMetadata {
134
- path: proj.path.clone(),
135
- project_type: detect_project_type(&proj.path),
136
- last_commit_date: None, // Would be populated from git
137
- dependency_count: proj.dependencies.len(),
138
- last_modified: proj.mtime,
139
- };
140
- project_map.insert(proj.path.clone(), metadata);
141
- }
142
-
143
- let mut used: HashSet<(String, String)> = HashSet::new();
144
- for proj in &scan.projects {
145
- for (n, v) in &proj.dependencies {
146
- used.insert((n.clone(), v.clone()));
147
- }
148
- }
149
-
150
- let mut seen_locations: HashMap<(String, String), Vec<PathBuf>> = HashMap::new();
151
- let mut items: Vec<PlanItem> = Vec::new();
152
- let mut symlink_candidates: Vec<(PathBuf, String, String)> = Vec::new();
153
-
154
- for pkg in &scan.packages {
155
- let key = (pkg.name.clone(), pkg.version.clone());
156
- seen_locations.entry(key.clone()).or_default().push(PathBuf::from(&pkg.path));
157
-
158
- let package_key = format!("{}@{}", pkg.name, pkg.version);
159
- let is_orphan = !used.contains(&key);
160
- let is_old = pkg.mtime < cutoff;
161
-
162
- // Record access in LRU cache
163
- if let Some(ref mut cache) = self.lru_cache {
164
- cache.record_access(&package_key, pkg.size_bytes);
165
- }
166
-
167
- // Check ML prediction
168
- let should_keep_ml = if let Some(ref predictor) = self.ml_predictor {
169
- if let (Some(metrics), Some(proj_path)) = (usage_map.get(&package_key), pkg.project_paths.first()) {
170
- if let Some(project_meta) = project_map.get(proj_path) {
171
- let behavior = DeveloperBehavior {
172
- npm_commands_executed: Vec::new(), // Would be populated from tracking
173
- file_access_frequency: 0,
174
- days_since_last_build: None,
175
- };
176
- predictor.should_keep(&package_key, metrics, project_meta, &behavior)
177
- } else {
178
- true // Conservative: keep if no project metadata
179
- }
180
- } else {
181
- true
182
- }
183
- } else {
184
- true
185
- };
186
-
187
- // Check LRU strategy
188
- let should_keep_lru = if let Some(ref mut cache) = self.lru_cache {
189
- cache.should_keep_lru(&package_key, self.config.preserve_days)
190
- } else {
191
- true
192
- };
193
-
194
- // Determine if package should be removed
195
- if is_orphan || (is_old && !should_keep_ml && !should_keep_lru) {
196
- items.push(PlanItem {
197
- target_path: pkg.path.clone(),
198
- estimated_size_bytes: pkg.size_bytes,
199
- reason: if is_orphan {
200
- "orphaned".into()
201
- } else if !should_keep_ml {
202
- "ml_predicted_unused".into()
203
- } else {
204
- "old".into()
205
- },
206
- });
207
- }
208
-
209
- // Collect symlink candidates (duplicates)
210
- if let Some(ref _dedup) = self.deduplication {
211
- if seen_locations.get(&key).map(|v| v.len()).unwrap_or(0) > 1 {
212
- symlink_candidates.push((PathBuf::from(&pkg.path), pkg.name.clone(), pkg.version.clone()));
213
- }
214
- }
215
- }
216
-
217
- // Process symlink candidates (in dry run, just mark them)
218
- for (path, _name, _version) in symlink_candidates {
219
- items.push(PlanItem {
220
- target_path: path.to_string_lossy().to_string(),
221
- estimated_size_bytes: 0,
222
- reason: "duplicate_symlink_candidate".into(),
223
- });
224
- }
225
-
226
- let total = items.iter().map(|i| i.estimated_size_bytes).sum();
227
- Ok(DryRunReport { items, total_estimated_bytes: total })
228
- }
229
-
230
- /// Execute symlinking for duplicate packages
231
- pub fn execute_symlinking(&self, scan: &ScanOutput) -> Result<usize> {
232
- if let Some(ref dedup) = self.deduplication {
233
- let mut seen: HashMap<(String, String), PathBuf> = HashMap::new();
234
- let mut symlinked_count = 0;
235
-
236
- for pkg in &scan.packages {
237
- let key = (pkg.name.clone(), pkg.version.clone());
238
-
239
- // Keep first occurrence as canonical
240
- let canonical = seen.entry(key.clone()).or_insert_with(|| PathBuf::from(&pkg.path));
241
-
242
- // Symlink duplicates
243
- if canonical.to_string_lossy() != pkg.path {
244
- let pkg_path = PathBuf::from(&pkg.path);
245
- if let Err(e) = dedup.deduplicate_package(&pkg_path, &pkg.name, &pkg.version) {
246
- eprintln!("Failed to symlink {:?}: {}", pkg_path, e);
247
- } else {
248
- symlinked_count += 1;
249
- }
250
- }
251
- }
252
-
253
- Ok(symlinked_count)
254
- } else {
255
- Ok(0)
256
- }
257
- }
258
- }
259
-
260
- fn detect_project_type(project_path: &str) -> String {
261
- use std::fs;
262
- use std::path::Path;
263
-
264
- let path = Path::new(project_path);
265
- let package_json = path.join("package.json");
266
-
267
- // Check package.json for project type indicators
268
- if package_json.exists() {
269
- if let Ok(content) = fs::read_to_string(&package_json) {
270
- if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
271
- // Check for framework-specific dependencies
272
- if let Some(deps) = json.get("dependencies").and_then(|d| d.as_object()) {
273
- if deps.contains_key("react") || deps.contains_key("next") {
274
- return "react".into();
275
- }
276
- if deps.contains_key("vue") || deps.contains_key("nuxt") {
277
- return "vue".into();
278
- }
279
- if deps.contains_key("angular") || deps.contains_key("@angular/core") {
280
- return "angular".into();
281
- }
282
- }
283
-
284
- // Check devDependencies
285
- if let Some(dev_deps) = json.get("devDependencies").and_then(|d| d.as_object()) {
286
- if dev_deps.contains_key("typescript") || dev_deps.contains_key("tsc") {
287
- return "typescript".into();
288
- }
289
- }
290
- }
291
- }
292
-
293
- // Check for TypeScript config files
294
- if path.join("tsconfig.json").exists() {
295
- return "typescript".into();
296
- }
297
-
298
- // Check for Next.js
299
- if path.join("next.config.js").exists() || path.join("next.config.ts").exists() {
300
- return "nextjs".into();
301
- }
302
-
303
- // Check path-based heuristics as fallback
304
- let path_lower = project_path.to_lowercase();
305
- if path_lower.contains("react") || path_lower.contains("next") {
306
- return "react".into();
307
- }
308
- if path_lower.contains("typescript") || path_lower.contains("ts") {
309
- return "typescript".into();
310
- }
311
- }
312
-
313
- "node".into()
314
- }
@@ -1,103 +0,0 @@
1
- use anyhow::{Context, Result};
2
- use chrono::Utc;
3
- use sha2::{Digest, Sha256};
4
- use std::{fs, path::{Path, PathBuf}};
5
-
6
- use crate::types::QuarantineRecord;
7
-
8
- fn quarantine_dir() -> PathBuf {
9
- let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("."));
10
- home.join(".packagepurge").join("quarantine")
11
- }
12
-
13
- fn index_path() -> PathBuf {
14
- quarantine_dir().join("index.json")
15
- }
16
-
17
- fn read_index() -> Vec<QuarantineRecord> {
18
- let p = index_path();
19
- if let Ok(text) = fs::read_to_string(&p) {
20
- if let Ok(list) = serde_json::from_str::<Vec<QuarantineRecord>>(&text) { return list; }
21
- }
22
- Vec::new()
23
- }
24
-
25
- fn write_index(mut list: Vec<QuarantineRecord>) -> Result<()> {
26
- // keep only recent N entries (e.g., 200) to bound file size
27
- if list.len() > 200 { let keep = list.split_off(list.len() - 200); list = keep; }
28
- let data = serde_json::to_string_pretty(&list)?;
29
- fs::write(index_path(), data).context("Failed to write quarantine index")?;
30
- Ok(())
31
- }
32
-
33
- fn sha256_dir(path: &Path) -> Result<(String, u64)> {
34
- let mut hasher = Sha256::new();
35
- let mut total: u64 = 0;
36
- for entry in walkdir::WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
37
- let p = entry.path();
38
- hasher.update(p.to_string_lossy().as_bytes());
39
- if entry.file_type().is_file() {
40
- let data = fs::read(p)?;
41
- total += data.len() as u64;
42
- hasher.update(&data);
43
- }
44
- }
45
- Ok((hex::encode(hasher.finalize()), total))
46
- }
47
-
48
- pub fn move_to_quarantine(target: &Path) -> Result<QuarantineRecord> {
49
- let qdir = quarantine_dir();
50
- fs::create_dir_all(&qdir).ok();
51
- let id = format!("{}", Utc::now().timestamp_nanos_opt().unwrap_or(0));
52
- let (checksum, size) = sha256_dir(target)?;
53
- let qpath = qdir.join(format!("{}_{}", id, target.file_name().unwrap_or_default().to_string_lossy()));
54
- if let Err(e) = fs::rename(target, &qpath) {
55
- // Handle cross-device link errors (os error 17 or 18 on Unix, or similar on Windows)
56
- // We simply try copy-and-delete as fallback for any rename failure
57
- if let Err(copy_err) = fs_extra::dir::copy(target, &qpath, &fs_extra::dir::CopyOptions::new().content_only(true)) {
58
- return Err(anyhow::anyhow!("Failed to move {:?} to quarantine (rename failed: {}, copy failed: {})", target, e, copy_err));
59
- }
60
- if let Err(rm_err) = fs::remove_dir_all(target) {
61
- // If we can't remove original, we should probably clean up the quarantine copy
62
- fs::remove_dir_all(&qpath).ok();
63
- return Err(anyhow::anyhow!("Failed to remove original {:?} after copy to quarantine: {}", target, rm_err));
64
- }
65
- }
66
- let rec = QuarantineRecord {
67
- id,
68
- original_path: target.to_string_lossy().to_string(),
69
- quarantine_path: qpath.to_string_lossy().to_string(),
70
- sha256: checksum,
71
- size_bytes: size,
72
- created_at: Utc::now(),
73
- };
74
- let mut list = read_index();
75
- list.push(rec.clone());
76
- write_index(list)?;
77
- Ok(rec)
78
- }
79
-
80
- #[allow(dead_code)]
81
- pub fn list_quarantine() -> Vec<QuarantineRecord> { read_index() }
82
-
83
- pub fn latest_quarantine() -> Option<QuarantineRecord> {
84
- let mut list = read_index();
85
- list.sort_by_key(|r| r.created_at);
86
- list.pop()
87
- }
88
-
89
- pub fn find_quarantine_by_id(id: &str) -> Option<QuarantineRecord> {
90
- read_index().into_iter().find(|r| r.id == id)
91
- }
92
-
93
- pub fn rollback_record(rec: &QuarantineRecord) -> Result<()> {
94
- let orig = PathBuf::from(&rec.original_path);
95
- let q = PathBuf::from(&rec.quarantine_path);
96
- if let Some(parent) = orig.parent() { fs::create_dir_all(parent).ok(); }
97
- fs::rename(&q, &orig).with_context(|| "Failed to rollback from quarantine")?;
98
- // remove from index
99
- let mut list = read_index();
100
- list.retain(|r| r.id != rec.id);
101
- write_index(list)?;
102
- Ok(())
103
- }