packagepurge 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.agent/workflows/build.md +58 -0
  2. package/.github/workflows/release.yml +176 -0
  3. package/README.md +215 -49
  4. package/dist/cli/index.d.ts +1 -0
  5. package/dist/cli/index.js +122 -132
  6. package/dist/cli/index.js.map +1 -1
  7. package/dist/core/bindings.d.ts +11 -0
  8. package/dist/core/bindings.d.ts.map +1 -1
  9. package/dist/core/bindings.js +40 -94
  10. package/dist/core/bindings.js.map +1 -1
  11. package/dist/utils/core-utils.d.ts +31 -0
  12. package/dist/utils/core-utils.d.ts.map +1 -0
  13. package/dist/utils/core-utils.js +121 -0
  14. package/dist/utils/core-utils.js.map +1 -0
  15. package/dist/utils/formatter.d.ts +63 -0
  16. package/dist/utils/formatter.d.ts.map +1 -0
  17. package/dist/utils/formatter.js +295 -0
  18. package/dist/utils/formatter.js.map +1 -0
  19. package/package.json +3 -3
  20. package/core/src/arc_lfu.rs +0 -91
  21. package/core/src/cache.rs +0 -205
  22. package/core/src/lockfiles.rs +0 -112
  23. package/core/src/main.rs +0 -125
  24. package/core/src/ml.rs +0 -188
  25. package/core/src/optimization.rs +0 -314
  26. package/core/src/safety.rs +0 -103
  27. package/core/src/scanner.rs +0 -136
  28. package/core/src/symlink.rs +0 -223
  29. package/core/src/types.rs +0 -87
  30. package/core/src/usage_tracker.rs +0 -107
  31. package/src/cli/index.ts +0 -212
  32. package/src/core/bindings.ts +0 -157
  33. package/src/managers/base-manager.ts +0 -117
  34. package/src/managers/index.ts +0 -32
  35. package/src/managers/npm-manager.ts +0 -96
  36. package/src/managers/pnpm-manager.ts +0 -107
  37. package/src/managers/yarn-manager.ts +0 -112
  38. package/src/types/index.ts +0 -97
  39. package/src/utils/logger.ts +0 -50
  40. package/tsconfig.json +0 -22
package/core/src/cache.rs DELETED
@@ -1,205 +0,0 @@
1
- use std::cell::RefCell;
2
- use std::collections::HashMap;
3
- use std::hash::Hash;
4
- use std::rc::Rc;
5
- use chrono::Utc;
6
- use crate::types::PackageUsageMetrics;
7
-
8
- // Doubly-linked list node
9
- struct Node<K, V> {
10
- key: K,
11
- value: V,
12
- prev: Option<Rc<RefCell<Node<K, V>>>>,
13
- next: Option<Rc<RefCell<Node<K, V>>>>,
14
- }
15
-
16
- pub struct LruCache<K, V> where K: Eq + Hash + Clone {
17
- capacity: usize,
18
- map: HashMap<K, Rc<RefCell<Node<K, V>>>>,
19
- head: Option<Rc<RefCell<Node<K, V>>>>, // MRU
20
- tail: Option<Rc<RefCell<Node<K, V>>>>, // LRU
21
- }
22
-
23
- impl<K, V> LruCache<K, V> where K: Eq + Hash + Clone {
24
- pub fn new(capacity: usize) -> Self {
25
- Self { capacity, map: HashMap::new(), head: None, tail: None }
26
- }
27
-
28
- #[allow(dead_code)]
29
- pub fn len(&self) -> usize { self.map.len() }
30
- #[allow(dead_code)]
31
- pub fn is_empty(&self) -> bool { self.map.is_empty() }
32
-
33
- pub fn get(&mut self, key: &K) -> Option<V> where V: Clone {
34
- if let Some(node_rc) = self.map.get(key).cloned() {
35
- self.move_to_head(node_rc.clone());
36
- return Some(node_rc.borrow().value.clone());
37
- }
38
- None
39
- }
40
-
41
- pub fn put(&mut self, key: K, value: V) -> Option<(K, V)> where V: Clone {
42
- if let Some(node_rc) = self.map.get(&key).cloned() {
43
- // Update value and move to head
44
- node_rc.borrow_mut().value = value;
45
- self.move_to_head(node_rc);
46
- return None;
47
- }
48
- // Insert new
49
- let node = Rc::new(RefCell::new(Node { key: key.clone(), value, prev: None, next: None }));
50
- self.attach_head(node.clone());
51
- self.map.insert(key.clone(), node);
52
- // Evict if over capacity
53
- if self.map.len() > self.capacity {
54
- if let Some(lru) = self.pop_tail() {
55
- let k = lru.borrow().key.clone();
56
- let v = lru.borrow().value.clone();
57
- self.map.remove(&k);
58
- return Some((k, v));
59
- }
60
- }
61
- None
62
- }
63
-
64
- fn detach(&mut self, node: Rc<RefCell<Node<K, V>>>) {
65
- let prev = node.borrow().prev.clone();
66
- let next = node.borrow().next.clone();
67
- if let Some(p) = prev.clone() { p.borrow_mut().next = next.clone(); } else { self.head = next.clone(); }
68
- if let Some(n) = next.clone() { n.borrow_mut().prev = prev.clone(); } else { self.tail = prev.clone(); }
69
- node.borrow_mut().prev = None;
70
- node.borrow_mut().next = None;
71
- }
72
-
73
- fn attach_head(&mut self, node: Rc<RefCell<Node<K, V>>>) {
74
- node.borrow_mut().prev = None;
75
- node.borrow_mut().next = self.head.clone();
76
- if let Some(h) = self.head.clone() { h.borrow_mut().prev = Some(node.clone()); }
77
- self.head = Some(node.clone());
78
- if self.tail.is_none() { self.tail = Some(node); }
79
- }
80
-
81
- fn move_to_head(&mut self, node: Rc<RefCell<Node<K, V>>>) {
82
- self.detach(node.clone());
83
- self.attach_head(node);
84
- }
85
-
86
- fn pop_tail(&mut self) -> Option<Rc<RefCell<Node<K, V>>>> {
87
- if let Some(t) = self.tail.clone() {
88
- self.detach(t.clone());
89
- return Some(t);
90
- }
91
- None
92
- }
93
- }
94
-
95
- /// LRU cache specialized for package versions with usage tracking
96
- #[allow(dead_code)]
97
- pub struct PackageLruCache {
98
- cache: LruCache<String, PackageUsageMetrics>,
99
- max_size_bytes: u64,
100
- current_size_bytes: u64,
101
- }
102
-
103
- impl PackageLruCache {
104
- pub fn new(max_packages: usize, max_size_bytes: u64) -> Self {
105
- Self {
106
- cache: LruCache::new(max_packages),
107
- max_size_bytes,
108
- current_size_bytes: 0,
109
- }
110
- }
111
-
112
- /// Record package access (updates atime and increments access count)
113
- pub fn record_access(&mut self, package_key: &str, size_bytes: u64) {
114
- let now = Utc::now();
115
- if let Some(metrics) = self.cache.get(&package_key.to_string()) {
116
- // Update existing metrics
117
- let mut updated = metrics;
118
- updated.last_access_time = now;
119
- updated.access_count += 1;
120
- self.cache.put(package_key.to_string(), updated);
121
- } else {
122
- // Create new metrics
123
- let metrics = PackageUsageMetrics {
124
- package_key: package_key.to_string(),
125
- last_access_time: now,
126
- last_script_execution: None,
127
- access_count: 1,
128
- script_execution_count: 0,
129
- last_successful_build: None,
130
- };
131
- if let Some((_evicted_key, _evicted_metrics)) = self.cache.put(package_key.to_string(), metrics) {
132
- // Handle eviction if needed
133
- // In a full implementation, we'd track size_bytes per package
134
- }
135
- self.current_size_bytes += size_bytes;
136
- }
137
- }
138
-
139
- /// Record successful script execution
140
- #[allow(dead_code)]
141
- pub fn record_script_execution(&mut self, package_key: &str) {
142
- let now = Utc::now();
143
- if let Some(metrics) = self.cache.get(&package_key.to_string()) {
144
- let mut updated = metrics;
145
- updated.last_script_execution = Some(now);
146
- updated.script_execution_count += 1;
147
- self.cache.put(package_key.to_string(), updated);
148
- }
149
- }
150
-
151
- /// Record successful build
152
- #[allow(dead_code)]
153
- pub fn record_build(&mut self, package_key: &str) {
154
- let now = Utc::now();
155
- if let Some(metrics) = self.cache.get(&package_key.to_string()) {
156
- let mut updated = metrics;
157
- updated.last_successful_build = Some(now);
158
- self.cache.put(package_key.to_string(), updated);
159
- }
160
- }
161
-
162
- /// Get metrics for a package (updates LRU position)
163
- pub fn get_metrics(&mut self, package_key: &str) -> Option<PackageUsageMetrics> {
164
- self.cache.get(&package_key.to_string())
165
- }
166
-
167
- /// Get least recently used packages (for eviction candidates)
168
- #[allow(dead_code)]
169
- pub fn get_lru_packages(&self, _count: usize) -> Vec<String> {
170
- // This is a simplified version - in a full implementation,
171
- // we'd need to iterate through the tail of the LRU cache
172
- Vec::new() // Placeholder
173
- }
174
-
175
- /// Check if package should be kept based on LRU strategy
176
- pub fn should_keep_lru(&mut self, package_key: &str, days_threshold: i64) -> bool {
177
- if let Some(metrics) = self.get_metrics(package_key) {
178
- let days_since_access = (Utc::now() - metrics.last_access_time).num_days();
179
- return days_since_access < days_threshold;
180
- }
181
- false
182
- }
183
- }
184
-
185
- #[cfg(test)]
186
- mod tests {
187
- use super::LruCache;
188
- #[test]
189
- fn test_lru_basic() {
190
- let mut lru = LruCache::new(2);
191
- assert!(lru.get(&"a").is_none());
192
- assert!(lru.put("a", 1).is_none());
193
- assert_eq!(lru.get(&"a"), Some(1));
194
- assert!(lru.put("b", 2).is_none());
195
- assert_eq!(lru.len(), 2);
196
- // Insert c -> evict LRU (which should be 'a' after accessing 'a' it's MRU; LRU is 'b'?)
197
- // Access changes MRU; sequence ensures eviction is correct
198
- lru.get(&"a"); // 'a' MRU, 'b' LRU
199
- let evicted = lru.put("c", 3);
200
- assert!(evicted.is_some());
201
- let (k, v) = evicted.unwrap();
202
- assert_eq!(k, "b");
203
- assert_eq!(v, 2);
204
- }
205
- }
@@ -1,112 +0,0 @@
1
- use std::fs;
2
- use std::path::Path;
3
-
4
- pub type DepList = Vec<(String, String)>; // (name, version)
5
-
6
- pub fn parse_npm_package_lock(path: &Path) -> DepList {
7
- let mut deps_list: DepList = Vec::new();
8
- let text = match fs::read_to_string(path) { Ok(t) => t, Err(_) => return deps_list };
9
- let json: serde_json::Value = match serde_json::from_str(&text) { Ok(v) => v, Err(_) => return deps_list };
10
-
11
- fn walk(node: &serde_json::Value, list: &mut DepList) {
12
- if let Some(deps) = node.get("dependencies").and_then(|d| d.as_object()) {
13
- for (name, dep_node) in deps {
14
- if let Some(ver) = dep_node.get("version").and_then(|v| v.as_str()) {
15
- list.push((name.clone(), ver.to_string()));
16
- }
17
- walk(dep_node, list);
18
- }
19
- }
20
- // Handle 'packages' in lockfile v2/v3
21
- if let Some(packages) = node.get("packages").and_then(|d| d.as_object()) {
22
- for (key, pkg_node) in packages {
23
- if key.is_empty() { continue; } // Root
24
-
25
- // Key is path like "node_modules/pkg" or "node_modules/a/node_modules/b"
26
- // We want the package name, which is after the last "node_modules/"
27
- let name = if let Some(idx) = key.rfind("node_modules/") {
28
- key[idx + "node_modules/".len()..].to_string()
29
- } else {
30
- key.clone()
31
- };
32
-
33
- if let Some(ver) = pkg_node.get("version").and_then(|v| v.as_str()) {
34
- list.push((name, ver.to_string()));
35
- }
36
- }
37
- }
38
- }
39
-
40
- walk(&json, &mut deps_list);
41
- deps_list
42
- }
43
-
44
- pub fn parse_yarn_lock(path: &Path) -> DepList {
45
- let mut list: DepList = Vec::new();
46
- let text = match fs::read_to_string(path) { Ok(t) => t, Err(_) => return list };
47
-
48
- let mut current_name: Option<String> = None;
49
-
50
- for line in text.lines() {
51
- let trimmed = line.trim();
52
- if trimmed.is_empty() || trimmed.starts_with('#') { continue; }
53
-
54
- if !line.starts_with(' ') {
55
- // Start of a block: "pkg@ver, pkg@ver:"
56
- let parts: Vec<&str> = trimmed.trim_end_matches(':').split(',').collect();
57
- if let Some(first) = parts.first() {
58
- // Extract name from "name@range"
59
- // This is heuristic; yarn lock keys are complex.
60
- // Simpler: wait for "version" line.
61
- // But we need the name.
62
- // Pattern: name@^1.2.3
63
- // Last '@' separates name and version range, but scoped packages start with @.
64
- let s = first.trim().trim_matches('"');
65
- if let Some(idx) = s.rfind('@') {
66
- if idx > 0 {
67
- current_name = Some(s[..idx].to_string());
68
- } else {
69
- current_name = None;
70
- }
71
- }
72
- }
73
- } else if let Some(name) = &current_name {
74
- if trimmed.starts_with("version") {
75
- // version "1.2.3"
76
- let parts: Vec<&str> = trimmed.split_whitespace().collect();
77
- if parts.len() >= 2 {
78
- let ver = parts[1].trim_matches('"');
79
- list.push((name.clone(), ver.to_string()));
80
- current_name = None; // Reset so we don't duplicate
81
- }
82
- }
83
- }
84
- }
85
- list
86
- }
87
-
88
- pub fn parse_pnpm_lock(path: &Path) -> DepList {
89
- let mut list: DepList = Vec::new();
90
- let text = match fs::read_to_string(path) { Ok(t) => t, Err(_) => return list };
91
-
92
- for line in text.lines() {
93
- let l = line.trim();
94
- // /name/version:
95
- if l.starts_with('/') && l.ends_with(':') {
96
- let content = l.trim_end_matches(':');
97
- // content is like /@babel/core/7.2.0
98
- // extract name and version.
99
- // Split by '/'
100
- let parts: Vec<&str> = content.split('/').collect();
101
- // parts[0] is empty
102
- // if scoped: "", "@scope", "pkg", "ver" -> len 4
103
- // if unscoped: "", "pkg", "ver" -> len 3
104
- if parts.len() >= 3 {
105
- let ver = parts.last().unwrap().to_string();
106
- let name = parts[1..parts.len()-1].join("/");
107
- list.push((name, ver));
108
- }
109
- }
110
- }
111
- list
112
- }
package/core/src/main.rs DELETED
@@ -1,125 +0,0 @@
1
- mod types;
2
- mod scanner;
3
- mod safety;
4
- mod optimization;
5
- mod cache;
6
- mod ml;
7
- mod arc_lfu;
8
- mod lockfiles;
9
- mod symlink;
10
- mod usage_tracker;
11
-
12
- use anyhow::Result;
13
- use clap::{Parser, Subcommand};
14
- use std::path::PathBuf;
15
-
16
- use optimization::{plan_basic_cleanup, RulesConfig, OptimizationEngine};
17
-
18
- #[derive(Parser)]
19
- #[command(name = "packagepurge-core", version)]
20
- struct Cli {
21
- #[command(subcommand)]
22
- command: Commands,
23
- }
24
-
25
- #[derive(Subcommand)]
26
- enum Commands {
27
- /// Scan filesystem and output dependency/caches JSON
28
- Scan { #[arg(short, long)] paths: Vec<PathBuf> },
29
- /// Produce cleanup plan without mutating filesystem
30
- DryRun { #[arg(short, long, default_value_t = 90)] preserve_days: i64, #[arg(short, long)] paths: Vec<PathBuf> },
31
- /// Move targets to quarantine (atomic move) based on paths provided
32
- Quarantine { #[arg(required=true)] targets: Vec<PathBuf> },
33
- /// Rollback by id or latest
34
- Rollback {
35
- #[arg(long)] id: Option<String>,
36
- #[arg(long)] latest: bool,
37
- },
38
- /// Optimize with ML/LRU and symlinking (dry run)
39
- Optimize {
40
- #[arg(short, long, default_value_t = 90)] preserve_days: i64,
41
- #[arg(short, long)] paths: Vec<PathBuf>,
42
- #[arg(long)] enable_symlinking: bool,
43
- #[arg(long)] enable_ml: bool,
44
- #[arg(long, default_value_t = 1000)] lru_max_packages: usize,
45
- #[arg(long, default_value_t = 10_000_000_000)] lru_max_size_bytes: u64,
46
- },
47
- /// Execute symlinking for duplicate packages
48
- Symlink {
49
- #[arg(short, long)] paths: Vec<PathBuf>,
50
- },
51
- }
52
-
53
- fn main() -> Result<()> {
54
- let cli = Cli::parse();
55
- match cli.command {
56
- Commands::Scan { paths } => {
57
- let out = scanner::scan(&paths)?;
58
- println!("{}", serde_json::to_string_pretty(&out)?);
59
- }
60
- Commands::DryRun { preserve_days, paths } => {
61
- let scan = scanner::scan(&paths)?;
62
- let report = plan_basic_cleanup(&scan, &RulesConfig {
63
- preserve_days,
64
- enable_symlinking: false,
65
- enable_ml_prediction: false,
66
- lru_max_packages: 1000,
67
- lru_max_size_bytes: 10_000_000_000, // 10GB default
68
- })?;
69
- println!("{}", serde_json::to_string_pretty(&report)?);
70
- }
71
- Commands::Quarantine { targets } => {
72
- let mut recs = Vec::new();
73
- for t in targets {
74
- match safety::move_to_quarantine(&t) {
75
- Ok(r) => recs.push(r),
76
- Err(e) => eprintln!("Failed to quarantine {:?}: {}", t, e),
77
- }
78
- }
79
- println!("{}", serde_json::to_string_pretty(&recs)?);
80
- }
81
- Commands::Rollback { id, latest } => {
82
- let rec = if let Some(i) = id { safety::find_quarantine_by_id(&i) } else if latest { safety::latest_quarantine() } else { None };
83
- if let Some(r) = rec {
84
- if let Err(e) = safety::rollback_record(&r) {
85
- eprintln!("{}", e);
86
- std::process::exit(1);
87
- }
88
- println!("{}", serde_json::to_string_pretty(&serde_json::json!({"status":"ok","id": r.id}))?);
89
- } else {
90
- eprintln!("No matching quarantine record found");
91
- std::process::exit(2);
92
- }
93
- }
94
- Commands::Optimize { preserve_days, paths, enable_symlinking, enable_ml, lru_max_packages, lru_max_size_bytes } => {
95
- let scan = scanner::scan(&paths)?;
96
- let config = RulesConfig {
97
- preserve_days,
98
- enable_symlinking,
99
- enable_ml_prediction: enable_ml,
100
- lru_max_packages,
101
- lru_max_size_bytes,
102
- };
103
- let mut engine = OptimizationEngine::new(config)?;
104
- let report = engine.plan_optimized_cleanup(&scan)?;
105
- println!("{}", serde_json::to_string_pretty(&report)?);
106
- }
107
- Commands::Symlink { paths } => {
108
- let scan = scanner::scan(&paths)?;
109
- let config = RulesConfig {
110
- preserve_days: 90,
111
- enable_symlinking: true,
112
- enable_ml_prediction: false,
113
- lru_max_packages: 1000,
114
- lru_max_size_bytes: 10_000_000_000,
115
- };
116
- let engine = OptimizationEngine::new(config)?;
117
- let count = engine.execute_symlinking(&scan)?;
118
- println!("{}", serde_json::to_string_pretty(&serde_json::json!({
119
- "status": "ok",
120
- "symlinked_count": count
121
- }))?);
122
- }
123
- }
124
- Ok(())
125
- }
package/core/src/ml.rs DELETED
@@ -1,188 +0,0 @@
1
- use chrono::Utc;
2
- use crate::types::{PackageUsageMetrics, ProjectMetadata, DeveloperBehavior};
3
-
4
- #[allow(dead_code)]
5
- pub trait MlRecommender {
6
- fn is_safe_to_evict(&self, package_id: &str) -> Option<bool>;
7
- fn should_keep(&self, package_id: &str, metrics: &PackageUsageMetrics, project: &ProjectMetadata, behavior: &DeveloperBehavior) -> bool;
8
- }
9
-
10
- #[allow(dead_code)]
11
- pub struct NoopRecommender;
12
- impl MlRecommender for NoopRecommender {
13
- fn is_safe_to_evict(&self, _package_id: &str) -> Option<bool> { None }
14
- fn should_keep(&self, _package_id: &str, _metrics: &PackageUsageMetrics, _project: &ProjectMetadata, _behavior: &DeveloperBehavior) -> bool {
15
- true // Conservative: keep by default
16
- }
17
- }
18
-
19
- /// Predictive Optimizer using rule-based ML (can be extended with actual ML models)
20
- #[allow(dead_code)]
21
- pub struct PredictiveOptimizer {
22
- /// Keep threshold in days (packages used within this period are likely needed)
23
- prediction_window_days: i64,
24
- }
25
-
26
- impl PredictiveOptimizer {
27
- pub fn new(prediction_window_days: i64) -> Self {
28
- Self { prediction_window_days }
29
- }
30
-
31
- /// Extract features from package metadata for ML prediction
32
- fn extract_features(
33
- &self,
34
- metrics: &PackageUsageMetrics,
35
- project: &ProjectMetadata,
36
- behavior: &DeveloperBehavior,
37
- ) -> Vec<f64> {
38
- let now = Utc::now();
39
-
40
- // Feature 1: Days since last access
41
- let days_since_access = (now - metrics.last_access_time).num_days() as f64;
42
-
43
- // Feature 2: Days since last script execution
44
- let days_since_script = metrics.last_script_execution
45
- .map(|t| (now - t).num_days() as f64)
46
- .unwrap_or(365.0); // High value if never executed
47
-
48
- // Feature 3: Days since last successful build
49
- let days_since_build = metrics.last_successful_build
50
- .map(|t| (now - t).num_days() as f64)
51
- .unwrap_or(365.0);
52
-
53
- // Feature 4: Access frequency (normalized)
54
- let access_frequency = metrics.access_count as f64 / 100.0; // Normalize
55
-
56
- // Feature 5: Script execution frequency
57
- let script_frequency = metrics.script_execution_count as f64 / 10.0;
58
-
59
- // Feature 6: Project activity (days since last commit)
60
- let days_since_commit = project.last_commit_date
61
- .map(|t| (now - t).num_days() as f64)
62
- .unwrap_or(365.0);
63
-
64
- // Feature 7: Project type score (higher for active project types)
65
- let project_type_score = match project.project_type.as_str() {
66
- "react" | "typescript" | "nextjs" => 1.0,
67
- "node" => 0.8,
68
- _ => 0.5,
69
- };
70
-
71
- // Feature 8: Dependency count (more deps = more likely to need packages)
72
- let dep_score = (project.dependency_count as f64 / 100.0).min(1.0);
73
-
74
- // Feature 9: Days since last build (from behavior)
75
- let behavior_days_since_build = behavior.days_since_last_build
76
- .map(|d| d as f64)
77
- .unwrap_or(365.0);
78
-
79
- // Feature 10: File access frequency
80
- let file_access_score = (behavior.file_access_frequency as f64 / 1000.0).min(1.0);
81
-
82
- vec![
83
- days_since_access,
84
- days_since_script,
85
- days_since_build,
86
- access_frequency,
87
- script_frequency,
88
- days_since_commit,
89
- project_type_score,
90
- dep_score,
91
- behavior_days_since_build,
92
- file_access_score,
93
- ]
94
- }
95
-
96
- /// Predict whether package should be kept (binary classification)
97
- /// Returns true if package is likely needed in the next prediction_window_days
98
- pub fn predict_keep(
99
- &self,
100
- metrics: &PackageUsageMetrics,
101
- project: &ProjectMetadata,
102
- behavior: &DeveloperBehavior,
103
- ) -> bool {
104
- let features = self.extract_features(metrics, project, behavior);
105
-
106
- // Simple rule-based classifier (can be replaced with actual ML model)
107
- // This implements a heuristic that mimics what a trained model would do
108
-
109
- // Rule 1: Recently accessed packages are likely needed
110
- let days_since_access = features[0];
111
- if days_since_access < 7.0 {
112
- return true; // Keep if accessed in last week
113
- }
114
-
115
- // Rule 2: Recently used in scripts
116
- let days_since_script = features[1];
117
- if days_since_script < 14.0 {
118
- return true; // Keep if used in script in last 2 weeks
119
- }
120
-
121
- // Rule 3: Recently built successfully
122
- let days_since_build = features[2];
123
- if days_since_build < 30.0 {
124
- return true; // Keep if built in last month
125
- }
126
-
127
- // Rule 4: High access frequency
128
- let access_frequency = features[3];
129
- if access_frequency > 0.5 {
130
- return true; // Keep if frequently accessed
131
- }
132
-
133
- // Rule 5: Active project with recent commits
134
- let days_since_commit = features[5];
135
- let project_type_score = features[6];
136
- if days_since_commit < 30.0 && project_type_score > 0.7 {
137
- return true; // Keep if project is active
138
- }
139
-
140
- // Rule 6: Weighted score combining all features
141
- // This is a simplified logistic regression-like decision
142
- let score = self.compute_keep_score(&features);
143
- score > 0.5
144
- }
145
-
146
- /// Compute a keep score (0.0 to 1.0) based on features
147
- /// This mimics a logistic regression output
148
- fn compute_keep_score(&self, features: &[f64]) -> f64 {
149
- // Weighted combination of features (weights learned from training data in real ML)
150
- // For now, use heuristic weights
151
- let weights = vec![
152
- -0.1, // days_since_access (negative: more days = lower score)
153
- -0.05, // days_since_script
154
- -0.03, // days_since_build
155
- 0.3, // access_frequency (positive: more access = higher score)
156
- 0.2, // script_frequency
157
- -0.02, // days_since_commit
158
- 0.15, // project_type_score
159
- 0.1, // dep_score
160
- -0.03, // behavior_days_since_build
161
- 0.1, // file_access_score
162
- ];
163
-
164
- let mut score = 0.5; // Base score
165
- for (feature, weight) in features.iter().zip(weights.iter()) {
166
- score += feature * weight;
167
- }
168
-
169
- // Apply sigmoid-like function to bound between 0 and 1
170
- 1.0 / (1.0 + (-score).exp())
171
- }
172
- }
173
-
174
- impl MlRecommender for PredictiveOptimizer {
175
- fn is_safe_to_evict(&self, _package_id: &str) -> Option<bool> {
176
- None // Use should_keep instead
177
- }
178
-
179
- fn should_keep(
180
- &self,
181
- _package_id: &str,
182
- metrics: &PackageUsageMetrics,
183
- project: &ProjectMetadata,
184
- behavior: &DeveloperBehavior,
185
- ) -> bool {
186
- self.predict_keep(metrics, project, behavior)
187
- }
188
- }