packagepurge 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,136 +0,0 @@
1
- use anyhow::Result;
2
- use chrono::{DateTime, Utc};
3
- use rayon::prelude::*;
4
- use std::{fs, path::{Path, PathBuf}, time::SystemTime};
5
- use walkdir::WalkDir;
6
-
7
- use crate::types::{PackageRecord, ProjectRecord, ScanOutput, PackageManager};
8
- use crate::lockfiles::{parse_npm_package_lock, parse_yarn_lock, parse_pnpm_lock};
9
-
10
- fn to_utc(st: SystemTime) -> DateTime<Utc> { st.into() }
11
-
12
- fn dir_size(path: &Path) -> u64 {
13
- let mut total: u64 = 0;
14
- for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
15
- if entry.file_type().is_file() {
16
- if let Ok(meta) = entry.metadata() {
17
- total += meta.len();
18
- }
19
- }
20
- }
21
- total
22
- }
23
-
24
- fn detect_manager_from_lock(dir: &Path) -> Option<PackageManager> {
25
- if dir.join("package-lock.json").exists() { return Some(PackageManager::Npm); }
26
- if dir.join("yarn.lock").exists() { return Some(PackageManager::Yarn); }
27
- if dir.join("pnpm-lock.yaml").exists() { return Some(PackageManager::Pnpm); }
28
- None
29
- }
30
-
31
- fn collect_projects_and_edges(root: &Path) -> (Vec<ProjectRecord>, Vec<(String, String)>) {
32
- let mut projects = Vec::new();
33
- let edges: Vec<(String, String)> = Vec::new();
34
- for entry in WalkDir::new(root).max_depth(6).into_iter().filter_map(|e| e.ok()) {
35
- if entry.file_type().is_file() && entry.file_name() == "package.json" {
36
- let dir = entry.path().parent().unwrap_or(root);
37
- let manager = detect_manager_from_lock(dir);
38
- let mtime = fs::metadata(entry.path()).and_then(|m| m.modified()).ok()
39
- .map(to_utc).unwrap_or_else(|| Utc::now());
40
- // Basic dependency extraction from package.json
41
- let mut deps: Vec<(String, String)> = Vec::new();
42
- if let Ok(content) = fs::read_to_string(entry.path()) {
43
- if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
44
- for key in ["dependencies", "devDependencies", "peerDependencies"] {
45
- if let Some(obj) = json.get(key).and_then(|v| v.as_object()) {
46
- for (name, ver) in obj {
47
- if let Some(ver_str) = ver.as_str() {
48
- deps.push((name.clone(), ver_str.to_string()));
49
- }
50
- }
51
- }
52
- }
53
- }
54
- }
55
- // Lockfile dependencies
56
- let lock_deps = match manager {
57
- Some(PackageManager::Npm) => parse_npm_package_lock(&dir.join("package-lock.json")),
58
- Some(PackageManager::Yarn) => parse_yarn_lock(&dir.join("yarn.lock")),
59
- Some(PackageManager::Pnpm) => parse_pnpm_lock(&dir.join("pnpm-lock.yaml")),
60
- None => Vec::new(),
61
- };
62
- deps.extend(lock_deps);
63
-
64
- projects.push(ProjectRecord {
65
- path: dir.to_string_lossy().to_string(),
66
- manager,
67
- dependencies: deps,
68
- mtime,
69
- });
70
- }
71
- }
72
- (projects, edges)
73
- }
74
-
75
- fn is_cache_dir(path: &Path) -> bool {
76
- let p = path.to_string_lossy().to_lowercase();
77
- p.ends_with(".npm") || p.contains("yarn/cache") || p.contains("pnpm/store")
78
- }
79
-
80
- pub fn scan(paths: &[PathBuf]) -> Result<ScanOutput> {
81
- let roots: Vec<PathBuf> = if paths.is_empty() { vec![std::env::current_dir()?] } else { paths.to_vec() };
82
-
83
- let mut all_projects: Vec<ProjectRecord> = Vec::new();
84
- let mut all_edges: Vec<(String, String)> = Vec::new();
85
- for root in &roots {
86
- let (projects, edges) = collect_projects_and_edges(root);
87
- all_projects.extend(projects);
88
- all_edges.extend(edges);
89
- }
90
-
91
- // Collect packages in node_modules and caches
92
- let mut package_dirs: Vec<PathBuf> = Vec::new();
93
- for root in &roots {
94
- for entry in WalkDir::new(root).into_iter().filter_map(|e| e.ok()) {
95
- if entry.file_type().is_dir() {
96
- let name = entry.file_name().to_string_lossy();
97
- if name == "node_modules" || is_cache_dir(entry.path()) {
98
- package_dirs.push(entry.into_path());
99
- }
100
- }
101
- }
102
- }
103
-
104
- let packages: Vec<PackageRecord> = package_dirs.par_iter().flat_map(|dir| {
105
- WalkDir::new(dir).min_depth(1).max_depth(3).into_iter().filter_map(|e| e.ok())
106
- .filter(|e| e.file_type().is_dir())
107
- .filter_map(|pkg_dir| {
108
- let pkg_path = pkg_dir.path().to_path_buf();
109
- let package_json = pkg_path.join("package.json");
110
- if !package_json.exists() { return None; }
111
- let meta = fs::metadata(&pkg_path).ok()?;
112
- let atime = meta.accessed().ok().map(to_utc).unwrap_or_else(|| Utc::now());
113
- let mtime = meta.modified().ok().map(to_utc).unwrap_or_else(|| Utc::now());
114
- let size = dir_size(&pkg_path);
115
- let (name, version) = if let Ok(text) = fs::read_to_string(&package_json) {
116
- if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
117
- let n = json.get("name").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
118
- let v = json.get("version").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
119
- (n, v)
120
- } else { ("unknown".into(), "unknown".into()) }
121
- } else { ("unknown".into(), "unknown".into()) };
122
- Some(PackageRecord {
123
- name,
124
- version,
125
- path: pkg_path.to_string_lossy().to_string(),
126
- size_bytes: size,
127
- atime,
128
- mtime,
129
- manager: None,
130
- project_paths: Vec::new(),
131
- })
132
- }).collect::<Vec<_>>()
133
- }).collect();
134
-
135
- Ok(ScanOutput { packages, projects: all_projects, edges: all_edges })
136
- }
@@ -1,223 +0,0 @@
1
- use anyhow::{Context, Result};
2
- use std::fs;
3
- use std::path::{Path, PathBuf};
4
-
5
- #[cfg(windows)]
6
- use std::os::windows::fs as win_fs;
7
-
8
- #[cfg(unix)]
9
- use std::os::unix::fs as unix_fs;
10
-
11
- /// Global store path (platform-specific)
12
- pub fn get_global_store_path() -> Result<PathBuf> {
13
- let home = dirs::home_dir().context("Could not determine home directory")?;
14
- Ok(home.join(".packagepurge").join("global_store"))
15
- }
16
-
17
- /// Initialize global store directory
18
- pub fn ensure_global_store() -> Result<PathBuf> {
19
- let store_path = get_global_store_path()?;
20
- fs::create_dir_all(&store_path)
21
- .with_context(|| format!("Failed to create global store at {:?}", store_path))?;
22
- Ok(store_path)
23
- }
24
-
25
- /// Generate content-addressable path for a package
26
- /// Format: global_store/{name}/{version}/{hash}
27
- pub fn get_canonical_path(store_path: &Path, name: &str, version: &str) -> Result<PathBuf> {
28
- // Use a simple hash of name@version for content addressing
29
- use sha2::{Digest, Sha256};
30
- let mut hasher = Sha256::new();
31
- hasher.update(format!("{}@{}", name, version).as_bytes());
32
- let hash = hex::encode(&hasher.finalize()[..8]);
33
-
34
- Ok(store_path
35
- .join(sanitize_name(name))
36
- .join(version)
37
- .join(&hash))
38
- }
39
-
40
- fn sanitize_name(name: &str) -> String {
41
- name.replace("/", "_").replace("\\", "_").replace(":", "_")
42
- }
43
-
44
- /// Check if a path is a symlink (or junction on Windows)
45
- pub fn is_symlink(path: &Path) -> bool {
46
- #[cfg(windows)]
47
- {
48
- // On Windows, try to read the link - if it succeeds, it's a symlink
49
- if fs::read_link(path).is_ok() {
50
- return true;
51
- }
52
- // Also check metadata for symlink file type
53
- if let Ok(meta) = fs::symlink_metadata(path) {
54
- return meta.file_type().is_symlink();
55
- }
56
- false
57
- }
58
-
59
- #[cfg(unix)]
60
- {
61
- if let Ok(meta) = fs::symlink_metadata(path) {
62
- meta.file_type().is_symlink()
63
- } else {
64
- false
65
- }
66
- }
67
- }
68
-
69
- /// Create hard links for all files in source directory to target directory
70
- pub fn hard_link_directory(src: &Path, dst: &Path) -> Result<()> {
71
- if dst.exists() {
72
- fs::remove_dir_all(dst)
73
- .with_context(|| format!("Failed to remove existing directory {:?}", dst))?;
74
- }
75
- fs::create_dir_all(dst)
76
- .with_context(|| format!("Failed to create directory {:?}", dst))?;
77
-
78
- // Recursively hard link all files
79
- copy_directory_with_hard_links(src, dst)?;
80
- Ok(())
81
- }
82
-
83
- fn copy_directory_with_hard_links(src: &Path, dst: &Path) -> Result<()> {
84
- use walkdir::WalkDir;
85
-
86
- for entry in WalkDir::new(src).into_iter().filter_map(|e| e.ok()) {
87
- let src_path = entry.path();
88
- let rel_path = src_path.strip_prefix(src)
89
- .with_context(|| format!("Failed to get relative path from {:?}", src))?;
90
- let dst_path = dst.join(rel_path);
91
-
92
- if src_path.is_dir() {
93
- fs::create_dir_all(&dst_path)
94
- .with_context(|| format!("Failed to create directory {:?}", dst_path))?;
95
- } else if src_path.is_file() {
96
- if let Some(parent) = dst_path.parent() {
97
- fs::create_dir_all(parent)
98
- .with_context(|| format!("Failed to create parent directory {:?}", parent))?;
99
- }
100
-
101
- #[cfg(unix)]
102
- {
103
- fs::hard_link(src_path, &dst_path)
104
- .with_context(|| format!("Failed to create hard link from {:?} to {:?}", src_path, dst_path))?;
105
- }
106
-
107
- #[cfg(windows)]
108
- {
109
- // Windows: try hard link first, fall back to copy
110
- if fs::hard_link(src_path, &dst_path).is_err() {
111
- // If hard link fails (e.g., different volumes), copy the file
112
- fs::copy(src_path, &dst_path)
113
- .with_context(|| format!("Failed to copy file from {:?} to {:?}", src_path, dst_path))?;
114
- }
115
- }
116
- }
117
- }
118
- Ok(())
119
- }
120
-
121
- /// Create a symlink (or junction on Windows) from target to source
122
- pub fn create_symlink(target: &Path, source: &Path) -> Result<()> {
123
- // Remove existing target if it exists
124
- if target.exists() {
125
- if target.is_dir() {
126
- fs::remove_dir_all(target)
127
- .with_context(|| format!("Failed to remove existing directory {:?}", target))?;
128
- } else {
129
- fs::remove_file(target)
130
- .with_context(|| format!("Failed to remove existing file {:?}", target))?;
131
- }
132
- }
133
-
134
- // Ensure parent directory exists
135
- if let Some(parent) = target.parent() {
136
- fs::create_dir_all(parent)
137
- .with_context(|| format!("Failed to create parent directory {:?}", parent))?;
138
- }
139
-
140
- #[cfg(windows)]
141
- {
142
- // On Windows, use directory symlink for directories, symlink for files
143
- if source.is_dir() {
144
- // Try to create a directory symlink (requires admin privileges or Developer Mode)
145
- win_fs::symlink_dir(source, target)
146
- .with_context(|| format!("Failed to create directory symlink from {:?} to {:?}. Note: On Windows, this may require administrator privileges or Developer Mode enabled.", target, source))?;
147
- } else {
148
- win_fs::symlink_file(source, target)
149
- .with_context(|| format!("Failed to create file symlink from {:?} to {:?}", target, source))?;
150
- }
151
- }
152
-
153
- #[cfg(unix)]
154
- {
155
- unix_fs::symlink(source, target)
156
- .with_context(|| format!("Failed to create symlink from {:?} to {:?}", target, source))?;
157
- }
158
-
159
- Ok(())
160
- }
161
-
162
- /// Deduplicate packages by creating symlinks to global store
163
- #[allow(dead_code)]
164
- pub struct SemanticDeduplication {
165
- store_path: PathBuf,
166
- }
167
-
168
- impl SemanticDeduplication {
169
- pub fn new() -> Result<Self> {
170
- let store_path = ensure_global_store()?;
171
- Ok(Self { store_path })
172
- }
173
-
174
- /// Process a package: hard link to global store, then symlink from original location
175
- pub fn deduplicate_package(&self, package_path: &Path, name: &str, version: &str) -> Result<()> {
176
- let canonical_path = get_canonical_path(&self.store_path, name, version)?;
177
-
178
- // If canonical doesn't exist, create it by hard linking from package_path
179
- if !canonical_path.exists() {
180
- hard_link_directory(package_path, &canonical_path)
181
- .with_context(|| format!("Failed to create canonical package at {:?}", canonical_path))?;
182
- }
183
-
184
- // If package_path is not already a symlink, replace it with one
185
- if !is_symlink(package_path) {
186
- // Create a temporary path for safe replacement
187
- let temp_path = package_path.with_extension(".packagepurge.tmp");
188
-
189
- // Create symlink at temp location first
190
- create_symlink(&temp_path, &canonical_path)?;
191
-
192
- // Remove original and rename temp
193
- if package_path.is_dir() {
194
- fs::remove_dir_all(package_path)
195
- .with_context(|| format!("Failed to remove original directory {:?}", package_path))?;
196
- } else {
197
- fs::remove_file(package_path)
198
- .with_context(|| format!("Failed to remove original file {:?}", package_path))?;
199
- }
200
-
201
- fs::rename(&temp_path, package_path)
202
- .with_context(|| format!("Failed to rename temp symlink to {:?}", package_path))?;
203
- }
204
-
205
- Ok(())
206
- }
207
- }
208
-
209
- #[cfg(test)]
210
- mod tests {
211
- use super::*;
212
- use std::fs;
213
- use tempfile::tempdir;
214
-
215
- #[test]
216
- fn test_get_canonical_path() {
217
- let store = PathBuf::from("/tmp/store");
218
- let path = get_canonical_path(&store, "react", "18.2.0").unwrap();
219
- assert!(path.to_string_lossy().contains("react"));
220
- assert!(path.to_string_lossy().contains("18.2.0"));
221
- }
222
- }
223
-
package/core/src/types.rs DELETED
@@ -1,87 +0,0 @@
1
- use chrono::{DateTime, Utc};
2
- use serde::{Deserialize, Serialize};
3
-
4
- #[derive(Debug, Clone, Serialize, Deserialize)]
5
- pub enum PackageManager { Npm, Yarn, Pnpm }
6
-
7
- #[derive(Debug, Clone, Serialize, Deserialize)]
8
- pub struct PackageRecord {
9
- pub name: String,
10
- pub version: String,
11
- pub path: String,
12
- pub size_bytes: u64,
13
- pub atime: DateTime<Utc>,
14
- pub mtime: DateTime<Utc>,
15
- pub manager: Option<PackageManager>,
16
- pub project_paths: Vec<String>,
17
- }
18
-
19
- #[derive(Debug, Clone, Serialize, Deserialize)]
20
- pub struct ProjectRecord {
21
- pub path: String,
22
- pub manager: Option<PackageManager>,
23
- pub dependencies: Vec<(String, String)>,
24
- pub mtime: DateTime<Utc>,
25
- }
26
-
27
- #[derive(Debug, Clone, Serialize, Deserialize)]
28
- pub struct ScanOutput {
29
- pub packages: Vec<PackageRecord>,
30
- pub projects: Vec<ProjectRecord>,
31
- pub edges: Vec<(String, String)>, // parent -> dependency
32
- }
33
-
34
- #[derive(Debug, Clone, Serialize, Deserialize)]
35
- pub struct PlanItem {
36
- pub target_path: String,
37
- pub estimated_size_bytes: u64,
38
- pub reason: String,
39
- }
40
-
41
- #[derive(Debug, Clone, Serialize, Deserialize)]
42
- pub struct DryRunReport {
43
- pub items: Vec<PlanItem>,
44
- pub total_estimated_bytes: u64,
45
- }
46
-
47
- #[derive(Debug, Clone, Serialize, Deserialize)]
48
- pub struct QuarantineRecord {
49
- pub id: String,
50
- pub original_path: String,
51
- pub quarantine_path: String,
52
- pub sha256: String,
53
- pub size_bytes: u64,
54
- pub created_at: DateTime<Utc>,
55
- }
56
-
57
- /// Usage metrics for a package
58
- #[allow(dead_code)]
59
- #[derive(Debug, Clone, Serialize, Deserialize)]
60
- pub struct PackageUsageMetrics {
61
- pub package_key: String, // Format: "name@version"
62
- pub last_access_time: DateTime<Utc>,
63
- pub last_script_execution: Option<DateTime<Utc>>,
64
- pub access_count: u64,
65
- pub script_execution_count: u64,
66
- pub last_successful_build: Option<DateTime<Utc>>,
67
- }
68
-
69
- /// Project metadata for ML features
70
- #[allow(dead_code)]
71
- #[derive(Debug, Clone, Serialize, Deserialize)]
72
- pub struct ProjectMetadata {
73
- pub path: String,
74
- pub project_type: String, // e.g., "react", "node", "typescript"
75
- pub last_commit_date: Option<DateTime<Utc>>,
76
- pub dependency_count: usize,
77
- pub last_modified: DateTime<Utc>,
78
- }
79
-
80
- /// Developer behavior metrics
81
- #[allow(dead_code)]
82
- #[derive(Debug, Clone, Serialize, Deserialize)]
83
- pub struct DeveloperBehavior {
84
- pub npm_commands_executed: Vec<(String, DateTime<Utc>)>, // (command, timestamp)
85
- pub file_access_frequency: u64,
86
- pub days_since_last_build: Option<i64>,
87
- }
@@ -1,107 +0,0 @@
1
- #![allow(dead_code)]
2
- use anyhow::{Context, Result};
3
- use std::collections::HashMap;
4
- use std::fs;
5
- use std::path::{Path, PathBuf};
6
-
7
- use crate::types::PackageUsageMetrics;
8
- use crate::cache::PackageLruCache;
9
-
10
- /// Tracks and persists package usage metrics across runs
11
- pub struct UsageTracker {
12
- cache_path: PathBuf,
13
- lru_cache: PackageLruCache,
14
- }
15
-
16
- impl UsageTracker {
17
- pub fn new(cache_path: PathBuf, max_packages: usize, max_size_bytes: u64) -> Result<Self> {
18
- // Ensure cache directory exists
19
- if let Some(parent) = cache_path.parent() {
20
- fs::create_dir_all(parent)
21
- .with_context(|| format!("Failed to create cache directory {:?}", parent))?;
22
- }
23
-
24
- let mut tracker = Self {
25
- cache_path: cache_path.clone(),
26
- lru_cache: PackageLruCache::new(max_packages, max_size_bytes),
27
- };
28
-
29
- // Load existing metrics if available
30
- if cache_path.exists() {
31
- if let Ok(metrics) = tracker.load_metrics() {
32
- for (key, _metric) in metrics {
33
- tracker.lru_cache.record_access(&key, 0); // Size will be updated on scan
34
- }
35
- }
36
- }
37
-
38
- Ok(tracker)
39
- }
40
-
41
- /// Load persisted metrics from disk
42
- fn load_metrics(&self) -> Result<HashMap<String, PackageUsageMetrics>> {
43
- let content = fs::read_to_string(&self.cache_path)
44
- .with_context(|| format!("Failed to read cache file {:?}", self.cache_path))?;
45
- let metrics: HashMap<String, PackageUsageMetrics> = serde_json::from_str(&content)
46
- .with_context(|| "Failed to parse metrics cache")?;
47
- Ok(metrics)
48
- }
49
-
50
- /// Persist metrics to disk
51
- pub fn save_metrics(&self) -> Result<()> {
52
- // In a full implementation, we'd collect all metrics from the LRU cache
53
- // For now, this is a placeholder that would be called after optimization runs
54
- Ok(())
55
- }
56
-
57
- /// Record a script execution (e.g., npm run build, npm test)
58
- /// This should be called when monitoring detects script execution
59
- pub fn record_script_execution(&mut self, package_key: &str) {
60
- self.lru_cache.record_script_execution(package_key);
61
- }
62
-
63
- /// Record a successful build
64
- pub fn record_build(&mut self, package_key: &str) {
65
- self.lru_cache.record_build(package_key);
66
- }
67
-
68
- /// Get the LRU cache for direct access
69
- pub fn lru_cache_mut(&mut self) -> &mut PackageLruCache {
70
- &mut self.lru_cache
71
- }
72
- }
73
-
74
- /// Helper to detect script execution from package.json scripts
75
- /// This would be integrated with npm/yarn execution monitoring
76
- pub fn detect_script_execution(project_path: &Path, script_name: &str) -> Vec<String> {
77
- use std::fs;
78
- use serde_json::Value;
79
-
80
- let package_json = project_path.join("package.json");
81
- if !package_json.exists() {
82
- return Vec::new();
83
- }
84
-
85
- let mut affected_packages = Vec::new();
86
-
87
- if let Ok(content) = fs::read_to_string(&package_json) {
88
- if let Ok(json) = serde_json::from_str::<Value>(&content) {
89
- // Check if script exists
90
- if let Some(scripts) = json.get("scripts").and_then(|s| s.as_object()) {
91
- if scripts.contains_key(script_name) {
92
- // In a full implementation, we'd parse the script to find dependencies
93
- // For now, we'll extract direct dependencies
94
- if let Some(deps) = json.get("dependencies").and_then(|d| d.as_object()) {
95
- for (name, version) in deps {
96
- if let Some(ver_str) = version.as_str() {
97
- affected_packages.push(format!("{}@{}", name, ver_str));
98
- }
99
- }
100
- }
101
- }
102
- }
103
- }
104
- }
105
-
106
- affected_packages
107
- }