@anysphere/file-service 0.0.0-e6124fba → 0.0.0-e68f3241

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/file_utils.rs DELETED
@@ -1,328 +0,0 @@
1
- // what methods do i want to support here.
2
- // 1. isInBadDir
3
- // 2. isBadFile
4
- // 3. vscode.workspace.asRelativePath
5
- // 4. vscode.fs.stat
6
-
7
- use anyhow::Error;
8
- use encoding_rs::UTF_8;
9
- use std::path::Path;
10
- use tokio::fs;
11
-
12
- pub fn is_in_bad_dir(file_path: &Path) -> Result<bool, Error> {
13
- let item_path = file_path
14
- .to_str()
15
- .ok_or(anyhow::anyhow!("Failed to convert path to string"))?;
16
- let is_bad_dir =
17
- item_path.contains("node_modules") || item_path.contains(".git");
18
- Ok(is_bad_dir)
19
- }
20
-
21
- pub fn is_good_file(file_path: &Path) -> Result<(), Error> {
22
- let item_path = file_path
23
- .to_str()
24
- .ok_or(anyhow::anyhow!("Failed to convert path to string"))?;
25
-
26
- let path = Path::new(item_path);
27
- let file_name = path
28
- .file_name()
29
- .ok_or(anyhow::anyhow!("Failed to get file name"))?
30
- .to_str()
31
- .ok_or(anyhow::anyhow!("Failed to convert file name to string"))?;
32
-
33
- let extension = path
34
- .extension()
35
- .ok_or(anyhow::anyhow!("Failed to get extension"))?
36
- .to_str()
37
- .ok_or(anyhow::anyhow!("Failed to convert extension to string"))?;
38
-
39
- match file_name {
40
- "package-lock.json" | "pnpm-lock.yaml" | "yarn.lock" | "composer.lock"
41
- | "Gemfile.lock" | "bun.lockb" => {
42
- return Err(anyhow::anyhow!("File is just a lock file"));
43
- }
44
- _ => {}
45
- }
46
-
47
- let bad_extensions = vec![
48
- "lock",
49
- "bak",
50
- "tmp",
51
- "bin",
52
- "exe",
53
- "dll",
54
- "so",
55
- "lockb",
56
- "qwoff",
57
- "isl",
58
- "csv",
59
- "pdf",
60
- // add ms word, excel, powerpoint, etc.
61
- "doc",
62
- "docx",
63
- "xls",
64
- "xlsx",
65
- "ppt",
66
- "pptx",
67
- "odt",
68
- "ods",
69
- "odp",
70
- "odg",
71
- "odf",
72
- "sxw",
73
- "sxc",
74
- "sxi",
75
- "sxd",
76
- "sdc",
77
- // add images
78
- "jpg",
79
- "jpeg",
80
- "png",
81
- "gif",
82
- "bmp",
83
- "tif",
84
- // add audio
85
- "mp3",
86
- "wav",
87
- "wma",
88
- "ogg",
89
- "flac",
90
- "aac",
91
- // add video
92
- "mp4",
93
- "mov",
94
- "wmv",
95
- "flv",
96
- "avi",
97
- // add archives
98
- "zip",
99
- "tar",
100
- "gz",
101
- "7z",
102
- "rar",
103
- "tgz",
104
- "dmg",
105
- "iso",
106
- "cue",
107
- "mdf",
108
- "mds",
109
- "vcd",
110
- "toast",
111
- "img",
112
- "apk",
113
- "msi",
114
- "cab",
115
- "tar.gz",
116
- "tar.xz",
117
- "tar.bz2",
118
- "tar.lzma",
119
- "tar.Z",
120
- "tar.sz",
121
- "lzma",
122
- // add fonts
123
- "ttf",
124
- "otf",
125
- "woff",
126
- "woff2",
127
- "eot",
128
- ];
129
- match bad_extensions.contains(&extension) {
130
- true => {
131
- return Err(anyhow::anyhow!("File is just a lock file"));
132
- }
133
- _ => {}
134
- }
135
-
136
- if item_path.contains(".git")
137
- || item_path.contains(".svn")
138
- || item_path.contains(".hg")
139
- {
140
- return Err(anyhow::anyhow!("File is just a lock file"));
141
- }
142
-
143
- let bad_extensions = vec![".exe", ".dll", ".so", ".o", ".bin"];
144
- match Path::new(item_path).extension() {
145
- Some(extension) => match extension.to_str() {
146
- Some(ext_str) => {
147
- if bad_extensions.contains(&ext_str) {
148
- return Err(anyhow::anyhow!("Binary file excluded from indexing."));
149
- }
150
- }
151
- None => {
152
- return Err(anyhow::anyhow!("Failed to convert extension to string"))
153
- }
154
- },
155
- None => return Err(anyhow::anyhow!("Failed to get extension")),
156
- }
157
-
158
- // #[cfg(not(test))]
159
- // {
160
- let path = Path::new(item_path);
161
- for part in path.iter() {
162
- match part.to_str() {
163
- Some(s) if s.starts_with(".") => {
164
- return Err(anyhow::anyhow!("File is hidden"))
165
- }
166
- _ => {}
167
- }
168
- }
169
- // }
170
-
171
- Ok(())
172
- }
173
-
174
- // use binaryornot::is_binary;
175
- // use anyhow::Context;
176
- // implement the buffer above:
177
- pub async fn is_good_file_runtime_check(
178
- file_path: &Path,
179
- // _buffer: &[u8],
180
- ) -> Result<(), Error> {
181
- match get_file_size(file_path).await {
182
- Ok(size) if size > 2 * 1024 * 1024 => {
183
- return Err(anyhow::anyhow!("Buffer is too large"));
184
- }
185
- Err(e) => return Err(e),
186
- _ => {}
187
- }
188
-
189
- // if is_binary(file_path).context("Failed to check if file is binary")? {
190
- // return Err(anyhow::anyhow!("File is binary"));
191
- // }
192
-
193
- Ok(())
194
- }
195
-
196
- pub async fn read_string_without_bom(
197
- file_path: &Path,
198
- ) -> Result<String, Error> {
199
- let file_buffer = match fs::read(file_path).await {
200
- Ok(buffer) => buffer,
201
- Err(e) => {
202
- return Err(anyhow::anyhow!(
203
- "Failed to read file buffer: {}",
204
- e.to_string()
205
- ))
206
- }
207
- };
208
-
209
- let (cow, _) = UTF_8.decode_with_bom_removal(&file_buffer);
210
-
211
- Ok(cow.to_string())
212
- }
213
-
214
- pub fn as_relative_path(
215
- base_path: &Path,
216
- file_path: &Path,
217
- ) -> Result<String, Error> {
218
- let relative_path = file_path.strip_prefix(base_path)?;
219
- Ok(
220
- relative_path
221
- .to_str()
222
- .ok_or(anyhow::anyhow!("Failed to convert relative path to string"))?
223
- .to_string(),
224
- )
225
- }
226
-
227
- pub async fn get_file_size(file_path: &Path) -> Result<u64, Error> {
228
- let metadata = fs::metadata(file_path).await?;
229
-
230
- Ok(metadata.len())
231
- }
232
-
233
- #[cfg(test)]
234
- mod tests {
235
- use super::*;
236
- use std::path::Path;
237
- use tokio::io::AsyncWriteExt;
238
-
239
- #[test]
240
- fn test_is_in_bad_dir() {
241
- let path = Path::new("src/node_modules/test.rs");
242
- assert_eq!(is_in_bad_dir(&path).unwrap(), true);
243
-
244
- let path = Path::new("src/.git/test.rs");
245
- assert_eq!(is_in_bad_dir(&path).unwrap(), true);
246
-
247
- let path = Path::new("src/test.rs");
248
- assert_eq!(is_in_bad_dir(&path).unwrap(), false);
249
- }
250
-
251
- #[test]
252
- fn test_is_good_file() {
253
- let path = Path::new("src/test.rs");
254
- assert_eq!(is_good_file(&path).is_ok(), true);
255
-
256
- let path = Path::new("src/test.exe");
257
- assert_eq!(is_good_file(&path).is_err(), true);
258
-
259
- let path = Path::new("src/.hidden");
260
- assert_eq!(is_good_file(&path).is_err(), true);
261
- }
262
-
263
- #[tokio::test]
264
- async fn test_is_good_file_runtime_check() {
265
- let temp_dir = tempfile::tempdir().unwrap();
266
- let temp_file_path = temp_dir.path().join("test_file");
267
- let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
268
- temp_file.write_all(b"Hello, world!").await.unwrap();
269
- let buffer = fs::read(&temp_file_path).await.unwrap();
270
- assert_eq!(
271
- is_good_file_runtime_check(&temp_file_path).await.is_ok(),
272
- true
273
- );
274
- temp_dir.close().unwrap();
275
-
276
- // let temp_dir = tempfile::tempdir().unwrap();
277
- // let temp_file_path = temp_dir.path().join("test_file");
278
- // let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
279
- // temp_file.write_all(&[0, 159, 146, 150]).await.unwrap(); // Invalid UTF-8 sequence
280
- // let buffer = fs::read(&temp_file_path).await.unwrap();
281
- // assert_eq!(
282
- // is_good_file_runtime_check(&temp_file_path).await.is_err(),
283
- // true
284
- // );
285
- // temp_dir.close().unwrap();
286
- }
287
-
288
- #[tokio::test]
289
- async fn test_bom_file() {
290
- const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
291
- const CONTENT: &str = "Hello, world!";
292
-
293
- // Write this to a temp file
294
- let temp_dir = tempfile::tempdir().unwrap();
295
- let temp_file_path = temp_dir.path().join("test_file");
296
- let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
297
- temp_file.write_all(&BOM).await.unwrap();
298
- temp_file.write_all(CONTENT.as_bytes()).await.unwrap();
299
-
300
- // expect that we read the file with tokio as the CONTENT
301
- let file_contents = read_string_without_bom(&temp_file_path).await.unwrap();
302
-
303
- // Check string equality of CONTENT (&str) to file_contents (String)
304
- assert_eq!(CONTENT, file_contents);
305
- }
306
-
307
- #[test]
308
- fn test_as_relative_path() {
309
- let base_path = Path::new("/home/user/src");
310
- let file_path = Path::new("/home/user/src/test.rs");
311
- assert_eq!(as_relative_path(&base_path, &file_path).unwrap(), "test.rs");
312
-
313
- let file_path = Path::new("/home/user/test.rs");
314
- assert!(as_relative_path(&base_path, &file_path).is_err());
315
- }
316
-
317
- #[tokio::test]
318
- async fn test_get_file_size() {
319
- let temp_dir = tempfile::tempdir().unwrap();
320
- let temp_file_path = temp_dir.path().join("test_file.txt");
321
- let mut temp_file = fs::File::create(&temp_file_path).await.unwrap();
322
- temp_file.write_all(b"Hello, world!").await.unwrap();
323
-
324
- let size = get_file_size(&temp_file_path).await.unwrap();
325
- assert_eq!(size, 13);
326
- temp_dir.close().unwrap();
327
- }
328
- }
package/src/git_utils.rs DELETED
@@ -1,355 +0,0 @@
1
- use std::collections::HashSet;
2
- use std::path::MAIN_SEPARATOR_STR;
3
- use std::process::Command;
4
-
5
- pub fn list_ignored_files_and_directories(
6
- workspace_root_path: &str,
7
- should_return_absolute_paths: bool,
8
- ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
9
- let mut gitignored_files = HashSet::new();
10
-
11
- let commands = vec![
12
- vec![
13
- "git",
14
- "ls-files",
15
- "--others",
16
- "--ignored",
17
- "--exclude-standard",
18
- "--directory",
19
- "--no-empty-directory"
20
- ],
21
- // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
22
- vec![
23
- "git submodule foreach --quiet 'git -C $toplevel/$path ls-files --others --ignored --exclude-standard --directory --no-empty-directory | (while read line; do echo $path/$line; done)'",
24
- ],
25
- ];
26
-
27
- for command in commands {
28
- let output = Command::new(&command[0])
29
- .args(&command[1..])
30
- .current_dir(workspace_root_path)
31
- .output()?;
32
-
33
- if output.status.success() {
34
- let files = String::from_utf8(output.stdout)?
35
- .lines()
36
- .filter(|line| !line.is_empty())
37
- .map(|line| {
38
- let line = line.replace("/", MAIN_SEPARATOR_STR);
39
-
40
- if should_return_absolute_paths {
41
- let mut path = std::path::PathBuf::from(workspace_root_path);
42
-
43
- path.push(line.clone());
44
-
45
- match path.canonicalize() {
46
- Ok(canonical_path) => {
47
- canonical_path.to_string_lossy().into_owned()
48
- }
49
- Err(_) => String::from(line),
50
- }
51
- } else {
52
- String::from(line)
53
- }
54
- })
55
- .collect::<HashSet<_>>();
56
-
57
- gitignored_files.extend(files);
58
- }
59
- }
60
-
61
- Ok(gitignored_files)
62
- }
63
-
64
- pub fn list_ignored_files_with_absolute_paths(
65
- workspace_root_path: &str,
66
- ) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
67
- let mut gitignored_files = HashSet::new();
68
-
69
- let commands = vec![
70
- vec![
71
- "git",
72
- "ls-files",
73
- "--others",
74
- "--ignored",
75
- "--exclude-standard",
76
- ],
77
- // FIXME(sualeh): this is super sketchy and might totally break in like a bazillion ways. i dont like it.
78
- vec![
79
- "sh",
80
- "-c",
81
- "git submodule foreach --quiet 'git ls-files --others --ignored --exclude-standard | sed \"s|^|$path/|\"'",
82
- ],
83
- ];
84
-
85
- for command in commands {
86
- let output = Command::new(&command[0])
87
- .args(&command[1..])
88
- .current_dir(workspace_root_path)
89
- .output()?;
90
-
91
- if output.status.success() {
92
- let files = String::from_utf8(output.stdout)?
93
- .lines()
94
- .filter(|line| !line.is_empty())
95
- .map(|line| String::from(line))
96
- .collect::<HashSet<_>>();
97
-
98
- gitignored_files.extend(files);
99
- }
100
- }
101
-
102
- Ok(gitignored_files)
103
- }
104
-
105
- pub async fn is_git_ignored(
106
- workspace_root_path: &str,
107
- file_path: &str,
108
- ) -> Result<bool, anyhow::Error> {
109
- let output = tokio::process::Command::new("git")
110
- .args(&["check-ignore", file_path])
111
- .current_dir(workspace_root_path)
112
- .output()
113
- .await?;
114
-
115
- Ok(output.status.success())
116
- }
117
-
118
- pub async fn is_git_directory(
119
- workspace_root_path: &str,
120
- ) -> Result<bool, anyhow::Error> {
121
- let output = tokio::process::Command::new("git")
122
- .args(&["rev-parse", "--is-inside-work-tree"])
123
- .current_dir(workspace_root_path)
124
- .output()
125
- .await?;
126
-
127
- Ok(output.status.success())
128
- }
129
-
130
- #[cfg(test)]
131
- mod tests {
132
- use super::*;
133
- use std::fs::File;
134
- use std::io::Write;
135
-
136
- #[test]
137
- fn test_no_ignored_files() {
138
- let dir = tempfile::tempdir().unwrap();
139
- let gitignored_files =
140
- list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
141
- .unwrap();
142
- Command::new("git")
143
- .args(&["init"])
144
- .current_dir(dir.path())
145
- .output()
146
- .unwrap();
147
- assert_eq!(gitignored_files.len(), 0);
148
- }
149
-
150
- #[test]
151
- fn test_one_ignored_file() {
152
- let dir = tempfile::tempdir().unwrap();
153
- println!("tempdir: {:?}", dir);
154
- let file_path = dir.path().join("ignored.txt");
155
- let mut file = File::create(&file_path).unwrap();
156
- writeln!(file, "This is an ignored file.").unwrap();
157
-
158
- let gitignore_path = dir.path().join(".gitignore");
159
- let mut gitignore = File::create(&gitignore_path).unwrap();
160
- writeln!(gitignore, "ignored.txt").unwrap();
161
-
162
- Command::new("git")
163
- .args(&["init"])
164
- .current_dir(dir.path())
165
- .output()
166
- .unwrap();
167
- let gitignored_files =
168
- list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
169
- .unwrap();
170
- println!(
171
- "ignored files for test_one_ignored_file: {:?}",
172
- gitignored_files
173
- );
174
- // assert_eq!(gitignored_files.len(), 1);
175
- assert!(gitignored_files.contains(&String::from("ignored.txt")));
176
- }
177
-
178
- #[test]
179
- fn test_multiple_ignored_files() {
180
- let dir = tempfile::tempdir().unwrap();
181
- println!("tempdir: {:?}", dir);
182
- let file_path1 = dir.path().join("ignored1.txt");
183
- let file_path2 = dir.path().join("ignored2.txt");
184
- let mut file1 = File::create(&file_path1).unwrap();
185
- let mut file2 = File::create(&file_path2).unwrap();
186
- writeln!(file1, "This is an ignored file.").unwrap();
187
- writeln!(file2, "This is another ignored file.").unwrap();
188
-
189
- let gitignore_path = dir.path().join(".gitignore");
190
- let mut gitignore = File::create(&gitignore_path).unwrap();
191
- writeln!(gitignore, "*.txt").unwrap();
192
-
193
- Command::new("git")
194
- .args(&["init"])
195
- .current_dir(dir.path())
196
- .output()
197
- .unwrap();
198
- let gitignored_files =
199
- list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
200
- .unwrap();
201
- println!(
202
- "ignored files for test_multiple_ignored_files: {:?}",
203
- gitignored_files
204
- );
205
- // assert_eq!(gitignored_files.len(), 2);
206
- assert!(gitignored_files.contains(&String::from("ignored1.txt")));
207
- assert!(gitignored_files.contains(&String::from("ignored2.txt")));
208
- }
209
-
210
- #[test]
211
- fn test_git_submodule_ignored_files() {
212
- let dir = tempfile::tempdir().unwrap();
213
- let submodule_path = dir.path().join("submodule");
214
- std::fs::create_dir(&submodule_path).unwrap();
215
-
216
- let o = Command::new("git")
217
- .args(&["init"])
218
- .current_dir(&submodule_path)
219
- .output()
220
- .unwrap();
221
- println!("git init output: {:?}", o);
222
-
223
- let file_path = submodule_path.join("ignored.txt");
224
- let mut file = File::create(&file_path).unwrap();
225
- writeln!(file, "This is an ignored file.").unwrap();
226
-
227
- let file2 = submodule_path.join("ignored2.txt");
228
- let mut file = File::create(&file2).unwrap();
229
- writeln!(file, "This is another ignored file.").unwrap();
230
-
231
- let gitignore_path = submodule_path.join(".gitignore");
232
- let mut gitignore = File::create(&gitignore_path).unwrap();
233
- writeln!(gitignore, "*.txt").unwrap();
234
-
235
- let o = Command::new("git")
236
- .args(&["init"])
237
- .current_dir(dir.path())
238
- .output()
239
- .unwrap();
240
- println!("git init output: {:?}", o);
241
-
242
- // make a commit in the submodule
243
- let o = Command::new("git")
244
- .args(&["add", "."])
245
- .current_dir(&submodule_path)
246
- .output()
247
- .unwrap();
248
- println!("git add output: {:?}", o);
249
- let o = Command::new("git")
250
- .args(&["commit", "-m", "initial commit"])
251
- .current_dir(&submodule_path)
252
- .output()
253
- .unwrap();
254
- println!("git commit output: {:?}", o);
255
-
256
- let o = Command::new("git")
257
- .args(&["submodule", "add", "./submodule"])
258
- .current_dir(dir.path())
259
- .output()
260
- .unwrap();
261
- println!("git submodule add output: {:?}", o);
262
-
263
- let gitignored_files =
264
- list_ignored_files_and_directories(dir.path().to_str().unwrap(), false)
265
- .unwrap();
266
- println!(
267
- "ignored files for test_git_submodule_ignored_files: {:?}",
268
- gitignored_files
269
- );
270
- assert!(gitignored_files.contains(&String::from("submodule/ignored.txt")));
271
- assert!(gitignored_files.contains(&String::from("submodule/ignored2.txt")));
272
- }
273
-
274
- #[test]
275
- fn test_multiple_ignored_files_in_current_dir() {
276
- let gitignored_files =
277
- list_ignored_files_and_directories(".", false).unwrap();
278
- assert!(gitignored_files.len() > 1);
279
-
280
- // print a sample of the ignored files
281
- let mut count = 0;
282
- for file in gitignored_files {
283
- println!("ignored file: {:?}", file);
284
- count += 1;
285
- if count > 10 {
286
- break;
287
- }
288
- }
289
- }
290
-
291
- #[tokio::test]
292
- async fn test_file_not_ignored() {
293
- let dir = tempfile::tempdir().unwrap();
294
- let file_path = dir.path().join("not_ignored.txt");
295
- let mut file = File::create(&file_path).unwrap();
296
- writeln!(file, "This is not an ignored file.").unwrap();
297
-
298
- Command::new("git")
299
- .args(&["init"])
300
- .current_dir(dir.path())
301
- .output()
302
- .unwrap();
303
- let is_ignored =
304
- is_git_ignored(dir.path().to_str().unwrap(), "not_ignored.txt")
305
- .await
306
- .unwrap();
307
- assert_eq!(is_ignored, false);
308
- }
309
-
310
- #[tokio::test]
311
- async fn test_file_ignored() {
312
- let dir = tempfile::tempdir().unwrap();
313
- let file_path = dir.path().join("ignored.txt");
314
- let mut file = File::create(&file_path).unwrap();
315
- writeln!(file, "This is an ignored file.").unwrap();
316
-
317
- let gitignore_path = dir.path().join(".gitignore");
318
- let mut gitignore = File::create(&gitignore_path).unwrap();
319
- writeln!(gitignore, "ignored.txt").unwrap();
320
-
321
- Command::new("git")
322
- .args(&["init"])
323
- .current_dir(dir.path())
324
- .output()
325
- .unwrap();
326
- let is_ignored =
327
- is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
328
- .await
329
- .unwrap();
330
- assert_eq!(is_ignored, true);
331
- }
332
-
333
- #[tokio::test]
334
- async fn test_file_ignored_with_wildcard() {
335
- let dir = tempfile::tempdir().unwrap();
336
- let file_path = dir.path().join("ignored.txt");
337
- let mut file = File::create(&file_path).unwrap();
338
- writeln!(file, "This is an ignored file.").unwrap();
339
-
340
- let gitignore_path = dir.path().join(".gitignore");
341
- let mut gitignore = File::create(&gitignore_path).unwrap();
342
- writeln!(gitignore, "*.txt").unwrap();
343
-
344
- Command::new("git")
345
- .args(&["init"])
346
- .current_dir(dir.path())
347
- .output()
348
- .unwrap();
349
- let is_ignored =
350
- is_git_ignored(dir.path().to_str().unwrap(), "ignored.txt")
351
- .await
352
- .unwrap();
353
- assert_eq!(is_ignored, true);
354
- }
355
- }