beswarm 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. beswarm/queries/tree-sitter-language-pack/README.md +7 -0
  2. beswarm/queries/tree-sitter-language-pack/arduino-tags.scm +5 -0
  3. beswarm/queries/tree-sitter-language-pack/c-tags.scm +9 -0
  4. beswarm/queries/tree-sitter-language-pack/chatito-tags.scm +16 -0
  5. beswarm/queries/tree-sitter-language-pack/commonlisp-tags.scm +122 -0
  6. beswarm/queries/tree-sitter-language-pack/cpp-tags.scm +15 -0
  7. beswarm/queries/tree-sitter-language-pack/csharp-tags.scm +26 -0
  8. beswarm/queries/tree-sitter-language-pack/d-tags.scm +26 -0
  9. beswarm/queries/tree-sitter-language-pack/dart-tags.scm +92 -0
  10. beswarm/queries/tree-sitter-language-pack/elisp-tags.scm +5 -0
  11. beswarm/queries/tree-sitter-language-pack/elixir-tags.scm +54 -0
  12. beswarm/queries/tree-sitter-language-pack/elm-tags.scm +19 -0
  13. beswarm/queries/tree-sitter-language-pack/gleam-tags.scm +41 -0
  14. beswarm/queries/tree-sitter-language-pack/go-tags.scm +42 -0
  15. beswarm/queries/tree-sitter-language-pack/java-tags.scm +20 -0
  16. beswarm/queries/tree-sitter-language-pack/javascript-tags.scm +88 -0
  17. beswarm/queries/tree-sitter-language-pack/lua-tags.scm +34 -0
  18. beswarm/queries/tree-sitter-language-pack/pony-tags.scm +39 -0
  19. beswarm/queries/tree-sitter-language-pack/properties-tags.scm +5 -0
  20. beswarm/queries/tree-sitter-language-pack/python-tags.scm +14 -0
  21. beswarm/queries/tree-sitter-language-pack/r-tags.scm +21 -0
  22. beswarm/queries/tree-sitter-language-pack/racket-tags.scm +12 -0
  23. beswarm/queries/tree-sitter-language-pack/ruby-tags.scm +64 -0
  24. beswarm/queries/tree-sitter-language-pack/rust-tags.scm +60 -0
  25. beswarm/queries/tree-sitter-language-pack/solidity-tags.scm +43 -0
  26. beswarm/queries/tree-sitter-language-pack/swift-tags.scm +51 -0
  27. beswarm/queries/tree-sitter-language-pack/udev-tags.scm +20 -0
  28. beswarm/queries/tree-sitter-languages/README.md +23 -0
  29. beswarm/queries/tree-sitter-languages/c-tags.scm +9 -0
  30. beswarm/queries/tree-sitter-languages/c_sharp-tags.scm +46 -0
  31. beswarm/queries/tree-sitter-languages/cpp-tags.scm +15 -0
  32. beswarm/queries/tree-sitter-languages/dart-tags.scm +91 -0
  33. beswarm/queries/tree-sitter-languages/elisp-tags.scm +8 -0
  34. beswarm/queries/tree-sitter-languages/elixir-tags.scm +54 -0
  35. beswarm/queries/tree-sitter-languages/elm-tags.scm +19 -0
  36. beswarm/queries/tree-sitter-languages/go-tags.scm +30 -0
  37. beswarm/queries/tree-sitter-languages/hcl-tags.scm +77 -0
  38. beswarm/queries/tree-sitter-languages/java-tags.scm +20 -0
  39. beswarm/queries/tree-sitter-languages/javascript-tags.scm +88 -0
  40. beswarm/queries/tree-sitter-languages/kotlin-tags.scm +27 -0
  41. beswarm/queries/tree-sitter-languages/ocaml-tags.scm +115 -0
  42. beswarm/queries/tree-sitter-languages/php-tags.scm +26 -0
  43. beswarm/queries/tree-sitter-languages/python-tags.scm +12 -0
  44. beswarm/queries/tree-sitter-languages/ql-tags.scm +26 -0
  45. beswarm/queries/tree-sitter-languages/ruby-tags.scm +64 -0
  46. beswarm/queries/tree-sitter-languages/rust-tags.scm +60 -0
  47. beswarm/queries/tree-sitter-languages/scala-tags.scm +65 -0
  48. beswarm/queries/tree-sitter-languages/typescript-tags.scm +41 -0
  49. beswarm/tools/__init__.py +13 -0
  50. beswarm/tools/edit_file.py +162 -0
  51. beswarm/tools/planner.py +33 -0
  52. beswarm/tools/repomap.py +1289 -0
  53. beswarm/tools/search_arxiv.py +206 -0
  54. beswarm/tools/think.py +40 -0
  55. beswarm/tools/worker.py +118 -0
  56. {beswarm-0.1.2.dist-info → beswarm-0.1.3.dist-info}/METADATA +1 -1
  57. beswarm-0.1.3.dist-info/RECORD +60 -0
  58. beswarm-0.1.2.dist-info/RECORD +0 -5
  59. {beswarm-0.1.2.dist-info → beswarm-0.1.3.dist-info}/WHEEL +0 -0
  60. {beswarm-0.1.2.dist-info → beswarm-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1289 @@
1
+ import os
2
+ import math
3
+ import time
4
+ import random
5
+ import shutil
6
+ import sqlite3
7
+ import colorsys
8
+ import warnings
9
+ from pathlib import Path
10
+ from collections import Counter, defaultdict, namedtuple
11
+
12
+ from aient.plugins import register_tool
13
+
14
+ from tqdm import tqdm
15
+ from diskcache import Cache
16
+ from grep_ast import TreeContext, filename_to_lang
17
+ from pygments.token import Token
18
+ from pygments.lexers import guess_lexer_for_filename
19
+
20
+ ROOT_IMPORTANT_FILES = [
21
+ # Version Control
22
+ ".gitignore",
23
+ ".gitattributes",
24
+ # Documentation
25
+ "README",
26
+ "README.md",
27
+ "README.txt",
28
+ "README.rst",
29
+ "CONTRIBUTING",
30
+ "CONTRIBUTING.md",
31
+ "CONTRIBUTING.txt",
32
+ "CONTRIBUTING.rst",
33
+ "LICENSE",
34
+ "LICENSE.md",
35
+ "LICENSE.txt",
36
+ "CHANGELOG",
37
+ "CHANGELOG.md",
38
+ "CHANGELOG.txt",
39
+ "CHANGELOG.rst",
40
+ "SECURITY",
41
+ "SECURITY.md",
42
+ "SECURITY.txt",
43
+ "CODEOWNERS",
44
+ # Package Management and Dependencies
45
+ "requirements.txt",
46
+ "Pipfile",
47
+ "Pipfile.lock",
48
+ "pyproject.toml",
49
+ "setup.py",
50
+ "setup.cfg",
51
+ "package.json",
52
+ "package-lock.json",
53
+ "yarn.lock",
54
+ "npm-shrinkwrap.json",
55
+ "Gemfile",
56
+ "Gemfile.lock",
57
+ "composer.json",
58
+ "composer.lock",
59
+ "pom.xml",
60
+ "build.gradle",
61
+ "build.gradle.kts",
62
+ "build.sbt",
63
+ "go.mod",
64
+ "go.sum",
65
+ "Cargo.toml",
66
+ "Cargo.lock",
67
+ "mix.exs",
68
+ "rebar.config",
69
+ "project.clj",
70
+ "Podfile",
71
+ "Cartfile",
72
+ "dub.json",
73
+ "dub.sdl",
74
+ # Configuration and Settings
75
+ ".env",
76
+ ".env.example",
77
+ ".editorconfig",
78
+ "tsconfig.json",
79
+ "jsconfig.json",
80
+ ".babelrc",
81
+ "babel.config.js",
82
+ ".eslintrc",
83
+ ".eslintignore",
84
+ ".prettierrc",
85
+ ".stylelintrc",
86
+ "tslint.json",
87
+ ".pylintrc",
88
+ ".flake8",
89
+ ".rubocop.yml",
90
+ ".scalafmt.conf",
91
+ ".dockerignore",
92
+ ".gitpod.yml",
93
+ "sonar-project.properties",
94
+ "renovate.json",
95
+ "dependabot.yml",
96
+ ".pre-commit-config.yaml",
97
+ "mypy.ini",
98
+ "tox.ini",
99
+ ".yamllint",
100
+ "pyrightconfig.json",
101
+ # Build and Compilation
102
+ "webpack.config.js",
103
+ "rollup.config.js",
104
+ "parcel.config.js",
105
+ "gulpfile.js",
106
+ "Gruntfile.js",
107
+ "build.xml",
108
+ "build.boot",
109
+ "project.json",
110
+ "build.cake",
111
+ "MANIFEST.in",
112
+ # Testing
113
+ "pytest.ini",
114
+ "phpunit.xml",
115
+ "karma.conf.js",
116
+ "jest.config.js",
117
+ "cypress.json",
118
+ ".nycrc",
119
+ ".nycrc.json",
120
+ # CI/CD
121
+ ".travis.yml",
122
+ ".gitlab-ci.yml",
123
+ "Jenkinsfile",
124
+ "azure-pipelines.yml",
125
+ "bitbucket-pipelines.yml",
126
+ "appveyor.yml",
127
+ "circle.yml",
128
+ ".circleci/config.yml",
129
+ ".github/dependabot.yml",
130
+ "codecov.yml",
131
+ ".coveragerc",
132
+ # Docker and Containers
133
+ "Dockerfile",
134
+ "docker-compose.yml",
135
+ "docker-compose.override.yml",
136
+ # Cloud and Serverless
137
+ "serverless.yml",
138
+ "firebase.json",
139
+ "now.json",
140
+ "netlify.toml",
141
+ "vercel.json",
142
+ "app.yaml",
143
+ "terraform.tf",
144
+ "main.tf",
145
+ "cloudformation.yaml",
146
+ "cloudformation.json",
147
+ "ansible.cfg",
148
+ "kubernetes.yaml",
149
+ "k8s.yaml",
150
+ # Database
151
+ "schema.sql",
152
+ "liquibase.properties",
153
+ "flyway.conf",
154
+ # Framework-specific
155
+ "next.config.js",
156
+ "nuxt.config.js",
157
+ "vue.config.js",
158
+ "angular.json",
159
+ "gatsby-config.js",
160
+ "gridsome.config.js",
161
+ # API Documentation
162
+ "swagger.yaml",
163
+ "swagger.json",
164
+ "openapi.yaml",
165
+ "openapi.json",
166
+ # Development environment
167
+ ".nvmrc",
168
+ ".ruby-version",
169
+ ".python-version",
170
+ "Vagrantfile",
171
+ # Quality and metrics
172
+ ".codeclimate.yml",
173
+ "codecov.yml",
174
+ # Documentation
175
+ "mkdocs.yml",
176
+ "_config.yml",
177
+ "book.toml",
178
+ "readthedocs.yml",
179
+ ".readthedocs.yaml",
180
+ # Package registries
181
+ ".npmrc",
182
+ ".yarnrc",
183
+ # Linting and formatting
184
+ ".isort.cfg",
185
+ ".markdownlint.json",
186
+ ".markdownlint.yaml",
187
+ # Security
188
+ ".bandit",
189
+ ".secrets.baseline",
190
+ # Misc
191
+ ".pypirc",
192
+ ".gitkeep",
193
+ ".npmignore",
194
+ ]
195
+
196
+
197
+ # Normalize the lists once
198
+ NORMALIZED_ROOT_IMPORTANT_FILES = set(os.path.normpath(path) for path in ROOT_IMPORTANT_FILES)
199
+
200
+
201
+ def is_important(file_path):
202
+ file_name = os.path.basename(file_path)
203
+ dir_name = os.path.normpath(os.path.dirname(file_path))
204
+ normalized_path = os.path.normpath(file_path)
205
+
206
+ # Check for GitHub Actions workflow files
207
+ if dir_name == os.path.normpath(".github/workflows") and file_name.endswith(".yml"):
208
+ return True
209
+
210
+ return normalized_path in NORMALIZED_ROOT_IMPORTANT_FILES
211
+
212
+
213
+ def filter_important_files(file_paths):
214
+ """
215
+ Filter a list of file paths to return only those that are commonly important in codebases.
216
+
217
+ :param file_paths: List of file paths to check
218
+ :return: List of file paths that match important file patterns
219
+ """
220
+ return list(filter(is_important, file_paths))
221
+
222
+ import os
223
+ import base64
224
+ from pathlib import Path
225
+
226
+ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp", ".pdf"}
227
+
228
+ def is_image_file(file_name):
229
+ """
230
+ Check if the given file name has an image file extension.
231
+
232
+ :param file_name: The name of the file to check.
233
+ :return: True if the file is an image, False otherwise.
234
+ """
235
+ file_name = str(file_name) # Convert file_name to string
236
+ return any(file_name.endswith(ext) for ext in IMAGE_EXTENSIONS)
237
+
238
+ def ensure_hash_prefix(color):
239
+ """Ensure hex color values have a # prefix."""
240
+ if not color:
241
+ return color
242
+ if isinstance(color, str) and color.strip() and not color.startswith("#"):
243
+ # Check if it's a valid hex color (3 or 6 hex digits)
244
+ if all(c in "0123456789ABCDEFabcdef" for c in color) and len(color) in (3, 6):
245
+ return f"#{color}"
246
+ return color
247
+
248
+ class InputOutput:
249
+ num_error_outputs = 0
250
+ num_user_asks = 0
251
+ clipboard_watcher = None
252
+ bell_on_next_input = False
253
+ notifications_command = None
254
+
255
+ def __init__(
256
+ self,
257
+ pretty=True,
258
+ yes=None,
259
+ input_history_file=None,
260
+ chat_history_file=None,
261
+ input=None,
262
+ output=None,
263
+ user_input_color="blue",
264
+ tool_output_color=None,
265
+ tool_error_color="red",
266
+ tool_warning_color="#FFA500",
267
+ assistant_output_color="blue",
268
+ completion_menu_color=None,
269
+ completion_menu_bg_color=None,
270
+ completion_menu_current_color=None,
271
+ completion_menu_current_bg_color=None,
272
+ code_theme="default",
273
+ encoding="utf-8",
274
+ line_endings="platform",
275
+ dry_run=False,
276
+ llm_history_file=None,
277
+ # editingmode=EditingMode.EMACS,
278
+ fancy_input=True,
279
+ file_watcher=None,
280
+ multiline_mode=False,
281
+ root=".",
282
+ notifications=False,
283
+ notifications_command=None,
284
+ ):
285
+ self.placeholder = None
286
+ self.interrupted = False
287
+ self.never_prompts = set()
288
+ # self.editingmode = editingmode
289
+ self.multiline_mode = multiline_mode
290
+ self.bell_on_next_input = False
291
+ self.notifications = notifications
292
+ if notifications and notifications_command is None:
293
+ self.notifications_command = self.get_default_notification_command()
294
+ else:
295
+ self.notifications_command = notifications_command
296
+
297
+ no_color = os.environ.get("NO_COLOR")
298
+ if no_color is not None and no_color != "":
299
+ pretty = False
300
+
301
+ self.user_input_color = ensure_hash_prefix(user_input_color) if pretty else None
302
+ self.tool_output_color = ensure_hash_prefix(tool_output_color) if pretty else None
303
+ self.tool_error_color = ensure_hash_prefix(tool_error_color) if pretty else None
304
+ self.tool_warning_color = ensure_hash_prefix(tool_warning_color) if pretty else None
305
+ self.assistant_output_color = ensure_hash_prefix(assistant_output_color)
306
+ self.completion_menu_color = ensure_hash_prefix(completion_menu_color) if pretty else None
307
+ self.completion_menu_bg_color = (
308
+ ensure_hash_prefix(completion_menu_bg_color) if pretty else None
309
+ )
310
+ self.completion_menu_current_color = (
311
+ ensure_hash_prefix(completion_menu_current_color) if pretty else None
312
+ )
313
+ self.completion_menu_current_bg_color = (
314
+ ensure_hash_prefix(completion_menu_current_bg_color) if pretty else None
315
+ )
316
+
317
+ self.code_theme = code_theme
318
+
319
+ self.input = input
320
+ self.output = output
321
+
322
+ self.pretty = pretty
323
+ if self.output:
324
+ self.pretty = False
325
+
326
+ self.yes = yes
327
+
328
+ self.input_history_file = input_history_file
329
+ self.llm_history_file = llm_history_file
330
+ if chat_history_file is not None:
331
+ self.chat_history_file = Path(chat_history_file)
332
+ else:
333
+ self.chat_history_file = None
334
+
335
+ self.encoding = encoding
336
+ valid_line_endings = {"platform", "lf", "crlf"}
337
+ if line_endings not in valid_line_endings:
338
+ raise ValueError(
339
+ f"Invalid line_endings value: {line_endings}. "
340
+ f"Must be one of: {', '.join(valid_line_endings)}"
341
+ )
342
+ self.newline = (
343
+ None if line_endings == "platform" else "\n" if line_endings == "lf" else "\r\n"
344
+ )
345
+ self.dry_run = dry_run
346
+
347
+ self.prompt_session = None
348
+
349
+ self.file_watcher = file_watcher
350
+ self.root = root
351
+
352
+ def read_image(self, filename):
353
+ try:
354
+ with open(str(filename), "rb") as image_file:
355
+ encoded_string = base64.b64encode(image_file.read())
356
+ return encoded_string.decode("utf-8")
357
+ except OSError as err:
358
+ self.tool_error(f"{filename}: unable to read: {err}")
359
+ return
360
+ except FileNotFoundError:
361
+ self.tool_error(f"{filename}: file not found error")
362
+ return
363
+ except IsADirectoryError:
364
+ self.tool_error(f"{filename}: is a directory")
365
+ return
366
+ except Exception as e:
367
+ self.tool_error(f"{filename}: {e}")
368
+ return
369
+
370
+ def read_text(self, filename, silent=False):
371
+ if is_image_file(filename):
372
+ return self.read_image(filename)
373
+
374
+ try:
375
+ with open(str(filename), "r", encoding=self.encoding) as f:
376
+ return f.read()
377
+ except FileNotFoundError:
378
+ if not silent:
379
+ self.tool_error(f"{filename}: file not found error")
380
+ return
381
+ except IsADirectoryError:
382
+ if not silent:
383
+ self.tool_error(f"{filename}: is a directory")
384
+ return
385
+ except OSError as err:
386
+ if not silent:
387
+ self.tool_error(f"{filename}: unable to read: {err}")
388
+ return
389
+ except UnicodeError as e:
390
+ if not silent:
391
+ self.tool_error(f"{filename}: {e}")
392
+ self.tool_error("Use --encoding to set the unicode encoding.")
393
+ return
394
+
395
+
396
+ # tree_sitter is throwing a FutureWarning
397
+ warnings.simplefilter("ignore", category=FutureWarning)
398
+ from grep_ast.tsl import USING_TSL_PACK, get_language, get_parser # noqa: E402
399
+
400
+ Tag = namedtuple("Tag", "rel_fname fname line name kind".split())
401
+
402
+
403
+ SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError)
404
+
405
+
406
+ CACHE_VERSION = 3
407
+ if USING_TSL_PACK:
408
+ CACHE_VERSION = 4
409
+
410
+
411
+ class RepoMap:
412
+ TAGS_CACHE_DIR = f".beswarm.tags.cache.v{CACHE_VERSION}"
413
+
414
+ warned_files = set()
415
+
416
+ def __init__(
417
+ self,
418
+ map_tokens=8192,
419
+ root=None,
420
+ main_model=None,
421
+ io=None,
422
+ repo_content_prefix=None,
423
+ verbose=False,
424
+ max_context_window=None,
425
+ map_mul_no_files=8,
426
+ refresh="auto",
427
+ ):
428
+ self.io = io
429
+ self.verbose = verbose
430
+ self.refresh = refresh
431
+
432
+ if not root:
433
+ root = os.getcwd()
434
+ self.root = root
435
+
436
+ self.load_tags_cache()
437
+ self.cache_threshold = 0.95
438
+
439
+ self.max_map_tokens = map_tokens
440
+ self.map_mul_no_files = map_mul_no_files
441
+ self.max_context_window = max_context_window
442
+
443
+ self.repo_content_prefix = repo_content_prefix
444
+
445
+ self.main_model = main_model
446
+
447
+ self.tree_cache = {}
448
+ self.tree_context_cache = {}
449
+ self.map_cache = {}
450
+ self.map_processing_time = 0
451
+ self.last_map = None
452
+
453
+ if self.verbose:
454
+ self.io.tool_output(
455
+ f"RepoMap initialized with map_mul_no_files: {self.map_mul_no_files}"
456
+ )
457
+
458
+ def token_count(self, text):
459
+ len_text = len(text)
460
+ return len_text / 4
461
+ if len_text < 200:
462
+ return self.main_model.token_count(text)
463
+
464
+ lines = text.splitlines(keepends=True)
465
+ num_lines = len(lines)
466
+ step = num_lines // 100 or 1
467
+ lines = lines[::step]
468
+ sample_text = "".join(lines)
469
+ sample_tokens = self.main_model.token_count(sample_text)
470
+ est_tokens = sample_tokens / len(sample_text) * len_text
471
+ return est_tokens
472
+
473
+ def get_repo_map(
474
+ self,
475
+ chat_files,
476
+ other_files,
477
+ mentioned_fnames=None,
478
+ mentioned_idents=None,
479
+ force_refresh=False,
480
+ ):
481
+ if self.max_map_tokens <= 0:
482
+ return
483
+ if not other_files:
484
+ return
485
+ if not mentioned_fnames:
486
+ mentioned_fnames = set()
487
+ if not mentioned_idents:
488
+ mentioned_idents = set()
489
+
490
+ max_map_tokens = self.max_map_tokens
491
+
492
+ # With no files in the chat, give a bigger view of the entire repo
493
+ padding = 4096
494
+ if max_map_tokens and self.max_context_window:
495
+ target = min(
496
+ int(max_map_tokens * self.map_mul_no_files),
497
+ self.max_context_window - padding,
498
+ )
499
+ else:
500
+ target = 0
501
+ if not chat_files and self.max_context_window and target > 0:
502
+ max_map_tokens = target
503
+
504
+ try:
505
+ files_listing = self.get_ranked_tags_map(
506
+ chat_files,
507
+ other_files,
508
+ max_map_tokens,
509
+ mentioned_fnames,
510
+ mentioned_idents,
511
+ force_refresh,
512
+ )
513
+ except RecursionError:
514
+ self.io.tool_error("Disabling repo map, git repo too large?")
515
+ self.max_map_tokens = 0
516
+ return
517
+
518
+ if not files_listing:
519
+ return
520
+
521
+ if self.verbose:
522
+ num_tokens = self.token_count(files_listing)
523
+ self.io.tool_output(f"Repo-map: {num_tokens / 1024:.1f} k-tokens")
524
+
525
+ if chat_files:
526
+ other = "other "
527
+ else:
528
+ other = ""
529
+
530
+ if self.repo_content_prefix:
531
+ repo_content = self.repo_content_prefix.format(other=other)
532
+ else:
533
+ repo_content = ""
534
+
535
+ repo_content += files_listing
536
+
537
+ return repo_content
538
+
539
+ def get_rel_fname(self, fname):
540
+ try:
541
+ return os.path.relpath(fname, self.root)
542
+ except ValueError:
543
+ # Issue #1288: ValueError: path is on mount 'C:', start on mount 'D:'
544
+ # Just return the full fname.
545
+ return fname
546
+
547
+ def tags_cache_error(self, original_error=None):
548
+ """Handle SQLite errors by trying to recreate cache, falling back to dict if needed"""
549
+
550
+ if self.verbose and original_error:
551
+ self.io.tool_warning(f"Tags cache error: {str(original_error)}")
552
+
553
+ if isinstance(getattr(self, "TAGS_CACHE", None), dict):
554
+ return
555
+
556
+ path = Path(self.root) / self.TAGS_CACHE_DIR
557
+
558
+ # Try to recreate the cache
559
+ try:
560
+ # Delete existing cache dir
561
+ if path.exists():
562
+ shutil.rmtree(path)
563
+
564
+ # Try to create new cache
565
+ new_cache = Cache(path)
566
+
567
+ # Test that it works
568
+ test_key = "test"
569
+ new_cache[test_key] = "test"
570
+ _ = new_cache[test_key]
571
+ del new_cache[test_key]
572
+
573
+ # If we got here, the new cache works
574
+ self.TAGS_CACHE = new_cache
575
+ return
576
+
577
+ except SQLITE_ERRORS as e:
578
+ # If anything goes wrong, warn and fall back to dict
579
+ self.io.tool_warning(
580
+ f"Unable to use tags cache at {path}, falling back to memory cache"
581
+ )
582
+ if self.verbose:
583
+ self.io.tool_warning(f"Cache recreation error: {str(e)}")
584
+
585
+ self.TAGS_CACHE = dict()
586
+
587
+ def load_tags_cache(self):
588
+ path = Path(self.root) / self.TAGS_CACHE_DIR
589
+ try:
590
+ self.TAGS_CACHE = Cache(path)
591
+ except SQLITE_ERRORS as e:
592
+ self.tags_cache_error(e)
593
+
594
+ def save_tags_cache(self):
595
+ pass
596
+
597
+ def get_mtime(self, fname):
598
+ try:
599
+ return os.path.getmtime((self.root / Path(fname)))
600
+ except FileNotFoundError:
601
+ self.io.tool_warning(f"File not found error: {fname}")
602
+
603
+ def get_tags(self, fname, rel_fname):
604
+ # Check if the file is in the cache and if the modification time has not changed
605
+ file_mtime = self.get_mtime(fname)
606
+ # print(f"file_mtime: {file_mtime}")
607
+ if file_mtime is None:
608
+ return []
609
+ cache_key = fname
610
+ try:
611
+ val = self.TAGS_CACHE.get(cache_key) # Issue #1308
612
+ except SQLITE_ERRORS as e:
613
+ self.tags_cache_error(e)
614
+ val = self.TAGS_CACHE.get(cache_key)
615
+
616
+ if val is not None and val.get("mtime") == file_mtime:
617
+ try:
618
+ return self.TAGS_CACHE[cache_key]["data"]
619
+ except SQLITE_ERRORS as e:
620
+ self.tags_cache_error(e)
621
+ return self.TAGS_CACHE[cache_key]["data"]
622
+
623
+ # miss!
624
+ data = list(self.get_tags_raw(fname, rel_fname))
625
+
626
+ # Update the cache
627
+ try:
628
+ self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
629
+ self.save_tags_cache()
630
+ except SQLITE_ERRORS as e:
631
+ self.tags_cache_error(e)
632
+ self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
633
+
634
+ return data
635
+
636
+ def get_tags_raw(self, fname, rel_fname):
637
+ lang = filename_to_lang(str(self.root / Path(fname)))
638
+ # print(f"lang1: {lang}")
639
+ if not lang:
640
+ return
641
+ # print(f"lang2: {lang}")
642
+
643
+ try:
644
+ language = get_language(lang)
645
+ parser = get_parser(lang)
646
+ except Exception as err:
647
+ print(f"Skipping file {fname}: {err}")
648
+ return
649
+
650
+ query_scm = get_scm_fname(lang)
651
+ # print(f"query_scm: {query_scm}, {query_scm.exists()}")
652
+ if not query_scm.exists():
653
+ return
654
+ query_scm = query_scm.read_text()
655
+
656
+ code = self.io.read_text(str(self.root / Path(fname)))
657
+ # print(f"code: {code}")
658
+ if not code:
659
+ return
660
+ tree = parser.parse(bytes(code, "utf-8"))
661
+
662
+ # Run the tags queries
663
+ query = language.query(query_scm)
664
+ captures = query.captures(tree.root_node)
665
+
666
+ saw = set()
667
+ if USING_TSL_PACK:
668
+ all_nodes = []
669
+ for tag, nodes in captures.items():
670
+ all_nodes += [(node, tag) for node in nodes]
671
+ else:
672
+ all_nodes = list(captures)
673
+
674
+ for node, tag in all_nodes:
675
+ if tag.startswith("name.definition."):
676
+ kind = "def"
677
+ elif tag.startswith("name.reference."):
678
+ kind = "ref"
679
+ else:
680
+ continue
681
+
682
+ saw.add(kind)
683
+
684
+ result = Tag(
685
+ rel_fname=rel_fname,
686
+ fname=fname,
687
+ name=node.text.decode("utf-8"),
688
+ kind=kind,
689
+ line=node.start_point[0],
690
+ )
691
+
692
+ yield result
693
+
694
+ if "ref" in saw:
695
+ return
696
+ if "def" not in saw:
697
+ return
698
+
699
+ # We saw defs, without any refs
700
+ # Some tags files only provide defs (cpp, for example)
701
+ # Use pygments to backfill refs
702
+
703
+ try:
704
+ lexer = guess_lexer_for_filename(fname, code)
705
+ except Exception: # On Windows, bad ref to time.clock which is deprecated?
706
+ # self.io.tool_error(f"Error lexing {fname}")
707
+ return
708
+
709
+ tokens = list(lexer.get_tokens(code))
710
+ tokens = [token[1] for token in tokens if token[0] in Token.Name]
711
+
712
+ for token in tokens:
713
+ yield Tag(
714
+ rel_fname=rel_fname,
715
+ fname=fname,
716
+ name=token,
717
+ kind="ref",
718
+ line=-1,
719
+ )
720
+
721
+ def get_ranked_tags(
722
+ self, chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, progress=None
723
+ ):
724
+ import networkx as nx
725
+
726
+ defines = defaultdict(set)
727
+ references = defaultdict(list)
728
+ definitions = defaultdict(set)
729
+
730
+ personalization = dict()
731
+
732
+ fnames = set(chat_fnames).union(set(other_fnames))
733
+ chat_rel_fnames = set()
734
+
735
+ fnames = sorted(fnames)
736
+
737
+ # Default personalization for unspecified files is 1/num_nodes
738
+ # https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
739
+ personalize = 100 / len(fnames)
740
+
741
+ try:
742
+ cache_size = len(self.TAGS_CACHE)
743
+ except SQLITE_ERRORS as e:
744
+ self.tags_cache_error(e)
745
+ cache_size = len(self.TAGS_CACHE)
746
+
747
+ if len(fnames) - cache_size > 100:
748
+ # self.io.tool_output(
749
+ # "Initial repo scan can be slow in larger repos, but only happens once."
750
+ # )
751
+ fnames = tqdm(fnames, desc="Scanning repo")
752
+ showing_bar = True
753
+ else:
754
+ showing_bar = False
755
+
756
+ for fname in fnames:
757
+ if self.verbose:
758
+ self.io.tool_output(f"Processing {fname}")
759
+ # if progress and not showing_bar:
760
+ # progress()
761
+
762
+ try:
763
+ file_ok = (self.root / Path(fname)).is_file()
764
+ except OSError:
765
+ file_ok = False
766
+
767
+ if not file_ok:
768
+ # print(f"file_ok: {file_ok}, fname: {self.root / Path(fname)}")
769
+ # if fname not in self.warned_files:
770
+ # self.io.tool_warning(f"Repo-map can't include {fname}")
771
+ # self.io.tool_output(
772
+ # "Has it been deleted from the file system but not from git?"
773
+ # )
774
+ # self.warned_files.add(fname)
775
+ continue
776
+
777
+ # dump(fname)
778
+ # print(f"self.root: {self.root}")
779
+ rel_fname = self.get_rel_fname((self.root / Path(fname)))
780
+ current_pers = 0.0 # Start with 0 personalization score
781
+
782
+ if fname in chat_fnames:
783
+ current_pers += personalize
784
+ chat_rel_fnames.add(rel_fname)
785
+
786
+ if rel_fname in mentioned_fnames:
787
+ # Use max to avoid double counting if in chat_fnames and mentioned_fnames
788
+ current_pers = max(current_pers, personalize)
789
+
790
+ # Check path components against mentioned_idents
791
+ path_obj = self.root / Path(rel_fname)
792
+ # print(f"path_obj: {path_obj.absolute()}")
793
+ path_components = set(path_obj.parts)
794
+ basename_with_ext = path_obj.name
795
+ basename_without_ext, _ = os.path.splitext(basename_with_ext)
796
+ components_to_check = path_components.union({basename_with_ext, basename_without_ext})
797
+
798
+ matched_idents = components_to_check.intersection(mentioned_idents)
799
+ if matched_idents:
800
+ # Add personalization *once* if any path component matches a mentioned ident
801
+ current_pers += personalize
802
+
803
+ if current_pers > 0:
804
+ personalization[rel_fname] = current_pers # Assign the final calculated value
805
+
806
+ tags = list(self.get_tags(fname, rel_fname))
807
+ if tags is None:
808
+ continue
809
+
810
+ for tag in tags:
811
+ if tag.kind == "def":
812
+ defines[tag.name].add(rel_fname)
813
+ key = (rel_fname, tag.name)
814
+ definitions[key].add(tag)
815
+
816
+ elif tag.kind == "ref":
817
+ references[tag.name].append(rel_fname)
818
+
819
+ ##
820
+ # dump(defines)
821
+ # dump(references)
822
+ # dump(personalization)
823
+
824
+ if not references:
825
+ references = dict((k, list(v)) for k, v in defines.items())
826
+
827
+ idents = set(defines.keys()).intersection(set(references.keys()))
828
+
829
+ G = nx.MultiDiGraph()
830
+
831
+ # Add a small self-edge for every definition that has no references
832
+ # Helps with tree-sitter 0.23.2 with ruby, where "def greet(name)"
833
+ # isn't counted as a def AND a ref. tree-sitter 0.24.0 does.
834
+ for ident in defines.keys():
835
+ if ident in references:
836
+ continue
837
+ for definer in defines[ident]:
838
+ G.add_edge(definer, definer, weight=0.1, ident=ident)
839
+ # print(f"self.root: {self.root}")
840
+ for ident in idents:
841
+ if progress:
842
+ progress()
843
+
844
+ definers = defines[ident]
845
+
846
+ mul = 1.0
847
+
848
+ is_snake = ("_" in ident) and any(c.isalpha() for c in ident)
849
+ is_camel = any(c.isupper() for c in ident) and any(c.islower() for c in ident)
850
+ if ident in mentioned_idents:
851
+ mul *= 10
852
+ if (is_snake or is_camel) and len(ident) >= 8:
853
+ mul *= 10
854
+ if ident.startswith("_"):
855
+ mul *= 0.1
856
+ if len(defines[ident]) > 5:
857
+ mul *= 0.1
858
+
859
+ for referencer, num_refs in Counter(references[ident]).items():
860
+ for definer in definers:
861
+ # dump(referencer, definer, num_refs, mul)
862
+ # if referencer == definer:
863
+ # continue
864
+
865
+ use_mul = mul
866
+ if referencer in chat_rel_fnames:
867
+ use_mul *= 50
868
+
869
+ # scale down so high freq (low value) mentions don't dominate
870
+ num_refs = math.sqrt(num_refs)
871
+
872
+ G.add_edge(referencer, definer, weight=use_mul * num_refs, ident=ident)
873
+
874
+ if not references:
875
+ pass
876
+
877
+ if personalization:
878
+ pers_args = dict(personalization=personalization, dangling=personalization)
879
+ else:
880
+ pers_args = dict()
881
+
882
+ try:
883
+ ranked = nx.pagerank(G, weight="weight", **pers_args)
884
+ except ZeroDivisionError:
885
+ # Issue #1536
886
+ try:
887
+ ranked = nx.pagerank(G, weight="weight")
888
+ except ZeroDivisionError:
889
+ return []
890
+
891
+ # distribute the rank from each source node, across all of its out edges
892
+ ranked_definitions = defaultdict(float)
893
+ for src in G.nodes:
894
+ if progress:
895
+ progress()
896
+
897
+ src_rank = ranked[src]
898
+ total_weight = sum(data["weight"] for _src, _dst, data in G.out_edges(src, data=True))
899
+ # dump(src, src_rank, total_weight)
900
+ for _src, dst, data in G.out_edges(src, data=True):
901
+ data["rank"] = src_rank * data["weight"] / total_weight
902
+ ident = data["ident"]
903
+ ranked_definitions[(dst, ident)] += data["rank"]
904
+
905
+ ranked_tags = []
906
+ ranked_definitions = sorted(
907
+ ranked_definitions.items(), reverse=True, key=lambda x: (x[1], x[0])
908
+ )
909
+
910
+ # dump(ranked_definitions)
911
+
912
+ for (fname, ident), rank in ranked_definitions:
913
+ # print(f"{rank:.03f} {fname} {ident}")
914
+ if fname in chat_rel_fnames:
915
+ continue
916
+ ranked_tags += list(definitions.get((fname, ident), []))
917
+ # print(f"self.root: {self.root}")
918
+ rel_other_fnames_without_tags = set(self.get_rel_fname((self.root / Path(fname))) for fname in other_fnames)
919
+ # print(f"self.root: {self.root}")
920
+ fnames_already_included = set(rt[0] for rt in ranked_tags)
921
+
922
+ top_rank = sorted([(rank, node) for (node, rank) in ranked.items()], reverse=True)
923
+ for rank, fname in top_rank:
924
+ if fname in rel_other_fnames_without_tags:
925
+ rel_other_fnames_without_tags.remove(fname)
926
+ if fname not in fnames_already_included:
927
+ ranked_tags.append((fname,))
928
+ # print(f"self.root: {self.root}")
929
+
930
+ for fname in rel_other_fnames_without_tags:
931
+ # print(f"fname: {fname}")
932
+ # print(f"self.root / Path(fname).absolute(): {self.root / Path(fname)}")
933
+ ranked_tags.append((str(self.root / Path(fname)),))
934
+ # if "main.py" in fname:
935
+ # print(f"tags: {fname}, {tags}")
936
+ # print(f"ranked_tags: {ranked_tags}")
937
+ return ranked_tags
938
+
939
+ def get_ranked_tags_map(
940
+ self,
941
+ chat_fnames,
942
+ other_fnames=None,
943
+ max_map_tokens=None,
944
+ mentioned_fnames=None,
945
+ mentioned_idents=None,
946
+ force_refresh=False,
947
+ ):
948
+ # Create a cache key
949
+ cache_key = [
950
+ tuple(sorted(chat_fnames)) if chat_fnames else None,
951
+ tuple(sorted(other_fnames)) if other_fnames else None,
952
+ max_map_tokens,
953
+ ]
954
+ # print("cache_key", cache_key)
955
+
956
+ if self.refresh == "auto":
957
+ cache_key += [
958
+ tuple(sorted(mentioned_fnames)) if mentioned_fnames else None,
959
+ tuple(sorted(mentioned_idents)) if mentioned_idents else None,
960
+ ]
961
+ cache_key = tuple(cache_key)
962
+
963
+ use_cache = False
964
+ if not force_refresh:
965
+ if self.refresh == "manual" and self.last_map:
966
+ return self.last_map
967
+
968
+ if self.refresh == "always":
969
+ use_cache = False
970
+ elif self.refresh == "files":
971
+ use_cache = True
972
+ elif self.refresh == "auto":
973
+ use_cache = self.map_processing_time > 1.0
974
+
975
+ # Check if the result is in the cache
976
+ if use_cache and cache_key in self.map_cache:
977
+ return self.map_cache[cache_key]
978
+
979
+ # If not in cache or force_refresh is True, generate the map
980
+ start_time = time.time()
981
+ result = self.get_ranked_tags_map_uncached(
982
+ chat_fnames, other_fnames, max_map_tokens, mentioned_fnames, mentioned_idents
983
+ )
984
+ # print(f"result: {result}")
985
+ end_time = time.time()
986
+ self.map_processing_time = end_time - start_time
987
+
988
+ # Store the result in the cache
989
+ self.map_cache[cache_key] = result
990
+ self.last_map = result
991
+
992
+ # print(f"result: {result}")
993
+ return result
994
+
995
+ def get_ranked_tags_map_uncached(
996
+ self,
997
+ chat_fnames,
998
+ other_fnames=None,
999
+ max_map_tokens=None,
1000
+ mentioned_fnames=None,
1001
+ mentioned_idents=None,
1002
+ ):
1003
+ if not other_fnames:
1004
+ other_fnames = list()
1005
+ if not max_map_tokens:
1006
+ max_map_tokens = self.max_map_tokens
1007
+ if not mentioned_fnames:
1008
+ mentioned_fnames = set()
1009
+ if not mentioned_idents:
1010
+ mentioned_idents = set()
1011
+
1012
+ # spin = Spinner("Updating repo map")
1013
+
1014
+ ranked_tags = self.get_ranked_tags(
1015
+ chat_fnames,
1016
+ other_fnames,
1017
+ mentioned_fnames,
1018
+ mentioned_idents,
1019
+ # progress=spin.step,
1020
+ )
1021
+
1022
+ other_rel_fnames = sorted(set(self.get_rel_fname(fname) for fname in other_fnames))
1023
+ special_fnames = filter_important_files(other_rel_fnames)
1024
+ ranked_tags_fnames = set(tag[0] for tag in ranked_tags)
1025
+ special_fnames = [fn for fn in special_fnames if fn not in ranked_tags_fnames]
1026
+ special_fnames = [(fn,) for fn in special_fnames]
1027
+
1028
+ ranked_tags = special_fnames + ranked_tags
1029
+ # print("ranked_tags", ranked_tags)
1030
+
1031
+ # spin.step()
1032
+
1033
+ num_tags = len(ranked_tags)
1034
+ lower_bound = 0
1035
+ upper_bound = num_tags
1036
+ best_tree = None
1037
+ best_tree_tokens = 0
1038
+
1039
+ chat_rel_fnames = set(self.get_rel_fname(fname) for fname in chat_fnames)
1040
+
1041
+ self.tree_cache = dict()
1042
+
1043
+ middle = min(int(max_map_tokens // 25), num_tags)
1044
+ # print(f"max_map_tokens: {max_map_tokens}")
1045
+ while lower_bound <= upper_bound:
1046
+ # dump(lower_bound, middle, upper_bound)
1047
+
1048
+ # spin.step()
1049
+
1050
+ tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
1051
+ # print("tree", tree)
1052
+ num_tokens = self.token_count(tree)
1053
+
1054
+ pct_err = abs(num_tokens - max_map_tokens) / max_map_tokens
1055
+ ok_err = 0.15
1056
+ if (num_tokens <= max_map_tokens and num_tokens > best_tree_tokens) or pct_err < ok_err:
1057
+ best_tree = tree
1058
+ best_tree_tokens = num_tokens
1059
+
1060
+ if pct_err < ok_err:
1061
+ break
1062
+
1063
+ if num_tokens < max_map_tokens:
1064
+ lower_bound = middle + 1
1065
+ else:
1066
+ upper_bound = middle - 1
1067
+
1068
+ middle = int((lower_bound + upper_bound) // 2)
1069
+
1070
+ # spin.end()
1071
+ # print("best_tree", repr(best_tree))
1072
+ return best_tree
1073
+
1074
+ tree_cache = dict()
1075
+
1076
+ def render_tree(self, abs_fname, rel_fname, lois):
1077
+ mtime = self.get_mtime(abs_fname)
1078
+ key = (rel_fname, tuple(sorted(lois)), mtime)
1079
+
1080
+ # print(f"key: {key}")
1081
+ # print(f"self.tree_cache: {self.tree_cache}")
1082
+ if key in self.tree_cache:
1083
+ return self.tree_cache[key]
1084
+ # print(f"abs_fname: {abs_fname}")
1085
+ # print(f"rel_fname: {rel_fname}")
1086
+ # print(f"mtime: {mtime}")
1087
+ # print(f"self.tree_context_cache: {self.tree_context_cache}")
1088
+ if (
1089
+ rel_fname not in self.tree_context_cache
1090
+ or self.tree_context_cache[rel_fname]["mtime"] != mtime
1091
+ ):
1092
+ # print(f"abs_fname: {abs_fname}")
1093
+ code = self.io.read_text(abs_fname) or ""
1094
+ # print(f"code: {code}")
1095
+ if not code.endswith("\n"):
1096
+ code += "\n"
1097
+
1098
+ context = TreeContext(
1099
+ rel_fname,
1100
+ code,
1101
+ color=False,
1102
+ line_number=False,
1103
+ child_context=False,
1104
+ last_line=False,
1105
+ margin=0,
1106
+ mark_lois=False,
1107
+ loi_pad=0,
1108
+ # header_max=30,
1109
+ show_top_of_file_parent_scope=False,
1110
+ )
1111
+ self.tree_context_cache[rel_fname] = {"context": context, "mtime": mtime}
1112
+
1113
+ context = self.tree_context_cache[rel_fname]["context"]
1114
+ context.lines_of_interest = set()
1115
+ context.add_lines_of_interest(lois)
1116
+ context.add_context()
1117
+ res = context.format()
1118
+ self.tree_cache[key] = res
1119
+ return res
1120
+
1121
+ def to_tree(self, tags, chat_rel_fnames):
1122
+ # print("tags", tags)
1123
+ # print("chat_rel_fnames", chat_rel_fnames)
1124
+ if not tags:
1125
+ return ""
1126
+
1127
+ cur_fname = None
1128
+ cur_abs_fname = None
1129
+ lois = None
1130
+ output = ""
1131
+
1132
+ # add a bogus tag at the end so we trip the this_fname != cur_fname...
1133
+ dummy_tag = (None,)
1134
+ for tag in sorted(tags) + [dummy_tag]:
1135
+ this_rel_fname = tag[0]
1136
+ if this_rel_fname in chat_rel_fnames:
1137
+ continue
1138
+
1139
+ # ... here ... to output the final real entry in the list
1140
+ if this_rel_fname != cur_fname:
1141
+ # print("this_rel_fname", this_rel_fname)
1142
+ # print("lois", lois, tag, type(tag), type(tag) is Tag)
1143
+ if lois is not None:
1144
+ output += "\n"
1145
+ output += str(self.root / Path(cur_fname)) + ":\n"
1146
+ # print(f"cur_abs_fname: {cur_abs_fname}, {type(cur_abs_fname)}")
1147
+ output += self.render_tree(self.root / Path(cur_abs_fname), cur_fname, lois)
1148
+ lois = None
1149
+ elif cur_fname:
1150
+ output += "\n" + cur_fname + "\n"
1151
+ if type(tag) is Tag:
1152
+ lois = []
1153
+ cur_abs_fname = tag.fname
1154
+ cur_fname = this_rel_fname
1155
+
1156
+ if lois is not None:
1157
+ lois.append(tag.line)
1158
+
1159
+ # truncate long lines, in case we get minified js or something else crazy
1160
+ output = "\n".join([line[:100] for line in output.splitlines()]) + "\n"
1161
+
1162
+ return output
1163
+
1164
+
1165
+ def find_src_files(directory):
1166
+ if not os.path.isdir(directory):
1167
+ return [directory]
1168
+
1169
+ src_files = []
1170
+ for root, dirs, files in os.walk(directory):
1171
+ for file in files:
1172
+ src_files.append(os.path.join(root, file))
1173
+ return src_files
1174
+
1175
+
1176
+ def get_random_color():
1177
+ hue = random.random()
1178
+ r, g, b = [int(x * 255) for x in colorsys.hsv_to_rgb(hue, 1, 0.75)]
1179
+ res = f"#{r:02x}{g:02x}{b:02x}"
1180
+ return res
1181
+
1182
+
1183
+ def get_scm_fname(lang):
1184
+ # print("lang", lang)
1185
+ # Load the tags queries
1186
+ if USING_TSL_PACK:
1187
+ subdir = "tree-sitter-language-pack"
1188
+ try:
1189
+ path = Path(__file__).parent.parent / "queries" / subdir / f"{lang}-tags.scm"
1190
+ # path = resources.files(__package__).joinpath(
1191
+ # "queries",
1192
+ # subdir,
1193
+ # f"{lang}-tags.scm",
1194
+ # )
1195
+ if path.exists():
1196
+ return path
1197
+ except KeyError:
1198
+ pass
1199
+
1200
+ # Fall back to tree-sitter-languages
1201
+ subdir = "tree-sitter-languages"
1202
+ try:
1203
+ path = Path(__file__).parent.parent / "queries" / subdir / f"{lang}-tags.scm"
1204
+ return path
1205
+ # return resources.files(__package__).joinpath(
1206
+ # "queries",
1207
+ # subdir,
1208
+ # f"{lang}-tags.scm",
1209
+ # )
1210
+ except KeyError:
1211
+ return
1212
+
1213
+
1214
+ def get_supported_languages_md():
1215
+ from grep_ast.parsers import PARSERS
1216
+
1217
+ res = """
1218
+ | Language | File extension | Repo map | Linter |
1219
+ |:--------:|:--------------:|:--------:|:------:|
1220
+ """
1221
+ data = sorted((lang, ex) for ex, lang in PARSERS.items())
1222
+
1223
+ for lang, ext in data:
1224
+ fn = get_scm_fname(lang)
1225
+ repo_map = "✓" if Path(fn).exists() else ""
1226
+ linter_support = "✓"
1227
+ res += f"| {lang:20} | {ext:20} | {repo_map:^8} | {linter_support:^6} |\n"
1228
+
1229
+ res += "\n"
1230
+
1231
+ return res
1232
+
1233
+ def find_all_files(dir_path):
1234
+ excluded_dirs = {'.git', '__pycache__', '.venv', '.env', 'node_modules'} # 排除的目录
1235
+ other_fnames = []
1236
+ for root, dirs, files in os.walk(dir_path):
1237
+ # 从dirs中移除需要排除的目录
1238
+ dirs[:] = [d for d in dirs if d not in excluded_dirs]
1239
+ for file in files:
1240
+ # if file.endswith(".py"):
1241
+ rel_path = os.path.relpath(os.path.join(root, file), dir_path)
1242
+ other_fnames.append(rel_path)
1243
+ return other_fnames
1244
+
1245
+
1246
+ @register_tool()
1247
+ def get_code_repo_map(dir_path):
1248
+ """
1249
+ 获取指定代码仓库的结构地图或摘要信息。研究代码仓库必须优先使用此工具。
1250
+
1251
+ 此工具分析指定目录下的代码仓库,扫描源代码文件,识别关键的定义(如函数、类)
1252
+ 和它们之间的引用关系,并生成一个基于重要性排名的代码结构摘要。
1253
+ 这有助于快速理解大型代码库的组织方式和核心组件。
1254
+
1255
+ 参数:
1256
+ dir_path: str - 需要分析的代码仓库的根目录路径。
1257
+
1258
+ 返回:
1259
+ str - 包含代码仓库结构地图或摘要信息的字符串。
1260
+ 如果目录无效或分析过程中出现错误,可能返回错误信息或空字符串。
1261
+ 地图通常包含重要文件的路径以及这些文件中最相关的代码片段(定义)。
1262
+ """
1263
+ rm = RepoMap(root=dir_path, io=InputOutput())
1264
+ other_fnames = find_all_files(dir_path)
1265
+ repo_map = rm.get_ranked_tags_map([], other_fnames)
1266
+ return repo_map
1267
+
1268
+ if __name__ == "__main__":
1269
+ # fnames = sys.argv[1:]
1270
+
1271
+ # chat_fnames = []
1272
+ # other_fnames = []
1273
+ # for fname in sys.argv[1:]:
1274
+ # if Path(fname).is_dir():
1275
+ # chat_fnames += find_src_files(fname)
1276
+ # else:
1277
+ # chat_fnames.append(fname)
1278
+ # print("chat_fnames", chat_fnames)
1279
+ # chat_fnames = []
1280
+ # rm = RepoMap(root=".", io=InputOutput())
1281
+
1282
+ # other_fnames = find_all_files(".")
1283
+ # print("other_fnames", other_fnames)
1284
+ # repo_map = rm.get_ranked_tags_map(chat_fnames, other_fnames)
1285
+ # print(repo_map)
1286
+
1287
+ # print(get_code_repo_map("."))
1288
+ # print(get_code_repo_map("/Users/yanyuming/Downloads/GitHub/uni-api"))
1289
+ print(get_code_repo_map("/Users/yanyuming/Downloads/GitHub/text-to-motion"))