aider-ce 0.88.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. aider/__init__.py +20 -0
  2. aider/__main__.py +4 -0
  3. aider/_version.py +34 -0
  4. aider/analytics.py +258 -0
  5. aider/args.py +1056 -0
  6. aider/args_formatter.py +228 -0
  7. aider/change_tracker.py +133 -0
  8. aider/coders/__init__.py +36 -0
  9. aider/coders/agent_coder.py +2166 -0
  10. aider/coders/agent_prompts.py +104 -0
  11. aider/coders/architect_coder.py +48 -0
  12. aider/coders/architect_prompts.py +40 -0
  13. aider/coders/ask_coder.py +9 -0
  14. aider/coders/ask_prompts.py +35 -0
  15. aider/coders/base_coder.py +3613 -0
  16. aider/coders/base_prompts.py +87 -0
  17. aider/coders/chat_chunks.py +64 -0
  18. aider/coders/context_coder.py +53 -0
  19. aider/coders/context_prompts.py +75 -0
  20. aider/coders/editblock_coder.py +657 -0
  21. aider/coders/editblock_fenced_coder.py +10 -0
  22. aider/coders/editblock_fenced_prompts.py +143 -0
  23. aider/coders/editblock_func_coder.py +141 -0
  24. aider/coders/editblock_func_prompts.py +27 -0
  25. aider/coders/editblock_prompts.py +175 -0
  26. aider/coders/editor_diff_fenced_coder.py +9 -0
  27. aider/coders/editor_diff_fenced_prompts.py +11 -0
  28. aider/coders/editor_editblock_coder.py +9 -0
  29. aider/coders/editor_editblock_prompts.py +21 -0
  30. aider/coders/editor_whole_coder.py +9 -0
  31. aider/coders/editor_whole_prompts.py +12 -0
  32. aider/coders/help_coder.py +16 -0
  33. aider/coders/help_prompts.py +46 -0
  34. aider/coders/patch_coder.py +706 -0
  35. aider/coders/patch_prompts.py +159 -0
  36. aider/coders/search_replace.py +757 -0
  37. aider/coders/shell.py +37 -0
  38. aider/coders/single_wholefile_func_coder.py +102 -0
  39. aider/coders/single_wholefile_func_prompts.py +27 -0
  40. aider/coders/udiff_coder.py +429 -0
  41. aider/coders/udiff_prompts.py +115 -0
  42. aider/coders/udiff_simple.py +14 -0
  43. aider/coders/udiff_simple_prompts.py +25 -0
  44. aider/coders/wholefile_coder.py +144 -0
  45. aider/coders/wholefile_func_coder.py +134 -0
  46. aider/coders/wholefile_func_prompts.py +27 -0
  47. aider/coders/wholefile_prompts.py +65 -0
  48. aider/commands.py +2173 -0
  49. aider/copypaste.py +72 -0
  50. aider/deprecated.py +126 -0
  51. aider/diffs.py +128 -0
  52. aider/dump.py +29 -0
  53. aider/editor.py +147 -0
  54. aider/exceptions.py +115 -0
  55. aider/format_settings.py +26 -0
  56. aider/gui.py +545 -0
  57. aider/help.py +163 -0
  58. aider/help_pats.py +19 -0
  59. aider/helpers/__init__.py +9 -0
  60. aider/helpers/similarity.py +98 -0
  61. aider/history.py +180 -0
  62. aider/io.py +1608 -0
  63. aider/linter.py +304 -0
  64. aider/llm.py +55 -0
  65. aider/main.py +1415 -0
  66. aider/mcp/__init__.py +174 -0
  67. aider/mcp/server.py +149 -0
  68. aider/mdstream.py +243 -0
  69. aider/models.py +1313 -0
  70. aider/onboarding.py +429 -0
  71. aider/openrouter.py +129 -0
  72. aider/prompts.py +56 -0
  73. aider/queries/tree-sitter-language-pack/README.md +7 -0
  74. aider/queries/tree-sitter-language-pack/arduino-tags.scm +5 -0
  75. aider/queries/tree-sitter-language-pack/c-tags.scm +9 -0
  76. aider/queries/tree-sitter-language-pack/chatito-tags.scm +16 -0
  77. aider/queries/tree-sitter-language-pack/clojure-tags.scm +7 -0
  78. aider/queries/tree-sitter-language-pack/commonlisp-tags.scm +122 -0
  79. aider/queries/tree-sitter-language-pack/cpp-tags.scm +15 -0
  80. aider/queries/tree-sitter-language-pack/csharp-tags.scm +26 -0
  81. aider/queries/tree-sitter-language-pack/d-tags.scm +26 -0
  82. aider/queries/tree-sitter-language-pack/dart-tags.scm +92 -0
  83. aider/queries/tree-sitter-language-pack/elisp-tags.scm +5 -0
  84. aider/queries/tree-sitter-language-pack/elixir-tags.scm +54 -0
  85. aider/queries/tree-sitter-language-pack/elm-tags.scm +19 -0
  86. aider/queries/tree-sitter-language-pack/gleam-tags.scm +41 -0
  87. aider/queries/tree-sitter-language-pack/go-tags.scm +42 -0
  88. aider/queries/tree-sitter-language-pack/java-tags.scm +20 -0
  89. aider/queries/tree-sitter-language-pack/javascript-tags.scm +88 -0
  90. aider/queries/tree-sitter-language-pack/lua-tags.scm +34 -0
  91. aider/queries/tree-sitter-language-pack/matlab-tags.scm +10 -0
  92. aider/queries/tree-sitter-language-pack/ocaml-tags.scm +115 -0
  93. aider/queries/tree-sitter-language-pack/ocaml_interface-tags.scm +98 -0
  94. aider/queries/tree-sitter-language-pack/pony-tags.scm +39 -0
  95. aider/queries/tree-sitter-language-pack/properties-tags.scm +5 -0
  96. aider/queries/tree-sitter-language-pack/python-tags.scm +14 -0
  97. aider/queries/tree-sitter-language-pack/r-tags.scm +21 -0
  98. aider/queries/tree-sitter-language-pack/racket-tags.scm +12 -0
  99. aider/queries/tree-sitter-language-pack/ruby-tags.scm +64 -0
  100. aider/queries/tree-sitter-language-pack/rust-tags.scm +60 -0
  101. aider/queries/tree-sitter-language-pack/solidity-tags.scm +43 -0
  102. aider/queries/tree-sitter-language-pack/swift-tags.scm +51 -0
  103. aider/queries/tree-sitter-language-pack/udev-tags.scm +20 -0
  104. aider/queries/tree-sitter-languages/README.md +24 -0
  105. aider/queries/tree-sitter-languages/c-tags.scm +9 -0
  106. aider/queries/tree-sitter-languages/c_sharp-tags.scm +46 -0
  107. aider/queries/tree-sitter-languages/cpp-tags.scm +15 -0
  108. aider/queries/tree-sitter-languages/dart-tags.scm +91 -0
  109. aider/queries/tree-sitter-languages/elisp-tags.scm +8 -0
  110. aider/queries/tree-sitter-languages/elixir-tags.scm +54 -0
  111. aider/queries/tree-sitter-languages/elm-tags.scm +19 -0
  112. aider/queries/tree-sitter-languages/fortran-tags.scm +15 -0
  113. aider/queries/tree-sitter-languages/go-tags.scm +30 -0
  114. aider/queries/tree-sitter-languages/haskell-tags.scm +3 -0
  115. aider/queries/tree-sitter-languages/hcl-tags.scm +77 -0
  116. aider/queries/tree-sitter-languages/java-tags.scm +20 -0
  117. aider/queries/tree-sitter-languages/javascript-tags.scm +88 -0
  118. aider/queries/tree-sitter-languages/julia-tags.scm +60 -0
  119. aider/queries/tree-sitter-languages/kotlin-tags.scm +27 -0
  120. aider/queries/tree-sitter-languages/matlab-tags.scm +10 -0
  121. aider/queries/tree-sitter-languages/ocaml-tags.scm +115 -0
  122. aider/queries/tree-sitter-languages/ocaml_interface-tags.scm +98 -0
  123. aider/queries/tree-sitter-languages/php-tags.scm +26 -0
  124. aider/queries/tree-sitter-languages/python-tags.scm +12 -0
  125. aider/queries/tree-sitter-languages/ql-tags.scm +26 -0
  126. aider/queries/tree-sitter-languages/ruby-tags.scm +64 -0
  127. aider/queries/tree-sitter-languages/rust-tags.scm +60 -0
  128. aider/queries/tree-sitter-languages/scala-tags.scm +65 -0
  129. aider/queries/tree-sitter-languages/typescript-tags.scm +41 -0
  130. aider/queries/tree-sitter-languages/zig-tags.scm +3 -0
  131. aider/reasoning_tags.py +82 -0
  132. aider/repo.py +621 -0
  133. aider/repomap.py +1174 -0
  134. aider/report.py +260 -0
  135. aider/resources/__init__.py +3 -0
  136. aider/resources/model-metadata.json +776 -0
  137. aider/resources/model-settings.yml +2068 -0
  138. aider/run_cmd.py +133 -0
  139. aider/scrape.py +293 -0
  140. aider/sendchat.py +242 -0
  141. aider/sessions.py +256 -0
  142. aider/special.py +203 -0
  143. aider/tools/__init__.py +72 -0
  144. aider/tools/command.py +105 -0
  145. aider/tools/command_interactive.py +122 -0
  146. aider/tools/delete_block.py +182 -0
  147. aider/tools/delete_line.py +155 -0
  148. aider/tools/delete_lines.py +184 -0
  149. aider/tools/extract_lines.py +341 -0
  150. aider/tools/finished.py +48 -0
  151. aider/tools/git_branch.py +129 -0
  152. aider/tools/git_diff.py +60 -0
  153. aider/tools/git_log.py +57 -0
  154. aider/tools/git_remote.py +53 -0
  155. aider/tools/git_show.py +51 -0
  156. aider/tools/git_status.py +46 -0
  157. aider/tools/grep.py +256 -0
  158. aider/tools/indent_lines.py +221 -0
  159. aider/tools/insert_block.py +288 -0
  160. aider/tools/list_changes.py +86 -0
  161. aider/tools/ls.py +93 -0
  162. aider/tools/make_editable.py +85 -0
  163. aider/tools/make_readonly.py +69 -0
  164. aider/tools/remove.py +91 -0
  165. aider/tools/replace_all.py +126 -0
  166. aider/tools/replace_line.py +173 -0
  167. aider/tools/replace_lines.py +217 -0
  168. aider/tools/replace_text.py +187 -0
  169. aider/tools/show_numbered_context.py +147 -0
  170. aider/tools/tool_utils.py +313 -0
  171. aider/tools/undo_change.py +95 -0
  172. aider/tools/update_todo_list.py +156 -0
  173. aider/tools/view.py +57 -0
  174. aider/tools/view_files_matching.py +141 -0
  175. aider/tools/view_files_with_symbol.py +129 -0
  176. aider/urls.py +17 -0
  177. aider/utils.py +456 -0
  178. aider/versioncheck.py +113 -0
  179. aider/voice.py +205 -0
  180. aider/waiting.py +38 -0
  181. aider/watch.py +318 -0
  182. aider/watch_prompts.py +12 -0
  183. aider/website/Gemfile +8 -0
  184. aider/website/_includes/blame.md +162 -0
  185. aider/website/_includes/get-started.md +22 -0
  186. aider/website/_includes/help-tip.md +5 -0
  187. aider/website/_includes/help.md +24 -0
  188. aider/website/_includes/install.md +5 -0
  189. aider/website/_includes/keys.md +4 -0
  190. aider/website/_includes/model-warnings.md +67 -0
  191. aider/website/_includes/multi-line.md +22 -0
  192. aider/website/_includes/python-m-aider.md +5 -0
  193. aider/website/_includes/recording.css +228 -0
  194. aider/website/_includes/recording.md +34 -0
  195. aider/website/_includes/replit-pipx.md +9 -0
  196. aider/website/_includes/works-best.md +1 -0
  197. aider/website/_sass/custom/custom.scss +103 -0
  198. aider/website/docs/config/adv-model-settings.md +2261 -0
  199. aider/website/docs/config/agent-mode.md +194 -0
  200. aider/website/docs/config/aider_conf.md +548 -0
  201. aider/website/docs/config/api-keys.md +90 -0
  202. aider/website/docs/config/dotenv.md +493 -0
  203. aider/website/docs/config/editor.md +127 -0
  204. aider/website/docs/config/mcp.md +95 -0
  205. aider/website/docs/config/model-aliases.md +104 -0
  206. aider/website/docs/config/options.md +890 -0
  207. aider/website/docs/config/reasoning.md +210 -0
  208. aider/website/docs/config.md +44 -0
  209. aider/website/docs/faq.md +384 -0
  210. aider/website/docs/git.md +76 -0
  211. aider/website/docs/index.md +47 -0
  212. aider/website/docs/install/codespaces.md +39 -0
  213. aider/website/docs/install/docker.md +57 -0
  214. aider/website/docs/install/optional.md +100 -0
  215. aider/website/docs/install/replit.md +8 -0
  216. aider/website/docs/install.md +115 -0
  217. aider/website/docs/languages.md +264 -0
  218. aider/website/docs/legal/contributor-agreement.md +111 -0
  219. aider/website/docs/legal/privacy.md +104 -0
  220. aider/website/docs/llms/anthropic.md +77 -0
  221. aider/website/docs/llms/azure.md +48 -0
  222. aider/website/docs/llms/bedrock.md +132 -0
  223. aider/website/docs/llms/cohere.md +34 -0
  224. aider/website/docs/llms/deepseek.md +32 -0
  225. aider/website/docs/llms/gemini.md +49 -0
  226. aider/website/docs/llms/github.md +111 -0
  227. aider/website/docs/llms/groq.md +36 -0
  228. aider/website/docs/llms/lm-studio.md +39 -0
  229. aider/website/docs/llms/ollama.md +75 -0
  230. aider/website/docs/llms/openai-compat.md +39 -0
  231. aider/website/docs/llms/openai.md +58 -0
  232. aider/website/docs/llms/openrouter.md +78 -0
  233. aider/website/docs/llms/other.md +117 -0
  234. aider/website/docs/llms/vertex.md +50 -0
  235. aider/website/docs/llms/warnings.md +10 -0
  236. aider/website/docs/llms/xai.md +53 -0
  237. aider/website/docs/llms.md +54 -0
  238. aider/website/docs/more/analytics.md +127 -0
  239. aider/website/docs/more/edit-formats.md +116 -0
  240. aider/website/docs/more/infinite-output.md +165 -0
  241. aider/website/docs/more-info.md +8 -0
  242. aider/website/docs/recordings/auto-accept-architect.md +31 -0
  243. aider/website/docs/recordings/dont-drop-original-read-files.md +35 -0
  244. aider/website/docs/recordings/index.md +21 -0
  245. aider/website/docs/recordings/model-accepts-settings.md +69 -0
  246. aider/website/docs/recordings/tree-sitter-language-pack.md +80 -0
  247. aider/website/docs/repomap.md +112 -0
  248. aider/website/docs/scripting.md +100 -0
  249. aider/website/docs/sessions.md +203 -0
  250. aider/website/docs/troubleshooting/aider-not-found.md +24 -0
  251. aider/website/docs/troubleshooting/edit-errors.md +76 -0
  252. aider/website/docs/troubleshooting/imports.md +62 -0
  253. aider/website/docs/troubleshooting/models-and-keys.md +54 -0
  254. aider/website/docs/troubleshooting/support.md +79 -0
  255. aider/website/docs/troubleshooting/token-limits.md +96 -0
  256. aider/website/docs/troubleshooting/warnings.md +12 -0
  257. aider/website/docs/troubleshooting.md +11 -0
  258. aider/website/docs/usage/browser.md +57 -0
  259. aider/website/docs/usage/caching.md +49 -0
  260. aider/website/docs/usage/commands.md +133 -0
  261. aider/website/docs/usage/conventions.md +119 -0
  262. aider/website/docs/usage/copypaste.md +121 -0
  263. aider/website/docs/usage/images-urls.md +48 -0
  264. aider/website/docs/usage/lint-test.md +118 -0
  265. aider/website/docs/usage/modes.md +211 -0
  266. aider/website/docs/usage/not-code.md +179 -0
  267. aider/website/docs/usage/notifications.md +87 -0
  268. aider/website/docs/usage/tips.md +79 -0
  269. aider/website/docs/usage/tutorials.md +30 -0
  270. aider/website/docs/usage/voice.md +121 -0
  271. aider/website/docs/usage/watch.md +294 -0
  272. aider/website/docs/usage.md +102 -0
  273. aider/website/share/index.md +101 -0
  274. aider_ce-0.88.20.dist-info/METADATA +187 -0
  275. aider_ce-0.88.20.dist-info/RECORD +279 -0
  276. aider_ce-0.88.20.dist-info/WHEEL +5 -0
  277. aider_ce-0.88.20.dist-info/entry_points.txt +2 -0
  278. aider_ce-0.88.20.dist-info/licenses/LICENSE.txt +202 -0
  279. aider_ce-0.88.20.dist-info/top_level.txt +1 -0
aider/repomap.py ADDED
@@ -0,0 +1,1174 @@
1
+ import math
2
+ import os
3
+ import shutil
4
+ import sqlite3
5
+ import sys
6
+ import time
7
+ import warnings
8
+ from collections import Counter, defaultdict, namedtuple
9
+ from importlib import resources
10
+ from pathlib import Path
11
+
12
+ import tree_sitter
13
+ from diskcache import Cache
14
+ from grep_ast import TreeContext, filename_to_lang
15
+ from pygments.lexers import guess_lexer_for_filename
16
+ from pygments.token import Token
17
+ from tqdm import tqdm
18
+
19
+ from aider.dump import dump
20
+ from aider.helpers.similarity import (
21
+ cosine_similarity,
22
+ create_bigram_vector,
23
+ normalize_vector,
24
+ )
25
+ from aider.special import filter_important_files
26
+ from aider.tools.tool_utils import ToolError
27
+
28
+ # tree_sitter is throwing a FutureWarning
29
+ warnings.simplefilter("ignore", category=FutureWarning)
30
+ from grep_ast.tsl import USING_TSL_PACK, get_language, get_parser # noqa: E402
31
+
32
+
33
+ # Define the Tag namedtuple with a default for specific_kind to maintain compatibility
34
+ # with cached entries that might have been created with the old definition
35
+ class TagBase(
36
+ namedtuple(
37
+ "TagBase",
38
+ "rel_fname fname line name kind specific_kind start_line end_line start_byte end_byte",
39
+ )
40
+ ):
41
+ __slots__ = ()
42
+
43
+ def __new__(
44
+ cls,
45
+ rel_fname,
46
+ fname,
47
+ line,
48
+ name,
49
+ kind,
50
+ specific_kind=None,
51
+ start_line=None,
52
+ end_line=None,
53
+ start_byte=None,
54
+ end_byte=None,
55
+ ):
56
+ # Provide a default value for specific_kind to handle old cached objects
57
+ return super(TagBase, cls).__new__(
58
+ cls,
59
+ rel_fname,
60
+ fname,
61
+ line,
62
+ name,
63
+ kind,
64
+ specific_kind,
65
+ start_line,
66
+ end_line,
67
+ start_byte,
68
+ end_byte,
69
+ )
70
+
71
+
72
+ Tag = TagBase
73
+
74
+
75
+ SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError)
76
+
77
+
78
+ CACHE_VERSION = 5
79
+ if USING_TSL_PACK:
80
+ CACHE_VERSION = 7
81
+
82
+ UPDATING_REPO_MAP_MESSAGE = "Updating repo map"
83
+
84
+
85
+ class RepoMap:
86
+ TAGS_CACHE_DIR = f".aider.tags.cache.v{CACHE_VERSION}"
87
+
88
+ warned_files = set()
89
+
90
+ # Class variable to store initial ranked tags results
91
+ _initial_ranked_tags = None
92
+ _initial_ident_to_files = None
93
+
94
+ # Define kinds that typically represent definitions across languages
95
+ # Used by AgentCoder to filter tags for the symbol outline
96
+ definition_kinds = {
97
+ "class",
98
+ "struct",
99
+ "enum",
100
+ "interface",
101
+ "trait", # Structure definitions
102
+ "function",
103
+ "method",
104
+ "constructor", # Function/method definitions
105
+ "module",
106
+ "namespace", # Module/namespace definitions
107
+ "constant",
108
+ "variable", # Top-level/class variable definitions (consider refining)
109
+ "type", # Type definitions
110
+ # Add more based on tree-sitter queries if needed
111
+ }
112
+
113
+ @staticmethod
114
+ def get_file_stub(fname, io):
115
+ """Generate a complete structural outline of a source code file.
116
+
117
+ Args:
118
+ fname (str): Absolute path to the source file
119
+ io: InputOutput instance for file operations
120
+
121
+ Returns:
122
+ str: Formatted outline showing the file's structure
123
+ """
124
+ # Use cached instance if available
125
+ if not hasattr(RepoMap, "_stub_instance"):
126
+ RepoMap._stub_instance = RepoMap(map_tokens=0, io=io)
127
+
128
+ rm = RepoMap._stub_instance
129
+
130
+ rel_fname = rm.get_rel_fname(fname)
131
+
132
+ # Reuse existing tag parsing
133
+ tags = rm.get_tags(fname, rel_fname)
134
+ if not tags:
135
+ return "# No outline available"
136
+
137
+ # Get all definition lines
138
+ lois = [tag.line for tag in tags if tag.kind == "def"]
139
+
140
+ # Reuse existing tree rendering
141
+ outline = rm.render_tree(fname, rel_fname, lois)
142
+
143
+ return f"{outline}"
144
+
145
+ def __init__(
146
+ self,
147
+ map_tokens=1024,
148
+ map_cache_dir=".",
149
+ main_model=None,
150
+ io=None,
151
+ repo_content_prefix=None,
152
+ verbose=False,
153
+ max_context_window=None,
154
+ map_mul_no_files=8,
155
+ refresh="auto",
156
+ max_code_line_length=100,
157
+ repo_root=None,
158
+ use_memory_cache=False,
159
+ ):
160
+ self.io = io
161
+ self.verbose = verbose
162
+ self.refresh = refresh
163
+
164
+ self.map_cache_dir = map_cache_dir
165
+ # Prefer an explicit repo root (eg per-test repo), fallback to CWD
166
+ self.root = repo_root or os.getcwd()
167
+
168
+ # Allow opting into an in-memory tags cache to avoid disk/SQLite locks
169
+ if use_memory_cache:
170
+ self.TAGS_CACHE = dict()
171
+ else:
172
+ self.load_tags_cache()
173
+ self.cache_threshold = 0.95
174
+
175
+ self.max_map_tokens = map_tokens
176
+ self.map_mul_no_files = map_mul_no_files
177
+ self.max_context_window = max_context_window
178
+
179
+ self.max_code_line_length = max_code_line_length
180
+
181
+ self.repo_content_prefix = repo_content_prefix
182
+
183
+ self.main_model = main_model
184
+
185
+ self.tree_cache = {}
186
+ self.tree_context_cache = {}
187
+ self.map_cache = {}
188
+ self.map_processing_time = 0
189
+ self.last_map = None
190
+
191
+ # Initialize cache for mentioned identifiers similarity
192
+ self._last_mentioned_idents = None
193
+ self._last_mentioned_idents_vector = None
194
+ self._has_last_mentioned_idents = False
195
+ self._mentioned_ident_similarity = 0.8
196
+
197
+ if self.verbose:
198
+ self.io.tool_output(
199
+ f"RepoMap initialized with map_mul_no_files: {self.map_mul_no_files}"
200
+ )
201
+ self.io.tool_output(f"RepoMap initialized with map_cache_dir: {self.map_cache_dir}")
202
+ self.io.tool_output(f"RepoMap assumes repo root is: {self.root}")
203
+
204
+ def token_count(self, text):
205
+ len_text = len(text)
206
+ if len_text < 200:
207
+ return self.main_model.token_count(text)
208
+
209
+ lines = text.splitlines(keepends=True)
210
+ num_lines = len(lines)
211
+ step = num_lines // 100 or 1
212
+ lines = lines[::step]
213
+ sample_text = "".join(lines)
214
+ sample_tokens = self.main_model.token_count(sample_text)
215
+ est_tokens = sample_tokens / len(sample_text) * len_text
216
+ return est_tokens
217
+
218
+ def get_repo_map(
219
+ self,
220
+ chat_files,
221
+ other_files,
222
+ mentioned_fnames=None,
223
+ mentioned_idents=None,
224
+ force_refresh=False,
225
+ ):
226
+ if self.max_map_tokens <= 0:
227
+ return
228
+ if not other_files:
229
+ return
230
+ if not mentioned_fnames:
231
+ mentioned_fnames = set()
232
+ if not mentioned_idents:
233
+ mentioned_idents = set()
234
+
235
+ max_map_tokens = self.max_map_tokens
236
+
237
+ # With no files in the chat, give a bigger view of the entire repo
238
+ padding = 4096
239
+ if max_map_tokens and self.max_context_window:
240
+ target = min(
241
+ int(max_map_tokens * self.map_mul_no_files),
242
+ self.max_context_window - padding,
243
+ )
244
+ else:
245
+ target = 0
246
+ if not chat_files and self.max_context_window and target > 0:
247
+ max_map_tokens = target
248
+
249
+ try:
250
+ files_listing = self.get_ranked_tags_map(
251
+ chat_files,
252
+ other_files,
253
+ max_map_tokens,
254
+ mentioned_fnames,
255
+ mentioned_idents,
256
+ force_refresh,
257
+ )
258
+ except RecursionError:
259
+ self.io.tool_error("Disabling repo map, git repo too large?")
260
+ self.max_map_tokens = 0
261
+ return
262
+
263
+ if not files_listing:
264
+ return
265
+
266
+ if self.verbose:
267
+ num_tokens = self.token_count(files_listing)
268
+ self.io.tool_output(f"Repo-map: {num_tokens / 1024:.1f} k-tokens")
269
+
270
+ if chat_files:
271
+ other = "other "
272
+ else:
273
+ other = ""
274
+
275
+ if self.repo_content_prefix:
276
+ repo_content = self.repo_content_prefix.format(other=other)
277
+ else:
278
+ repo_content = ""
279
+
280
+ repo_content += files_listing
281
+
282
+ return repo_content
283
+
284
+ def get_rel_fname(self, fname):
285
+ try:
286
+ return os.path.relpath(fname, self.root)
287
+ except ValueError:
288
+ # Issue #1288: ValueError: path is on mount 'C:', start on mount 'D:'
289
+ # Just return the full fname.
290
+ return fname
291
+
292
+ def tags_cache_error(self, original_error=None):
293
+ """Handle SQLite errors by trying to recreate cache, falling back to dict if needed"""
294
+
295
+ if self.verbose and original_error:
296
+ self.io.tool_warning(f"Tags cache error: {str(original_error)}")
297
+
298
+ if isinstance(getattr(self, "TAGS_CACHE", None), dict):
299
+ return
300
+
301
+ path = Path(self.map_cache_dir) / self.TAGS_CACHE_DIR
302
+
303
+ # Try to recreate the cache
304
+ try:
305
+ # Delete existing cache dir
306
+ if path.exists():
307
+ shutil.rmtree(path)
308
+
309
+ # Try to create new cache
310
+ new_cache = Cache(path)
311
+
312
+ # Test that it works
313
+ test_key = "test"
314
+ new_cache[test_key] = "test"
315
+ _ = new_cache[test_key]
316
+ del new_cache[test_key]
317
+
318
+ # If we got here, the new cache works
319
+ self.TAGS_CACHE = new_cache
320
+ return
321
+
322
+ except SQLITE_ERRORS as e:
323
+ # If anything goes wrong, warn and fall back to dict
324
+ self.io.tool_warning(
325
+ f"Unable to use tags cache at {path}, falling back to memory cache"
326
+ )
327
+ if self.verbose:
328
+ self.io.tool_warning(f"Cache recreation error: {str(e)}")
329
+
330
+ self.TAGS_CACHE = dict()
331
+
332
+ def load_tags_cache(self):
333
+ path = Path(self.map_cache_dir) / self.TAGS_CACHE_DIR
334
+ try:
335
+ self.TAGS_CACHE = Cache(path)
336
+ except SQLITE_ERRORS as e:
337
+ self.tags_cache_error(e)
338
+
339
+ def save_tags_cache(self):
340
+ pass
341
+
342
+ def get_mtime(self, fname):
343
+ try:
344
+ return os.path.getmtime(fname)
345
+ except FileNotFoundError:
346
+ self.io.tool_warning(f"File not found error: {fname}")
347
+
348
+ def get_tags(self, fname, rel_fname):
349
+ # Check if the file is in the cache and if the modification time has not changed
350
+ file_mtime = self.get_mtime(fname)
351
+ if file_mtime is None:
352
+ return []
353
+
354
+ cache_key = fname
355
+ try:
356
+ val = self.TAGS_CACHE.get(cache_key) # Issue #1308
357
+ except SQLITE_ERRORS as e:
358
+ self.tags_cache_error(e)
359
+ val = self.TAGS_CACHE.get(cache_key)
360
+
361
+ if val is not None and val.get("mtime") == file_mtime:
362
+ try:
363
+ # Get the cached data
364
+ data = self.TAGS_CACHE[cache_key]["data"]
365
+
366
+ # Let our Tag class handle compatibility with old cache formats
367
+ # No need for special handling as TagBase.__new__ will supply default specific_kind
368
+
369
+ return data
370
+ except SQLITE_ERRORS as e:
371
+ self.tags_cache_error(e)
372
+ return self.TAGS_CACHE[cache_key]["data"]
373
+ except (TypeError, AttributeError) as e:
374
+ # If we hit an error related to missing fields in old cached Tag objects,
375
+ # force a cache refresh for this file
376
+ if self.verbose:
377
+ self.io.tool_warning(f"Cache format error for {fname}, refreshing: {e}")
378
+ # Return empty list to trigger cache refresh
379
+ return []
380
+
381
+ # miss!
382
+ data = list(self.get_tags_raw(fname, rel_fname))
383
+
384
+ # Update the cache
385
+ try:
386
+ self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
387
+ self.save_tags_cache()
388
+ except SQLITE_ERRORS as e:
389
+ self.tags_cache_error(e)
390
+ self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
391
+
392
+ return data
393
+
394
+ def get_symbol_definition_location(self, file_path, symbol_name):
395
+ """
396
+ Finds the unique definition location (start/end line) for a symbol in a file.
397
+
398
+ Args:
399
+ file_path (str): The relative path to the file.
400
+ symbol_name (str): The name of the symbol to find.
401
+
402
+ Returns:
403
+ tuple: (start_line, end_line) (0-based) if a unique definition is found.
404
+
405
+ Raises:
406
+ ToolError: If the symbol is not found, not unique, or not a definition.
407
+ """
408
+ abs_path = self.io.root_abs_path(file_path) # Assuming io has this helper or similar
409
+ rel_path = self.get_rel_fname(abs_path) # Ensure we use consistent relative path
410
+
411
+ tags = self.get_tags(abs_path, rel_path)
412
+ if not tags:
413
+ raise ToolError(f"Symbol '{symbol_name}' not found in '{file_path}' (no tags).")
414
+
415
+ definitions = []
416
+ for tag in tags:
417
+ # Check if it's a definition and the name matches
418
+ if tag.kind == "def" and tag.name == symbol_name:
419
+ # Ensure we have valid location info
420
+ if tag.start_line is not None and tag.end_line is not None and tag.start_line >= 0:
421
+ definitions.append(tag)
422
+
423
+ if not definitions:
424
+ # Check if it exists as a non-definition tag
425
+ non_defs = [tag for tag in tags if tag.name == symbol_name and tag.kind != "def"]
426
+ if non_defs:
427
+ raise ToolError(
428
+ f"Symbol '{symbol_name}' found in '{file_path}', but not as a unique definition"
429
+ f" (found as {non_defs[0].kind})."
430
+ )
431
+ else:
432
+ raise ToolError(f"Symbol '{symbol_name}' definition not found in '{file_path}'.")
433
+
434
+ if len(definitions) > 1:
435
+ # Provide more context about ambiguity if possible
436
+ lines = sorted([d.start_line + 1 for d in definitions]) # 1-based for user message
437
+ raise ToolError(
438
+ f"Symbol '{symbol_name}' is ambiguous in '{file_path}'. Found definitions on lines:"
439
+ f" {', '.join(map(str, lines))}."
440
+ )
441
+
442
+ # Unique definition found
443
+ definition_tag = definitions[0]
444
+ return definition_tag.start_line, definition_tag.end_line
445
+ # Check if the file is in the cache and if the modification time has not changed
446
+
447
+ def shared_path_components(self, path1_str, path2_str):
448
+ """
449
+ Calculates distance based on how many parent components are shared.
450
+ Distance = Total parts - (2 * Shared parts). Lower is closer.
451
+ """
452
+ p1 = Path(path1_str).parts
453
+ p2 = Path(path2_str).parts
454
+
455
+ # Count the number of common leading parts
456
+ common_count = 0
457
+ for comp1, comp2 in zip(p1, p2):
458
+ if comp1 == comp2:
459
+ common_count += 1
460
+ else:
461
+ break
462
+
463
+ # A simple metric of difference:
464
+ # (Total parts in P1 + Total parts in P2) - (2 * Common parts)
465
+ distance = len(p1) + len(p2) - (2 * common_count)
466
+ return distance
467
+
468
+ def get_tags_raw(self, fname, rel_fname):
469
+ lang = filename_to_lang(fname)
470
+ if not lang:
471
+ return
472
+
473
+ try:
474
+ language = get_language(lang)
475
+ parser = get_parser(lang)
476
+ except Exception as err:
477
+ print(f"Skipping file {fname}: {err}")
478
+ return
479
+
480
+ query_scm = get_scm_fname(lang)
481
+ if not query_scm.exists():
482
+ return
483
+ query_scm = query_scm.read_text()
484
+
485
+ code = self.io.read_text(fname)
486
+ if not code:
487
+ return
488
+ tree = parser.parse(bytes(code, "utf-8"))
489
+
490
+ # Run the tags queries
491
+ if sys.version_info >= (3, 10):
492
+ query = tree_sitter.Query(language, query_scm)
493
+ cursor = tree_sitter.QueryCursor(query)
494
+ captures = cursor.captures(tree.root_node)
495
+ else:
496
+ query = language.query(query_scm)
497
+ captures = query.captures(tree.root_node)
498
+
499
+ saw = set()
500
+ if USING_TSL_PACK:
501
+ all_nodes = []
502
+ for tag, nodes in captures.items():
503
+ all_nodes += [(node, tag) for node in nodes]
504
+ else:
505
+ all_nodes = list(captures)
506
+
507
+ for node, tag in all_nodes:
508
+ if tag.startswith("name.definition."):
509
+ kind = "def"
510
+ elif tag.startswith("name.reference."):
511
+ kind = "ref"
512
+ else:
513
+ continue
514
+
515
+ saw.add(kind)
516
+
517
+ # Extract specific kind from the tag, e.g., 'function' from 'name.definition.function'
518
+ specific_kind = tag.split(".")[-1] if "." in tag else None
519
+
520
+ result = Tag(
521
+ rel_fname=rel_fname,
522
+ fname=fname,
523
+ name=node.text.decode("utf-8"),
524
+ kind=kind,
525
+ specific_kind=specific_kind,
526
+ line=node.start_point[0], # Legacy line number
527
+ start_line=node.start_point[0],
528
+ end_line=node.end_point[0],
529
+ start_byte=node.start_byte,
530
+ end_byte=node.end_byte,
531
+ )
532
+
533
+ yield result
534
+
535
+ if "ref" in saw:
536
+ return
537
+ if "def" not in saw:
538
+ return
539
+
540
+ # We saw defs, without any refs
541
+ # Some tags files only provide defs (cpp, for example)
542
+ # Use pygments to backfill refs
543
+
544
+ try:
545
+ lexer = guess_lexer_for_filename(fname, code)
546
+ except Exception: # On Windows, bad ref to time.clock which is deprecated?
547
+ # self.io.tool_error(f"Error lexing {fname}")
548
+ return
549
+
550
+ tokens = list(lexer.get_tokens(code))
551
+ tokens = [token[1] for token in tokens if token[0] in Token.Name]
552
+
553
+ for token in tokens:
554
+ yield Tag(
555
+ rel_fname=rel_fname,
556
+ fname=fname,
557
+ name=token,
558
+ kind="ref",
559
+ specific_kind="name", # Default for pygments fallback
560
+ line=-1, # Pygments doesn't give precise locations easily
561
+ start_line=-1,
562
+ end_line=-1,
563
+ start_byte=-1,
564
+ end_byte=-1,
565
+ )
566
+
567
+ def get_ranked_tags(
568
+ self, chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, progress=True
569
+ ):
570
+ import networkx as nx
571
+
572
+ defines = defaultdict(set)
573
+ references = defaultdict(list)
574
+ definitions = defaultdict(set)
575
+
576
+ personalization = dict()
577
+
578
+ fnames = set(chat_fnames).union(set(other_fnames))
579
+ chat_rel_fnames = set()
580
+
581
+ fnames = sorted(fnames)
582
+
583
+ # Default personalization for unspecified files is 1/num_nodes
584
+ # https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
585
+ personalize = 100 / len(fnames)
586
+
587
+ try:
588
+ cache_size = len(self.TAGS_CACHE)
589
+ except SQLITE_ERRORS as e:
590
+ self.tags_cache_error(e)
591
+ cache_size = len(self.TAGS_CACHE)
592
+
593
+ if len(fnames) - cache_size > 100:
594
+ self.io.tool_output(
595
+ "Initial repo scan can be slow in larger repos, but only happens once."
596
+ )
597
+ fnames = tqdm(fnames, desc="Scanning repo")
598
+ showing_bar = True
599
+ else:
600
+ showing_bar = False
601
+
602
+ for fname in fnames:
603
+ if self.verbose:
604
+ self.io.tool_output(f"Processing {fname}")
605
+ if progress and not showing_bar:
606
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {fname}")
607
+
608
+ try:
609
+ file_ok = Path(fname).is_file()
610
+ except OSError:
611
+ file_ok = False
612
+
613
+ if not file_ok:
614
+ if fname not in self.warned_files:
615
+ self.io.tool_warning(f"Repo-map can't include {fname}")
616
+ self.io.tool_output(
617
+ "Has it been deleted from the file system but not from git?"
618
+ )
619
+ self.warned_files.add(fname)
620
+ continue
621
+
622
+ # dump(fname)
623
+ rel_fname = self.get_rel_fname(fname)
624
+ current_pers = 0.0 # Start with 0 personalization score
625
+
626
+ if fname in chat_fnames:
627
+ current_pers += personalize
628
+ chat_rel_fnames.add(rel_fname)
629
+
630
+ if rel_fname in mentioned_fnames:
631
+ # Use max to avoid double counting if in chat_fnames and mentioned_fnames
632
+ current_pers = max(current_pers, personalize)
633
+
634
+ # Check path components against mentioned_idents
635
+ path_obj = Path(rel_fname)
636
+ path_components = set(path_obj.parts)
637
+ basename_with_ext = path_obj.name
638
+ basename_without_ext, _ = os.path.splitext(basename_with_ext)
639
+ components_to_check = path_components.union({basename_with_ext, basename_without_ext})
640
+
641
+ matched_idents = components_to_check.intersection(mentioned_idents)
642
+ if matched_idents:
643
+ # Add personalization *once* if any path component matches a mentioned ident
644
+ current_pers += personalize
645
+
646
+ if current_pers > 0:
647
+ personalization[rel_fname] = current_pers # Assign the final calculated value
648
+
649
+ tags = list(self.get_tags(fname, rel_fname))
650
+
651
+ if tags is None:
652
+ continue
653
+
654
+ for tag in tags:
655
+ if tag.kind == "def":
656
+ defines[tag.name].add(rel_fname)
657
+ key = (rel_fname, tag.name)
658
+ definitions[key].add(tag)
659
+
660
+ elif tag.kind == "ref":
661
+ references[tag.name].append(rel_fname)
662
+
663
+ ##
664
+ # dump(defines)
665
+ # dump(references)
666
+ # dump(personalization)
667
+
668
+ if not references:
669
+ references = dict((k, list(v)) for k, v in defines.items())
670
+
671
+ idents = set(defines.keys()).intersection(set(references.keys()))
672
+
673
+ G = nx.MultiDiGraph()
674
+
675
+ # Add a small self-edge for every definition that has no references
676
+ # Helps with tree-sitter 0.23.2 with ruby, where "def greet(name)"
677
+ # isn't counted as a def AND a ref. tree-sitter 0.24.0 does.
678
+ for ident in defines.keys():
679
+ if ident in references:
680
+ continue
681
+ for definer in defines[ident]:
682
+ G.add_edge(definer, definer, weight=0.000001, ident=ident)
683
+
684
+ for ident in idents:
685
+ if progress:
686
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {ident}")
687
+
688
+ definers = defines[ident]
689
+
690
+ mul = 1.0
691
+
692
+ is_snake = ("_" in ident) and any(c.isalpha() for c in ident)
693
+ is_kebab = ("-" in ident) and any(c.isalpha() for c in ident)
694
+ is_camel = any(c.isupper() for c in ident) and any(c.islower() for c in ident)
695
+ if ident in mentioned_idents:
696
+ mul *= 16
697
+
698
+ # Prioritize function-like identifiers
699
+ if (
700
+ (is_snake or is_kebab or is_camel)
701
+ and len(ident) >= 8
702
+ and "test" not in ident.lower()
703
+ ):
704
+ mul *= 16
705
+
706
+ # Downplay repetitive definitions in case of common boiler plate
707
+ # Scale down logarithmically given the increasing number of references in a codebase
708
+ # Ideally, this will help downweight boiler plate in frameworks, interfaces, and abstract classes
709
+ if len(defines[ident]) > 4:
710
+ exp = min(len(defines[ident]), 32)
711
+ mul *= math.log2((4 / (2**exp)) + 1)
712
+
713
+ # Calculate multiplier: log(number of unique file references * total references ^ 2)
714
+ # Used to balance the number of times an identifier appears with its number of refs per file
715
+ # Penetration in code base is important
716
+ # So is the frequency
717
+ # And the logarithm keeps them from scaling out of bounds forever
718
+ # Combined with the above downweighting
719
+ # There should be a push/pull that balances repetitiveness of identifier defs
720
+ # With absolute number of references throughout a codebase
721
+ unique_file_refs = len(set(references[ident]))
722
+ total_refs = len(references[ident])
723
+ ext_mul = round(math.log2(unique_file_refs * total_refs**2 + 1))
724
+
725
+ for referencer, num_refs in Counter(references[ident]).items():
726
+ for definer in definers:
727
+ # dump(referencer, definer, num_refs, mul)
728
+
729
+ # Only add edge if file extensions match
730
+ referencer_ext = Path(referencer).suffix
731
+ definer_ext = Path(definer).suffix
732
+ if referencer_ext != definer_ext:
733
+ continue
734
+
735
+ use_mul = mul * ext_mul
736
+
737
+ if referencer in chat_rel_fnames:
738
+ use_mul *= 64
739
+ elif referencer == definer:
740
+ use_mul *= 1 / 128
741
+
742
+ # scale down so high freq (low value) mentions don't dominate
743
+ # num_refs = math.sqrt(num_refs)
744
+ path_distance = self.shared_path_components(referencer, definer)
745
+ weight = num_refs * use_mul * 2 ** (-1 * path_distance)
746
+ G.add_edge(referencer, definer, weight=weight, ident=ident)
747
+
748
+ if not references:
749
+ pass
750
+
751
+ if personalization:
752
+ pers_args = dict(personalization=personalization, dangling=personalization)
753
+ else:
754
+ pers_args = dict()
755
+
756
+ try:
757
+ ranked = nx.pagerank(G, weight="weight", **pers_args)
758
+ except ZeroDivisionError:
759
+ # Issue #1536
760
+ try:
761
+ ranked = nx.pagerank(G, weight="weight")
762
+ except ZeroDivisionError:
763
+ return []
764
+
765
+ # distribute the rank from each source node, across all of its out edges
766
+ ranked_definitions = defaultdict(float)
767
+ for src in G.nodes:
768
+ if progress:
769
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {src}")
770
+
771
+ src_rank = ranked[src]
772
+ total_weight = sum(data["weight"] for _src, _dst, data in G.out_edges(src, data=True))
773
+ # dump(src, src_rank, total_weight)
774
+ for _src, dst, data in G.out_edges(src, data=True):
775
+ data["rank"] = src_rank * data["weight"] / total_weight
776
+ ident = data["ident"]
777
+ ranked_definitions[(dst, ident)] += data["rank"]
778
+
779
+ ranked_tags = []
780
+ ranked_definitions = sorted(
781
+ ranked_definitions.items(), reverse=True, key=lambda x: (x[1], x[0])
782
+ )
783
+
784
+ # dump(ranked_definitions)
785
+ # with open('defs.txt', 'w') as out_file:
786
+ # import pprint
787
+ # printer = pprint.PrettyPrinter(indent=2, stream=out_file)
788
+ # printer.pprint(ranked_definitions)
789
+
790
+ for (fname, ident), rank in ranked_definitions:
791
+ # print(f"{rank:.03f} {fname} {ident}")
792
+ if fname in chat_rel_fnames:
793
+ continue
794
+ ranked_tags += list(definitions.get((fname, ident), []))
795
+
796
+ rel_other_fnames_without_tags = set(self.get_rel_fname(fname) for fname in other_fnames)
797
+
798
+ fnames_already_included = set(rt[0] for rt in ranked_tags)
799
+
800
+ top_rank = sorted([(rank, node) for (node, rank) in ranked.items()], reverse=True)
801
+ for rank, fname in top_rank:
802
+ if fname in rel_other_fnames_without_tags:
803
+ rel_other_fnames_without_tags.remove(fname)
804
+ if fname not in fnames_already_included:
805
+ ranked_tags.append((fname,))
806
+
807
+ for fname in rel_other_fnames_without_tags:
808
+ ranked_tags.append((fname,))
809
+
810
+ return ranked_tags
811
+
812
+ def get_ranked_tags_map(
813
+ self,
814
+ chat_fnames,
815
+ other_fnames=None,
816
+ max_map_tokens=None,
817
+ mentioned_fnames=None,
818
+ mentioned_idents=None,
819
+ force_refresh=False,
820
+ ):
821
+ if not other_fnames:
822
+ other_fnames = list()
823
+ if not max_map_tokens:
824
+ max_map_tokens = self.max_map_tokens
825
+ if not mentioned_fnames:
826
+ mentioned_fnames = set()
827
+ if not mentioned_idents:
828
+ mentioned_idents = set()
829
+
830
+ # Create a cache key
831
+ cache_key = [
832
+ tuple(sorted(chat_fnames)) if chat_fnames else None,
833
+ len(other_fnames) if other_fnames else None,
834
+ max_map_tokens,
835
+ ]
836
+
837
+ if self.refresh == "auto":
838
+ # Handle mentioned_fnames normally
839
+ cache_key += [
840
+ tuple(sorted(mentioned_fnames)) if mentioned_fnames else None,
841
+ ]
842
+
843
+ # Handle mentioned_idents with similarity check
844
+ cache_key_component = self._get_mentioned_idents_cache_component(mentioned_idents)
845
+ cache_key.append(cache_key_component)
846
+
847
+ cache_key = hash(str(tuple(cache_key)))
848
+
849
+ use_cache = False
850
+ if not force_refresh:
851
+ if self.refresh == "manual" and self.last_map:
852
+ return self.last_map
853
+
854
+ if self.refresh == "always":
855
+ use_cache = False
856
+ elif self.refresh == "files":
857
+ use_cache = True
858
+ elif self.refresh == "auto":
859
+ use_cache = self.map_processing_time > 1.0
860
+
861
+ # Check if the result is in the cache
862
+ if use_cache and cache_key in self.map_cache:
863
+ return self.map_cache[cache_key]
864
+
865
+ # If not in cache or force_refresh is True, generate the map
866
+ start_time = time.time()
867
+ result = self.get_ranked_tags_map_uncached(
868
+ chat_fnames, other_fnames, max_map_tokens, mentioned_fnames, mentioned_idents
869
+ )
870
+ end_time = time.time()
871
+ self.map_processing_time = end_time - start_time
872
+
873
+ # Store the result in the cache
874
+ self.map_cache[cache_key] = result
875
+ self.last_map = result
876
+
877
+ return result
878
+
879
+ def get_ranked_tags_map_uncached(
880
+ self,
881
+ chat_fnames,
882
+ other_fnames=None,
883
+ max_map_tokens=None,
884
+ mentioned_fnames=None,
885
+ mentioned_idents=None,
886
+ ):
887
+ if not other_fnames:
888
+ other_fnames = list()
889
+ if not max_map_tokens:
890
+ max_map_tokens = self.max_map_tokens
891
+ if not mentioned_fnames:
892
+ mentioned_fnames = set()
893
+ if not mentioned_idents:
894
+ mentioned_idents = set()
895
+
896
+ self.io.update_spinner(UPDATING_REPO_MAP_MESSAGE)
897
+
898
+ ranked_tags = self.get_ranked_tags(
899
+ chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, True
900
+ )
901
+
902
+ other_rel_fnames = sorted(set(self.get_rel_fname(fname) for fname in other_fnames))
903
+ special_fnames = filter_important_files(other_rel_fnames)
904
+ ranked_tags_fnames = set(tag[0] for tag in ranked_tags)
905
+ special_fnames = [fn for fn in special_fnames if fn not in ranked_tags_fnames]
906
+ special_fnames = [(fn,) for fn in special_fnames]
907
+
908
+ ranked_tags = special_fnames + ranked_tags
909
+
910
+ num_tags = len(ranked_tags)
911
+ lower_bound = 0
912
+ upper_bound = num_tags
913
+ best_tree = None
914
+ best_tree_tokens = 0
915
+
916
+ chat_rel_fnames = set(self.get_rel_fname(fname) for fname in chat_fnames)
917
+
918
+ self.tree_cache = dict()
919
+
920
+ middle = min(int(max_map_tokens // 25), num_tags)
921
+ while lower_bound <= upper_bound:
922
+ # dump(lower_bound, middle, upper_bound)
923
+
924
+ if middle > 1500:
925
+ show_tokens = f"{middle / 1000.0:.1f}K"
926
+ else:
927
+ show_tokens = str(middle)
928
+
929
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {show_tokens} tokens")
930
+
931
+ tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
932
+ num_tokens = self.token_count(tree)
933
+
934
+ pct_err = abs(num_tokens - max_map_tokens) / max_map_tokens
935
+ ok_err = 0.15
936
+ if (num_tokens <= max_map_tokens and num_tokens > best_tree_tokens) or pct_err < ok_err:
937
+ best_tree = tree
938
+ best_tree_tokens = num_tokens
939
+
940
+ if pct_err < ok_err:
941
+ break
942
+
943
+ if num_tokens < max_map_tokens:
944
+ lower_bound = middle + 1
945
+ else:
946
+ upper_bound = middle - 1
947
+
948
+ middle = int((lower_bound + upper_bound) // 2)
949
+
950
+ return best_tree
951
+
952
+ tree_cache = dict()
953
+
954
+ def render_tree(self, abs_fname, rel_fname, lois):
955
+ mtime = self.get_mtime(abs_fname)
956
+ key = (rel_fname, tuple(sorted(lois)), mtime)
957
+
958
+ if key in self.tree_cache:
959
+ return self.tree_cache[key]
960
+
961
+ if (
962
+ rel_fname not in self.tree_context_cache
963
+ or self.tree_context_cache[rel_fname]["mtime"] != mtime
964
+ ):
965
+ code = self.io.read_text(abs_fname) or ""
966
+ if not code.endswith("\n"):
967
+ code += "\n"
968
+
969
+ context = TreeContext(
970
+ rel_fname,
971
+ code,
972
+ color=False,
973
+ line_number=False,
974
+ child_context=False,
975
+ last_line=False,
976
+ margin=0,
977
+ mark_lois=False,
978
+ loi_pad=0,
979
+ # header_max=30,
980
+ show_top_of_file_parent_scope=False,
981
+ )
982
+ self.tree_context_cache[rel_fname] = {"context": context, "mtime": mtime}
983
+
984
+ context = self.tree_context_cache[rel_fname]["context"]
985
+ context.lines_of_interest = set()
986
+ context.add_lines_of_interest(lois)
987
+ context.add_context()
988
+ res = context.format()
989
+ self.tree_cache[key] = res
990
+ return res
991
+
992
+ def to_tree(self, tags, chat_rel_fnames):
993
+ if not tags:
994
+ return ""
995
+
996
+ cur_fname = None
997
+ cur_abs_fname = None
998
+ lois = None
999
+ output = ""
1000
+
1001
+ # add a bogus tag at the end so we trip the this_fname != cur_fname...
1002
+ dummy_tag = (None,)
1003
+ for tag in sorted(tags) + [dummy_tag]:
1004
+ this_rel_fname = tag[0]
1005
+ if this_rel_fname in chat_rel_fnames:
1006
+ continue
1007
+
1008
+ # ... here ... to output the final real entry in the list
1009
+ if this_rel_fname != cur_fname:
1010
+ if lois is not None:
1011
+ output += "\n"
1012
+ output += cur_fname + ":\n"
1013
+
1014
+ # truncate long lines, in case we get minified js or something else crazy
1015
+ output += truncate_long_lines(
1016
+ self.render_tree(cur_abs_fname, cur_fname, lois), self.max_code_line_length
1017
+ )
1018
+
1019
+ lois = None
1020
+ elif cur_fname:
1021
+ output += "\n" + cur_fname + "\n"
1022
+ if type(tag) is Tag:
1023
+ lois = []
1024
+ cur_abs_fname = tag.fname
1025
+ cur_fname = this_rel_fname
1026
+
1027
+ if lois is not None:
1028
+ lois.append(tag.line)
1029
+
1030
+ return output
1031
+
1032
+ def _get_mentioned_idents_cache_component(self, mentioned_idents):
1033
+ """
1034
+ Determine the cache key component for mentioned_idents using similarity comparison.
1035
+
1036
+ This method compares the current mentioned_idents with the previous ones using
1037
+ cosine similarity. If the similarity is high enough, it returns the previous
1038
+ cache key component to maintain cache hits. Otherwise, it updates the stored
1039
+ values and returns the current mentioned_idents.
1040
+
1041
+ Args:
1042
+ mentioned_idents (set): Current set of mentioned identifiers
1043
+
1044
+ Returns:
1045
+ tuple or None: Cache key component for mentioned_idents
1046
+ """
1047
+ if not mentioned_idents:
1048
+ self._last_mentioned_idents = None
1049
+ self._last_mentioned_idents_vector = None
1050
+ self._has_last_mentioned_idents = False
1051
+ return None
1052
+
1053
+ current_mentioned_idents = tuple(mentioned_idents)
1054
+
1055
+ # Check if we have a previous cached value to compare against
1056
+ if self._has_last_mentioned_idents:
1057
+ # Create vector for current mentioned_idents
1058
+ current_vector = create_bigram_vector(current_mentioned_idents)
1059
+ current_vector_norm = normalize_vector(current_vector)
1060
+
1061
+ # Calculate cosine similarity
1062
+ similarity = cosine_similarity(self._last_mentioned_idents_vector, current_vector_norm)
1063
+ # If similarity is high enough, use the previous cache key component
1064
+ if similarity >= self._mentioned_ident_similarity:
1065
+ # Use the previous mentioned_idents for cache key to maintain cache hit
1066
+ cache_key_component = self._last_mentioned_idents
1067
+
1068
+ # Make similarity more strict the more consecutive cache hits
1069
+ self._mentioned_ident_similarity = min(
1070
+ 0.9, self._mentioned_ident_similarity + 0.025
1071
+ )
1072
+ else:
1073
+ # Similarity is too low, use current mentioned_idents
1074
+ cache_key_component = current_mentioned_idents
1075
+
1076
+ # Update stored values
1077
+ self._last_mentioned_idents = current_mentioned_idents
1078
+ self._last_mentioned_idents_vector = current_vector_norm
1079
+
1080
+ # Make similarity less strict the more consecutive cache misses
1081
+ self._mentioned_ident_similarity = max(
1082
+ 0.5, self._mentioned_ident_similarity - 0.025
1083
+ )
1084
+ else:
1085
+ # First time or no previous value, use current mentioned_idents
1086
+ cache_key_component = current_mentioned_idents
1087
+ current_vector = create_bigram_vector(current_mentioned_idents)
1088
+
1089
+ # Store for future comparisons
1090
+ self._last_mentioned_idents = current_mentioned_idents
1091
+ self._last_mentioned_idents_vector = normalize_vector(current_vector)
1092
+
1093
+ self._has_last_mentioned_idents = True
1094
+ return cache_key_component
1095
+
1096
+
1097
+ def truncate_long_lines(text, max_length):
1098
+ return "\n".join([line[:max_length] for line in text.splitlines()]) + "\n"
1099
+
1100
+
1101
+ def find_src_files(directory):
1102
+ if not os.path.isdir(directory):
1103
+ return [directory]
1104
+
1105
+ src_files = []
1106
+ for root, dirs, files in os.walk(directory):
1107
+ for file in files:
1108
+ src_files.append(os.path.join(root, file))
1109
+ return src_files
1110
+
1111
+
1112
+ def get_scm_fname(lang):
1113
+ # Load the tags queries
1114
+ if USING_TSL_PACK:
1115
+ subdir = "tree-sitter-language-pack"
1116
+ try:
1117
+ path = resources.files(__package__).joinpath(
1118
+ "queries",
1119
+ subdir,
1120
+ f"{lang}-tags.scm",
1121
+ )
1122
+ if path.exists():
1123
+ return path
1124
+ except KeyError:
1125
+ pass
1126
+
1127
+ # Fall back to tree-sitter-languages
1128
+ subdir = "tree-sitter-languages"
1129
+ try:
1130
+ return resources.files(__package__).joinpath(
1131
+ "queries",
1132
+ subdir,
1133
+ f"{lang}-tags.scm",
1134
+ )
1135
+ except KeyError:
1136
+ return
1137
+
1138
+
1139
+ def get_supported_languages_md():
1140
+ from grep_ast.parsers import PARSERS
1141
+
1142
+ res = """
1143
+ | Language | File extension | Repo map | Linter |
1144
+ |:--------:|:--------------:|:--------:|:------:|
1145
+ """
1146
+ data = sorted((lang, ex) for ex, lang in PARSERS.items())
1147
+
1148
+ for lang, ext in data:
1149
+ fn = get_scm_fname(lang)
1150
+ repo_map = "✓" if Path(fn).exists() else ""
1151
+ linter_support = "✓"
1152
+ res += f"| {lang:20} | {ext:20} | {repo_map:^8} | {linter_support:^6} |\n"
1153
+
1154
+ res += "\n"
1155
+
1156
+ return res
1157
+
1158
+
1159
+ if __name__ == "__main__":
1160
+ fnames = sys.argv[1:]
1161
+
1162
+ chat_fnames = []
1163
+ other_fnames = []
1164
+ for fname in sys.argv[1:]:
1165
+ if Path(fname).is_dir():
1166
+ chat_fnames += find_src_files(fname)
1167
+ else:
1168
+ chat_fnames.append(fname)
1169
+
1170
+ rm = RepoMap(root=".")
1171
+ repo_map = rm.get_ranked_tags_map(chat_fnames, other_fnames)
1172
+
1173
+ dump(len(repo_map))
1174
+ print(repo_map)