cecli-dev 0.93.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (366) hide show
  1. cecli/__init__.py +20 -0
  2. cecli/__main__.py +4 -0
  3. cecli/_version.py +34 -0
  4. cecli/args.py +1092 -0
  5. cecli/args_formatter.py +228 -0
  6. cecli/change_tracker.py +133 -0
  7. cecli/coders/__init__.py +38 -0
  8. cecli/coders/agent_coder.py +1872 -0
  9. cecli/coders/architect_coder.py +63 -0
  10. cecli/coders/ask_coder.py +8 -0
  11. cecli/coders/base_coder.py +3993 -0
  12. cecli/coders/chat_chunks.py +116 -0
  13. cecli/coders/context_coder.py +52 -0
  14. cecli/coders/copypaste_coder.py +269 -0
  15. cecli/coders/editblock_coder.py +656 -0
  16. cecli/coders/editblock_fenced_coder.py +9 -0
  17. cecli/coders/editblock_func_coder.py +140 -0
  18. cecli/coders/editor_diff_fenced_coder.py +8 -0
  19. cecli/coders/editor_editblock_coder.py +8 -0
  20. cecli/coders/editor_whole_coder.py +8 -0
  21. cecli/coders/help_coder.py +15 -0
  22. cecli/coders/patch_coder.py +705 -0
  23. cecli/coders/search_replace.py +757 -0
  24. cecli/coders/shell.py +37 -0
  25. cecli/coders/single_wholefile_func_coder.py +101 -0
  26. cecli/coders/udiff_coder.py +428 -0
  27. cecli/coders/udiff_simple.py +12 -0
  28. cecli/coders/wholefile_coder.py +143 -0
  29. cecli/coders/wholefile_func_coder.py +133 -0
  30. cecli/commands/__init__.py +192 -0
  31. cecli/commands/add.py +226 -0
  32. cecli/commands/agent.py +51 -0
  33. cecli/commands/architect.py +46 -0
  34. cecli/commands/ask.py +44 -0
  35. cecli/commands/chat_mode.py +0 -0
  36. cecli/commands/clear.py +37 -0
  37. cecli/commands/code.py +46 -0
  38. cecli/commands/command_prefix.py +44 -0
  39. cecli/commands/commit.py +52 -0
  40. cecli/commands/context.py +47 -0
  41. cecli/commands/context_blocks.py +124 -0
  42. cecli/commands/context_management.py +51 -0
  43. cecli/commands/copy.py +62 -0
  44. cecli/commands/copy_context.py +81 -0
  45. cecli/commands/core.py +287 -0
  46. cecli/commands/diff.py +68 -0
  47. cecli/commands/drop.py +217 -0
  48. cecli/commands/editor.py +78 -0
  49. cecli/commands/exit.py +55 -0
  50. cecli/commands/git.py +57 -0
  51. cecli/commands/help.py +140 -0
  52. cecli/commands/history_search.py +40 -0
  53. cecli/commands/lint.py +109 -0
  54. cecli/commands/list_sessions.py +56 -0
  55. cecli/commands/load.py +85 -0
  56. cecli/commands/load_session.py +48 -0
  57. cecli/commands/load_skill.py +68 -0
  58. cecli/commands/ls.py +75 -0
  59. cecli/commands/map.py +37 -0
  60. cecli/commands/map_refresh.py +35 -0
  61. cecli/commands/model.py +118 -0
  62. cecli/commands/models.py +41 -0
  63. cecli/commands/multiline_mode.py +38 -0
  64. cecli/commands/paste.py +91 -0
  65. cecli/commands/quit.py +32 -0
  66. cecli/commands/read_only.py +267 -0
  67. cecli/commands/read_only_stub.py +270 -0
  68. cecli/commands/reasoning_effort.py +70 -0
  69. cecli/commands/remove_skill.py +68 -0
  70. cecli/commands/report.py +40 -0
  71. cecli/commands/reset.py +88 -0
  72. cecli/commands/run.py +99 -0
  73. cecli/commands/save.py +49 -0
  74. cecli/commands/save_session.py +43 -0
  75. cecli/commands/settings.py +69 -0
  76. cecli/commands/test.py +58 -0
  77. cecli/commands/think_tokens.py +74 -0
  78. cecli/commands/tokens.py +207 -0
  79. cecli/commands/undo.py +145 -0
  80. cecli/commands/utils/__init__.py +0 -0
  81. cecli/commands/utils/base_command.py +131 -0
  82. cecli/commands/utils/helpers.py +142 -0
  83. cecli/commands/utils/registry.py +53 -0
  84. cecli/commands/utils/save_load_manager.py +98 -0
  85. cecli/commands/voice.py +78 -0
  86. cecli/commands/weak_model.py +123 -0
  87. cecli/commands/web.py +87 -0
  88. cecli/deprecated_args.py +185 -0
  89. cecli/diffs.py +129 -0
  90. cecli/dump.py +29 -0
  91. cecli/editor.py +147 -0
  92. cecli/exceptions.py +115 -0
  93. cecli/format_settings.py +26 -0
  94. cecli/help.py +119 -0
  95. cecli/help_pats.py +19 -0
  96. cecli/helpers/__init__.py +9 -0
  97. cecli/helpers/copypaste.py +123 -0
  98. cecli/helpers/coroutines.py +8 -0
  99. cecli/helpers/file_searcher.py +142 -0
  100. cecli/helpers/model_providers.py +552 -0
  101. cecli/helpers/plugin_manager.py +81 -0
  102. cecli/helpers/profiler.py +162 -0
  103. cecli/helpers/requests.py +77 -0
  104. cecli/helpers/similarity.py +98 -0
  105. cecli/helpers/skills.py +577 -0
  106. cecli/history.py +186 -0
  107. cecli/io.py +1782 -0
  108. cecli/linter.py +304 -0
  109. cecli/llm.py +101 -0
  110. cecli/main.py +1280 -0
  111. cecli/mcp/__init__.py +154 -0
  112. cecli/mcp/oauth.py +250 -0
  113. cecli/mcp/server.py +278 -0
  114. cecli/mdstream.py +243 -0
  115. cecli/models.py +1255 -0
  116. cecli/onboarding.py +301 -0
  117. cecli/prompts/__init__.py +0 -0
  118. cecli/prompts/agent.yml +71 -0
  119. cecli/prompts/architect.yml +35 -0
  120. cecli/prompts/ask.yml +31 -0
  121. cecli/prompts/base.yml +99 -0
  122. cecli/prompts/context.yml +60 -0
  123. cecli/prompts/copypaste.yml +5 -0
  124. cecli/prompts/editblock.yml +143 -0
  125. cecli/prompts/editblock_fenced.yml +106 -0
  126. cecli/prompts/editblock_func.yml +25 -0
  127. cecli/prompts/editor_diff_fenced.yml +115 -0
  128. cecli/prompts/editor_editblock.yml +121 -0
  129. cecli/prompts/editor_whole.yml +46 -0
  130. cecli/prompts/help.yml +37 -0
  131. cecli/prompts/patch.yml +110 -0
  132. cecli/prompts/single_wholefile_func.yml +24 -0
  133. cecli/prompts/udiff.yml +106 -0
  134. cecli/prompts/udiff_simple.yml +13 -0
  135. cecli/prompts/utils/__init__.py +0 -0
  136. cecli/prompts/utils/prompt_registry.py +167 -0
  137. cecli/prompts/utils/system.py +56 -0
  138. cecli/prompts/wholefile.yml +50 -0
  139. cecli/prompts/wholefile_func.yml +24 -0
  140. cecli/queries/tree-sitter-language-pack/README.md +7 -0
  141. cecli/queries/tree-sitter-language-pack/arduino-tags.scm +5 -0
  142. cecli/queries/tree-sitter-language-pack/c-tags.scm +12 -0
  143. cecli/queries/tree-sitter-language-pack/chatito-tags.scm +16 -0
  144. cecli/queries/tree-sitter-language-pack/clojure-tags.scm +12 -0
  145. cecli/queries/tree-sitter-language-pack/commonlisp-tags.scm +127 -0
  146. cecli/queries/tree-sitter-language-pack/cpp-tags.scm +18 -0
  147. cecli/queries/tree-sitter-language-pack/csharp-tags.scm +32 -0
  148. cecli/queries/tree-sitter-language-pack/d-tags.scm +26 -0
  149. cecli/queries/tree-sitter-language-pack/dart-tags.scm +97 -0
  150. cecli/queries/tree-sitter-language-pack/elisp-tags.scm +5 -0
  151. cecli/queries/tree-sitter-language-pack/elixir-tags.scm +59 -0
  152. cecli/queries/tree-sitter-language-pack/elm-tags.scm +22 -0
  153. cecli/queries/tree-sitter-language-pack/gleam-tags.scm +41 -0
  154. cecli/queries/tree-sitter-language-pack/go-tags.scm +49 -0
  155. cecli/queries/tree-sitter-language-pack/java-tags.scm +26 -0
  156. cecli/queries/tree-sitter-language-pack/javascript-tags.scm +96 -0
  157. cecli/queries/tree-sitter-language-pack/lua-tags.scm +39 -0
  158. cecli/queries/tree-sitter-language-pack/matlab-tags.scm +10 -0
  159. cecli/queries/tree-sitter-language-pack/ocaml-tags.scm +115 -0
  160. cecli/queries/tree-sitter-language-pack/ocaml_interface-tags.scm +101 -0
  161. cecli/queries/tree-sitter-language-pack/pony-tags.scm +39 -0
  162. cecli/queries/tree-sitter-language-pack/properties-tags.scm +5 -0
  163. cecli/queries/tree-sitter-language-pack/python-tags.scm +24 -0
  164. cecli/queries/tree-sitter-language-pack/r-tags.scm +27 -0
  165. cecli/queries/tree-sitter-language-pack/racket-tags.scm +12 -0
  166. cecli/queries/tree-sitter-language-pack/ruby-tags.scm +69 -0
  167. cecli/queries/tree-sitter-language-pack/rust-tags.scm +63 -0
  168. cecli/queries/tree-sitter-language-pack/solidity-tags.scm +43 -0
  169. cecli/queries/tree-sitter-language-pack/swift-tags.scm +54 -0
  170. cecli/queries/tree-sitter-language-pack/udev-tags.scm +20 -0
  171. cecli/queries/tree-sitter-languages/README.md +24 -0
  172. cecli/queries/tree-sitter-languages/c-tags.scm +12 -0
  173. cecli/queries/tree-sitter-languages/c_sharp-tags.scm +52 -0
  174. cecli/queries/tree-sitter-languages/cpp-tags.scm +18 -0
  175. cecli/queries/tree-sitter-languages/dart-tags.scm +92 -0
  176. cecli/queries/tree-sitter-languages/elisp-tags.scm +8 -0
  177. cecli/queries/tree-sitter-languages/elixir-tags.scm +59 -0
  178. cecli/queries/tree-sitter-languages/elm-tags.scm +22 -0
  179. cecli/queries/tree-sitter-languages/fortran-tags.scm +18 -0
  180. cecli/queries/tree-sitter-languages/go-tags.scm +36 -0
  181. cecli/queries/tree-sitter-languages/haskell-tags.scm +5 -0
  182. cecli/queries/tree-sitter-languages/hcl-tags.scm +77 -0
  183. cecli/queries/tree-sitter-languages/java-tags.scm +26 -0
  184. cecli/queries/tree-sitter-languages/javascript-tags.scm +96 -0
  185. cecli/queries/tree-sitter-languages/julia-tags.scm +60 -0
  186. cecli/queries/tree-sitter-languages/kotlin-tags.scm +30 -0
  187. cecli/queries/tree-sitter-languages/matlab-tags.scm +10 -0
  188. cecli/queries/tree-sitter-languages/ocaml-tags.scm +115 -0
  189. cecli/queries/tree-sitter-languages/ocaml_interface-tags.scm +104 -0
  190. cecli/queries/tree-sitter-languages/php-tags.scm +32 -0
  191. cecli/queries/tree-sitter-languages/python-tags.scm +22 -0
  192. cecli/queries/tree-sitter-languages/ql-tags.scm +26 -0
  193. cecli/queries/tree-sitter-languages/ruby-tags.scm +69 -0
  194. cecli/queries/tree-sitter-languages/rust-tags.scm +63 -0
  195. cecli/queries/tree-sitter-languages/scala-tags.scm +64 -0
  196. cecli/queries/tree-sitter-languages/typescript-tags.scm +44 -0
  197. cecli/queries/tree-sitter-languages/zig-tags.scm +20 -0
  198. cecli/reasoning_tags.py +82 -0
  199. cecli/repo.py +626 -0
  200. cecli/repomap.py +1368 -0
  201. cecli/report.py +260 -0
  202. cecli/resources/__init__.py +3 -0
  203. cecli/resources/model-metadata.json +25751 -0
  204. cecli/resources/model-settings.yml +2394 -0
  205. cecli/resources/providers.json +67 -0
  206. cecli/run_cmd.py +143 -0
  207. cecli/scrape.py +295 -0
  208. cecli/sendchat.py +250 -0
  209. cecli/sessions.py +281 -0
  210. cecli/special.py +203 -0
  211. cecli/tools/__init__.py +72 -0
  212. cecli/tools/command.py +103 -0
  213. cecli/tools/command_interactive.py +113 -0
  214. cecli/tools/context_manager.py +175 -0
  215. cecli/tools/delete_block.py +154 -0
  216. cecli/tools/delete_line.py +120 -0
  217. cecli/tools/delete_lines.py +144 -0
  218. cecli/tools/extract_lines.py +281 -0
  219. cecli/tools/finished.py +35 -0
  220. cecli/tools/git_branch.py +132 -0
  221. cecli/tools/git_diff.py +49 -0
  222. cecli/tools/git_log.py +43 -0
  223. cecli/tools/git_remote.py +39 -0
  224. cecli/tools/git_show.py +37 -0
  225. cecli/tools/git_status.py +32 -0
  226. cecli/tools/grep.py +242 -0
  227. cecli/tools/indent_lines.py +195 -0
  228. cecli/tools/insert_block.py +263 -0
  229. cecli/tools/list_changes.py +71 -0
  230. cecli/tools/load_skill.py +51 -0
  231. cecli/tools/ls.py +77 -0
  232. cecli/tools/remove_skill.py +51 -0
  233. cecli/tools/replace_all.py +113 -0
  234. cecli/tools/replace_line.py +135 -0
  235. cecli/tools/replace_lines.py +180 -0
  236. cecli/tools/replace_text.py +186 -0
  237. cecli/tools/show_numbered_context.py +137 -0
  238. cecli/tools/thinking.py +52 -0
  239. cecli/tools/undo_change.py +82 -0
  240. cecli/tools/update_todo_list.py +148 -0
  241. cecli/tools/utils/base_tool.py +64 -0
  242. cecli/tools/utils/helpers.py +359 -0
  243. cecli/tools/utils/output.py +119 -0
  244. cecli/tools/utils/registry.py +145 -0
  245. cecli/tools/view_files_matching.py +138 -0
  246. cecli/tools/view_files_with_symbol.py +117 -0
  247. cecli/tui/__init__.py +83 -0
  248. cecli/tui/app.py +971 -0
  249. cecli/tui/io.py +566 -0
  250. cecli/tui/styles.tcss +117 -0
  251. cecli/tui/widgets/__init__.py +19 -0
  252. cecli/tui/widgets/completion_bar.py +331 -0
  253. cecli/tui/widgets/file_list.py +76 -0
  254. cecli/tui/widgets/footer.py +165 -0
  255. cecli/tui/widgets/input_area.py +320 -0
  256. cecli/tui/widgets/key_hints.py +16 -0
  257. cecli/tui/widgets/output.py +354 -0
  258. cecli/tui/widgets/status_bar.py +279 -0
  259. cecli/tui/worker.py +160 -0
  260. cecli/urls.py +16 -0
  261. cecli/utils.py +499 -0
  262. cecli/versioncheck.py +90 -0
  263. cecli/voice.py +90 -0
  264. cecli/waiting.py +38 -0
  265. cecli/watch.py +316 -0
  266. cecli/watch_prompts.py +12 -0
  267. cecli/website/Gemfile +8 -0
  268. cecli/website/_includes/blame.md +162 -0
  269. cecli/website/_includes/get-started.md +22 -0
  270. cecli/website/_includes/help-tip.md +5 -0
  271. cecli/website/_includes/help.md +24 -0
  272. cecli/website/_includes/install.md +5 -0
  273. cecli/website/_includes/keys.md +4 -0
  274. cecli/website/_includes/model-warnings.md +67 -0
  275. cecli/website/_includes/multi-line.md +22 -0
  276. cecli/website/_includes/python-m-aider.md +5 -0
  277. cecli/website/_includes/recording.css +228 -0
  278. cecli/website/_includes/recording.md +34 -0
  279. cecli/website/_includes/replit-pipx.md +9 -0
  280. cecli/website/_includes/works-best.md +1 -0
  281. cecli/website/_sass/custom/custom.scss +103 -0
  282. cecli/website/docs/config/adv-model-settings.md +2498 -0
  283. cecli/website/docs/config/agent-mode.md +320 -0
  284. cecli/website/docs/config/aider_conf.md +548 -0
  285. cecli/website/docs/config/api-keys.md +90 -0
  286. cecli/website/docs/config/custom-commands.md +187 -0
  287. cecli/website/docs/config/dotenv.md +493 -0
  288. cecli/website/docs/config/editor.md +127 -0
  289. cecli/website/docs/config/mcp.md +210 -0
  290. cecli/website/docs/config/model-aliases.md +173 -0
  291. cecli/website/docs/config/options.md +890 -0
  292. cecli/website/docs/config/reasoning.md +210 -0
  293. cecli/website/docs/config/skills.md +172 -0
  294. cecli/website/docs/config/tui.md +126 -0
  295. cecli/website/docs/config.md +44 -0
  296. cecli/website/docs/faq.md +379 -0
  297. cecli/website/docs/git.md +76 -0
  298. cecli/website/docs/index.md +47 -0
  299. cecli/website/docs/install/codespaces.md +39 -0
  300. cecli/website/docs/install/docker.md +48 -0
  301. cecli/website/docs/install/optional.md +100 -0
  302. cecli/website/docs/install/replit.md +8 -0
  303. cecli/website/docs/install.md +115 -0
  304. cecli/website/docs/languages.md +264 -0
  305. cecli/website/docs/legal/contributor-agreement.md +111 -0
  306. cecli/website/docs/legal/privacy.md +104 -0
  307. cecli/website/docs/llms/anthropic.md +77 -0
  308. cecli/website/docs/llms/azure.md +48 -0
  309. cecli/website/docs/llms/bedrock.md +132 -0
  310. cecli/website/docs/llms/cohere.md +34 -0
  311. cecli/website/docs/llms/deepseek.md +32 -0
  312. cecli/website/docs/llms/gemini.md +49 -0
  313. cecli/website/docs/llms/github.md +111 -0
  314. cecli/website/docs/llms/groq.md +36 -0
  315. cecli/website/docs/llms/lm-studio.md +39 -0
  316. cecli/website/docs/llms/ollama.md +75 -0
  317. cecli/website/docs/llms/openai-compat.md +39 -0
  318. cecli/website/docs/llms/openai.md +58 -0
  319. cecli/website/docs/llms/openrouter.md +78 -0
  320. cecli/website/docs/llms/other.md +117 -0
  321. cecli/website/docs/llms/vertex.md +50 -0
  322. cecli/website/docs/llms/warnings.md +10 -0
  323. cecli/website/docs/llms/xai.md +53 -0
  324. cecli/website/docs/llms.md +54 -0
  325. cecli/website/docs/more/analytics.md +127 -0
  326. cecli/website/docs/more/edit-formats.md +116 -0
  327. cecli/website/docs/more/infinite-output.md +192 -0
  328. cecli/website/docs/more-info.md +8 -0
  329. cecli/website/docs/recordings/auto-accept-architect.md +31 -0
  330. cecli/website/docs/recordings/dont-drop-original-read-files.md +35 -0
  331. cecli/website/docs/recordings/index.md +21 -0
  332. cecli/website/docs/recordings/model-accepts-settings.md +69 -0
  333. cecli/website/docs/recordings/tree-sitter-language-pack.md +80 -0
  334. cecli/website/docs/repomap.md +112 -0
  335. cecli/website/docs/scripting.md +100 -0
  336. cecli/website/docs/sessions.md +213 -0
  337. cecli/website/docs/troubleshooting/aider-not-found.md +24 -0
  338. cecli/website/docs/troubleshooting/edit-errors.md +76 -0
  339. cecli/website/docs/troubleshooting/imports.md +62 -0
  340. cecli/website/docs/troubleshooting/models-and-keys.md +54 -0
  341. cecli/website/docs/troubleshooting/support.md +79 -0
  342. cecli/website/docs/troubleshooting/token-limits.md +96 -0
  343. cecli/website/docs/troubleshooting/warnings.md +12 -0
  344. cecli/website/docs/troubleshooting.md +11 -0
  345. cecli/website/docs/usage/browser.md +57 -0
  346. cecli/website/docs/usage/caching.md +49 -0
  347. cecli/website/docs/usage/commands.md +133 -0
  348. cecli/website/docs/usage/conventions.md +119 -0
  349. cecli/website/docs/usage/copypaste.md +136 -0
  350. cecli/website/docs/usage/images-urls.md +48 -0
  351. cecli/website/docs/usage/lint-test.md +118 -0
  352. cecli/website/docs/usage/modes.md +211 -0
  353. cecli/website/docs/usage/not-code.md +179 -0
  354. cecli/website/docs/usage/notifications.md +87 -0
  355. cecli/website/docs/usage/tips.md +79 -0
  356. cecli/website/docs/usage/tutorials.md +30 -0
  357. cecli/website/docs/usage/voice.md +121 -0
  358. cecli/website/docs/usage/watch.md +294 -0
  359. cecli/website/docs/usage.md +102 -0
  360. cecli/website/share/index.md +101 -0
  361. cecli_dev-0.93.1.dist-info/METADATA +549 -0
  362. cecli_dev-0.93.1.dist-info/RECORD +366 -0
  363. cecli_dev-0.93.1.dist-info/WHEEL +5 -0
  364. cecli_dev-0.93.1.dist-info/entry_points.txt +4 -0
  365. cecli_dev-0.93.1.dist-info/licenses/LICENSE.txt +202 -0
  366. cecli_dev-0.93.1.dist-info/top_level.txt +1 -0
cecli/repomap.py ADDED
@@ -0,0 +1,1368 @@
1
+ import math
2
+ import os
3
+ import re
4
+ import shutil
5
+ import sqlite3
6
+ import sys
7
+ import time
8
+ import warnings
9
+ from collections import defaultdict, namedtuple
10
+ from importlib import resources
11
+ from pathlib import Path
12
+
13
+ import tree_sitter
14
+ from diskcache import Cache
15
+ from grep_ast import TreeContext, filename_to_lang
16
+ from pygments.lexers import guess_lexer_for_filename
17
+ from pygments.token import Token
18
+
19
+ from cecli.dump import dump
20
+ from cecli.helpers.similarity import (
21
+ cosine_similarity,
22
+ create_bigram_vector,
23
+ normalize_vector,
24
+ )
25
+ from cecli.special import filter_important_files
26
+ from cecli.tools.utils.helpers import ToolError
27
+
28
+ # tree_sitter is throwing a FutureWarning
29
+ warnings.simplefilter("ignore", category=FutureWarning)
30
+ from grep_ast.tsl import USING_TSL_PACK, get_language, get_parser # noqa: E402
31
+
32
+
33
+ # Define the Tag namedtuple with a default for specific_kind to maintain compatibility
34
+ # with cached entries that might have been created with the old definition
35
+ class TagBase(
36
+ namedtuple(
37
+ "TagBase",
38
+ "rel_fname fname line name kind specific_kind start_line end_line start_byte end_byte",
39
+ )
40
+ ):
41
+ __slots__ = ()
42
+
43
+ def __new__(
44
+ cls,
45
+ rel_fname,
46
+ fname,
47
+ line,
48
+ name,
49
+ kind,
50
+ specific_kind=None,
51
+ start_line=None,
52
+ end_line=None,
53
+ start_byte=None,
54
+ end_byte=None,
55
+ ):
56
+ # Provide a default value for specific_kind to handle old cached objects
57
+ return super(TagBase, cls).__new__(
58
+ cls,
59
+ rel_fname,
60
+ fname,
61
+ line,
62
+ name,
63
+ kind,
64
+ specific_kind,
65
+ start_line,
66
+ end_line,
67
+ start_byte,
68
+ end_byte,
69
+ )
70
+
71
+
72
+ Tag = TagBase
73
+
74
+
75
+ SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError)
76
+
77
+
78
+ CACHE_VERSION = 7
79
+ if USING_TSL_PACK:
80
+ CACHE_VERSION = 9
81
+
82
+ UPDATING_REPO_MAP_MESSAGE = "Updating repo map"
83
+
84
+
85
+ class RepoMap:
86
+ TAGS_CACHE_DIR = f".cecli/tags.cache.v{CACHE_VERSION}"
87
+
88
+ warned_files = set()
89
+
90
+ # Class variable to store initial ranked tags results
91
+ _initial_ranked_tags = None
92
+ _initial_ident_to_files = None
93
+
94
+ # Define kinds that typically represent definitions across languages
95
+ # Used by AgentCoder to filter tags for the symbol outline
96
+ definition_kinds = {
97
+ "class",
98
+ "struct",
99
+ "enum",
100
+ "interface",
101
+ "trait", # Structure definitions
102
+ "function",
103
+ "method",
104
+ "constructor", # Function/method definitions
105
+ "module",
106
+ "namespace", # Module/namespace definitions
107
+ "constant",
108
+ "variable", # Top-level/class variable definitions (consider refining)
109
+ "type", # Type definitions
110
+ # Add more based on tree-sitter queries if needed
111
+ }
112
+
113
+ @staticmethod
114
+ def get_file_stub(fname, io):
115
+ """Generate a complete structural outline of a source code file.
116
+
117
+ Args:
118
+ fname (str): Absolute path to the source file
119
+ io: InputOutput instance for file operations
120
+
121
+ Returns:
122
+ str: Formatted outline showing the file's structure
123
+ """
124
+ # Use cached instance if available
125
+ if not hasattr(RepoMap, "_stub_instance"):
126
+ RepoMap._stub_instance = RepoMap(map_tokens=0, io=io)
127
+
128
+ rm = RepoMap._stub_instance
129
+
130
+ rel_fname = rm.get_rel_fname(fname)
131
+
132
+ # Reuse existing tag parsing
133
+ tags = rm.get_tags(fname, rel_fname)
134
+ if not tags:
135
+ return "# No outline available"
136
+
137
+ # Get all definition lines
138
+ lois = [tag.line for tag in tags if tag.kind == "def"]
139
+
140
+ # Reuse existing tree rendering
141
+ outline = rm.render_tree(fname, rel_fname, lois)
142
+
143
+ return f"{outline}"
144
+
145
+ def __init__(
146
+ self,
147
+ map_tokens=1024,
148
+ map_cache_dir=".",
149
+ main_model=None,
150
+ io=None,
151
+ repo_content_prefix=None,
152
+ verbose=False,
153
+ max_context_window=None,
154
+ map_mul_no_files=8,
155
+ refresh="auto",
156
+ max_code_line_length=100,
157
+ repo_root=None,
158
+ use_memory_cache=False,
159
+ use_enhanced_map=False,
160
+ ):
161
+ self.io = io
162
+ self.verbose = verbose
163
+ self.refresh = refresh
164
+ self.use_enhanced_map = use_enhanced_map
165
+
166
+ self.map_cache_dir = map_cache_dir
167
+ # Prefer an explicit repo root (eg per-test repo), fallback to CWD
168
+ self.root = repo_root or os.getcwd()
169
+
170
+ # Allow opting into an in-memory tags cache to avoid disk/SQLite locks
171
+ if use_memory_cache:
172
+ self.TAGS_CACHE = dict()
173
+ else:
174
+ self.load_tags_cache()
175
+ self.cache_threshold = 0.95
176
+
177
+ self.max_map_tokens = map_tokens
178
+ self.map_mul_no_files = map_mul_no_files
179
+ self.max_context_window = max_context_window
180
+
181
+ self.max_code_line_length = max_code_line_length
182
+
183
+ self.repo_content_prefix = repo_content_prefix
184
+
185
+ self.main_model = main_model
186
+
187
+ self.tree_cache = {}
188
+ self.tree_context_cache = {}
189
+ self.map_cache = {}
190
+ self.map_processing_time = 0
191
+ self.last_map = None
192
+
193
+ # Initialize cache for mentioned identifiers similarity
194
+ self._last_mentioned_idents = None
195
+ self._last_mentioned_idents_vector = None
196
+ self._has_last_mentioned_idents = False
197
+ self._mentioned_ident_similarity = 0.8
198
+
199
+ if self.verbose:
200
+ self.io.tool_output(f"RepoMap loaded entries from tags cache: {len(self.TAGS_CACHE)}")
201
+ self.io.tool_output(
202
+ f"RepoMap initialized with map_mul_no_files: {self.map_mul_no_files}"
203
+ )
204
+ self.io.tool_output(f"RepoMap initialized with map_cache_dir: {self.map_cache_dir}")
205
+ self.io.tool_output(f"RepoMap assumes repo root is: {self.root}")
206
+
207
+ def token_count(self, text):
208
+ len_text = len(text)
209
+ if len_text < 200:
210
+ return self.main_model.token_count(text)
211
+
212
+ lines = text.splitlines(keepends=True)
213
+ num_lines = len(lines)
214
+ step = num_lines // 100 or 1
215
+ lines = lines[::step]
216
+ sample_text = "".join(lines)
217
+ sample_tokens = self.main_model.token_count(sample_text)
218
+ est_tokens = sample_tokens / len(sample_text) * len_text
219
+ return est_tokens
220
+
221
+ def get_repo_map(
222
+ self,
223
+ chat_files,
224
+ other_files,
225
+ mentioned_fnames=None,
226
+ mentioned_idents=None,
227
+ force_refresh=False,
228
+ ):
229
+ if self.max_map_tokens <= 0:
230
+ return
231
+ if not other_files:
232
+ return
233
+ if not mentioned_fnames:
234
+ mentioned_fnames = set()
235
+ if not mentioned_idents:
236
+ mentioned_idents = set()
237
+
238
+ max_map_tokens = self.max_map_tokens
239
+
240
+ # With no files in the chat, give a bigger view of the entire repo
241
+ padding = 4096
242
+ if max_map_tokens and self.max_context_window:
243
+ target = min(
244
+ int(max_map_tokens * self.map_mul_no_files),
245
+ self.max_context_window - padding,
246
+ )
247
+ else:
248
+ target = 0
249
+ if not chat_files and self.max_context_window and target > 0:
250
+ max_map_tokens = target
251
+
252
+ try:
253
+ files_listing = self.get_ranked_tags_map(
254
+ chat_files,
255
+ other_files,
256
+ max_map_tokens,
257
+ mentioned_fnames,
258
+ mentioned_idents,
259
+ force_refresh,
260
+ )
261
+ except RecursionError:
262
+ self.io.tool_error("Disabling repo map, git repo too large?")
263
+ self.max_map_tokens = 0
264
+ return
265
+
266
+ if not files_listing:
267
+ return
268
+
269
+ if self.verbose:
270
+ num_tokens = self.token_count(files_listing)
271
+ self.io.tool_output(f"Repo-map: {num_tokens / 1024:.1f} k-tokens")
272
+
273
+ if chat_files:
274
+ other = "other "
275
+ else:
276
+ other = ""
277
+
278
+ if self.repo_content_prefix:
279
+ repo_content = self.repo_content_prefix.format(other=other)
280
+ else:
281
+ repo_content = ""
282
+
283
+ repo_content += files_listing
284
+
285
+ return repo_content
286
+
287
+ def get_rel_fname(self, fname):
288
+ try:
289
+ return os.path.relpath(fname, self.root)
290
+ except ValueError:
291
+ # Issue #1288: ValueError: path is on mount 'C:', start on mount 'D:'
292
+ # Just return the full fname.
293
+ return fname
294
+
295
+ def tags_cache_error(self, original_error=None):
296
+ """Handle SQLite errors by trying to recreate cache, falling back to dict if needed"""
297
+
298
+ if self.verbose and original_error:
299
+ self.io.tool_warning(f"Tags cache error: {str(original_error)}")
300
+
301
+ if isinstance(getattr(self, "TAGS_CACHE", None), dict):
302
+ return
303
+
304
+ path = Path(self.map_cache_dir) / self.TAGS_CACHE_DIR
305
+
306
+ # Try to recreate the cache
307
+ try:
308
+ # Delete existing cache dir
309
+ if path.exists():
310
+ shutil.rmtree(path)
311
+
312
+ # Try to create new cache
313
+ new_cache = Cache(path)
314
+
315
+ # Test that it works
316
+ test_key = "test"
317
+ new_cache[test_key] = "test"
318
+ _ = new_cache[test_key]
319
+ del new_cache[test_key]
320
+
321
+ # If we got here, the new cache works
322
+ self.TAGS_CACHE = new_cache
323
+ return
324
+
325
+ except SQLITE_ERRORS as e:
326
+ # If anything goes wrong, warn and fall back to dict
327
+ self.io.tool_warning(
328
+ f"Unable to use tags cache at {path}, falling back to memory cache"
329
+ )
330
+ if self.verbose:
331
+ self.io.tool_warning(f"Cache recreation error: {str(e)}")
332
+
333
+ self.TAGS_CACHE = dict()
334
+
335
+ def load_tags_cache(self):
336
+ path = Path(self.map_cache_dir) / self.TAGS_CACHE_DIR
337
+ try:
338
+ self.TAGS_CACHE = Cache(path)
339
+ except SQLITE_ERRORS as e:
340
+ self.tags_cache_error(e)
341
+
342
+ def save_tags_cache(self):
343
+ pass
344
+
345
+ def get_mtime(self, fname):
346
+ try:
347
+ return os.path.getmtime(fname)
348
+ except FileNotFoundError:
349
+ self.io.tool_warning(f"File not found error: {fname}")
350
+
351
+ def _compute_file_summary(self, tags, rel_fname):
352
+ """Compute file-level summary from tags."""
353
+ defines = set()
354
+ references = defaultdict(int)
355
+ imports = set()
356
+
357
+ for tag in tags:
358
+ if tag.kind == "def":
359
+ defines.add(tag.name)
360
+ elif tag.kind == "ref":
361
+ references[tag.name] += 1
362
+ if tag.specific_kind == "import":
363
+ imports.add(tag.name)
364
+
365
+ return {"defines": defines, "references": dict(references), "imports": imports}
366
+
367
+ def _get_cached_summary(self, fname, file_mtime):
368
+ """Get cached summary for a file if available and up-to-date."""
369
+ cache_key = fname
370
+ try:
371
+ val = self.TAGS_CACHE.get(cache_key) # Issue #1308
372
+ except SQLITE_ERRORS as e:
373
+ self.tags_cache_error(e)
374
+ val = self.TAGS_CACHE.get(cache_key)
375
+
376
+ if val is not None and val.get("mtime") == file_mtime:
377
+ # Handle backward compatibility: old cache entries won't have "summary"
378
+ summary = val.get("summary")
379
+ if summary is None:
380
+ # Compute summary from cached data
381
+ data = val.get("data")
382
+ if data is not None:
383
+ rel_fname = self.get_rel_fname(fname)
384
+ summary = self._compute_file_summary(data, rel_fname)
385
+ # Update cache with summary for future use
386
+ val["summary"] = summary
387
+ self.TAGS_CACHE[cache_key] = val
388
+ return summary
389
+ return None
390
+
391
+ def get_tags(self, fname, rel_fname):
392
+ # Check if the file is in the cache and if the modification time has not changed
393
+ file_mtime = self.get_mtime(fname)
394
+ if file_mtime is None:
395
+ return []
396
+
397
+ cache_key = fname
398
+ try:
399
+ val = self.TAGS_CACHE.get(cache_key) # Issue #1308
400
+ except SQLITE_ERRORS as e:
401
+ self.tags_cache_error(e)
402
+ val = self.TAGS_CACHE.get(cache_key)
403
+
404
+ if val is not None and val.get("mtime") == file_mtime:
405
+ try:
406
+ # Get the cached data
407
+ data = self.TAGS_CACHE[cache_key]["data"]
408
+
409
+ # Let our Tag class handle compatibility with old cache formats
410
+ # No need for special handling as TagBase.__new__ will supply default specific_kind
411
+
412
+ return data
413
+ except SQLITE_ERRORS as e:
414
+ self.tags_cache_error(e)
415
+ return self.TAGS_CACHE[cache_key]["data"]
416
+ except (TypeError, AttributeError) as e:
417
+ # If we hit an error related to missing fields in old cached Tag objects,
418
+ # force a cache refresh for this file
419
+ if self.verbose:
420
+ self.io.tool_warning(f"Cache format error for {fname}, refreshing: {e}")
421
+ # Return empty list to trigger cache refresh
422
+ return []
423
+
424
+ # miss!
425
+ data = list(self.get_tags_raw(fname, rel_fname))
426
+
427
+ # Compute file summary
428
+ summary = self._compute_file_summary(data, rel_fname)
429
+
430
+ # Update the cache
431
+ try:
432
+ self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data, "summary": summary}
433
+ self.save_tags_cache()
434
+ except SQLITE_ERRORS as e:
435
+ self.tags_cache_error(e)
436
+ self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data, "summary": summary}
437
+
438
+ return data
439
+
440
+ def get_symbol_definition_location(self, file_path, symbol_name):
441
+ """
442
+ Finds the unique definition location (start/end line) for a symbol in a file.
443
+
444
+ Args:
445
+ file_path (str): The relative path to the file.
446
+ symbol_name (str): The name of the symbol to find.
447
+
448
+ Returns:
449
+ tuple: (start_line, end_line) (0-based) if a unique definition is found.
450
+
451
+ Raises:
452
+ ToolError: If the symbol is not found, not unique, or not a definition.
453
+ """
454
+ abs_path = self.io.root_abs_path(file_path) # Assuming io has this helper or similar
455
+ rel_path = self.get_rel_fname(abs_path) # Ensure we use consistent relative path
456
+
457
+ tags = self.get_tags(abs_path, rel_path)
458
+ if not tags:
459
+ raise ToolError(f"Symbol '{symbol_name}' not found in '{file_path}' (no tags).")
460
+
461
+ definitions = []
462
+ for tag in tags:
463
+ # Check if it's a definition and the name matches
464
+ if tag.kind == "def" and tag.name == symbol_name:
465
+ # Ensure we have valid location info
466
+ if tag.start_line is not None and tag.end_line is not None and tag.start_line >= 0:
467
+ definitions.append(tag)
468
+
469
+ if not definitions:
470
+ # Check if it exists as a non-definition tag
471
+ non_defs = [tag for tag in tags if tag.name == symbol_name and tag.kind != "def"]
472
+ if non_defs:
473
+ raise ToolError(
474
+ f"Symbol '{symbol_name}' found in '{file_path}', but not as a unique definition"
475
+ f" (found as {non_defs[0].kind})."
476
+ )
477
+ else:
478
+ raise ToolError(f"Symbol '{symbol_name}' definition not found in '{file_path}'.")
479
+
480
+ if len(definitions) > 1:
481
+ # Provide more context about ambiguity if possible
482
+ lines = sorted([d.start_line + 1 for d in definitions]) # 1-based for user message
483
+ raise ToolError(
484
+ f"Symbol '{symbol_name}' is ambiguous in '{file_path}'. Found definitions on lines:"
485
+ f" {', '.join(map(str, lines))}."
486
+ )
487
+
488
+ # Unique definition found
489
+ definition_tag = definitions[0]
490
+ return definition_tag.start_line, definition_tag.end_line
491
+ # Check if the file is in the cache and if the modification time has not changed
492
+
493
+ def shared_path_components(self, path1_str, path2_str):
494
+ """
495
+ Calculates distance based on how many parent components are shared.
496
+ Distance = Total parts - (2 * Shared parts). Lower is closer.
497
+ """
498
+ p1 = Path(path1_str).parts
499
+ p2 = Path(path2_str).parts
500
+
501
+ # Count the number of common leading parts
502
+ common_count = 0
503
+ for comp1, comp2 in zip(p1, p2):
504
+ if comp1 == comp2:
505
+ common_count += 1
506
+ else:
507
+ break
508
+
509
+ # A simple metric of difference:
510
+ # (Total parts in P1 + Total parts in P2) - (2 * Common parts)
511
+ distance = len(p1) + len(p2) - (2 * common_count)
512
+ return distance
513
+
514
+ def check_import_match(self, definer, imports):
515
+ definer_path = Path(definer)
516
+ definer_parts = list(definer_path.parts)
517
+ if not definer_parts:
518
+ return False
519
+
520
+ # Remove extension from last part
521
+ definer_parts[-1] = os.path.splitext(definer_parts[-1])[0]
522
+
523
+ for imp in imports:
524
+ imp_parts = [p for p in re.split(r"[.\\/]", imp) if p]
525
+ if len(imp_parts) > len(definer_parts):
526
+ continue
527
+
528
+ # Check for sub-sequence match
529
+ # Check for sub-sequence match
530
+ for i in range(len(definer_parts) - len(imp_parts) + 1):
531
+ if definer_parts[i : i + len(imp_parts)] == imp_parts:
532
+ # Allow if it's a suffix match (standard aliasing)
533
+ if i + len(imp_parts) == len(definer_parts):
534
+ return True
535
+
536
+ # Allow partial/middle match if enough specificity (>= 2 parts)
537
+ if len(imp_parts) >= 2:
538
+ return True
539
+ return False
540
+
541
+ def get_tags_raw(self, fname, rel_fname):
542
+ lang = filename_to_lang(fname)
543
+ if not lang:
544
+ return
545
+
546
+ try:
547
+ language = get_language(lang)
548
+ parser = get_parser(lang)
549
+ except Exception as err:
550
+ if self.verbose:
551
+ print(f"Skipping file {fname}: {err}")
552
+ return
553
+
554
+ query_scm = get_scm_fname(lang)
555
+ if not query_scm.exists():
556
+ return
557
+ query_scm = query_scm.read_text()
558
+
559
+ code = self.io.read_text(fname)
560
+ if not code:
561
+ return
562
+ tree = parser.parse(bytes(code, "utf-8"))
563
+
564
+ # Run the tags queries
565
+ if sys.version_info >= (3, 10):
566
+ query = tree_sitter.Query(language, query_scm)
567
+ cursor = tree_sitter.QueryCursor(query)
568
+ captures = cursor.captures(tree.root_node)
569
+ else:
570
+ query = language.query(query_scm)
571
+ captures = query.captures(tree.root_node)
572
+
573
+ saw = set()
574
+ if USING_TSL_PACK:
575
+ all_nodes = []
576
+ for tag, nodes in captures.items():
577
+ all_nodes += [(node, tag) for node in nodes]
578
+ else:
579
+ all_nodes = list(captures)
580
+
581
+ for node, tag in all_nodes:
582
+ if tag.startswith("name.definition."):
583
+ kind = "def"
584
+ elif tag.startswith("name.reference."):
585
+ kind = "ref"
586
+ else:
587
+ continue
588
+
589
+ saw.add(kind)
590
+
591
+ # Extract specific kind from the tag, e.g., 'function' from 'name.definition.function'
592
+ specific_kind = tag.split(".")[-1] if "." in tag else None
593
+
594
+ result = Tag(
595
+ rel_fname=rel_fname,
596
+ fname=fname,
597
+ name=node.text.decode("utf-8"),
598
+ kind=kind,
599
+ specific_kind=specific_kind,
600
+ line=node.start_point[0], # Legacy line number
601
+ start_line=node.start_point[0],
602
+ end_line=node.end_point[0],
603
+ start_byte=node.start_byte,
604
+ end_byte=node.end_byte,
605
+ )
606
+
607
+ yield result
608
+
609
+ if "ref" in saw:
610
+ return
611
+ if "def" not in saw:
612
+ return
613
+
614
+ # We saw defs, without any refs
615
+ # Some tags files only provide defs (cpp, for example)
616
+ # Use pygments to backfill refs
617
+
618
+ try:
619
+ lexer = guess_lexer_for_filename(fname, code)
620
+ except Exception: # On Windows, bad ref to time.clock which is deprecated?
621
+ # self.io.tool_error(f"Error lexing {fname}")
622
+ return
623
+
624
+ tokens = list(lexer.get_tokens(code))
625
+ tokens = [token[1] for token in tokens if token[0] in Token.Name]
626
+
627
+ for token in tokens:
628
+ yield Tag(
629
+ rel_fname=rel_fname,
630
+ fname=fname,
631
+ name=token,
632
+ kind="ref",
633
+ specific_kind="name", # Default for pygments fallback
634
+ line=-1, # Pygments doesn't give precise locations easily
635
+ start_line=-1,
636
+ end_line=-1,
637
+ start_byte=-1,
638
+ end_byte=-1,
639
+ )
640
+
641
+ def get_ranked_tags(
642
+ self, chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, progress=True
643
+ ):
644
+ import rustworkx
645
+
646
+ defines = defaultdict(set)
647
+ references = defaultdict(lambda: defaultdict(int))
648
+ total_ref_count = defaultdict(int) # Track total references per identifier
649
+ definitions = defaultdict(set)
650
+ file_imports = defaultdict(set)
651
+ import_ast_mode = False
652
+
653
+ personalization = dict()
654
+
655
+ fnames = set(chat_fnames).union(set(other_fnames))
656
+ chat_rel_fnames = set()
657
+
658
+ fnames = sorted(fnames)
659
+
660
+ # Default personalization for unspecified files is 1/num_nodes
661
+ personalize = 100 / len(fnames)
662
+
663
+ fname_to_parts = {}
664
+ fname_to_suffix = {}
665
+
666
+ try:
667
+ cache_size = len(self.TAGS_CACHE)
668
+ except SQLITE_ERRORS as e:
669
+ self.tags_cache_error(e)
670
+ cache_size = len(self.TAGS_CACHE)
671
+
672
+ if len(fnames) - cache_size > 100:
673
+ self.io.tool_output(
674
+ "Initial repo scan can be slow in larger repos, but only happens once."
675
+ )
676
+ self.io.update_spinner("Scanning repo")
677
+ showing_bar = True
678
+ else:
679
+ showing_bar = False
680
+
681
+ num_fnames = len(fnames)
682
+ fname_index = 0
683
+ for fname in fnames:
684
+ if self.verbose:
685
+ self.io.tool_output(f"Processing {fname}")
686
+ if progress:
687
+ if showing_bar:
688
+ fname_index += 1
689
+ self.io.update_spinner(f"Scanning repo: {fname_index}/{num_fnames}")
690
+ else:
691
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {fname}")
692
+
693
+ try:
694
+ file_ok = os.path.isfile(fname)
695
+ except OSError:
696
+ file_ok = False
697
+
698
+ if not file_ok:
699
+ if fname not in self.warned_files:
700
+ self.io.tool_warning(f"Repo-map can't include {fname}")
701
+ self.io.tool_output(
702
+ "Has it been deleted from the file system but not from git?"
703
+ )
704
+ self.warned_files.add(fname)
705
+ continue
706
+
707
+ # dump(fname)
708
+ rel_fname = self.get_rel_fname(fname)
709
+ current_pers = 0.0 # Start with 0 personalization score
710
+
711
+ if fname in chat_fnames:
712
+ current_pers += personalize
713
+ chat_rel_fnames.add(rel_fname)
714
+
715
+ if rel_fname in mentioned_fnames:
716
+ # Use max to avoid double counting if in chat_fnames and mentioned_fnames
717
+ current_pers = max(current_pers, personalize)
718
+
719
+ # Check path components against mentioned_idents
720
+ path_obj = Path(rel_fname)
721
+ fname_to_parts[rel_fname] = path_obj.parts
722
+ fname_to_suffix[rel_fname] = path_obj.suffix
723
+ path_components = set(path_obj.parts)
724
+ basename_with_ext = path_obj.name
725
+ basename_without_ext, _ = os.path.splitext(basename_with_ext)
726
+ components_to_check = path_components.union({basename_with_ext, basename_without_ext})
727
+
728
+ matched_idents = components_to_check.intersection(mentioned_idents)
729
+ if matched_idents:
730
+ # Add personalization *once* if any path component matches a mentioned ident
731
+ current_pers += personalize
732
+
733
+ if current_pers > 0:
734
+ personalization[rel_fname] = current_pers # Assign the final calculated value
735
+
736
+ # Get file mtime and check for cached summary
737
+ file_mtime = self.get_mtime(fname)
738
+ summary = None
739
+ if file_mtime is not None:
740
+ summary = self._get_cached_summary(fname, file_mtime)
741
+
742
+ if summary is not None:
743
+ # Use cached summary for defines and references
744
+ for ident in summary["defines"]:
745
+ defines[ident].add(rel_fname)
746
+ for ident, count in summary["references"].items():
747
+ references[ident][rel_fname] += count
748
+ total_ref_count[ident] += count
749
+ for imp in summary["imports"]:
750
+ file_imports[rel_fname].add(imp)
751
+
752
+ # Still need to parse tags for definitions (Tag objects)
753
+ # But only if this file has definitions
754
+ if summary["defines"]:
755
+ tags = list(self.get_tags(fname, rel_fname))
756
+ if tags is not None:
757
+ for tag in tags:
758
+ if tag.kind == "def":
759
+ key = (rel_fname, tag.name)
760
+ definitions[key].add(tag)
761
+ else:
762
+ # No cached summary, parse all tags
763
+ tags = list(self.get_tags(fname, rel_fname))
764
+ if tags is None:
765
+ continue
766
+
767
+ for tag in tags:
768
+ if tag.kind == "def":
769
+ defines[tag.name].add(rel_fname)
770
+ key = (rel_fname, tag.name)
771
+ definitions[key].add(tag)
772
+
773
+ elif tag.kind == "ref":
774
+ references[tag.name][rel_fname] += 1
775
+ total_ref_count[tag.name] += 1
776
+
777
+ if tag.specific_kind == "import":
778
+ file_imports[rel_fname].add(tag.name)
779
+
780
+ self.io.profile("Process Files")
781
+
782
+ if self.use_enhanced_map and len(file_imports) > 0:
783
+ import_ast_mode = True
784
+
785
+ if len(references) == 0:
786
+ # Convert defines to the new references structure: dict of dicts with counts
787
+ references = {}
788
+ for ident, files in defines.items():
789
+ references[ident] = {file: 1 for file in files}
790
+ total_ref_count[ident] = len(files) # Each file has count 1
791
+
792
+ idents = set(defines.keys()).intersection(set(references.keys()))
793
+
794
+ G = rustworkx.PyDiGraph(multigraph=True)
795
+
796
+ # Collect all unique file names that will be nodes
797
+ all_files = set()
798
+ for files in defines.values():
799
+ all_files.update(files)
800
+ for ref_dict in references.values():
801
+ all_files.update(ref_dict.keys())
802
+ all_files.update(file_imports.keys())
803
+ all_files.update(personalization.keys())
804
+
805
+ # Add all nodes and create mapping from file name to node index
806
+ file_to_node = {}
807
+ node_to_file = {}
808
+ for fname in sorted(all_files):
809
+ node_idx = G.add_node(fname)
810
+ file_to_node[fname] = node_idx
811
+ node_to_file[node_idx] = fname
812
+
813
+ # Add a small self-edge for every definition that has no references
814
+ # Helps with tree-sitter 0.23.2 with ruby, where "def greet(name)"
815
+ # isn't counted as a def AND a ref. tree-sitter 0.24.0 does.
816
+
817
+ unreferenced_weight = 2**-32 / (len(idents) + 1)
818
+ for ident in defines.keys():
819
+ if ident in references:
820
+ continue
821
+ for definer in defines[ident]:
822
+ definer_idx = file_to_node[definer]
823
+ G.add_edge(
824
+ definer_idx, definer_idx, {"weight": unreferenced_weight, "ident": ident}
825
+ )
826
+
827
+ for ident in idents:
828
+ if progress:
829
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {ident}")
830
+
831
+ definers = defines[ident]
832
+
833
+ mul = 1.0
834
+
835
+ is_snake = ("_" in ident) and any(c.isalpha() for c in ident)
836
+ is_kebab = ("-" in ident) and any(c.isalpha() for c in ident)
837
+ is_camel = any(c.isupper() for c in ident) and any(c.islower() for c in ident)
838
+ if ident in mentioned_idents:
839
+ mul *= 16
840
+
841
+ # Prioritize function-like identifiers
842
+ if (
843
+ (is_snake or is_kebab or is_camel)
844
+ and len(ident) >= 8
845
+ and "test" not in ident.lower()
846
+ ):
847
+ mul *= 16
848
+
849
+ # Downplay repetitive definitions in case of common boiler plate
850
+ # Scale down logarithmically given the increasing number of references in a codebase
851
+ # Ideally, this will help downweight boiler plate in frameworks, interfaces, and abstract classes
852
+ if len(defines[ident]) > 4:
853
+ exp = min(len(defines[ident]), 32)
854
+ mul *= math.log2((4 / (2**exp)) + 1)
855
+
856
+ # Calculate multiplier: log(number of unique file references * total references ^ 2)
857
+ # Used to balance the number of times an identifier appears with its number of refs per file
858
+ # Penetration in code base is important
859
+ # So is the frequency
860
+ # And the logarithm keeps them from scaling out of bounds forever
861
+ # Combined with the above downweighting
862
+ # There should be a push/pull that balances repetitiveness of identifier defs
863
+ # With absolute number of references throughout a codebase
864
+ unique_file_refs = len(references[ident])
865
+ total_refs = total_ref_count[ident]
866
+ ext_mul = round(math.log2(unique_file_refs * total_refs**2 + 1))
867
+
868
+ for referencer, num_refs in references[ident].items():
869
+ relevant_definers = [] if import_ast_mode else definers
870
+
871
+ # A referencer should not link to any definiers of an identifier it also defines
872
+ if referencer in definers:
873
+ relevant_definers = [referencer]
874
+ elif import_ast_mode:
875
+ if referencer in file_imports:
876
+ matches = [
877
+ d
878
+ for d in definers
879
+ if self.check_import_match(d, file_imports[referencer])
880
+ ]
881
+ if matches:
882
+ relevant_definers = matches
883
+
884
+ for definer in relevant_definers:
885
+ # dump(referencer, definer, num_refs, mul)
886
+ # Only add edge if file extensions match
887
+ referencer_ext = fname_to_suffix[referencer]
888
+ definer_ext = fname_to_suffix[definer]
889
+ if referencer_ext != definer_ext:
890
+ continue
891
+
892
+ use_mul = mul * ext_mul
893
+
894
+ if referencer in chat_rel_fnames:
895
+ use_mul *= 64
896
+ elif referencer == definer:
897
+ use_mul *= num_refs / 128
898
+
899
+ # scale down so high freq (low value) mentions don't dominate
900
+ # num_refs = math.sqrt(num_refs)
901
+
902
+ p1 = fname_to_parts[referencer]
903
+ p2 = fname_to_parts[definer]
904
+
905
+ # Count common leading parts
906
+ common_count = 0
907
+ for c1, c2 in zip(p1, p2):
908
+ if c1 == c2:
909
+ common_count += 1
910
+ else:
911
+ break
912
+
913
+ path_distance = len(p1) + len(p2) - (2 * common_count)
914
+
915
+ weight = use_mul * 2 ** (-1 * path_distance)
916
+ referencer_idx = file_to_node[referencer]
917
+ definer_idx = file_to_node[definer]
918
+ G.add_edge(
919
+ referencer_idx,
920
+ definer_idx,
921
+ {"weight": weight, "key": ident, "ident": ident},
922
+ )
923
+
924
+ self.io.profile("Build Graph")
925
+
926
+ self.io.profile("PERSONALIZATION START")
927
+ # Convert personalization from file names to node indices
928
+ if personalization:
929
+ pers_node = {file_to_node[fname]: val for fname, val in personalization.items()}
930
+ pers_args = dict(personalization=pers_node, dangling=pers_node)
931
+ else:
932
+ pers_args = dict()
933
+ self.io.profile("PERSONALIZATION END")
934
+ try:
935
+ ranked = rustworkx.pagerank(G, weight_fn=lambda edge: edge["weight"], **pers_args)
936
+ except ZeroDivisionError:
937
+ # Issue #1536
938
+ try:
939
+ ranked = rustworkx.pagerank(G, weight_fn=lambda edge: edge["weight"])
940
+ except ZeroDivisionError:
941
+ self.io.profile("zero")
942
+ return []
943
+ except Exception as e:
944
+ self.io.profile(e)
945
+ except Exception as e:
946
+ self.io.profile(e)
947
+
948
+ self.io.profile("PageRank")
949
+
950
+ # distribute the rank from each source node, across all of its out edges
951
+ ranked_definitions = defaultdict(float)
952
+ for src in G.node_indices():
953
+ if progress:
954
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {src}")
955
+
956
+ src_rank = ranked[src]
957
+ total_weight = sum(data["weight"] for _src, _dst, data in G.out_edges(src))
958
+ # dump(src, src_rank, total_weight)
959
+ for _src, dst, data in G.out_edges(src):
960
+ data["rank"] = src_rank * data["weight"] / total_weight
961
+ ident = data["ident"]
962
+ fname = node_to_file[dst]
963
+ ranked_definitions[(fname, ident)] += data["rank"]
964
+
965
+ self.io.profile("Distribute Rank")
966
+
967
+ ranked_tags = []
968
+ ranked_definitions = sorted(
969
+ ranked_definitions.items(), reverse=True, key=lambda x: (x[1], x[0])
970
+ )
971
+
972
+ # with open('defs.txt', 'w') as out_file:
973
+ # import pprint
974
+ # printer = pprint.PrettyPrinter(indent=2, stream=out_file)
975
+ # printer.pprint(ranked_definitions)
976
+
977
+ for (fname, ident), rank in ranked_definitions:
978
+ # print(f"{rank:.03f} {fname} {ident}")
979
+ if fname in chat_rel_fnames:
980
+ continue
981
+ ranked_tags += list(definitions.get((fname, ident), []))
982
+
983
+ rel_other_fnames_without_tags = set(self.get_rel_fname(fname) for fname in other_fnames)
984
+
985
+ fnames_already_included = set(rt[0] for rt in ranked_tags)
986
+
987
+ top_rank = sorted([(rank, node_idx) for (node_idx, rank) in ranked.items()], reverse=True)
988
+ for rank, node_idx in top_rank:
989
+ fname = node_to_file[node_idx]
990
+ if fname in rel_other_fnames_without_tags:
991
+ rel_other_fnames_without_tags.remove(fname)
992
+ if fname not in fnames_already_included:
993
+ ranked_tags.append((fname,))
994
+
995
+ for fname in rel_other_fnames_without_tags:
996
+ ranked_tags.append((fname,))
997
+
998
+ return ranked_tags
999
+
1000
+ def get_ranked_tags_map(
1001
+ self,
1002
+ chat_fnames,
1003
+ other_fnames=None,
1004
+ max_map_tokens=None,
1005
+ mentioned_fnames=None,
1006
+ mentioned_idents=None,
1007
+ force_refresh=False,
1008
+ ):
1009
+ if not other_fnames:
1010
+ other_fnames = list()
1011
+ if not max_map_tokens:
1012
+ max_map_tokens = self.max_map_tokens
1013
+ if not mentioned_fnames:
1014
+ mentioned_fnames = set()
1015
+ if not mentioned_idents:
1016
+ mentioned_idents = set()
1017
+
1018
+ # Create a cache key
1019
+ cache_key = [
1020
+ tuple(sorted(chat_fnames)) if chat_fnames else None,
1021
+ len(other_fnames) if other_fnames else None,
1022
+ max_map_tokens,
1023
+ ]
1024
+
1025
+ if self.refresh == "auto":
1026
+ # Handle mentioned_fnames normally
1027
+ cache_key += [
1028
+ tuple(sorted(mentioned_fnames)) if mentioned_fnames else None,
1029
+ ]
1030
+
1031
+ # Handle mentioned_idents with similarity check
1032
+ cache_key_component = self._get_mentioned_idents_cache_component(mentioned_idents)
1033
+ cache_key.append(cache_key_component)
1034
+
1035
+ cache_key = hash(str(tuple(cache_key)))
1036
+
1037
+ use_cache = False
1038
+ if not force_refresh:
1039
+ if self.refresh == "manual" and self.last_map:
1040
+ return self.last_map
1041
+
1042
+ if self.refresh == "always":
1043
+ use_cache = False
1044
+ elif self.refresh == "files":
1045
+ use_cache = True
1046
+ elif self.refresh == "auto":
1047
+ use_cache = self.map_processing_time > 1.0
1048
+
1049
+ # Check if the result is in the cache
1050
+ if use_cache and cache_key in self.map_cache:
1051
+ return self.map_cache[cache_key]
1052
+
1053
+ # If not in cache or force_refresh is True, generate the map
1054
+ start_time = time.time()
1055
+ result = self.get_ranked_tags_map_uncached(
1056
+ chat_fnames, other_fnames, max_map_tokens, mentioned_fnames, mentioned_idents
1057
+ )
1058
+ end_time = time.time()
1059
+ self.map_processing_time = end_time - start_time
1060
+
1061
+ # Store the result in the cache
1062
+ self.map_cache[cache_key] = result
1063
+ self.last_map = result
1064
+
1065
+ return result
1066
+
1067
+ def get_ranked_tags_map_uncached(
1068
+ self,
1069
+ chat_fnames,
1070
+ other_fnames=None,
1071
+ max_map_tokens=None,
1072
+ mentioned_fnames=None,
1073
+ mentioned_idents=None,
1074
+ ):
1075
+ self.io.profile("Start Rank Tags Map Uncached", start=True)
1076
+
1077
+ if not other_fnames:
1078
+ other_fnames = list()
1079
+ if not max_map_tokens:
1080
+ max_map_tokens = self.max_map_tokens
1081
+ if not mentioned_fnames:
1082
+ mentioned_fnames = set()
1083
+ if not mentioned_idents:
1084
+ mentioned_idents = set()
1085
+
1086
+ self.io.update_spinner(UPDATING_REPO_MAP_MESSAGE)
1087
+
1088
+ ranked_tags = self.get_ranked_tags(
1089
+ chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, True
1090
+ )
1091
+
1092
+ self.io.profile("Finish Getting Ranked Tags")
1093
+
1094
+ other_rel_fnames = sorted(set(self.get_rel_fname(fname) for fname in other_fnames))
1095
+ special_fnames = filter_important_files(other_rel_fnames)
1096
+ ranked_tags_fnames = set(tag[0] for tag in ranked_tags)
1097
+ special_fnames = [fn for fn in special_fnames if fn not in ranked_tags_fnames]
1098
+ special_fnames = [(fn,) for fn in special_fnames]
1099
+
1100
+ ranked_tags = special_fnames + ranked_tags
1101
+
1102
+ num_tags = len(ranked_tags)
1103
+ lower_bound = 0
1104
+ upper_bound = num_tags
1105
+ best_tree = None
1106
+ best_tree_tokens = 0
1107
+
1108
+ chat_rel_fnames = set(self.get_rel_fname(fname) for fname in chat_fnames)
1109
+
1110
+ self.tree_cache = dict()
1111
+
1112
+ middle = min(int(max_map_tokens // 25), num_tags)
1113
+ while lower_bound <= upper_bound:
1114
+ # dump(lower_bound, middle, upper_bound)
1115
+
1116
+ if middle > 1500:
1117
+ show_tokens = f"{middle / 1000.0:.1f}K"
1118
+ else:
1119
+ show_tokens = str(middle)
1120
+
1121
+ self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {show_tokens} tokens")
1122
+
1123
+ tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
1124
+ num_tokens = self.token_count(tree)
1125
+
1126
+ pct_err = abs(num_tokens - max_map_tokens) / max_map_tokens
1127
+ ok_err = 0.15
1128
+ if (num_tokens <= max_map_tokens and num_tokens > best_tree_tokens) or pct_err < ok_err:
1129
+ best_tree = tree
1130
+ best_tree_tokens = num_tokens
1131
+
1132
+ if pct_err < ok_err:
1133
+ break
1134
+
1135
+ if num_tokens < max_map_tokens:
1136
+ lower_bound = middle + 1
1137
+ else:
1138
+ upper_bound = middle - 1
1139
+
1140
+ middle = int((lower_bound + upper_bound) // 2)
1141
+
1142
+ self.io.profile("Calculate Best Tree")
1143
+
1144
+ return best_tree
1145
+
1146
+ tree_cache = dict()
1147
+
1148
+ def render_tree(self, abs_fname, rel_fname, lois):
1149
+ mtime = self.get_mtime(abs_fname)
1150
+ key = (rel_fname, tuple(sorted(lois)), mtime)
1151
+
1152
+ if key in self.tree_cache:
1153
+ return self.tree_cache[key]
1154
+
1155
+ if (
1156
+ rel_fname not in self.tree_context_cache
1157
+ or self.tree_context_cache[rel_fname]["mtime"] != mtime
1158
+ ):
1159
+ code = self.io.read_text(abs_fname) or ""
1160
+ if not code.endswith("\n"):
1161
+ code += "\n"
1162
+
1163
+ context = TreeContext(
1164
+ rel_fname,
1165
+ code,
1166
+ color=False,
1167
+ line_number=False,
1168
+ child_context=False,
1169
+ last_line=False,
1170
+ margin=0,
1171
+ mark_lois=False,
1172
+ loi_pad=0,
1173
+ # header_max=30,
1174
+ show_top_of_file_parent_scope=False,
1175
+ )
1176
+ self.tree_context_cache[rel_fname] = {"context": context, "mtime": mtime}
1177
+
1178
+ context = self.tree_context_cache[rel_fname]["context"]
1179
+ context.lines_of_interest = set()
1180
+ context.add_lines_of_interest(lois)
1181
+ context.add_context()
1182
+ res = context.format()
1183
+ self.tree_cache[key] = res
1184
+ return res
1185
+
1186
+ def to_tree(self, tags, chat_rel_fnames):
1187
+ if not tags:
1188
+ return ""
1189
+
1190
+ cur_fname = None
1191
+ cur_abs_fname = None
1192
+ lois = None
1193
+ output = ""
1194
+
1195
+ # add a bogus tag at the end so we trip the this_fname != cur_fname...
1196
+ dummy_tag = (None,)
1197
+ for tag in sorted(tags) + [dummy_tag]:
1198
+ this_rel_fname = tag[0]
1199
+ if this_rel_fname in chat_rel_fnames:
1200
+ continue
1201
+
1202
+ # ... here ... to output the final real entry in the list
1203
+ if this_rel_fname != cur_fname:
1204
+ if lois is not None:
1205
+ output += "\n"
1206
+ output += cur_fname + ":\n"
1207
+
1208
+ # truncate long lines, in case we get minified js or something else crazy
1209
+ output += truncate_long_lines(
1210
+ self.render_tree(cur_abs_fname, cur_fname, lois), self.max_code_line_length
1211
+ )
1212
+
1213
+ lois = None
1214
+ elif cur_fname:
1215
+ output += "\n" + cur_fname + "\n"
1216
+ if type(tag) is Tag:
1217
+ lois = []
1218
+ cur_abs_fname = tag.fname
1219
+ cur_fname = this_rel_fname
1220
+
1221
+ if lois is not None:
1222
+ lois.append(tag.line)
1223
+
1224
+ return output
1225
+
1226
+ def _get_mentioned_idents_cache_component(self, mentioned_idents):
1227
+ """
1228
+ Determine the cache key component for mentioned_idents using similarity comparison.
1229
+
1230
+ This method compares the current mentioned_idents with the previous ones using
1231
+ cosine similarity. If the similarity is high enough, it returns the previous
1232
+ cache key component to maintain cache hits. Otherwise, it updates the stored
1233
+ values and returns the current mentioned_idents.
1234
+
1235
+ Args:
1236
+ mentioned_idents (set): Current set of mentioned identifiers
1237
+
1238
+ Returns:
1239
+ tuple or None: Cache key component for mentioned_idents
1240
+ """
1241
+ if not mentioned_idents:
1242
+ self._last_mentioned_idents = None
1243
+ self._last_mentioned_idents_vector = None
1244
+ self._has_last_mentioned_idents = False
1245
+ return None
1246
+
1247
+ current_mentioned_idents = tuple(mentioned_idents)
1248
+
1249
+ # Check if we have a previous cached value to compare against
1250
+ if self._has_last_mentioned_idents:
1251
+ # Create vector for current mentioned_idents
1252
+ current_vector = create_bigram_vector(current_mentioned_idents)
1253
+ current_vector_norm = normalize_vector(current_vector)
1254
+
1255
+ # Calculate cosine similarity
1256
+ similarity = cosine_similarity(self._last_mentioned_idents_vector, current_vector_norm)
1257
+ # If similarity is high enough, use the previous cache key component
1258
+ if similarity >= self._mentioned_ident_similarity:
1259
+ # Use the previous mentioned_idents for cache key to maintain cache hit
1260
+ cache_key_component = self._last_mentioned_idents
1261
+
1262
+ # Make similarity more strict the more consecutive cache hits
1263
+ self._mentioned_ident_similarity = min(
1264
+ 0.9, self._mentioned_ident_similarity + 0.025
1265
+ )
1266
+ else:
1267
+ # Similarity is too low, use current mentioned_idents
1268
+ cache_key_component = current_mentioned_idents
1269
+
1270
+ # Update stored values
1271
+ self._last_mentioned_idents = current_mentioned_idents
1272
+ self._last_mentioned_idents_vector = current_vector_norm
1273
+
1274
+ # Make similarity less strict the more consecutive cache misses
1275
+ self._mentioned_ident_similarity = max(
1276
+ 0.5, self._mentioned_ident_similarity - 0.025
1277
+ )
1278
+ else:
1279
+ # First time or no previous value, use current mentioned_idents
1280
+ cache_key_component = current_mentioned_idents
1281
+ current_vector = create_bigram_vector(current_mentioned_idents)
1282
+
1283
+ # Store for future comparisons
1284
+ self._last_mentioned_idents = current_mentioned_idents
1285
+ self._last_mentioned_idents_vector = normalize_vector(current_vector)
1286
+
1287
+ self._has_last_mentioned_idents = True
1288
+ return cache_key_component
1289
+
1290
+
1291
+ def truncate_long_lines(text, max_length):
1292
+ return "\n".join([line[:max_length] for line in text.splitlines()]) + "\n"
1293
+
1294
+
1295
+ def find_src_files(directory):
1296
+ if not os.path.isdir(directory):
1297
+ return [directory]
1298
+
1299
+ src_files = []
1300
+ for root, dirs, files in os.walk(directory):
1301
+ for file in files:
1302
+ src_files.append(os.path.join(root, file))
1303
+ return src_files
1304
+
1305
+
1306
+ def get_scm_fname(lang):
1307
+ # Load the tags queries
1308
+ if USING_TSL_PACK:
1309
+ subdir = "tree-sitter-language-pack"
1310
+ try:
1311
+ path = resources.files(__package__).joinpath(
1312
+ "queries",
1313
+ subdir,
1314
+ f"{lang}-tags.scm",
1315
+ )
1316
+ if path.exists():
1317
+ return path
1318
+ except KeyError:
1319
+ pass
1320
+
1321
+ # Fall back to tree-sitter-languages
1322
+ subdir = "tree-sitter-languages"
1323
+ try:
1324
+ return resources.files(__package__).joinpath(
1325
+ "queries",
1326
+ subdir,
1327
+ f"{lang}-tags.scm",
1328
+ )
1329
+ except KeyError:
1330
+ return
1331
+
1332
+
1333
+ def get_supported_languages_md():
1334
+ from grep_ast.parsers import PARSERS
1335
+
1336
+ res = """
1337
+ | Language | File extension | Repo map | Linter |
1338
+ |:--------:|:--------------:|:--------:|:------:|
1339
+ """
1340
+ data = sorted((lang, ex) for ex, lang in PARSERS.items())
1341
+
1342
+ for lang, ext in data:
1343
+ fn = get_scm_fname(lang)
1344
+ repo_map = "✓" if fn and os.path.exists(fn) else ""
1345
+ linter_support = "✓"
1346
+ res += f"| {lang:20} | {ext:20} | {repo_map:^8} | {linter_support:^6} |\n"
1347
+
1348
+ res += "\n"
1349
+
1350
+ return res
1351
+
1352
+
1353
+ if __name__ == "__main__":
1354
+ fnames = sys.argv[1:]
1355
+
1356
+ chat_fnames = []
1357
+ other_fnames = []
1358
+ for fname in sys.argv[1:]:
1359
+ if os.path.isdir(fname):
1360
+ chat_fnames += find_src_files(fname)
1361
+ else:
1362
+ chat_fnames.append(fname)
1363
+
1364
+ rm = RepoMap(root=".")
1365
+ repo_map = rm.get_ranked_tags_map(chat_fnames, other_fnames)
1366
+
1367
+ dump(len(repo_map))
1368
+ print(repo_map)