aider-ce 0.88.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aider/__init__.py +20 -0
- aider/__main__.py +4 -0
- aider/_version.py +34 -0
- aider/analytics.py +258 -0
- aider/args.py +1056 -0
- aider/args_formatter.py +228 -0
- aider/change_tracker.py +133 -0
- aider/coders/__init__.py +36 -0
- aider/coders/agent_coder.py +2166 -0
- aider/coders/agent_prompts.py +104 -0
- aider/coders/architect_coder.py +48 -0
- aider/coders/architect_prompts.py +40 -0
- aider/coders/ask_coder.py +9 -0
- aider/coders/ask_prompts.py +35 -0
- aider/coders/base_coder.py +3613 -0
- aider/coders/base_prompts.py +87 -0
- aider/coders/chat_chunks.py +64 -0
- aider/coders/context_coder.py +53 -0
- aider/coders/context_prompts.py +75 -0
- aider/coders/editblock_coder.py +657 -0
- aider/coders/editblock_fenced_coder.py +10 -0
- aider/coders/editblock_fenced_prompts.py +143 -0
- aider/coders/editblock_func_coder.py +141 -0
- aider/coders/editblock_func_prompts.py +27 -0
- aider/coders/editblock_prompts.py +175 -0
- aider/coders/editor_diff_fenced_coder.py +9 -0
- aider/coders/editor_diff_fenced_prompts.py +11 -0
- aider/coders/editor_editblock_coder.py +9 -0
- aider/coders/editor_editblock_prompts.py +21 -0
- aider/coders/editor_whole_coder.py +9 -0
- aider/coders/editor_whole_prompts.py +12 -0
- aider/coders/help_coder.py +16 -0
- aider/coders/help_prompts.py +46 -0
- aider/coders/patch_coder.py +706 -0
- aider/coders/patch_prompts.py +159 -0
- aider/coders/search_replace.py +757 -0
- aider/coders/shell.py +37 -0
- aider/coders/single_wholefile_func_coder.py +102 -0
- aider/coders/single_wholefile_func_prompts.py +27 -0
- aider/coders/udiff_coder.py +429 -0
- aider/coders/udiff_prompts.py +115 -0
- aider/coders/udiff_simple.py +14 -0
- aider/coders/udiff_simple_prompts.py +25 -0
- aider/coders/wholefile_coder.py +144 -0
- aider/coders/wholefile_func_coder.py +134 -0
- aider/coders/wholefile_func_prompts.py +27 -0
- aider/coders/wholefile_prompts.py +65 -0
- aider/commands.py +2173 -0
- aider/copypaste.py +72 -0
- aider/deprecated.py +126 -0
- aider/diffs.py +128 -0
- aider/dump.py +29 -0
- aider/editor.py +147 -0
- aider/exceptions.py +115 -0
- aider/format_settings.py +26 -0
- aider/gui.py +545 -0
- aider/help.py +163 -0
- aider/help_pats.py +19 -0
- aider/helpers/__init__.py +9 -0
- aider/helpers/similarity.py +98 -0
- aider/history.py +180 -0
- aider/io.py +1608 -0
- aider/linter.py +304 -0
- aider/llm.py +55 -0
- aider/main.py +1415 -0
- aider/mcp/__init__.py +174 -0
- aider/mcp/server.py +149 -0
- aider/mdstream.py +243 -0
- aider/models.py +1313 -0
- aider/onboarding.py +429 -0
- aider/openrouter.py +129 -0
- aider/prompts.py +56 -0
- aider/queries/tree-sitter-language-pack/README.md +7 -0
- aider/queries/tree-sitter-language-pack/arduino-tags.scm +5 -0
- aider/queries/tree-sitter-language-pack/c-tags.scm +9 -0
- aider/queries/tree-sitter-language-pack/chatito-tags.scm +16 -0
- aider/queries/tree-sitter-language-pack/clojure-tags.scm +7 -0
- aider/queries/tree-sitter-language-pack/commonlisp-tags.scm +122 -0
- aider/queries/tree-sitter-language-pack/cpp-tags.scm +15 -0
- aider/queries/tree-sitter-language-pack/csharp-tags.scm +26 -0
- aider/queries/tree-sitter-language-pack/d-tags.scm +26 -0
- aider/queries/tree-sitter-language-pack/dart-tags.scm +92 -0
- aider/queries/tree-sitter-language-pack/elisp-tags.scm +5 -0
- aider/queries/tree-sitter-language-pack/elixir-tags.scm +54 -0
- aider/queries/tree-sitter-language-pack/elm-tags.scm +19 -0
- aider/queries/tree-sitter-language-pack/gleam-tags.scm +41 -0
- aider/queries/tree-sitter-language-pack/go-tags.scm +42 -0
- aider/queries/tree-sitter-language-pack/java-tags.scm +20 -0
- aider/queries/tree-sitter-language-pack/javascript-tags.scm +88 -0
- aider/queries/tree-sitter-language-pack/lua-tags.scm +34 -0
- aider/queries/tree-sitter-language-pack/matlab-tags.scm +10 -0
- aider/queries/tree-sitter-language-pack/ocaml-tags.scm +115 -0
- aider/queries/tree-sitter-language-pack/ocaml_interface-tags.scm +98 -0
- aider/queries/tree-sitter-language-pack/pony-tags.scm +39 -0
- aider/queries/tree-sitter-language-pack/properties-tags.scm +5 -0
- aider/queries/tree-sitter-language-pack/python-tags.scm +14 -0
- aider/queries/tree-sitter-language-pack/r-tags.scm +21 -0
- aider/queries/tree-sitter-language-pack/racket-tags.scm +12 -0
- aider/queries/tree-sitter-language-pack/ruby-tags.scm +64 -0
- aider/queries/tree-sitter-language-pack/rust-tags.scm +60 -0
- aider/queries/tree-sitter-language-pack/solidity-tags.scm +43 -0
- aider/queries/tree-sitter-language-pack/swift-tags.scm +51 -0
- aider/queries/tree-sitter-language-pack/udev-tags.scm +20 -0
- aider/queries/tree-sitter-languages/README.md +24 -0
- aider/queries/tree-sitter-languages/c-tags.scm +9 -0
- aider/queries/tree-sitter-languages/c_sharp-tags.scm +46 -0
- aider/queries/tree-sitter-languages/cpp-tags.scm +15 -0
- aider/queries/tree-sitter-languages/dart-tags.scm +91 -0
- aider/queries/tree-sitter-languages/elisp-tags.scm +8 -0
- aider/queries/tree-sitter-languages/elixir-tags.scm +54 -0
- aider/queries/tree-sitter-languages/elm-tags.scm +19 -0
- aider/queries/tree-sitter-languages/fortran-tags.scm +15 -0
- aider/queries/tree-sitter-languages/go-tags.scm +30 -0
- aider/queries/tree-sitter-languages/haskell-tags.scm +3 -0
- aider/queries/tree-sitter-languages/hcl-tags.scm +77 -0
- aider/queries/tree-sitter-languages/java-tags.scm +20 -0
- aider/queries/tree-sitter-languages/javascript-tags.scm +88 -0
- aider/queries/tree-sitter-languages/julia-tags.scm +60 -0
- aider/queries/tree-sitter-languages/kotlin-tags.scm +27 -0
- aider/queries/tree-sitter-languages/matlab-tags.scm +10 -0
- aider/queries/tree-sitter-languages/ocaml-tags.scm +115 -0
- aider/queries/tree-sitter-languages/ocaml_interface-tags.scm +98 -0
- aider/queries/tree-sitter-languages/php-tags.scm +26 -0
- aider/queries/tree-sitter-languages/python-tags.scm +12 -0
- aider/queries/tree-sitter-languages/ql-tags.scm +26 -0
- aider/queries/tree-sitter-languages/ruby-tags.scm +64 -0
- aider/queries/tree-sitter-languages/rust-tags.scm +60 -0
- aider/queries/tree-sitter-languages/scala-tags.scm +65 -0
- aider/queries/tree-sitter-languages/typescript-tags.scm +41 -0
- aider/queries/tree-sitter-languages/zig-tags.scm +3 -0
- aider/reasoning_tags.py +82 -0
- aider/repo.py +621 -0
- aider/repomap.py +1174 -0
- aider/report.py +260 -0
- aider/resources/__init__.py +3 -0
- aider/resources/model-metadata.json +776 -0
- aider/resources/model-settings.yml +2068 -0
- aider/run_cmd.py +133 -0
- aider/scrape.py +293 -0
- aider/sendchat.py +242 -0
- aider/sessions.py +256 -0
- aider/special.py +203 -0
- aider/tools/__init__.py +72 -0
- aider/tools/command.py +105 -0
- aider/tools/command_interactive.py +122 -0
- aider/tools/delete_block.py +182 -0
- aider/tools/delete_line.py +155 -0
- aider/tools/delete_lines.py +184 -0
- aider/tools/extract_lines.py +341 -0
- aider/tools/finished.py +48 -0
- aider/tools/git_branch.py +129 -0
- aider/tools/git_diff.py +60 -0
- aider/tools/git_log.py +57 -0
- aider/tools/git_remote.py +53 -0
- aider/tools/git_show.py +51 -0
- aider/tools/git_status.py +46 -0
- aider/tools/grep.py +256 -0
- aider/tools/indent_lines.py +221 -0
- aider/tools/insert_block.py +288 -0
- aider/tools/list_changes.py +86 -0
- aider/tools/ls.py +93 -0
- aider/tools/make_editable.py +85 -0
- aider/tools/make_readonly.py +69 -0
- aider/tools/remove.py +91 -0
- aider/tools/replace_all.py +126 -0
- aider/tools/replace_line.py +173 -0
- aider/tools/replace_lines.py +217 -0
- aider/tools/replace_text.py +187 -0
- aider/tools/show_numbered_context.py +147 -0
- aider/tools/tool_utils.py +313 -0
- aider/tools/undo_change.py +95 -0
- aider/tools/update_todo_list.py +156 -0
- aider/tools/view.py +57 -0
- aider/tools/view_files_matching.py +141 -0
- aider/tools/view_files_with_symbol.py +129 -0
- aider/urls.py +17 -0
- aider/utils.py +456 -0
- aider/versioncheck.py +113 -0
- aider/voice.py +205 -0
- aider/waiting.py +38 -0
- aider/watch.py +318 -0
- aider/watch_prompts.py +12 -0
- aider/website/Gemfile +8 -0
- aider/website/_includes/blame.md +162 -0
- aider/website/_includes/get-started.md +22 -0
- aider/website/_includes/help-tip.md +5 -0
- aider/website/_includes/help.md +24 -0
- aider/website/_includes/install.md +5 -0
- aider/website/_includes/keys.md +4 -0
- aider/website/_includes/model-warnings.md +67 -0
- aider/website/_includes/multi-line.md +22 -0
- aider/website/_includes/python-m-aider.md +5 -0
- aider/website/_includes/recording.css +228 -0
- aider/website/_includes/recording.md +34 -0
- aider/website/_includes/replit-pipx.md +9 -0
- aider/website/_includes/works-best.md +1 -0
- aider/website/_sass/custom/custom.scss +103 -0
- aider/website/docs/config/adv-model-settings.md +2261 -0
- aider/website/docs/config/agent-mode.md +194 -0
- aider/website/docs/config/aider_conf.md +548 -0
- aider/website/docs/config/api-keys.md +90 -0
- aider/website/docs/config/dotenv.md +493 -0
- aider/website/docs/config/editor.md +127 -0
- aider/website/docs/config/mcp.md +95 -0
- aider/website/docs/config/model-aliases.md +104 -0
- aider/website/docs/config/options.md +890 -0
- aider/website/docs/config/reasoning.md +210 -0
- aider/website/docs/config.md +44 -0
- aider/website/docs/faq.md +384 -0
- aider/website/docs/git.md +76 -0
- aider/website/docs/index.md +47 -0
- aider/website/docs/install/codespaces.md +39 -0
- aider/website/docs/install/docker.md +57 -0
- aider/website/docs/install/optional.md +100 -0
- aider/website/docs/install/replit.md +8 -0
- aider/website/docs/install.md +115 -0
- aider/website/docs/languages.md +264 -0
- aider/website/docs/legal/contributor-agreement.md +111 -0
- aider/website/docs/legal/privacy.md +104 -0
- aider/website/docs/llms/anthropic.md +77 -0
- aider/website/docs/llms/azure.md +48 -0
- aider/website/docs/llms/bedrock.md +132 -0
- aider/website/docs/llms/cohere.md +34 -0
- aider/website/docs/llms/deepseek.md +32 -0
- aider/website/docs/llms/gemini.md +49 -0
- aider/website/docs/llms/github.md +111 -0
- aider/website/docs/llms/groq.md +36 -0
- aider/website/docs/llms/lm-studio.md +39 -0
- aider/website/docs/llms/ollama.md +75 -0
- aider/website/docs/llms/openai-compat.md +39 -0
- aider/website/docs/llms/openai.md +58 -0
- aider/website/docs/llms/openrouter.md +78 -0
- aider/website/docs/llms/other.md +117 -0
- aider/website/docs/llms/vertex.md +50 -0
- aider/website/docs/llms/warnings.md +10 -0
- aider/website/docs/llms/xai.md +53 -0
- aider/website/docs/llms.md +54 -0
- aider/website/docs/more/analytics.md +127 -0
- aider/website/docs/more/edit-formats.md +116 -0
- aider/website/docs/more/infinite-output.md +165 -0
- aider/website/docs/more-info.md +8 -0
- aider/website/docs/recordings/auto-accept-architect.md +31 -0
- aider/website/docs/recordings/dont-drop-original-read-files.md +35 -0
- aider/website/docs/recordings/index.md +21 -0
- aider/website/docs/recordings/model-accepts-settings.md +69 -0
- aider/website/docs/recordings/tree-sitter-language-pack.md +80 -0
- aider/website/docs/repomap.md +112 -0
- aider/website/docs/scripting.md +100 -0
- aider/website/docs/sessions.md +203 -0
- aider/website/docs/troubleshooting/aider-not-found.md +24 -0
- aider/website/docs/troubleshooting/edit-errors.md +76 -0
- aider/website/docs/troubleshooting/imports.md +62 -0
- aider/website/docs/troubleshooting/models-and-keys.md +54 -0
- aider/website/docs/troubleshooting/support.md +79 -0
- aider/website/docs/troubleshooting/token-limits.md +96 -0
- aider/website/docs/troubleshooting/warnings.md +12 -0
- aider/website/docs/troubleshooting.md +11 -0
- aider/website/docs/usage/browser.md +57 -0
- aider/website/docs/usage/caching.md +49 -0
- aider/website/docs/usage/commands.md +133 -0
- aider/website/docs/usage/conventions.md +119 -0
- aider/website/docs/usage/copypaste.md +121 -0
- aider/website/docs/usage/images-urls.md +48 -0
- aider/website/docs/usage/lint-test.md +118 -0
- aider/website/docs/usage/modes.md +211 -0
- aider/website/docs/usage/not-code.md +179 -0
- aider/website/docs/usage/notifications.md +87 -0
- aider/website/docs/usage/tips.md +79 -0
- aider/website/docs/usage/tutorials.md +30 -0
- aider/website/docs/usage/voice.md +121 -0
- aider/website/docs/usage/watch.md +294 -0
- aider/website/docs/usage.md +102 -0
- aider/website/share/index.md +101 -0
- aider_ce-0.88.20.dist-info/METADATA +187 -0
- aider_ce-0.88.20.dist-info/RECORD +279 -0
- aider_ce-0.88.20.dist-info/WHEEL +5 -0
- aider_ce-0.88.20.dist-info/entry_points.txt +2 -0
- aider_ce-0.88.20.dist-info/licenses/LICENSE.txt +202 -0
- aider_ce-0.88.20.dist-info/top_level.txt +1 -0
aider/repomap.py
ADDED
|
@@ -0,0 +1,1174 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import sqlite3
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
import warnings
|
|
8
|
+
from collections import Counter, defaultdict, namedtuple
|
|
9
|
+
from importlib import resources
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import tree_sitter
|
|
13
|
+
from diskcache import Cache
|
|
14
|
+
from grep_ast import TreeContext, filename_to_lang
|
|
15
|
+
from pygments.lexers import guess_lexer_for_filename
|
|
16
|
+
from pygments.token import Token
|
|
17
|
+
from tqdm import tqdm
|
|
18
|
+
|
|
19
|
+
from aider.dump import dump
|
|
20
|
+
from aider.helpers.similarity import (
|
|
21
|
+
cosine_similarity,
|
|
22
|
+
create_bigram_vector,
|
|
23
|
+
normalize_vector,
|
|
24
|
+
)
|
|
25
|
+
from aider.special import filter_important_files
|
|
26
|
+
from aider.tools.tool_utils import ToolError
|
|
27
|
+
|
|
28
|
+
# tree_sitter is throwing a FutureWarning
|
|
29
|
+
warnings.simplefilter("ignore", category=FutureWarning)
|
|
30
|
+
from grep_ast.tsl import USING_TSL_PACK, get_language, get_parser # noqa: E402
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Define the Tag namedtuple with a default for specific_kind to maintain compatibility
|
|
34
|
+
# with cached entries that might have been created with the old definition
|
|
35
|
+
class TagBase(
|
|
36
|
+
namedtuple(
|
|
37
|
+
"TagBase",
|
|
38
|
+
"rel_fname fname line name kind specific_kind start_line end_line start_byte end_byte",
|
|
39
|
+
)
|
|
40
|
+
):
|
|
41
|
+
__slots__ = ()
|
|
42
|
+
|
|
43
|
+
def __new__(
|
|
44
|
+
cls,
|
|
45
|
+
rel_fname,
|
|
46
|
+
fname,
|
|
47
|
+
line,
|
|
48
|
+
name,
|
|
49
|
+
kind,
|
|
50
|
+
specific_kind=None,
|
|
51
|
+
start_line=None,
|
|
52
|
+
end_line=None,
|
|
53
|
+
start_byte=None,
|
|
54
|
+
end_byte=None,
|
|
55
|
+
):
|
|
56
|
+
# Provide a default value for specific_kind to handle old cached objects
|
|
57
|
+
return super(TagBase, cls).__new__(
|
|
58
|
+
cls,
|
|
59
|
+
rel_fname,
|
|
60
|
+
fname,
|
|
61
|
+
line,
|
|
62
|
+
name,
|
|
63
|
+
kind,
|
|
64
|
+
specific_kind,
|
|
65
|
+
start_line,
|
|
66
|
+
end_line,
|
|
67
|
+
start_byte,
|
|
68
|
+
end_byte,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
Tag = TagBase
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
CACHE_VERSION = 5
|
|
79
|
+
if USING_TSL_PACK:
|
|
80
|
+
CACHE_VERSION = 7
|
|
81
|
+
|
|
82
|
+
UPDATING_REPO_MAP_MESSAGE = "Updating repo map"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class RepoMap:
|
|
86
|
+
TAGS_CACHE_DIR = f".aider.tags.cache.v{CACHE_VERSION}"
|
|
87
|
+
|
|
88
|
+
warned_files = set()
|
|
89
|
+
|
|
90
|
+
# Class variable to store initial ranked tags results
|
|
91
|
+
_initial_ranked_tags = None
|
|
92
|
+
_initial_ident_to_files = None
|
|
93
|
+
|
|
94
|
+
# Define kinds that typically represent definitions across languages
|
|
95
|
+
# Used by AgentCoder to filter tags for the symbol outline
|
|
96
|
+
definition_kinds = {
|
|
97
|
+
"class",
|
|
98
|
+
"struct",
|
|
99
|
+
"enum",
|
|
100
|
+
"interface",
|
|
101
|
+
"trait", # Structure definitions
|
|
102
|
+
"function",
|
|
103
|
+
"method",
|
|
104
|
+
"constructor", # Function/method definitions
|
|
105
|
+
"module",
|
|
106
|
+
"namespace", # Module/namespace definitions
|
|
107
|
+
"constant",
|
|
108
|
+
"variable", # Top-level/class variable definitions (consider refining)
|
|
109
|
+
"type", # Type definitions
|
|
110
|
+
# Add more based on tree-sitter queries if needed
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
@staticmethod
|
|
114
|
+
def get_file_stub(fname, io):
|
|
115
|
+
"""Generate a complete structural outline of a source code file.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
fname (str): Absolute path to the source file
|
|
119
|
+
io: InputOutput instance for file operations
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
str: Formatted outline showing the file's structure
|
|
123
|
+
"""
|
|
124
|
+
# Use cached instance if available
|
|
125
|
+
if not hasattr(RepoMap, "_stub_instance"):
|
|
126
|
+
RepoMap._stub_instance = RepoMap(map_tokens=0, io=io)
|
|
127
|
+
|
|
128
|
+
rm = RepoMap._stub_instance
|
|
129
|
+
|
|
130
|
+
rel_fname = rm.get_rel_fname(fname)
|
|
131
|
+
|
|
132
|
+
# Reuse existing tag parsing
|
|
133
|
+
tags = rm.get_tags(fname, rel_fname)
|
|
134
|
+
if not tags:
|
|
135
|
+
return "# No outline available"
|
|
136
|
+
|
|
137
|
+
# Get all definition lines
|
|
138
|
+
lois = [tag.line for tag in tags if tag.kind == "def"]
|
|
139
|
+
|
|
140
|
+
# Reuse existing tree rendering
|
|
141
|
+
outline = rm.render_tree(fname, rel_fname, lois)
|
|
142
|
+
|
|
143
|
+
return f"{outline}"
|
|
144
|
+
|
|
145
|
+
def __init__(
|
|
146
|
+
self,
|
|
147
|
+
map_tokens=1024,
|
|
148
|
+
map_cache_dir=".",
|
|
149
|
+
main_model=None,
|
|
150
|
+
io=None,
|
|
151
|
+
repo_content_prefix=None,
|
|
152
|
+
verbose=False,
|
|
153
|
+
max_context_window=None,
|
|
154
|
+
map_mul_no_files=8,
|
|
155
|
+
refresh="auto",
|
|
156
|
+
max_code_line_length=100,
|
|
157
|
+
repo_root=None,
|
|
158
|
+
use_memory_cache=False,
|
|
159
|
+
):
|
|
160
|
+
self.io = io
|
|
161
|
+
self.verbose = verbose
|
|
162
|
+
self.refresh = refresh
|
|
163
|
+
|
|
164
|
+
self.map_cache_dir = map_cache_dir
|
|
165
|
+
# Prefer an explicit repo root (eg per-test repo), fallback to CWD
|
|
166
|
+
self.root = repo_root or os.getcwd()
|
|
167
|
+
|
|
168
|
+
# Allow opting into an in-memory tags cache to avoid disk/SQLite locks
|
|
169
|
+
if use_memory_cache:
|
|
170
|
+
self.TAGS_CACHE = dict()
|
|
171
|
+
else:
|
|
172
|
+
self.load_tags_cache()
|
|
173
|
+
self.cache_threshold = 0.95
|
|
174
|
+
|
|
175
|
+
self.max_map_tokens = map_tokens
|
|
176
|
+
self.map_mul_no_files = map_mul_no_files
|
|
177
|
+
self.max_context_window = max_context_window
|
|
178
|
+
|
|
179
|
+
self.max_code_line_length = max_code_line_length
|
|
180
|
+
|
|
181
|
+
self.repo_content_prefix = repo_content_prefix
|
|
182
|
+
|
|
183
|
+
self.main_model = main_model
|
|
184
|
+
|
|
185
|
+
self.tree_cache = {}
|
|
186
|
+
self.tree_context_cache = {}
|
|
187
|
+
self.map_cache = {}
|
|
188
|
+
self.map_processing_time = 0
|
|
189
|
+
self.last_map = None
|
|
190
|
+
|
|
191
|
+
# Initialize cache for mentioned identifiers similarity
|
|
192
|
+
self._last_mentioned_idents = None
|
|
193
|
+
self._last_mentioned_idents_vector = None
|
|
194
|
+
self._has_last_mentioned_idents = False
|
|
195
|
+
self._mentioned_ident_similarity = 0.8
|
|
196
|
+
|
|
197
|
+
if self.verbose:
|
|
198
|
+
self.io.tool_output(
|
|
199
|
+
f"RepoMap initialized with map_mul_no_files: {self.map_mul_no_files}"
|
|
200
|
+
)
|
|
201
|
+
self.io.tool_output(f"RepoMap initialized with map_cache_dir: {self.map_cache_dir}")
|
|
202
|
+
self.io.tool_output(f"RepoMap assumes repo root is: {self.root}")
|
|
203
|
+
|
|
204
|
+
def token_count(self, text):
|
|
205
|
+
len_text = len(text)
|
|
206
|
+
if len_text < 200:
|
|
207
|
+
return self.main_model.token_count(text)
|
|
208
|
+
|
|
209
|
+
lines = text.splitlines(keepends=True)
|
|
210
|
+
num_lines = len(lines)
|
|
211
|
+
step = num_lines // 100 or 1
|
|
212
|
+
lines = lines[::step]
|
|
213
|
+
sample_text = "".join(lines)
|
|
214
|
+
sample_tokens = self.main_model.token_count(sample_text)
|
|
215
|
+
est_tokens = sample_tokens / len(sample_text) * len_text
|
|
216
|
+
return est_tokens
|
|
217
|
+
|
|
218
|
+
def get_repo_map(
|
|
219
|
+
self,
|
|
220
|
+
chat_files,
|
|
221
|
+
other_files,
|
|
222
|
+
mentioned_fnames=None,
|
|
223
|
+
mentioned_idents=None,
|
|
224
|
+
force_refresh=False,
|
|
225
|
+
):
|
|
226
|
+
if self.max_map_tokens <= 0:
|
|
227
|
+
return
|
|
228
|
+
if not other_files:
|
|
229
|
+
return
|
|
230
|
+
if not mentioned_fnames:
|
|
231
|
+
mentioned_fnames = set()
|
|
232
|
+
if not mentioned_idents:
|
|
233
|
+
mentioned_idents = set()
|
|
234
|
+
|
|
235
|
+
max_map_tokens = self.max_map_tokens
|
|
236
|
+
|
|
237
|
+
# With no files in the chat, give a bigger view of the entire repo
|
|
238
|
+
padding = 4096
|
|
239
|
+
if max_map_tokens and self.max_context_window:
|
|
240
|
+
target = min(
|
|
241
|
+
int(max_map_tokens * self.map_mul_no_files),
|
|
242
|
+
self.max_context_window - padding,
|
|
243
|
+
)
|
|
244
|
+
else:
|
|
245
|
+
target = 0
|
|
246
|
+
if not chat_files and self.max_context_window and target > 0:
|
|
247
|
+
max_map_tokens = target
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
files_listing = self.get_ranked_tags_map(
|
|
251
|
+
chat_files,
|
|
252
|
+
other_files,
|
|
253
|
+
max_map_tokens,
|
|
254
|
+
mentioned_fnames,
|
|
255
|
+
mentioned_idents,
|
|
256
|
+
force_refresh,
|
|
257
|
+
)
|
|
258
|
+
except RecursionError:
|
|
259
|
+
self.io.tool_error("Disabling repo map, git repo too large?")
|
|
260
|
+
self.max_map_tokens = 0
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
if not files_listing:
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
if self.verbose:
|
|
267
|
+
num_tokens = self.token_count(files_listing)
|
|
268
|
+
self.io.tool_output(f"Repo-map: {num_tokens / 1024:.1f} k-tokens")
|
|
269
|
+
|
|
270
|
+
if chat_files:
|
|
271
|
+
other = "other "
|
|
272
|
+
else:
|
|
273
|
+
other = ""
|
|
274
|
+
|
|
275
|
+
if self.repo_content_prefix:
|
|
276
|
+
repo_content = self.repo_content_prefix.format(other=other)
|
|
277
|
+
else:
|
|
278
|
+
repo_content = ""
|
|
279
|
+
|
|
280
|
+
repo_content += files_listing
|
|
281
|
+
|
|
282
|
+
return repo_content
|
|
283
|
+
|
|
284
|
+
def get_rel_fname(self, fname):
|
|
285
|
+
try:
|
|
286
|
+
return os.path.relpath(fname, self.root)
|
|
287
|
+
except ValueError:
|
|
288
|
+
# Issue #1288: ValueError: path is on mount 'C:', start on mount 'D:'
|
|
289
|
+
# Just return the full fname.
|
|
290
|
+
return fname
|
|
291
|
+
|
|
292
|
+
def tags_cache_error(self, original_error=None):
|
|
293
|
+
"""Handle SQLite errors by trying to recreate cache, falling back to dict if needed"""
|
|
294
|
+
|
|
295
|
+
if self.verbose and original_error:
|
|
296
|
+
self.io.tool_warning(f"Tags cache error: {str(original_error)}")
|
|
297
|
+
|
|
298
|
+
if isinstance(getattr(self, "TAGS_CACHE", None), dict):
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
path = Path(self.map_cache_dir) / self.TAGS_CACHE_DIR
|
|
302
|
+
|
|
303
|
+
# Try to recreate the cache
|
|
304
|
+
try:
|
|
305
|
+
# Delete existing cache dir
|
|
306
|
+
if path.exists():
|
|
307
|
+
shutil.rmtree(path)
|
|
308
|
+
|
|
309
|
+
# Try to create new cache
|
|
310
|
+
new_cache = Cache(path)
|
|
311
|
+
|
|
312
|
+
# Test that it works
|
|
313
|
+
test_key = "test"
|
|
314
|
+
new_cache[test_key] = "test"
|
|
315
|
+
_ = new_cache[test_key]
|
|
316
|
+
del new_cache[test_key]
|
|
317
|
+
|
|
318
|
+
# If we got here, the new cache works
|
|
319
|
+
self.TAGS_CACHE = new_cache
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
except SQLITE_ERRORS as e:
|
|
323
|
+
# If anything goes wrong, warn and fall back to dict
|
|
324
|
+
self.io.tool_warning(
|
|
325
|
+
f"Unable to use tags cache at {path}, falling back to memory cache"
|
|
326
|
+
)
|
|
327
|
+
if self.verbose:
|
|
328
|
+
self.io.tool_warning(f"Cache recreation error: {str(e)}")
|
|
329
|
+
|
|
330
|
+
self.TAGS_CACHE = dict()
|
|
331
|
+
|
|
332
|
+
def load_tags_cache(self):
|
|
333
|
+
path = Path(self.map_cache_dir) / self.TAGS_CACHE_DIR
|
|
334
|
+
try:
|
|
335
|
+
self.TAGS_CACHE = Cache(path)
|
|
336
|
+
except SQLITE_ERRORS as e:
|
|
337
|
+
self.tags_cache_error(e)
|
|
338
|
+
|
|
339
|
+
def save_tags_cache(self):
|
|
340
|
+
pass
|
|
341
|
+
|
|
342
|
+
def get_mtime(self, fname):
|
|
343
|
+
try:
|
|
344
|
+
return os.path.getmtime(fname)
|
|
345
|
+
except FileNotFoundError:
|
|
346
|
+
self.io.tool_warning(f"File not found error: {fname}")
|
|
347
|
+
|
|
348
|
+
def get_tags(self, fname, rel_fname):
|
|
349
|
+
# Check if the file is in the cache and if the modification time has not changed
|
|
350
|
+
file_mtime = self.get_mtime(fname)
|
|
351
|
+
if file_mtime is None:
|
|
352
|
+
return []
|
|
353
|
+
|
|
354
|
+
cache_key = fname
|
|
355
|
+
try:
|
|
356
|
+
val = self.TAGS_CACHE.get(cache_key) # Issue #1308
|
|
357
|
+
except SQLITE_ERRORS as e:
|
|
358
|
+
self.tags_cache_error(e)
|
|
359
|
+
val = self.TAGS_CACHE.get(cache_key)
|
|
360
|
+
|
|
361
|
+
if val is not None and val.get("mtime") == file_mtime:
|
|
362
|
+
try:
|
|
363
|
+
# Get the cached data
|
|
364
|
+
data = self.TAGS_CACHE[cache_key]["data"]
|
|
365
|
+
|
|
366
|
+
# Let our Tag class handle compatibility with old cache formats
|
|
367
|
+
# No need for special handling as TagBase.__new__ will supply default specific_kind
|
|
368
|
+
|
|
369
|
+
return data
|
|
370
|
+
except SQLITE_ERRORS as e:
|
|
371
|
+
self.tags_cache_error(e)
|
|
372
|
+
return self.TAGS_CACHE[cache_key]["data"]
|
|
373
|
+
except (TypeError, AttributeError) as e:
|
|
374
|
+
# If we hit an error related to missing fields in old cached Tag objects,
|
|
375
|
+
# force a cache refresh for this file
|
|
376
|
+
if self.verbose:
|
|
377
|
+
self.io.tool_warning(f"Cache format error for {fname}, refreshing: {e}")
|
|
378
|
+
# Return empty list to trigger cache refresh
|
|
379
|
+
return []
|
|
380
|
+
|
|
381
|
+
# miss!
|
|
382
|
+
data = list(self.get_tags_raw(fname, rel_fname))
|
|
383
|
+
|
|
384
|
+
# Update the cache
|
|
385
|
+
try:
|
|
386
|
+
self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
|
|
387
|
+
self.save_tags_cache()
|
|
388
|
+
except SQLITE_ERRORS as e:
|
|
389
|
+
self.tags_cache_error(e)
|
|
390
|
+
self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
|
|
391
|
+
|
|
392
|
+
return data
|
|
393
|
+
|
|
394
|
+
def get_symbol_definition_location(self, file_path, symbol_name):
|
|
395
|
+
"""
|
|
396
|
+
Finds the unique definition location (start/end line) for a symbol in a file.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
file_path (str): The relative path to the file.
|
|
400
|
+
symbol_name (str): The name of the symbol to find.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
tuple: (start_line, end_line) (0-based) if a unique definition is found.
|
|
404
|
+
|
|
405
|
+
Raises:
|
|
406
|
+
ToolError: If the symbol is not found, not unique, or not a definition.
|
|
407
|
+
"""
|
|
408
|
+
abs_path = self.io.root_abs_path(file_path) # Assuming io has this helper or similar
|
|
409
|
+
rel_path = self.get_rel_fname(abs_path) # Ensure we use consistent relative path
|
|
410
|
+
|
|
411
|
+
tags = self.get_tags(abs_path, rel_path)
|
|
412
|
+
if not tags:
|
|
413
|
+
raise ToolError(f"Symbol '{symbol_name}' not found in '{file_path}' (no tags).")
|
|
414
|
+
|
|
415
|
+
definitions = []
|
|
416
|
+
for tag in tags:
|
|
417
|
+
# Check if it's a definition and the name matches
|
|
418
|
+
if tag.kind == "def" and tag.name == symbol_name:
|
|
419
|
+
# Ensure we have valid location info
|
|
420
|
+
if tag.start_line is not None and tag.end_line is not None and tag.start_line >= 0:
|
|
421
|
+
definitions.append(tag)
|
|
422
|
+
|
|
423
|
+
if not definitions:
|
|
424
|
+
# Check if it exists as a non-definition tag
|
|
425
|
+
non_defs = [tag for tag in tags if tag.name == symbol_name and tag.kind != "def"]
|
|
426
|
+
if non_defs:
|
|
427
|
+
raise ToolError(
|
|
428
|
+
f"Symbol '{symbol_name}' found in '{file_path}', but not as a unique definition"
|
|
429
|
+
f" (found as {non_defs[0].kind})."
|
|
430
|
+
)
|
|
431
|
+
else:
|
|
432
|
+
raise ToolError(f"Symbol '{symbol_name}' definition not found in '{file_path}'.")
|
|
433
|
+
|
|
434
|
+
if len(definitions) > 1:
|
|
435
|
+
# Provide more context about ambiguity if possible
|
|
436
|
+
lines = sorted([d.start_line + 1 for d in definitions]) # 1-based for user message
|
|
437
|
+
raise ToolError(
|
|
438
|
+
f"Symbol '{symbol_name}' is ambiguous in '{file_path}'. Found definitions on lines:"
|
|
439
|
+
f" {', '.join(map(str, lines))}."
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# Unique definition found
|
|
443
|
+
definition_tag = definitions[0]
|
|
444
|
+
return definition_tag.start_line, definition_tag.end_line
|
|
445
|
+
# Check if the file is in the cache and if the modification time has not changed
|
|
446
|
+
|
|
447
|
+
def shared_path_components(self, path1_str, path2_str):
|
|
448
|
+
"""
|
|
449
|
+
Calculates distance based on how many parent components are shared.
|
|
450
|
+
Distance = Total parts - (2 * Shared parts). Lower is closer.
|
|
451
|
+
"""
|
|
452
|
+
p1 = Path(path1_str).parts
|
|
453
|
+
p2 = Path(path2_str).parts
|
|
454
|
+
|
|
455
|
+
# Count the number of common leading parts
|
|
456
|
+
common_count = 0
|
|
457
|
+
for comp1, comp2 in zip(p1, p2):
|
|
458
|
+
if comp1 == comp2:
|
|
459
|
+
common_count += 1
|
|
460
|
+
else:
|
|
461
|
+
break
|
|
462
|
+
|
|
463
|
+
# A simple metric of difference:
|
|
464
|
+
# (Total parts in P1 + Total parts in P2) - (2 * Common parts)
|
|
465
|
+
distance = len(p1) + len(p2) - (2 * common_count)
|
|
466
|
+
return distance
|
|
467
|
+
|
|
468
|
+
def get_tags_raw(self, fname, rel_fname):
|
|
469
|
+
lang = filename_to_lang(fname)
|
|
470
|
+
if not lang:
|
|
471
|
+
return
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
language = get_language(lang)
|
|
475
|
+
parser = get_parser(lang)
|
|
476
|
+
except Exception as err:
|
|
477
|
+
print(f"Skipping file {fname}: {err}")
|
|
478
|
+
return
|
|
479
|
+
|
|
480
|
+
query_scm = get_scm_fname(lang)
|
|
481
|
+
if not query_scm.exists():
|
|
482
|
+
return
|
|
483
|
+
query_scm = query_scm.read_text()
|
|
484
|
+
|
|
485
|
+
code = self.io.read_text(fname)
|
|
486
|
+
if not code:
|
|
487
|
+
return
|
|
488
|
+
tree = parser.parse(bytes(code, "utf-8"))
|
|
489
|
+
|
|
490
|
+
# Run the tags queries
|
|
491
|
+
if sys.version_info >= (3, 10):
|
|
492
|
+
query = tree_sitter.Query(language, query_scm)
|
|
493
|
+
cursor = tree_sitter.QueryCursor(query)
|
|
494
|
+
captures = cursor.captures(tree.root_node)
|
|
495
|
+
else:
|
|
496
|
+
query = language.query(query_scm)
|
|
497
|
+
captures = query.captures(tree.root_node)
|
|
498
|
+
|
|
499
|
+
saw = set()
|
|
500
|
+
if USING_TSL_PACK:
|
|
501
|
+
all_nodes = []
|
|
502
|
+
for tag, nodes in captures.items():
|
|
503
|
+
all_nodes += [(node, tag) for node in nodes]
|
|
504
|
+
else:
|
|
505
|
+
all_nodes = list(captures)
|
|
506
|
+
|
|
507
|
+
for node, tag in all_nodes:
|
|
508
|
+
if tag.startswith("name.definition."):
|
|
509
|
+
kind = "def"
|
|
510
|
+
elif tag.startswith("name.reference."):
|
|
511
|
+
kind = "ref"
|
|
512
|
+
else:
|
|
513
|
+
continue
|
|
514
|
+
|
|
515
|
+
saw.add(kind)
|
|
516
|
+
|
|
517
|
+
# Extract specific kind from the tag, e.g., 'function' from 'name.definition.function'
|
|
518
|
+
specific_kind = tag.split(".")[-1] if "." in tag else None
|
|
519
|
+
|
|
520
|
+
result = Tag(
|
|
521
|
+
rel_fname=rel_fname,
|
|
522
|
+
fname=fname,
|
|
523
|
+
name=node.text.decode("utf-8"),
|
|
524
|
+
kind=kind,
|
|
525
|
+
specific_kind=specific_kind,
|
|
526
|
+
line=node.start_point[0], # Legacy line number
|
|
527
|
+
start_line=node.start_point[0],
|
|
528
|
+
end_line=node.end_point[0],
|
|
529
|
+
start_byte=node.start_byte,
|
|
530
|
+
end_byte=node.end_byte,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
yield result
|
|
534
|
+
|
|
535
|
+
if "ref" in saw:
|
|
536
|
+
return
|
|
537
|
+
if "def" not in saw:
|
|
538
|
+
return
|
|
539
|
+
|
|
540
|
+
# We saw defs, without any refs
|
|
541
|
+
# Some tags files only provide defs (cpp, for example)
|
|
542
|
+
# Use pygments to backfill refs
|
|
543
|
+
|
|
544
|
+
try:
|
|
545
|
+
lexer = guess_lexer_for_filename(fname, code)
|
|
546
|
+
except Exception: # On Windows, bad ref to time.clock which is deprecated?
|
|
547
|
+
# self.io.tool_error(f"Error lexing {fname}")
|
|
548
|
+
return
|
|
549
|
+
|
|
550
|
+
tokens = list(lexer.get_tokens(code))
|
|
551
|
+
tokens = [token[1] for token in tokens if token[0] in Token.Name]
|
|
552
|
+
|
|
553
|
+
for token in tokens:
|
|
554
|
+
yield Tag(
|
|
555
|
+
rel_fname=rel_fname,
|
|
556
|
+
fname=fname,
|
|
557
|
+
name=token,
|
|
558
|
+
kind="ref",
|
|
559
|
+
specific_kind="name", # Default for pygments fallback
|
|
560
|
+
line=-1, # Pygments doesn't give precise locations easily
|
|
561
|
+
start_line=-1,
|
|
562
|
+
end_line=-1,
|
|
563
|
+
start_byte=-1,
|
|
564
|
+
end_byte=-1,
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
def get_ranked_tags(
|
|
568
|
+
self, chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, progress=True
|
|
569
|
+
):
|
|
570
|
+
import networkx as nx
|
|
571
|
+
|
|
572
|
+
defines = defaultdict(set)
|
|
573
|
+
references = defaultdict(list)
|
|
574
|
+
definitions = defaultdict(set)
|
|
575
|
+
|
|
576
|
+
personalization = dict()
|
|
577
|
+
|
|
578
|
+
fnames = set(chat_fnames).union(set(other_fnames))
|
|
579
|
+
chat_rel_fnames = set()
|
|
580
|
+
|
|
581
|
+
fnames = sorted(fnames)
|
|
582
|
+
|
|
583
|
+
# Default personalization for unspecified files is 1/num_nodes
|
|
584
|
+
# https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
|
|
585
|
+
personalize = 100 / len(fnames)
|
|
586
|
+
|
|
587
|
+
try:
|
|
588
|
+
cache_size = len(self.TAGS_CACHE)
|
|
589
|
+
except SQLITE_ERRORS as e:
|
|
590
|
+
self.tags_cache_error(e)
|
|
591
|
+
cache_size = len(self.TAGS_CACHE)
|
|
592
|
+
|
|
593
|
+
if len(fnames) - cache_size > 100:
|
|
594
|
+
self.io.tool_output(
|
|
595
|
+
"Initial repo scan can be slow in larger repos, but only happens once."
|
|
596
|
+
)
|
|
597
|
+
fnames = tqdm(fnames, desc="Scanning repo")
|
|
598
|
+
showing_bar = True
|
|
599
|
+
else:
|
|
600
|
+
showing_bar = False
|
|
601
|
+
|
|
602
|
+
for fname in fnames:
|
|
603
|
+
if self.verbose:
|
|
604
|
+
self.io.tool_output(f"Processing {fname}")
|
|
605
|
+
if progress and not showing_bar:
|
|
606
|
+
self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {fname}")
|
|
607
|
+
|
|
608
|
+
try:
|
|
609
|
+
file_ok = Path(fname).is_file()
|
|
610
|
+
except OSError:
|
|
611
|
+
file_ok = False
|
|
612
|
+
|
|
613
|
+
if not file_ok:
|
|
614
|
+
if fname not in self.warned_files:
|
|
615
|
+
self.io.tool_warning(f"Repo-map can't include {fname}")
|
|
616
|
+
self.io.tool_output(
|
|
617
|
+
"Has it been deleted from the file system but not from git?"
|
|
618
|
+
)
|
|
619
|
+
self.warned_files.add(fname)
|
|
620
|
+
continue
|
|
621
|
+
|
|
622
|
+
# dump(fname)
|
|
623
|
+
rel_fname = self.get_rel_fname(fname)
|
|
624
|
+
current_pers = 0.0 # Start with 0 personalization score
|
|
625
|
+
|
|
626
|
+
if fname in chat_fnames:
|
|
627
|
+
current_pers += personalize
|
|
628
|
+
chat_rel_fnames.add(rel_fname)
|
|
629
|
+
|
|
630
|
+
if rel_fname in mentioned_fnames:
|
|
631
|
+
# Use max to avoid double counting if in chat_fnames and mentioned_fnames
|
|
632
|
+
current_pers = max(current_pers, personalize)
|
|
633
|
+
|
|
634
|
+
# Check path components against mentioned_idents
|
|
635
|
+
path_obj = Path(rel_fname)
|
|
636
|
+
path_components = set(path_obj.parts)
|
|
637
|
+
basename_with_ext = path_obj.name
|
|
638
|
+
basename_without_ext, _ = os.path.splitext(basename_with_ext)
|
|
639
|
+
components_to_check = path_components.union({basename_with_ext, basename_without_ext})
|
|
640
|
+
|
|
641
|
+
matched_idents = components_to_check.intersection(mentioned_idents)
|
|
642
|
+
if matched_idents:
|
|
643
|
+
# Add personalization *once* if any path component matches a mentioned ident
|
|
644
|
+
current_pers += personalize
|
|
645
|
+
|
|
646
|
+
if current_pers > 0:
|
|
647
|
+
personalization[rel_fname] = current_pers # Assign the final calculated value
|
|
648
|
+
|
|
649
|
+
tags = list(self.get_tags(fname, rel_fname))
|
|
650
|
+
|
|
651
|
+
if tags is None:
|
|
652
|
+
continue
|
|
653
|
+
|
|
654
|
+
for tag in tags:
|
|
655
|
+
if tag.kind == "def":
|
|
656
|
+
defines[tag.name].add(rel_fname)
|
|
657
|
+
key = (rel_fname, tag.name)
|
|
658
|
+
definitions[key].add(tag)
|
|
659
|
+
|
|
660
|
+
elif tag.kind == "ref":
|
|
661
|
+
references[tag.name].append(rel_fname)
|
|
662
|
+
|
|
663
|
+
##
|
|
664
|
+
# dump(defines)
|
|
665
|
+
# dump(references)
|
|
666
|
+
# dump(personalization)
|
|
667
|
+
|
|
668
|
+
if not references:
|
|
669
|
+
references = dict((k, list(v)) for k, v in defines.items())
|
|
670
|
+
|
|
671
|
+
idents = set(defines.keys()).intersection(set(references.keys()))
|
|
672
|
+
|
|
673
|
+
G = nx.MultiDiGraph()
|
|
674
|
+
|
|
675
|
+
# Add a small self-edge for every definition that has no references
|
|
676
|
+
# Helps with tree-sitter 0.23.2 with ruby, where "def greet(name)"
|
|
677
|
+
# isn't counted as a def AND a ref. tree-sitter 0.24.0 does.
|
|
678
|
+
for ident in defines.keys():
|
|
679
|
+
if ident in references:
|
|
680
|
+
continue
|
|
681
|
+
for definer in defines[ident]:
|
|
682
|
+
G.add_edge(definer, definer, weight=0.000001, ident=ident)
|
|
683
|
+
|
|
684
|
+
for ident in idents:
|
|
685
|
+
if progress:
|
|
686
|
+
self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {ident}")
|
|
687
|
+
|
|
688
|
+
definers = defines[ident]
|
|
689
|
+
|
|
690
|
+
mul = 1.0
|
|
691
|
+
|
|
692
|
+
is_snake = ("_" in ident) and any(c.isalpha() for c in ident)
|
|
693
|
+
is_kebab = ("-" in ident) and any(c.isalpha() for c in ident)
|
|
694
|
+
is_camel = any(c.isupper() for c in ident) and any(c.islower() for c in ident)
|
|
695
|
+
if ident in mentioned_idents:
|
|
696
|
+
mul *= 16
|
|
697
|
+
|
|
698
|
+
# Prioritize function-like identifiers
|
|
699
|
+
if (
|
|
700
|
+
(is_snake or is_kebab or is_camel)
|
|
701
|
+
and len(ident) >= 8
|
|
702
|
+
and "test" not in ident.lower()
|
|
703
|
+
):
|
|
704
|
+
mul *= 16
|
|
705
|
+
|
|
706
|
+
# Downplay repetitive definitions in case of common boiler plate
|
|
707
|
+
# Scale down logarithmically given the increasing number of references in a codebase
|
|
708
|
+
# Ideally, this will help downweight boiler plate in frameworks, interfaces, and abstract classes
|
|
709
|
+
if len(defines[ident]) > 4:
|
|
710
|
+
exp = min(len(defines[ident]), 32)
|
|
711
|
+
mul *= math.log2((4 / (2**exp)) + 1)
|
|
712
|
+
|
|
713
|
+
# Calculate multiplier: log(number of unique file references * total references ^ 2)
|
|
714
|
+
# Used to balance the number of times an identifier appears with its number of refs per file
|
|
715
|
+
# Penetration in code base is important
|
|
716
|
+
# So is the frequency
|
|
717
|
+
# And the logarithm keeps them from scaling out of bounds forever
|
|
718
|
+
# Combined with the above downweighting
|
|
719
|
+
# There should be a push/pull that balances repetitiveness of identifier defs
|
|
720
|
+
# With absolute number of references throughout a codebase
|
|
721
|
+
unique_file_refs = len(set(references[ident]))
|
|
722
|
+
total_refs = len(references[ident])
|
|
723
|
+
ext_mul = round(math.log2(unique_file_refs * total_refs**2 + 1))
|
|
724
|
+
|
|
725
|
+
for referencer, num_refs in Counter(references[ident]).items():
|
|
726
|
+
for definer in definers:
|
|
727
|
+
# dump(referencer, definer, num_refs, mul)
|
|
728
|
+
|
|
729
|
+
# Only add edge if file extensions match
|
|
730
|
+
referencer_ext = Path(referencer).suffix
|
|
731
|
+
definer_ext = Path(definer).suffix
|
|
732
|
+
if referencer_ext != definer_ext:
|
|
733
|
+
continue
|
|
734
|
+
|
|
735
|
+
use_mul = mul * ext_mul
|
|
736
|
+
|
|
737
|
+
if referencer in chat_rel_fnames:
|
|
738
|
+
use_mul *= 64
|
|
739
|
+
elif referencer == definer:
|
|
740
|
+
use_mul *= 1 / 128
|
|
741
|
+
|
|
742
|
+
# scale down so high freq (low value) mentions don't dominate
|
|
743
|
+
# num_refs = math.sqrt(num_refs)
|
|
744
|
+
path_distance = self.shared_path_components(referencer, definer)
|
|
745
|
+
weight = num_refs * use_mul * 2 ** (-1 * path_distance)
|
|
746
|
+
G.add_edge(referencer, definer, weight=weight, ident=ident)
|
|
747
|
+
|
|
748
|
+
if not references:
|
|
749
|
+
pass
|
|
750
|
+
|
|
751
|
+
if personalization:
|
|
752
|
+
pers_args = dict(personalization=personalization, dangling=personalization)
|
|
753
|
+
else:
|
|
754
|
+
pers_args = dict()
|
|
755
|
+
|
|
756
|
+
try:
|
|
757
|
+
ranked = nx.pagerank(G, weight="weight", **pers_args)
|
|
758
|
+
except ZeroDivisionError:
|
|
759
|
+
# Issue #1536
|
|
760
|
+
try:
|
|
761
|
+
ranked = nx.pagerank(G, weight="weight")
|
|
762
|
+
except ZeroDivisionError:
|
|
763
|
+
return []
|
|
764
|
+
|
|
765
|
+
# distribute the rank from each source node, across all of its out edges
|
|
766
|
+
ranked_definitions = defaultdict(float)
|
|
767
|
+
for src in G.nodes:
|
|
768
|
+
if progress:
|
|
769
|
+
self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {src}")
|
|
770
|
+
|
|
771
|
+
src_rank = ranked[src]
|
|
772
|
+
total_weight = sum(data["weight"] for _src, _dst, data in G.out_edges(src, data=True))
|
|
773
|
+
# dump(src, src_rank, total_weight)
|
|
774
|
+
for _src, dst, data in G.out_edges(src, data=True):
|
|
775
|
+
data["rank"] = src_rank * data["weight"] / total_weight
|
|
776
|
+
ident = data["ident"]
|
|
777
|
+
ranked_definitions[(dst, ident)] += data["rank"]
|
|
778
|
+
|
|
779
|
+
ranked_tags = []
|
|
780
|
+
ranked_definitions = sorted(
|
|
781
|
+
ranked_definitions.items(), reverse=True, key=lambda x: (x[1], x[0])
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
# dump(ranked_definitions)
|
|
785
|
+
# with open('defs.txt', 'w') as out_file:
|
|
786
|
+
# import pprint
|
|
787
|
+
# printer = pprint.PrettyPrinter(indent=2, stream=out_file)
|
|
788
|
+
# printer.pprint(ranked_definitions)
|
|
789
|
+
|
|
790
|
+
for (fname, ident), rank in ranked_definitions:
|
|
791
|
+
# print(f"{rank:.03f} {fname} {ident}")
|
|
792
|
+
if fname in chat_rel_fnames:
|
|
793
|
+
continue
|
|
794
|
+
ranked_tags += list(definitions.get((fname, ident), []))
|
|
795
|
+
|
|
796
|
+
rel_other_fnames_without_tags = set(self.get_rel_fname(fname) for fname in other_fnames)
|
|
797
|
+
|
|
798
|
+
fnames_already_included = set(rt[0] for rt in ranked_tags)
|
|
799
|
+
|
|
800
|
+
top_rank = sorted([(rank, node) for (node, rank) in ranked.items()], reverse=True)
|
|
801
|
+
for rank, fname in top_rank:
|
|
802
|
+
if fname in rel_other_fnames_without_tags:
|
|
803
|
+
rel_other_fnames_without_tags.remove(fname)
|
|
804
|
+
if fname not in fnames_already_included:
|
|
805
|
+
ranked_tags.append((fname,))
|
|
806
|
+
|
|
807
|
+
for fname in rel_other_fnames_without_tags:
|
|
808
|
+
ranked_tags.append((fname,))
|
|
809
|
+
|
|
810
|
+
return ranked_tags
|
|
811
|
+
|
|
812
|
+
def get_ranked_tags_map(
|
|
813
|
+
self,
|
|
814
|
+
chat_fnames,
|
|
815
|
+
other_fnames=None,
|
|
816
|
+
max_map_tokens=None,
|
|
817
|
+
mentioned_fnames=None,
|
|
818
|
+
mentioned_idents=None,
|
|
819
|
+
force_refresh=False,
|
|
820
|
+
):
|
|
821
|
+
if not other_fnames:
|
|
822
|
+
other_fnames = list()
|
|
823
|
+
if not max_map_tokens:
|
|
824
|
+
max_map_tokens = self.max_map_tokens
|
|
825
|
+
if not mentioned_fnames:
|
|
826
|
+
mentioned_fnames = set()
|
|
827
|
+
if not mentioned_idents:
|
|
828
|
+
mentioned_idents = set()
|
|
829
|
+
|
|
830
|
+
# Create a cache key
|
|
831
|
+
cache_key = [
|
|
832
|
+
tuple(sorted(chat_fnames)) if chat_fnames else None,
|
|
833
|
+
len(other_fnames) if other_fnames else None,
|
|
834
|
+
max_map_tokens,
|
|
835
|
+
]
|
|
836
|
+
|
|
837
|
+
if self.refresh == "auto":
|
|
838
|
+
# Handle mentioned_fnames normally
|
|
839
|
+
cache_key += [
|
|
840
|
+
tuple(sorted(mentioned_fnames)) if mentioned_fnames else None,
|
|
841
|
+
]
|
|
842
|
+
|
|
843
|
+
# Handle mentioned_idents with similarity check
|
|
844
|
+
cache_key_component = self._get_mentioned_idents_cache_component(mentioned_idents)
|
|
845
|
+
cache_key.append(cache_key_component)
|
|
846
|
+
|
|
847
|
+
cache_key = hash(str(tuple(cache_key)))
|
|
848
|
+
|
|
849
|
+
use_cache = False
|
|
850
|
+
if not force_refresh:
|
|
851
|
+
if self.refresh == "manual" and self.last_map:
|
|
852
|
+
return self.last_map
|
|
853
|
+
|
|
854
|
+
if self.refresh == "always":
|
|
855
|
+
use_cache = False
|
|
856
|
+
elif self.refresh == "files":
|
|
857
|
+
use_cache = True
|
|
858
|
+
elif self.refresh == "auto":
|
|
859
|
+
use_cache = self.map_processing_time > 1.0
|
|
860
|
+
|
|
861
|
+
# Check if the result is in the cache
|
|
862
|
+
if use_cache and cache_key in self.map_cache:
|
|
863
|
+
return self.map_cache[cache_key]
|
|
864
|
+
|
|
865
|
+
# If not in cache or force_refresh is True, generate the map
|
|
866
|
+
start_time = time.time()
|
|
867
|
+
result = self.get_ranked_tags_map_uncached(
|
|
868
|
+
chat_fnames, other_fnames, max_map_tokens, mentioned_fnames, mentioned_idents
|
|
869
|
+
)
|
|
870
|
+
end_time = time.time()
|
|
871
|
+
self.map_processing_time = end_time - start_time
|
|
872
|
+
|
|
873
|
+
# Store the result in the cache
|
|
874
|
+
self.map_cache[cache_key] = result
|
|
875
|
+
self.last_map = result
|
|
876
|
+
|
|
877
|
+
return result
|
|
878
|
+
|
|
879
|
+
def get_ranked_tags_map_uncached(
|
|
880
|
+
self,
|
|
881
|
+
chat_fnames,
|
|
882
|
+
other_fnames=None,
|
|
883
|
+
max_map_tokens=None,
|
|
884
|
+
mentioned_fnames=None,
|
|
885
|
+
mentioned_idents=None,
|
|
886
|
+
):
|
|
887
|
+
if not other_fnames:
|
|
888
|
+
other_fnames = list()
|
|
889
|
+
if not max_map_tokens:
|
|
890
|
+
max_map_tokens = self.max_map_tokens
|
|
891
|
+
if not mentioned_fnames:
|
|
892
|
+
mentioned_fnames = set()
|
|
893
|
+
if not mentioned_idents:
|
|
894
|
+
mentioned_idents = set()
|
|
895
|
+
|
|
896
|
+
self.io.update_spinner(UPDATING_REPO_MAP_MESSAGE)
|
|
897
|
+
|
|
898
|
+
ranked_tags = self.get_ranked_tags(
|
|
899
|
+
chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, True
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
other_rel_fnames = sorted(set(self.get_rel_fname(fname) for fname in other_fnames))
|
|
903
|
+
special_fnames = filter_important_files(other_rel_fnames)
|
|
904
|
+
ranked_tags_fnames = set(tag[0] for tag in ranked_tags)
|
|
905
|
+
special_fnames = [fn for fn in special_fnames if fn not in ranked_tags_fnames]
|
|
906
|
+
special_fnames = [(fn,) for fn in special_fnames]
|
|
907
|
+
|
|
908
|
+
ranked_tags = special_fnames + ranked_tags
|
|
909
|
+
|
|
910
|
+
num_tags = len(ranked_tags)
|
|
911
|
+
lower_bound = 0
|
|
912
|
+
upper_bound = num_tags
|
|
913
|
+
best_tree = None
|
|
914
|
+
best_tree_tokens = 0
|
|
915
|
+
|
|
916
|
+
chat_rel_fnames = set(self.get_rel_fname(fname) for fname in chat_fnames)
|
|
917
|
+
|
|
918
|
+
self.tree_cache = dict()
|
|
919
|
+
|
|
920
|
+
middle = min(int(max_map_tokens // 25), num_tags)
|
|
921
|
+
while lower_bound <= upper_bound:
|
|
922
|
+
# dump(lower_bound, middle, upper_bound)
|
|
923
|
+
|
|
924
|
+
if middle > 1500:
|
|
925
|
+
show_tokens = f"{middle / 1000.0:.1f}K"
|
|
926
|
+
else:
|
|
927
|
+
show_tokens = str(middle)
|
|
928
|
+
|
|
929
|
+
self.io.update_spinner(f"{UPDATING_REPO_MAP_MESSAGE}: {show_tokens} tokens")
|
|
930
|
+
|
|
931
|
+
tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
|
|
932
|
+
num_tokens = self.token_count(tree)
|
|
933
|
+
|
|
934
|
+
pct_err = abs(num_tokens - max_map_tokens) / max_map_tokens
|
|
935
|
+
ok_err = 0.15
|
|
936
|
+
if (num_tokens <= max_map_tokens and num_tokens > best_tree_tokens) or pct_err < ok_err:
|
|
937
|
+
best_tree = tree
|
|
938
|
+
best_tree_tokens = num_tokens
|
|
939
|
+
|
|
940
|
+
if pct_err < ok_err:
|
|
941
|
+
break
|
|
942
|
+
|
|
943
|
+
if num_tokens < max_map_tokens:
|
|
944
|
+
lower_bound = middle + 1
|
|
945
|
+
else:
|
|
946
|
+
upper_bound = middle - 1
|
|
947
|
+
|
|
948
|
+
middle = int((lower_bound + upper_bound) // 2)
|
|
949
|
+
|
|
950
|
+
return best_tree
|
|
951
|
+
|
|
952
|
+
tree_cache = dict()
|
|
953
|
+
|
|
954
|
+
def render_tree(self, abs_fname, rel_fname, lois):
|
|
955
|
+
mtime = self.get_mtime(abs_fname)
|
|
956
|
+
key = (rel_fname, tuple(sorted(lois)), mtime)
|
|
957
|
+
|
|
958
|
+
if key in self.tree_cache:
|
|
959
|
+
return self.tree_cache[key]
|
|
960
|
+
|
|
961
|
+
if (
|
|
962
|
+
rel_fname not in self.tree_context_cache
|
|
963
|
+
or self.tree_context_cache[rel_fname]["mtime"] != mtime
|
|
964
|
+
):
|
|
965
|
+
code = self.io.read_text(abs_fname) or ""
|
|
966
|
+
if not code.endswith("\n"):
|
|
967
|
+
code += "\n"
|
|
968
|
+
|
|
969
|
+
context = TreeContext(
|
|
970
|
+
rel_fname,
|
|
971
|
+
code,
|
|
972
|
+
color=False,
|
|
973
|
+
line_number=False,
|
|
974
|
+
child_context=False,
|
|
975
|
+
last_line=False,
|
|
976
|
+
margin=0,
|
|
977
|
+
mark_lois=False,
|
|
978
|
+
loi_pad=0,
|
|
979
|
+
# header_max=30,
|
|
980
|
+
show_top_of_file_parent_scope=False,
|
|
981
|
+
)
|
|
982
|
+
self.tree_context_cache[rel_fname] = {"context": context, "mtime": mtime}
|
|
983
|
+
|
|
984
|
+
context = self.tree_context_cache[rel_fname]["context"]
|
|
985
|
+
context.lines_of_interest = set()
|
|
986
|
+
context.add_lines_of_interest(lois)
|
|
987
|
+
context.add_context()
|
|
988
|
+
res = context.format()
|
|
989
|
+
self.tree_cache[key] = res
|
|
990
|
+
return res
|
|
991
|
+
|
|
992
|
+
def to_tree(self, tags, chat_rel_fnames):
|
|
993
|
+
if not tags:
|
|
994
|
+
return ""
|
|
995
|
+
|
|
996
|
+
cur_fname = None
|
|
997
|
+
cur_abs_fname = None
|
|
998
|
+
lois = None
|
|
999
|
+
output = ""
|
|
1000
|
+
|
|
1001
|
+
# add a bogus tag at the end so we trip the this_fname != cur_fname...
|
|
1002
|
+
dummy_tag = (None,)
|
|
1003
|
+
for tag in sorted(tags) + [dummy_tag]:
|
|
1004
|
+
this_rel_fname = tag[0]
|
|
1005
|
+
if this_rel_fname in chat_rel_fnames:
|
|
1006
|
+
continue
|
|
1007
|
+
|
|
1008
|
+
# ... here ... to output the final real entry in the list
|
|
1009
|
+
if this_rel_fname != cur_fname:
|
|
1010
|
+
if lois is not None:
|
|
1011
|
+
output += "\n"
|
|
1012
|
+
output += cur_fname + ":\n"
|
|
1013
|
+
|
|
1014
|
+
# truncate long lines, in case we get minified js or something else crazy
|
|
1015
|
+
output += truncate_long_lines(
|
|
1016
|
+
self.render_tree(cur_abs_fname, cur_fname, lois), self.max_code_line_length
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
lois = None
|
|
1020
|
+
elif cur_fname:
|
|
1021
|
+
output += "\n" + cur_fname + "\n"
|
|
1022
|
+
if type(tag) is Tag:
|
|
1023
|
+
lois = []
|
|
1024
|
+
cur_abs_fname = tag.fname
|
|
1025
|
+
cur_fname = this_rel_fname
|
|
1026
|
+
|
|
1027
|
+
if lois is not None:
|
|
1028
|
+
lois.append(tag.line)
|
|
1029
|
+
|
|
1030
|
+
return output
|
|
1031
|
+
|
|
1032
|
+
def _get_mentioned_idents_cache_component(self, mentioned_idents):
|
|
1033
|
+
"""
|
|
1034
|
+
Determine the cache key component for mentioned_idents using similarity comparison.
|
|
1035
|
+
|
|
1036
|
+
This method compares the current mentioned_idents with the previous ones using
|
|
1037
|
+
cosine similarity. If the similarity is high enough, it returns the previous
|
|
1038
|
+
cache key component to maintain cache hits. Otherwise, it updates the stored
|
|
1039
|
+
values and returns the current mentioned_idents.
|
|
1040
|
+
|
|
1041
|
+
Args:
|
|
1042
|
+
mentioned_idents (set): Current set of mentioned identifiers
|
|
1043
|
+
|
|
1044
|
+
Returns:
|
|
1045
|
+
tuple or None: Cache key component for mentioned_idents
|
|
1046
|
+
"""
|
|
1047
|
+
if not mentioned_idents:
|
|
1048
|
+
self._last_mentioned_idents = None
|
|
1049
|
+
self._last_mentioned_idents_vector = None
|
|
1050
|
+
self._has_last_mentioned_idents = False
|
|
1051
|
+
return None
|
|
1052
|
+
|
|
1053
|
+
current_mentioned_idents = tuple(mentioned_idents)
|
|
1054
|
+
|
|
1055
|
+
# Check if we have a previous cached value to compare against
|
|
1056
|
+
if self._has_last_mentioned_idents:
|
|
1057
|
+
# Create vector for current mentioned_idents
|
|
1058
|
+
current_vector = create_bigram_vector(current_mentioned_idents)
|
|
1059
|
+
current_vector_norm = normalize_vector(current_vector)
|
|
1060
|
+
|
|
1061
|
+
# Calculate cosine similarity
|
|
1062
|
+
similarity = cosine_similarity(self._last_mentioned_idents_vector, current_vector_norm)
|
|
1063
|
+
# If similarity is high enough, use the previous cache key component
|
|
1064
|
+
if similarity >= self._mentioned_ident_similarity:
|
|
1065
|
+
# Use the previous mentioned_idents for cache key to maintain cache hit
|
|
1066
|
+
cache_key_component = self._last_mentioned_idents
|
|
1067
|
+
|
|
1068
|
+
# Make similarity more strict the more consecutive cache hits
|
|
1069
|
+
self._mentioned_ident_similarity = min(
|
|
1070
|
+
0.9, self._mentioned_ident_similarity + 0.025
|
|
1071
|
+
)
|
|
1072
|
+
else:
|
|
1073
|
+
# Similarity is too low, use current mentioned_idents
|
|
1074
|
+
cache_key_component = current_mentioned_idents
|
|
1075
|
+
|
|
1076
|
+
# Update stored values
|
|
1077
|
+
self._last_mentioned_idents = current_mentioned_idents
|
|
1078
|
+
self._last_mentioned_idents_vector = current_vector_norm
|
|
1079
|
+
|
|
1080
|
+
# Make similarity less strict the more consecutive cache misses
|
|
1081
|
+
self._mentioned_ident_similarity = max(
|
|
1082
|
+
0.5, self._mentioned_ident_similarity - 0.025
|
|
1083
|
+
)
|
|
1084
|
+
else:
|
|
1085
|
+
# First time or no previous value, use current mentioned_idents
|
|
1086
|
+
cache_key_component = current_mentioned_idents
|
|
1087
|
+
current_vector = create_bigram_vector(current_mentioned_idents)
|
|
1088
|
+
|
|
1089
|
+
# Store for future comparisons
|
|
1090
|
+
self._last_mentioned_idents = current_mentioned_idents
|
|
1091
|
+
self._last_mentioned_idents_vector = normalize_vector(current_vector)
|
|
1092
|
+
|
|
1093
|
+
self._has_last_mentioned_idents = True
|
|
1094
|
+
return cache_key_component
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
def truncate_long_lines(text, max_length):
|
|
1098
|
+
return "\n".join([line[:max_length] for line in text.splitlines()]) + "\n"
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
def find_src_files(directory):
|
|
1102
|
+
if not os.path.isdir(directory):
|
|
1103
|
+
return [directory]
|
|
1104
|
+
|
|
1105
|
+
src_files = []
|
|
1106
|
+
for root, dirs, files in os.walk(directory):
|
|
1107
|
+
for file in files:
|
|
1108
|
+
src_files.append(os.path.join(root, file))
|
|
1109
|
+
return src_files
|
|
1110
|
+
|
|
1111
|
+
|
|
1112
|
+
def get_scm_fname(lang):
|
|
1113
|
+
# Load the tags queries
|
|
1114
|
+
if USING_TSL_PACK:
|
|
1115
|
+
subdir = "tree-sitter-language-pack"
|
|
1116
|
+
try:
|
|
1117
|
+
path = resources.files(__package__).joinpath(
|
|
1118
|
+
"queries",
|
|
1119
|
+
subdir,
|
|
1120
|
+
f"{lang}-tags.scm",
|
|
1121
|
+
)
|
|
1122
|
+
if path.exists():
|
|
1123
|
+
return path
|
|
1124
|
+
except KeyError:
|
|
1125
|
+
pass
|
|
1126
|
+
|
|
1127
|
+
# Fall back to tree-sitter-languages
|
|
1128
|
+
subdir = "tree-sitter-languages"
|
|
1129
|
+
try:
|
|
1130
|
+
return resources.files(__package__).joinpath(
|
|
1131
|
+
"queries",
|
|
1132
|
+
subdir,
|
|
1133
|
+
f"{lang}-tags.scm",
|
|
1134
|
+
)
|
|
1135
|
+
except KeyError:
|
|
1136
|
+
return
|
|
1137
|
+
|
|
1138
|
+
|
|
1139
|
+
def get_supported_languages_md():
|
|
1140
|
+
from grep_ast.parsers import PARSERS
|
|
1141
|
+
|
|
1142
|
+
res = """
|
|
1143
|
+
| Language | File extension | Repo map | Linter |
|
|
1144
|
+
|:--------:|:--------------:|:--------:|:------:|
|
|
1145
|
+
"""
|
|
1146
|
+
data = sorted((lang, ex) for ex, lang in PARSERS.items())
|
|
1147
|
+
|
|
1148
|
+
for lang, ext in data:
|
|
1149
|
+
fn = get_scm_fname(lang)
|
|
1150
|
+
repo_map = "✓" if Path(fn).exists() else ""
|
|
1151
|
+
linter_support = "✓"
|
|
1152
|
+
res += f"| {lang:20} | {ext:20} | {repo_map:^8} | {linter_support:^6} |\n"
|
|
1153
|
+
|
|
1154
|
+
res += "\n"
|
|
1155
|
+
|
|
1156
|
+
return res
|
|
1157
|
+
|
|
1158
|
+
|
|
1159
|
+
if __name__ == "__main__":
|
|
1160
|
+
fnames = sys.argv[1:]
|
|
1161
|
+
|
|
1162
|
+
chat_fnames = []
|
|
1163
|
+
other_fnames = []
|
|
1164
|
+
for fname in sys.argv[1:]:
|
|
1165
|
+
if Path(fname).is_dir():
|
|
1166
|
+
chat_fnames += find_src_files(fname)
|
|
1167
|
+
else:
|
|
1168
|
+
chat_fnames.append(fname)
|
|
1169
|
+
|
|
1170
|
+
rm = RepoMap(root=".")
|
|
1171
|
+
repo_map = rm.get_ranked_tags_map(chat_fnames, other_fnames)
|
|
1172
|
+
|
|
1173
|
+
dump(len(repo_map))
|
|
1174
|
+
print(repo_map)
|