aider-ce 0.88.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aider/__init__.py +20 -0
- aider/__main__.py +4 -0
- aider/_version.py +34 -0
- aider/analytics.py +258 -0
- aider/args.py +1056 -0
- aider/args_formatter.py +228 -0
- aider/change_tracker.py +133 -0
- aider/coders/__init__.py +36 -0
- aider/coders/agent_coder.py +2166 -0
- aider/coders/agent_prompts.py +104 -0
- aider/coders/architect_coder.py +48 -0
- aider/coders/architect_prompts.py +40 -0
- aider/coders/ask_coder.py +9 -0
- aider/coders/ask_prompts.py +35 -0
- aider/coders/base_coder.py +3613 -0
- aider/coders/base_prompts.py +87 -0
- aider/coders/chat_chunks.py +64 -0
- aider/coders/context_coder.py +53 -0
- aider/coders/context_prompts.py +75 -0
- aider/coders/editblock_coder.py +657 -0
- aider/coders/editblock_fenced_coder.py +10 -0
- aider/coders/editblock_fenced_prompts.py +143 -0
- aider/coders/editblock_func_coder.py +141 -0
- aider/coders/editblock_func_prompts.py +27 -0
- aider/coders/editblock_prompts.py +175 -0
- aider/coders/editor_diff_fenced_coder.py +9 -0
- aider/coders/editor_diff_fenced_prompts.py +11 -0
- aider/coders/editor_editblock_coder.py +9 -0
- aider/coders/editor_editblock_prompts.py +21 -0
- aider/coders/editor_whole_coder.py +9 -0
- aider/coders/editor_whole_prompts.py +12 -0
- aider/coders/help_coder.py +16 -0
- aider/coders/help_prompts.py +46 -0
- aider/coders/patch_coder.py +706 -0
- aider/coders/patch_prompts.py +159 -0
- aider/coders/search_replace.py +757 -0
- aider/coders/shell.py +37 -0
- aider/coders/single_wholefile_func_coder.py +102 -0
- aider/coders/single_wholefile_func_prompts.py +27 -0
- aider/coders/udiff_coder.py +429 -0
- aider/coders/udiff_prompts.py +115 -0
- aider/coders/udiff_simple.py +14 -0
- aider/coders/udiff_simple_prompts.py +25 -0
- aider/coders/wholefile_coder.py +144 -0
- aider/coders/wholefile_func_coder.py +134 -0
- aider/coders/wholefile_func_prompts.py +27 -0
- aider/coders/wholefile_prompts.py +65 -0
- aider/commands.py +2173 -0
- aider/copypaste.py +72 -0
- aider/deprecated.py +126 -0
- aider/diffs.py +128 -0
- aider/dump.py +29 -0
- aider/editor.py +147 -0
- aider/exceptions.py +115 -0
- aider/format_settings.py +26 -0
- aider/gui.py +545 -0
- aider/help.py +163 -0
- aider/help_pats.py +19 -0
- aider/helpers/__init__.py +9 -0
- aider/helpers/similarity.py +98 -0
- aider/history.py +180 -0
- aider/io.py +1608 -0
- aider/linter.py +304 -0
- aider/llm.py +55 -0
- aider/main.py +1415 -0
- aider/mcp/__init__.py +174 -0
- aider/mcp/server.py +149 -0
- aider/mdstream.py +243 -0
- aider/models.py +1313 -0
- aider/onboarding.py +429 -0
- aider/openrouter.py +129 -0
- aider/prompts.py +56 -0
- aider/queries/tree-sitter-language-pack/README.md +7 -0
- aider/queries/tree-sitter-language-pack/arduino-tags.scm +5 -0
- aider/queries/tree-sitter-language-pack/c-tags.scm +9 -0
- aider/queries/tree-sitter-language-pack/chatito-tags.scm +16 -0
- aider/queries/tree-sitter-language-pack/clojure-tags.scm +7 -0
- aider/queries/tree-sitter-language-pack/commonlisp-tags.scm +122 -0
- aider/queries/tree-sitter-language-pack/cpp-tags.scm +15 -0
- aider/queries/tree-sitter-language-pack/csharp-tags.scm +26 -0
- aider/queries/tree-sitter-language-pack/d-tags.scm +26 -0
- aider/queries/tree-sitter-language-pack/dart-tags.scm +92 -0
- aider/queries/tree-sitter-language-pack/elisp-tags.scm +5 -0
- aider/queries/tree-sitter-language-pack/elixir-tags.scm +54 -0
- aider/queries/tree-sitter-language-pack/elm-tags.scm +19 -0
- aider/queries/tree-sitter-language-pack/gleam-tags.scm +41 -0
- aider/queries/tree-sitter-language-pack/go-tags.scm +42 -0
- aider/queries/tree-sitter-language-pack/java-tags.scm +20 -0
- aider/queries/tree-sitter-language-pack/javascript-tags.scm +88 -0
- aider/queries/tree-sitter-language-pack/lua-tags.scm +34 -0
- aider/queries/tree-sitter-language-pack/matlab-tags.scm +10 -0
- aider/queries/tree-sitter-language-pack/ocaml-tags.scm +115 -0
- aider/queries/tree-sitter-language-pack/ocaml_interface-tags.scm +98 -0
- aider/queries/tree-sitter-language-pack/pony-tags.scm +39 -0
- aider/queries/tree-sitter-language-pack/properties-tags.scm +5 -0
- aider/queries/tree-sitter-language-pack/python-tags.scm +14 -0
- aider/queries/tree-sitter-language-pack/r-tags.scm +21 -0
- aider/queries/tree-sitter-language-pack/racket-tags.scm +12 -0
- aider/queries/tree-sitter-language-pack/ruby-tags.scm +64 -0
- aider/queries/tree-sitter-language-pack/rust-tags.scm +60 -0
- aider/queries/tree-sitter-language-pack/solidity-tags.scm +43 -0
- aider/queries/tree-sitter-language-pack/swift-tags.scm +51 -0
- aider/queries/tree-sitter-language-pack/udev-tags.scm +20 -0
- aider/queries/tree-sitter-languages/README.md +24 -0
- aider/queries/tree-sitter-languages/c-tags.scm +9 -0
- aider/queries/tree-sitter-languages/c_sharp-tags.scm +46 -0
- aider/queries/tree-sitter-languages/cpp-tags.scm +15 -0
- aider/queries/tree-sitter-languages/dart-tags.scm +91 -0
- aider/queries/tree-sitter-languages/elisp-tags.scm +8 -0
- aider/queries/tree-sitter-languages/elixir-tags.scm +54 -0
- aider/queries/tree-sitter-languages/elm-tags.scm +19 -0
- aider/queries/tree-sitter-languages/fortran-tags.scm +15 -0
- aider/queries/tree-sitter-languages/go-tags.scm +30 -0
- aider/queries/tree-sitter-languages/haskell-tags.scm +3 -0
- aider/queries/tree-sitter-languages/hcl-tags.scm +77 -0
- aider/queries/tree-sitter-languages/java-tags.scm +20 -0
- aider/queries/tree-sitter-languages/javascript-tags.scm +88 -0
- aider/queries/tree-sitter-languages/julia-tags.scm +60 -0
- aider/queries/tree-sitter-languages/kotlin-tags.scm +27 -0
- aider/queries/tree-sitter-languages/matlab-tags.scm +10 -0
- aider/queries/tree-sitter-languages/ocaml-tags.scm +115 -0
- aider/queries/tree-sitter-languages/ocaml_interface-tags.scm +98 -0
- aider/queries/tree-sitter-languages/php-tags.scm +26 -0
- aider/queries/tree-sitter-languages/python-tags.scm +12 -0
- aider/queries/tree-sitter-languages/ql-tags.scm +26 -0
- aider/queries/tree-sitter-languages/ruby-tags.scm +64 -0
- aider/queries/tree-sitter-languages/rust-tags.scm +60 -0
- aider/queries/tree-sitter-languages/scala-tags.scm +65 -0
- aider/queries/tree-sitter-languages/typescript-tags.scm +41 -0
- aider/queries/tree-sitter-languages/zig-tags.scm +3 -0
- aider/reasoning_tags.py +82 -0
- aider/repo.py +621 -0
- aider/repomap.py +1174 -0
- aider/report.py +260 -0
- aider/resources/__init__.py +3 -0
- aider/resources/model-metadata.json +776 -0
- aider/resources/model-settings.yml +2068 -0
- aider/run_cmd.py +133 -0
- aider/scrape.py +293 -0
- aider/sendchat.py +242 -0
- aider/sessions.py +256 -0
- aider/special.py +203 -0
- aider/tools/__init__.py +72 -0
- aider/tools/command.py +105 -0
- aider/tools/command_interactive.py +122 -0
- aider/tools/delete_block.py +182 -0
- aider/tools/delete_line.py +155 -0
- aider/tools/delete_lines.py +184 -0
- aider/tools/extract_lines.py +341 -0
- aider/tools/finished.py +48 -0
- aider/tools/git_branch.py +129 -0
- aider/tools/git_diff.py +60 -0
- aider/tools/git_log.py +57 -0
- aider/tools/git_remote.py +53 -0
- aider/tools/git_show.py +51 -0
- aider/tools/git_status.py +46 -0
- aider/tools/grep.py +256 -0
- aider/tools/indent_lines.py +221 -0
- aider/tools/insert_block.py +288 -0
- aider/tools/list_changes.py +86 -0
- aider/tools/ls.py +93 -0
- aider/tools/make_editable.py +85 -0
- aider/tools/make_readonly.py +69 -0
- aider/tools/remove.py +91 -0
- aider/tools/replace_all.py +126 -0
- aider/tools/replace_line.py +173 -0
- aider/tools/replace_lines.py +217 -0
- aider/tools/replace_text.py +187 -0
- aider/tools/show_numbered_context.py +147 -0
- aider/tools/tool_utils.py +313 -0
- aider/tools/undo_change.py +95 -0
- aider/tools/update_todo_list.py +156 -0
- aider/tools/view.py +57 -0
- aider/tools/view_files_matching.py +141 -0
- aider/tools/view_files_with_symbol.py +129 -0
- aider/urls.py +17 -0
- aider/utils.py +456 -0
- aider/versioncheck.py +113 -0
- aider/voice.py +205 -0
- aider/waiting.py +38 -0
- aider/watch.py +318 -0
- aider/watch_prompts.py +12 -0
- aider/website/Gemfile +8 -0
- aider/website/_includes/blame.md +162 -0
- aider/website/_includes/get-started.md +22 -0
- aider/website/_includes/help-tip.md +5 -0
- aider/website/_includes/help.md +24 -0
- aider/website/_includes/install.md +5 -0
- aider/website/_includes/keys.md +4 -0
- aider/website/_includes/model-warnings.md +67 -0
- aider/website/_includes/multi-line.md +22 -0
- aider/website/_includes/python-m-aider.md +5 -0
- aider/website/_includes/recording.css +228 -0
- aider/website/_includes/recording.md +34 -0
- aider/website/_includes/replit-pipx.md +9 -0
- aider/website/_includes/works-best.md +1 -0
- aider/website/_sass/custom/custom.scss +103 -0
- aider/website/docs/config/adv-model-settings.md +2261 -0
- aider/website/docs/config/agent-mode.md +194 -0
- aider/website/docs/config/aider_conf.md +548 -0
- aider/website/docs/config/api-keys.md +90 -0
- aider/website/docs/config/dotenv.md +493 -0
- aider/website/docs/config/editor.md +127 -0
- aider/website/docs/config/mcp.md +95 -0
- aider/website/docs/config/model-aliases.md +104 -0
- aider/website/docs/config/options.md +890 -0
- aider/website/docs/config/reasoning.md +210 -0
- aider/website/docs/config.md +44 -0
- aider/website/docs/faq.md +384 -0
- aider/website/docs/git.md +76 -0
- aider/website/docs/index.md +47 -0
- aider/website/docs/install/codespaces.md +39 -0
- aider/website/docs/install/docker.md +57 -0
- aider/website/docs/install/optional.md +100 -0
- aider/website/docs/install/replit.md +8 -0
- aider/website/docs/install.md +115 -0
- aider/website/docs/languages.md +264 -0
- aider/website/docs/legal/contributor-agreement.md +111 -0
- aider/website/docs/legal/privacy.md +104 -0
- aider/website/docs/llms/anthropic.md +77 -0
- aider/website/docs/llms/azure.md +48 -0
- aider/website/docs/llms/bedrock.md +132 -0
- aider/website/docs/llms/cohere.md +34 -0
- aider/website/docs/llms/deepseek.md +32 -0
- aider/website/docs/llms/gemini.md +49 -0
- aider/website/docs/llms/github.md +111 -0
- aider/website/docs/llms/groq.md +36 -0
- aider/website/docs/llms/lm-studio.md +39 -0
- aider/website/docs/llms/ollama.md +75 -0
- aider/website/docs/llms/openai-compat.md +39 -0
- aider/website/docs/llms/openai.md +58 -0
- aider/website/docs/llms/openrouter.md +78 -0
- aider/website/docs/llms/other.md +117 -0
- aider/website/docs/llms/vertex.md +50 -0
- aider/website/docs/llms/warnings.md +10 -0
- aider/website/docs/llms/xai.md +53 -0
- aider/website/docs/llms.md +54 -0
- aider/website/docs/more/analytics.md +127 -0
- aider/website/docs/more/edit-formats.md +116 -0
- aider/website/docs/more/infinite-output.md +165 -0
- aider/website/docs/more-info.md +8 -0
- aider/website/docs/recordings/auto-accept-architect.md +31 -0
- aider/website/docs/recordings/dont-drop-original-read-files.md +35 -0
- aider/website/docs/recordings/index.md +21 -0
- aider/website/docs/recordings/model-accepts-settings.md +69 -0
- aider/website/docs/recordings/tree-sitter-language-pack.md +80 -0
- aider/website/docs/repomap.md +112 -0
- aider/website/docs/scripting.md +100 -0
- aider/website/docs/sessions.md +203 -0
- aider/website/docs/troubleshooting/aider-not-found.md +24 -0
- aider/website/docs/troubleshooting/edit-errors.md +76 -0
- aider/website/docs/troubleshooting/imports.md +62 -0
- aider/website/docs/troubleshooting/models-and-keys.md +54 -0
- aider/website/docs/troubleshooting/support.md +79 -0
- aider/website/docs/troubleshooting/token-limits.md +96 -0
- aider/website/docs/troubleshooting/warnings.md +12 -0
- aider/website/docs/troubleshooting.md +11 -0
- aider/website/docs/usage/browser.md +57 -0
- aider/website/docs/usage/caching.md +49 -0
- aider/website/docs/usage/commands.md +133 -0
- aider/website/docs/usage/conventions.md +119 -0
- aider/website/docs/usage/copypaste.md +121 -0
- aider/website/docs/usage/images-urls.md +48 -0
- aider/website/docs/usage/lint-test.md +118 -0
- aider/website/docs/usage/modes.md +211 -0
- aider/website/docs/usage/not-code.md +179 -0
- aider/website/docs/usage/notifications.md +87 -0
- aider/website/docs/usage/tips.md +79 -0
- aider/website/docs/usage/tutorials.md +30 -0
- aider/website/docs/usage/voice.md +121 -0
- aider/website/docs/usage/watch.md +294 -0
- aider/website/docs/usage.md +102 -0
- aider/website/share/index.md +101 -0
- aider_ce-0.88.20.dist-info/METADATA +187 -0
- aider_ce-0.88.20.dist-info/RECORD +279 -0
- aider_ce-0.88.20.dist-info/WHEEL +5 -0
- aider_ce-0.88.20.dist-info/entry_points.txt +2 -0
- aider_ce-0.88.20.dist-info/licenses/LICENSE.txt +202 -0
- aider_ce-0.88.20.dist-info/top_level.txt +1 -0
aider/help_pats.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# This needs to sync with MANIFEST.in
|
|
2
|
+
|
|
3
|
+
exclude_website_pats = [
|
|
4
|
+
"**/.DS_Store",
|
|
5
|
+
"examples/**",
|
|
6
|
+
"_posts/**",
|
|
7
|
+
"HISTORY.md",
|
|
8
|
+
"docs/benchmarks*md",
|
|
9
|
+
"docs/ctags.md",
|
|
10
|
+
"docs/unified-diffs.md",
|
|
11
|
+
"docs/leaderboards/index.md",
|
|
12
|
+
"assets/**",
|
|
13
|
+
".jekyll-metadata",
|
|
14
|
+
"Gemfile.lock",
|
|
15
|
+
"Gemfile",
|
|
16
|
+
"_config.yml",
|
|
17
|
+
"**/OLD/**",
|
|
18
|
+
"OLD/**",
|
|
19
|
+
]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def normalize_vector(vector):
|
|
5
|
+
"""Normalize a vector to unit length (L2 norm).
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
vector (np.ndarray or list): Input vector
|
|
9
|
+
|
|
10
|
+
Returns:
|
|
11
|
+
np.ndarray: Normalized vector with length 1
|
|
12
|
+
"""
|
|
13
|
+
vector = np.asarray(vector, dtype=np.float64)
|
|
14
|
+
magnitude = np.linalg.norm(vector)
|
|
15
|
+
if magnitude == 0:
|
|
16
|
+
return vector # Return original if zero vector
|
|
17
|
+
return vector / magnitude
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def cosine_similarity(vector1, vector2):
|
|
21
|
+
"""Calculate cosine similarity between two vectors.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
vector1 (np.ndarray or list): First vector
|
|
25
|
+
vector2 (np.ndarray or list): Second vector
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
float: Cosine similarity between the vectors (range: -1 to 1)
|
|
29
|
+
"""
|
|
30
|
+
vector1 = np.asarray(vector1, dtype=np.float64)
|
|
31
|
+
vector2 = np.asarray(vector2, dtype=np.float64)
|
|
32
|
+
|
|
33
|
+
if len(vector1) != len(vector2):
|
|
34
|
+
raise ValueError("Vectors must have the same length")
|
|
35
|
+
|
|
36
|
+
# Use NumPy's optimized dot product and norm functions
|
|
37
|
+
dot_product = np.dot(vector1, vector2)
|
|
38
|
+
magnitude1 = np.linalg.norm(vector1)
|
|
39
|
+
magnitude2 = np.linalg.norm(vector2)
|
|
40
|
+
|
|
41
|
+
if magnitude1 == 0 or magnitude2 == 0:
|
|
42
|
+
return 0.0 # Return 0 if either vector is zero
|
|
43
|
+
|
|
44
|
+
return dot_product / (magnitude1 * magnitude2)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def create_bigram_vector(texts):
|
|
48
|
+
"""Create a bigram frequency vector using optimized NumPy operations.
|
|
49
|
+
|
|
50
|
+
This version uses pre-computed bigram indices and NumPy's bincount
|
|
51
|
+
for maximum performance on large datasets.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
texts (tuple): Tuple of strings to process
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
np.ndarray: Vector of bigram frequencies
|
|
58
|
+
"""
|
|
59
|
+
# Pre-compute bigram indices (0 for 'aa', 1 for 'ab', ..., 675 for 'zz')
|
|
60
|
+
bigram_indices = {}
|
|
61
|
+
idx = 0
|
|
62
|
+
for i in range(ord("a"), ord("z") + 1):
|
|
63
|
+
for j in range(ord("a"), ord("z") + 1):
|
|
64
|
+
bigram = chr(i) + chr(j)
|
|
65
|
+
bigram_indices[bigram] = idx
|
|
66
|
+
idx += 1
|
|
67
|
+
|
|
68
|
+
# Initialize frequency vector
|
|
69
|
+
vector = np.zeros(26 * 26, dtype=np.int32)
|
|
70
|
+
|
|
71
|
+
# Process all texts
|
|
72
|
+
for text in texts:
|
|
73
|
+
text_lower = text.lower()
|
|
74
|
+
if len(text_lower) < 2:
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
# Extract bigrams using NumPy sliding window view
|
|
78
|
+
# Convert string to character array for efficient slicing
|
|
79
|
+
chars = np.array(list(text_lower))
|
|
80
|
+
|
|
81
|
+
# Create bigrams by combining consecutive characters
|
|
82
|
+
bigrams = np.char.add(chars[:-1], chars[1:])
|
|
83
|
+
|
|
84
|
+
# Filter only alphabetic bigrams
|
|
85
|
+
mask = np.array([bg.isalpha() for bg in bigrams])
|
|
86
|
+
valid_bigrams = bigrams[mask]
|
|
87
|
+
|
|
88
|
+
# Count bigrams using bincount with pre-computed indices
|
|
89
|
+
indices = []
|
|
90
|
+
for bg in valid_bigrams:
|
|
91
|
+
if bg in bigram_indices:
|
|
92
|
+
indices.append(bigram_indices[bg])
|
|
93
|
+
|
|
94
|
+
if indices:
|
|
95
|
+
counts = np.bincount(indices, minlength=26 * 26)
|
|
96
|
+
vector += counts
|
|
97
|
+
|
|
98
|
+
return vector
|
aider/history.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
from aider import models, prompts
|
|
4
|
+
from aider.dump import dump # noqa: F401
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ChatSummary:
|
|
8
|
+
def __init__(self, models=None, max_tokens=1024):
|
|
9
|
+
if not models:
|
|
10
|
+
raise ValueError("At least one model must be provided")
|
|
11
|
+
self.models = models if isinstance(models, list) else [models]
|
|
12
|
+
self.max_tokens = max_tokens
|
|
13
|
+
self.token_count = self.models[0].token_count
|
|
14
|
+
|
|
15
|
+
def check_max_tokens(self, messages, max_tokens=None):
|
|
16
|
+
if max_tokens is None:
|
|
17
|
+
max_tokens = self.max_tokens
|
|
18
|
+
|
|
19
|
+
if not max_tokens:
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
sized = self.tokenize(messages)
|
|
23
|
+
total = sum(tokens for tokens, _msg in sized)
|
|
24
|
+
return total > max_tokens
|
|
25
|
+
|
|
26
|
+
def tokenize(self, messages):
|
|
27
|
+
sized = []
|
|
28
|
+
for msg in messages:
|
|
29
|
+
tokens = self.token_count(msg)
|
|
30
|
+
sized.append((tokens, msg))
|
|
31
|
+
return sized
|
|
32
|
+
|
|
33
|
+
async def summarize(self, messages, depth=0):
|
|
34
|
+
messages = await self.summarize_real(messages)
|
|
35
|
+
if messages and messages[-1]["role"] != "assistant":
|
|
36
|
+
messages.append(dict(role="assistant", content="Ok."))
|
|
37
|
+
return messages
|
|
38
|
+
|
|
39
|
+
async def summarize_real(self, messages, depth=0):
|
|
40
|
+
if not self.models:
|
|
41
|
+
raise ValueError("No models available for summarization")
|
|
42
|
+
|
|
43
|
+
sized = self.tokenize(messages)
|
|
44
|
+
total = sum(tokens for tokens, _msg in sized)
|
|
45
|
+
|
|
46
|
+
if total <= self.max_tokens:
|
|
47
|
+
if depth == 0:
|
|
48
|
+
# All fit, no summarization needed
|
|
49
|
+
return messages
|
|
50
|
+
# This is a chunk that's small enough to summarize in one go
|
|
51
|
+
return await self.summarize_all(messages)
|
|
52
|
+
|
|
53
|
+
min_split = 4
|
|
54
|
+
if len(messages) <= min_split or depth > 4:
|
|
55
|
+
return await self.summarize_all(messages)
|
|
56
|
+
|
|
57
|
+
tail_tokens = 0
|
|
58
|
+
split_index = len(messages)
|
|
59
|
+
half_max_tokens = self.max_tokens // 2
|
|
60
|
+
|
|
61
|
+
# Iterate over the messages in reverse order
|
|
62
|
+
for i in range(len(sized) - 1, -1, -1):
|
|
63
|
+
tokens, _msg = sized[i]
|
|
64
|
+
if tail_tokens + tokens < half_max_tokens:
|
|
65
|
+
tail_tokens += tokens
|
|
66
|
+
split_index = i
|
|
67
|
+
else:
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
# If we couldn't find a split point from the end, it's because the
|
|
71
|
+
# last message was too big. So just split off the last message and
|
|
72
|
+
# summarize the rest. This prevents infinite recursion.
|
|
73
|
+
if split_index == len(messages):
|
|
74
|
+
split_index = len(messages) - 1
|
|
75
|
+
|
|
76
|
+
# Ensure the head ends with an assistant message
|
|
77
|
+
while messages[split_index - 1]["role"] != "assistant" and split_index > 1:
|
|
78
|
+
split_index -= 1
|
|
79
|
+
|
|
80
|
+
if split_index <= min_split:
|
|
81
|
+
return await self.summarize_all(messages)
|
|
82
|
+
|
|
83
|
+
# Split head and tail
|
|
84
|
+
head = messages[:split_index]
|
|
85
|
+
tail = messages[split_index:]
|
|
86
|
+
|
|
87
|
+
summary = await self.summarize_real(head, depth + 1)
|
|
88
|
+
|
|
89
|
+
# If the combined summary and tail still fits, return directly
|
|
90
|
+
new_messages = summary + tail
|
|
91
|
+
|
|
92
|
+
sized_new = self.tokenize(new_messages)
|
|
93
|
+
total_new = sum(tokens for tokens, _msg in sized_new)
|
|
94
|
+
|
|
95
|
+
if total_new < self.max_tokens:
|
|
96
|
+
return new_messages
|
|
97
|
+
|
|
98
|
+
# Otherwise recurse with increased depth
|
|
99
|
+
return await self.summarize_real(new_messages, depth + 1)
|
|
100
|
+
|
|
101
|
+
async def summarize_all(self, messages):
|
|
102
|
+
content = ""
|
|
103
|
+
for msg in messages:
|
|
104
|
+
role = msg["role"].upper()
|
|
105
|
+
if role not in ("USER", "ASSISTANT"):
|
|
106
|
+
continue
|
|
107
|
+
if not msg.get("content"):
|
|
108
|
+
continue
|
|
109
|
+
content += f"# {role}\n"
|
|
110
|
+
content += msg["content"]
|
|
111
|
+
if not content.endswith("\n"):
|
|
112
|
+
content += "\n"
|
|
113
|
+
|
|
114
|
+
summarize_messages = [
|
|
115
|
+
dict(role="system", content=prompts.summarize),
|
|
116
|
+
dict(role="user", content=content),
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
for model in self.models:
|
|
120
|
+
try:
|
|
121
|
+
summary = await model.simple_send_with_retries(summarize_messages)
|
|
122
|
+
if summary is not None:
|
|
123
|
+
summary = prompts.summary_prefix + summary
|
|
124
|
+
return [dict(role="user", content=summary)]
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f"Summarization failed for model {model.name}: {str(e)}")
|
|
127
|
+
|
|
128
|
+
err = "summarizer unexpectedly failed for all models"
|
|
129
|
+
print(err)
|
|
130
|
+
raise ValueError(err)
|
|
131
|
+
|
|
132
|
+
async def summarize_all_as_text(self, messages, prompt, max_tokens=None):
|
|
133
|
+
content = ""
|
|
134
|
+
for msg in messages:
|
|
135
|
+
role = msg["role"].upper()
|
|
136
|
+
if role not in ("USER", "ASSISTANT"):
|
|
137
|
+
continue
|
|
138
|
+
if not msg.get("content"):
|
|
139
|
+
continue
|
|
140
|
+
content += f"# {role}\n"
|
|
141
|
+
content += msg["content"]
|
|
142
|
+
if not content.endswith("\n"):
|
|
143
|
+
content += "\n"
|
|
144
|
+
|
|
145
|
+
summarize_messages = [
|
|
146
|
+
dict(role="system", content=prompt),
|
|
147
|
+
dict(role="user", content=content),
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
for model in self.models:
|
|
151
|
+
try:
|
|
152
|
+
summary = await model.simple_send_with_retries(
|
|
153
|
+
summarize_messages, max_tokens=max_tokens
|
|
154
|
+
)
|
|
155
|
+
if summary is not None:
|
|
156
|
+
return summary
|
|
157
|
+
except Exception as e:
|
|
158
|
+
print(f"Summarization failed for model {model.name}: {str(e)}")
|
|
159
|
+
|
|
160
|
+
raise ValueError("summarizer unexpectedly failed for all models")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def main():
|
|
164
|
+
parser = argparse.ArgumentParser()
|
|
165
|
+
parser.add_argument("filename", help="Markdown file to parse")
|
|
166
|
+
args = parser.parse_args()
|
|
167
|
+
|
|
168
|
+
model_names = ["gpt-3.5-turbo", "gpt-4"] # Add more model names as needed
|
|
169
|
+
model_list = [models.Model(name) for name in model_names]
|
|
170
|
+
summarizer = ChatSummary(model_list)
|
|
171
|
+
|
|
172
|
+
with open(args.filename, "r") as f:
|
|
173
|
+
text = f.read()
|
|
174
|
+
|
|
175
|
+
summary = summarizer.summarize_chat_history_markdown(text)
|
|
176
|
+
dump(summary)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
if __name__ == "__main__":
|
|
180
|
+
main()
|