kash-shell 0.3.25__py3-none-any.whl → 0.3.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/commands/help/assistant_commands.py +4 -3
- kash/config/colors.py +5 -3
- kash/config/text_styles.py +1 -0
- kash/config/unified_live.py +251 -0
- kash/docs/markdown/assistant_instructions_template.md +3 -3
- kash/docs/markdown/topics/a1_what_is_kash.md +22 -20
- kash/docs/markdown/topics/a2_installation.md +10 -10
- kash/docs/markdown/topics/a3_getting_started.md +8 -8
- kash/docs/markdown/topics/a4_elements.md +3 -3
- kash/docs/markdown/topics/a5_tips_for_use_with_other_tools.md +12 -12
- kash/docs/markdown/topics/b0_philosophy_of_kash.md +17 -17
- kash/docs/markdown/topics/b1_kash_overview.md +7 -7
- kash/docs/markdown/topics/b2_workspace_and_file_formats.md +1 -1
- kash/docs/markdown/topics/b3_modern_shell_tool_recommendations.md +1 -1
- kash/docs/markdown/topics/b4_faq.md +7 -7
- kash/docs/markdown/welcome.md +1 -1
- kash/embeddings/embeddings.py +110 -39
- kash/embeddings/text_similarity.py +2 -2
- kash/exec/shell_callable_action.py +4 -3
- kash/help/help_embeddings.py +5 -2
- kash/model/graph_model.py +2 -0
- kash/model/items_model.py +3 -3
- kash/shell/output/shell_output.py +2 -2
- kash/utils/file_utils/csv_utils.py +105 -0
- kash/utils/rich_custom/multitask_status.py +19 -5
- kash/web_gen/templates/base_styles.css.jinja +348 -23
- kash/web_gen/templates/base_webpage.html.jinja +11 -0
- kash/web_gen/templates/components/toc_styles.css.jinja +15 -3
- kash/web_gen/templates/components/tooltip_styles.css.jinja +1 -0
- kash/web_gen/templates/content_styles.css.jinja +23 -9
- kash/web_gen/templates/item_view.html.jinja +12 -4
- kash/web_gen/templates/simple_webpage.html.jinja +2 -2
- kash/xonsh_custom/custom_shell.py +7 -4
- {kash_shell-0.3.25.dist-info → kash_shell-0.3.26.dist-info}/METADATA +58 -55
- {kash_shell-0.3.25.dist-info → kash_shell-0.3.26.dist-info}/RECORD +38 -36
- {kash_shell-0.3.25.dist-info → kash_shell-0.3.26.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.25.dist-info → kash_shell-0.3.26.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.25.dist-info → kash_shell-0.3.26.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
> “*Civilization advances by extending the number of important operations which we can
|
|
4
4
|
> perform without thinking about them.*” —Alfred North Whitehead
|
|
5
5
|
|
|
6
|
-
Here is a bit more motivation for experimenting with kash, why I think it
|
|
6
|
+
Here is a bit more motivation for experimenting with kash, why I think it’s potentially
|
|
7
7
|
so useful, and some design principles.
|
|
8
8
|
(You may skip ahead to the next section if you just want a more concrete overview!)
|
|
9
9
|
|
|
10
|
-
### Why Apps Can
|
|
10
|
+
### Why Apps Can’t Solve All Your Problems
|
|
11
11
|
|
|
12
12
|
AI has radically changed the way we use software.
|
|
13
|
-
With LLMs and other generative AI models, we
|
|
13
|
+
With LLMs and other generative AI models, we’ve seen big improvements in two areas:
|
|
14
14
|
|
|
15
15
|
1. Powerful general-purpose new AI tools (ChatGPT, Perplexity, etc.)
|
|
16
16
|
|
|
@@ -18,20 +18,20 @@ With LLMs and other generative AI models, we've seen big improvements in two are
|
|
|
18
18
|
want to solve, like Notion, Figma, Descript, etc.
|
|
19
19
|
|
|
20
20
|
While we have these powerful cloud apps, we all know numerous situations where our
|
|
21
|
-
problems aren
|
|
21
|
+
problems aren’t easily solved or automated with single tool like ChatGPT, Notion, Google
|
|
22
22
|
Docs, Slack, Excel, and Zapier.
|
|
23
23
|
|
|
24
24
|
If you want to use any of the newest AI models and APIs for something not supported by
|
|
25
25
|
an existing tool, you generally have to design and build it yourself—in Python and/or a
|
|
26
26
|
full-stack web app.
|
|
27
27
|
|
|
28
|
-
It
|
|
28
|
+
It’s true tools like GitHub Copilot, Claude Code, and Cursor can help anyone write code
|
|
29
29
|
much faster. But even if you have a tool like this, building polished apps that are good
|
|
30
30
|
enough people will pay them takes time, and many good product ideas never get built.
|
|
31
|
-
And the curse of [Conway
|
|
32
|
-
companies won
|
|
31
|
+
And the curse of [Conway’s Law](https://en.wikipedia.org/wiki/Conway%27s_law) means many
|
|
32
|
+
companies won’t add specific features you want, or at best are likely to do it slowly.
|
|
33
33
|
|
|
34
|
-
In short, in spite of AI tools accelerating software, certain things don
|
|
34
|
+
In short, in spite of AI tools accelerating software, certain things don’t change: we
|
|
35
35
|
are waiting for developers, product managers, designers, and entrepreneurs to design and
|
|
36
36
|
ship solutions for us.
|
|
37
37
|
|
|
@@ -58,9 +58,9 @@ Command-line shells generally still suffer from three big issues:
|
|
|
58
58
|
- A text-based interface many find confusing or ugly
|
|
59
59
|
|
|
60
60
|
- No easy, “native” support for modern tools, apps, and APIs (especially LLMs—and using
|
|
61
|
-
`curl` to call OpenAI APIs doesn
|
|
61
|
+
`curl` to call OpenAI APIs doesn’t count!)
|
|
62
62
|
|
|
63
|
-
Even worse, command lines haven
|
|
63
|
+
Even worse, command lines haven’t gotten much better.
|
|
64
64
|
Few companies make money shipping new command-line tooling.
|
|
65
65
|
(In the last few years this has slowly starting to change with tools like nushell, fish,
|
|
66
66
|
and Warp.)
|
|
@@ -73,7 +73,7 @@ developer, a designer, or an enterpreneur building a product.
|
|
|
73
73
|
Any tool that lets you solve complex problems yourself, without waiting for engineers
|
|
74
74
|
and designers, can radically improve your productivity.
|
|
75
75
|
|
|
76
|
-
I think it
|
|
76
|
+
I think it’s a good time to revisit this idea.
|
|
77
77
|
|
|
78
78
|
In a post-LLM world, it should be possible to do more things without so much time and
|
|
79
79
|
effort spent (even with the help of LLMs) on coding and UI/UX design.
|
|
@@ -84,7 +84,7 @@ to see how well it works.
|
|
|
84
84
|
|
|
85
85
|
### The Goals of Kash
|
|
86
86
|
|
|
87
|
-
Kash is an experimental attempt at building the tool I
|
|
87
|
+
Kash is an experimental attempt at building the tool I’ve wanted for a long time, using
|
|
88
88
|
a command line as a starting point, and with an initial focus on content-related tasks.
|
|
89
89
|
|
|
90
90
|
That brings us to the goals behind building a new, AI-native shell.
|
|
@@ -99,17 +99,17 @@ That brings us to the goals behind building a new, AI-native shell.
|
|
|
99
99
|
- **Make complex tasks possible:** Highly complex tasks and workflows should be easy to
|
|
100
100
|
assemble (and rerun if they need to be automated) by adding new primitive actions and
|
|
101
101
|
combining primitive actions into more complex workflows.
|
|
102
|
-
You shouldn
|
|
102
|
+
You shouldn’t need to be a programmer to use any task—but any task should be
|
|
103
103
|
extensible with arbitrary code (written by you and an LLM) when needed.
|
|
104
104
|
|
|
105
105
|
- **Augment human skills and judgement:** Many AI agent efforts aim for pure automation.
|
|
106
106
|
But even with powerful LLMs and tools, full automation is rare.
|
|
107
|
-
Invariably, the best results come from human review wherever it
|
|
107
|
+
Invariably, the best results come from human review wherever it’s needed—experimenting
|
|
108
108
|
with different models and prompts, looking at what works, focusing expert human
|
|
109
109
|
attention in the right places.
|
|
110
110
|
The most flexible tools augment, not replace, your ability to review and manipulate
|
|
111
111
|
information. It should help both very technical users, like developers, as well as less
|
|
112
|
-
technical but sophisticated users who aren
|
|
112
|
+
technical but sophisticated users who aren’t traditional programmers.
|
|
113
113
|
|
|
114
114
|
- **Accelerate discovery of the workflows that work best:** We have so many powerful
|
|
115
115
|
APIs, models, libraries, and tools now—but the real bottleneck is in discovering and
|
|
@@ -125,7 +125,7 @@ That brings us to the goals behind building a new, AI-native shell.
|
|
|
125
125
|
|
|
126
126
|
A better command line like a first step toward an item-based information operating
|
|
127
127
|
system—an alternate, more flexible UX and information architecture for knowledge
|
|
128
|
-
workflows. My hope is that kash becomes the tool you need when you don
|
|
128
|
+
workflows. My hope is that kash becomes the tool you need when you don’t know what tool
|
|
129
129
|
you need.
|
|
130
130
|
|
|
131
131
|
### Design Principles
|
|
@@ -155,7 +155,7 @@ Key design choices:
|
|
|
155
155
|
transition)
|
|
156
156
|
|
|
157
157
|
7. **Maintain context in workspaces** (keep files organized by project or effort in a
|
|
158
|
-
folder that can be persisted, won
|
|
158
|
+
folder that can be persisted, won’t get lost, and includes content, metadata,
|
|
159
159
|
actions, settings, selections, caches, history, etc.)
|
|
160
160
|
|
|
161
161
|
8. **Maintain metadata on files** (so you always know where each piece of content comes
|
|
@@ -8,7 +8,7 @@ extensibility of a modern command line interface.
|
|
|
8
8
|
|
|
9
9
|
The philosophy behind kash is similar to Unix shell tools: simple commands that can be
|
|
10
10
|
combined flexibly in powerful ways.
|
|
11
|
-
It operates on
|
|
11
|
+
It operates on “items” such as URLs, files, or Markdown notes within a workspace
|
|
12
12
|
directory.
|
|
13
13
|
|
|
14
14
|
This command-line is also AI enabled.
|
|
@@ -29,7 +29,7 @@ intuitive than old Unix commands.
|
|
|
29
29
|
### MCP Support
|
|
30
30
|
|
|
31
31
|
If the idea of having lots of commands runnable by an LLM sounds to you a little like
|
|
32
|
-
MCP, you
|
|
32
|
+
MCP, you’re right. Any action in kash can also be an MCP tool!
|
|
33
33
|
|
|
34
34
|
You can connect Claude Desktop or Cursor or other MCP clients to kash and use any kash
|
|
35
35
|
action as a tool. However, unlike the complexity of writing a new MCP server, the idea
|
|
@@ -41,22 +41,22 @@ Anyone, including kash itself, can write new actions.
|
|
|
41
41
|
You write a simple Python function, add a decorator, and it becomes an action you can
|
|
42
42
|
use in your shell.
|
|
43
43
|
|
|
44
|
-
Finally, getting really useful things to work still takes effort, so I
|
|
44
|
+
Finally, getting really useful things to work still takes effort, so I’ve also added a
|
|
45
45
|
number of little libraries to help with this.
|
|
46
46
|
|
|
47
47
|
### Supporting Complex Tasks
|
|
48
48
|
|
|
49
|
-
Because it
|
|
49
|
+
Because it’s really just a set of Python libraries, kash is more capable than a typical
|
|
50
50
|
shell. It is starting to become a sort of AI-friendly scripting framework as well.
|
|
51
51
|
|
|
52
52
|
Inputs and outputs of commands are stored as files, so you can easily chain commands
|
|
53
53
|
together and inspect intermediate results.
|
|
54
54
|
|
|
55
55
|
When possible, actions are nondestructive and idempotent—that is, they will either
|
|
56
|
-
create new files or simply skip an operation if it
|
|
56
|
+
create new files or simply skip an operation if it’s already complete.
|
|
57
57
|
|
|
58
58
|
So it can work a bit like a Makefile: suppose you run a command like `transcribe` on a
|
|
59
|
-
video. If you
|
|
59
|
+
video. If you’ve already run that command on the same YouTube URL, kash knows it and can
|
|
60
60
|
recognize the downloaded video and transcribed text is already present in your current
|
|
61
61
|
workspace.
|
|
62
62
|
|
|
@@ -95,7 +95,7 @@ original document), the sources are listed in a `derived_from` array within the
|
|
|
95
95
|
This means actions can find citations or other data on the provenance of a given piece
|
|
96
96
|
of information.
|
|
97
97
|
|
|
98
|
-
This might sound a little complex, but it
|
|
98
|
+
This might sound a little complex, but it’s quite simple in practice.
|
|
99
99
|
All the metadata is in a standard format,
|
|
100
100
|
[Frontmatter Format](https://github.com/jlevy/frontmatter-format), and the information
|
|
101
101
|
is compatible with other apps and pretty self explanatory.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
A kash workspace is simply a directory of files.
|
|
4
4
|
The goal is for a workspace to be easy to use not just with kash but with other editors
|
|
5
|
-
or tools, so it
|
|
5
|
+
or tools, so it’s possible to edit, share, or commit files to version control.
|
|
6
6
|
It makes sense to devote a workspace to a single topic, project, or area of research.
|
|
7
7
|
|
|
8
8
|
File formats and conventions:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Many of us (myself included) have long believed in sticking with tried-and-true bash and
|
|
4
4
|
the classic command-line tools.
|
|
5
|
-
While it
|
|
5
|
+
While it’s still wise to know these tools, we’ve in recent years seen many new tools
|
|
6
6
|
emerge that are more powerful, modern, and cross-platform.
|
|
7
7
|
|
|
8
8
|
When using kash it makes sense to use these.
|
|
@@ -17,7 +17,7 @@ Anyone, including kash itself, can write new actions easily.
|
|
|
17
17
|
|
|
18
18
|
The philosophy behind kash is similar to Unix shell tools: simple commands that can be
|
|
19
19
|
combined in flexible and powerful ways.
|
|
20
|
-
It operates on
|
|
20
|
+
It operates on “items” such as URLs, files, or Markdown notes within a workspace
|
|
21
21
|
directory.
|
|
22
22
|
|
|
23
23
|
For more detailed information, you can run `help` to get background and a list of
|
|
@@ -42,7 +42,7 @@ questions.
|
|
|
42
42
|
### How does kash accept both shell and assistant requests to the LLM with natural language?
|
|
43
43
|
|
|
44
44
|
By default, if a command is valid shell or Python, kash will treat it as a shell
|
|
45
|
-
command, using xonsh
|
|
45
|
+
command, using xonsh’s conventions.
|
|
46
46
|
|
|
47
47
|
Commands that begin with a `?` are automatically considered assistant requests.
|
|
48
48
|
|
|
@@ -136,9 +136,9 @@ fit kash commands and actions, reading metadata on items, etc.
|
|
|
136
136
|
|
|
137
137
|
### Can kash replace my regular shell?
|
|
138
138
|
|
|
139
|
-
While kash doesn
|
|
139
|
+
While kash doesn’t aim to completely replace all uses of the shell—for example, that’s
|
|
140
140
|
hard to do in general for remote use, and people have many constraints, customizations,
|
|
141
|
-
and preferences—I
|
|
141
|
+
and preferences—I’ve found it’s highly useful for a lot of situations.
|
|
142
142
|
It is starting to replace bash or zsh for day-to-day local use on my laptop.
|
|
143
143
|
|
|
144
144
|
Kash basically wraps xonsh, so you have almost all the functionality of xonsh and Python
|
|
@@ -154,18 +154,18 @@ Any command you type on the command-line in kash is a command.
|
|
|
154
154
|
Some commands are basic, built-in commands.
|
|
155
155
|
The idea is there are relatively few of these, and they do important primitive things
|
|
156
156
|
like `select` (select or show selections), `show` (show an item), `files` (list
|
|
157
|
-
files—kash
|
|
157
|
+
files—kash’s better version of `ls`), `workspace` (shows information about the current
|
|
158
158
|
workspace), or `logs` (shows the detailed logs for the current workspace).
|
|
159
159
|
In Python, built-in commands are defined by simple functions.
|
|
160
160
|
|
|
161
161
|
But most commands are defined as an *action*. Actions are invoked just like any other
|
|
162
|
-
command but have a standard structure: they are assumed to perform an
|
|
162
|
+
command but have a standard structure: they are assumed to perform an “action” on a set
|
|
163
163
|
of items (files of known types) and then save those items, all within an existing
|
|
164
164
|
workspace. Actions are defined as a subclass of `Action` in Python.
|
|
165
165
|
|
|
166
166
|
### Does nvm (Node version manager) work in kash?
|
|
167
167
|
|
|
168
|
-
It
|
|
168
|
+
It’s hard to get nvm to work well in xonsh, but try [fnm](https://github.com/Schniz/fnm)
|
|
169
169
|
instead! It works just as well and kash supports `fnm` automatically so it auto-detects
|
|
170
170
|
and uses fnm to switch or install Node versions for directories with Node projects (i.e.
|
|
171
171
|
there is an `.nvmrc`, `.node-version`, or `package.json` file).
|
kash/docs/markdown/welcome.md
CHANGED
|
@@ -6,7 +6,7 @@ You may simply ask a question and the kash assistant will help you.
|
|
|
6
6
|
Press **space** (or type **?**), then write your question or request.
|
|
7
7
|
Use `logs` for detailed logs.
|
|
8
8
|
|
|
9
|
-
*I
|
|
9
|
+
*I’d love to hear from you with issues, bugs, and ideas.
|
|
10
10
|
Discuss at github.com/jlevy/kash or contact me github.com/jlevy or x.com/ojoshe (DMs
|
|
11
11
|
open).*
|
|
12
12
|
|
kash/embeddings/embeddings.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import ast
|
|
4
|
+
import json
|
|
4
5
|
from collections.abc import Iterable
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, TypeAlias
|
|
7
|
+
from typing import TYPE_CHECKING, Any, TypeAlias
|
|
7
8
|
|
|
9
|
+
import pandas as pd
|
|
8
10
|
from pydantic.dataclasses import dataclass
|
|
9
11
|
from strif import abbrev_list
|
|
10
12
|
|
|
11
13
|
from kash.config.logger import get_logger
|
|
12
14
|
from kash.llm_utils.init_litellm import init_litellm
|
|
13
|
-
from kash.llm_utils.llms import DEFAULT_EMBEDDING_MODEL
|
|
15
|
+
from kash.llm_utils.llms import DEFAULT_EMBEDDING_MODEL, EmbeddingModel
|
|
14
16
|
|
|
15
17
|
if TYPE_CHECKING:
|
|
16
18
|
from pandas import DataFrame
|
|
@@ -18,15 +20,26 @@ if TYPE_CHECKING:
|
|
|
18
20
|
log = get_logger(__name__)
|
|
19
21
|
|
|
20
22
|
|
|
21
|
-
BATCH_SIZE = 1024
|
|
23
|
+
BATCH_SIZE: int = 1024
|
|
22
24
|
|
|
23
25
|
Key: TypeAlias = str
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class EmbValue:
|
|
30
|
+
emb_text: str
|
|
31
|
+
data: dict[str, Any] | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class KeyVal:
|
|
36
|
+
"""
|
|
37
|
+
A key-value pair where the key is a unique identifier (such as the path)
|
|
38
|
+
and the value is the text to embed and any additional data.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
key: Key
|
|
42
|
+
value: EmbValue
|
|
30
43
|
|
|
31
44
|
|
|
32
45
|
@dataclass
|
|
@@ -36,39 +49,45 @@ class Embeddings:
|
|
|
36
49
|
small texts, the text itself).
|
|
37
50
|
"""
|
|
38
51
|
|
|
39
|
-
data: dict[Key, tuple[
|
|
40
|
-
"""Mapping of key to
|
|
52
|
+
data: dict[Key, tuple[EmbValue, list[float]]]
|
|
53
|
+
"""Mapping of key to EmbValue and embedding."""
|
|
41
54
|
|
|
42
|
-
def as_iterable(self) -> Iterable[tuple[Key,
|
|
43
|
-
return ((key,
|
|
55
|
+
def as_iterable(self) -> Iterable[tuple[Key, EmbValue, list[float]]]:
|
|
56
|
+
return ((key, emb_value, emb) for key, (emb_value, emb) in self.data.items())
|
|
44
57
|
|
|
45
58
|
def as_df(self) -> DataFrame:
|
|
46
59
|
from pandas import DataFrame
|
|
47
60
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
61
|
+
if not self.data:
|
|
62
|
+
return DataFrame({"key": [], "text": [], "data": [], "embedding": []})
|
|
63
|
+
|
|
64
|
+
items = [(key, emb_value, emb) for key, (emb_value, emb) in self.data.items()]
|
|
65
|
+
keys, emb_values, embeddings = zip(*items, strict=False)
|
|
66
|
+
|
|
51
67
|
return DataFrame(
|
|
52
68
|
{
|
|
53
|
-
"key": keys,
|
|
54
|
-
"text":
|
|
55
|
-
"
|
|
69
|
+
"key": list(keys),
|
|
70
|
+
"text": [ev.emb_text for ev in emb_values],
|
|
71
|
+
"data": [ev.data for ev in emb_values],
|
|
72
|
+
"embedding": list(embeddings),
|
|
56
73
|
}
|
|
57
74
|
)
|
|
58
75
|
|
|
59
|
-
def __getitem__(self, key: Key) -> tuple[
|
|
76
|
+
def __getitem__(self, key: Key) -> tuple[EmbValue, list[float]]:
|
|
60
77
|
if key in self.data:
|
|
61
78
|
return self.data[key]
|
|
62
79
|
else:
|
|
63
80
|
raise KeyError(f"Key '{key}' not found in embeddings")
|
|
64
81
|
|
|
65
82
|
@classmethod
|
|
66
|
-
def embed(
|
|
83
|
+
def embed(
|
|
84
|
+
cls, keyvals: list[KeyVal], model: EmbeddingModel = DEFAULT_EMBEDDING_MODEL
|
|
85
|
+
) -> Embeddings:
|
|
67
86
|
from litellm import embedding
|
|
68
87
|
|
|
69
88
|
init_litellm()
|
|
70
89
|
|
|
71
|
-
data = {}
|
|
90
|
+
data: dict[Key, tuple[EmbValue, list[float]]] = {}
|
|
72
91
|
log.info(
|
|
73
92
|
"Embedding %d texts (model %s, batch size %s)…",
|
|
74
93
|
len(keyvals),
|
|
@@ -76,21 +95,23 @@ class Embeddings:
|
|
|
76
95
|
BATCH_SIZE,
|
|
77
96
|
)
|
|
78
97
|
for batch_start in range(0, len(keyvals), BATCH_SIZE):
|
|
79
|
-
batch_end = batch_start + BATCH_SIZE
|
|
80
|
-
batch = keyvals[batch_start:batch_end]
|
|
81
|
-
keys = [kv
|
|
82
|
-
texts = [kv
|
|
98
|
+
batch_end: int = batch_start + BATCH_SIZE
|
|
99
|
+
batch: list[KeyVal] = keyvals[batch_start:batch_end]
|
|
100
|
+
keys: list[Key] = [kv.key for kv in batch]
|
|
101
|
+
texts: list[str] = [kv.value.emb_text for kv in batch]
|
|
83
102
|
|
|
84
103
|
response = embedding(model=model.litellm_name, input=texts)
|
|
85
104
|
|
|
86
105
|
if not response.data:
|
|
87
106
|
raise ValueError("No embedding response data")
|
|
88
107
|
|
|
89
|
-
batch_embeddings = [e["embedding"] for e in response.data]
|
|
108
|
+
batch_embeddings: list[list[float]] = [e["embedding"] for e in response.data]
|
|
90
109
|
data.update(
|
|
91
110
|
{
|
|
92
|
-
key: (
|
|
93
|
-
for key,
|
|
111
|
+
key: (emb_value, emb)
|
|
112
|
+
for key, emb_value, emb in zip(
|
|
113
|
+
keys, [kv.value for kv in batch], batch_embeddings, strict=False
|
|
114
|
+
)
|
|
94
115
|
}
|
|
95
116
|
)
|
|
96
117
|
|
|
@@ -110,32 +131,82 @@ class Embeddings:
|
|
|
110
131
|
def read_from_csv(cls, path: Path) -> Embeddings:
|
|
111
132
|
import pandas as pd
|
|
112
133
|
|
|
113
|
-
df = pd.read_csv(path)
|
|
134
|
+
df: pd.DataFrame = pd.read_csv(path)
|
|
114
135
|
df["embedding"] = df["embedding"].apply(ast.literal_eval)
|
|
115
|
-
|
|
116
|
-
|
|
136
|
+
|
|
137
|
+
# Handle missing data column just in case.
|
|
138
|
+
if "data" in df.columns:
|
|
139
|
+
df["data"] = df["data"].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else None)
|
|
140
|
+
else:
|
|
141
|
+
df["data"] = None
|
|
142
|
+
|
|
143
|
+
data: dict[Key, tuple[EmbValue, list[float]]] = {}
|
|
144
|
+
for _, row in df.iterrows():
|
|
145
|
+
key = str(row["key"])
|
|
146
|
+
text = str(row["text"])
|
|
147
|
+
embedding = list(row["embedding"])
|
|
148
|
+
# Type-safe handling of data column
|
|
149
|
+
raw_data = row["data"] if "data" in df.columns else None
|
|
150
|
+
data_value: dict[str, Any] | None = (
|
|
151
|
+
raw_data if isinstance(raw_data, dict) or raw_data is None else None
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
data[key] = (
|
|
155
|
+
EmbValue(emb_text=text, data=data_value),
|
|
156
|
+
embedding,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return cls(data=data)
|
|
117
160
|
|
|
118
161
|
def to_npz(self, path: Path) -> None:
|
|
119
162
|
"""Save embeddings in numpy's compressed format."""
|
|
120
163
|
import numpy as np
|
|
121
164
|
|
|
122
165
|
keys: list[Key] = list(self.data.keys())
|
|
123
|
-
texts: list[str] = [self.data[k][0] for k in keys]
|
|
166
|
+
texts: list[str] = [self.data[k][0].emb_text for k in keys]
|
|
167
|
+
# Serialize data as JSON strings
|
|
168
|
+
data_strings: list[str] = [
|
|
169
|
+
json.dumps(self.data[k][0].data) if self.data[k][0].data is not None else ""
|
|
170
|
+
for k in keys
|
|
171
|
+
]
|
|
124
172
|
embeddings = np.array([self.data[k][1] for k in keys])
|
|
125
|
-
np.savez_compressed(
|
|
173
|
+
np.savez_compressed(
|
|
174
|
+
path,
|
|
175
|
+
keys=keys,
|
|
176
|
+
texts=texts,
|
|
177
|
+
data=data_strings,
|
|
178
|
+
embeddings=embeddings,
|
|
179
|
+
)
|
|
126
180
|
|
|
127
181
|
@classmethod
|
|
128
182
|
def read_from_npz(cls, path: Path) -> Embeddings:
|
|
129
183
|
"""Load embeddings from numpy's compressed format."""
|
|
130
184
|
import numpy as np
|
|
131
185
|
|
|
132
|
-
with np.load(path) as
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
186
|
+
with np.load(path) as npz_data:
|
|
187
|
+
if "data" in npz_data.files:
|
|
188
|
+
data_array = npz_data["data"]
|
|
189
|
+
else:
|
|
190
|
+
# No data column, so no data.
|
|
191
|
+
data_array = None
|
|
192
|
+
|
|
193
|
+
loaded_data: dict[Key, tuple[EmbValue, list[float]]] = {}
|
|
194
|
+
for i, (k, t, e) in enumerate(
|
|
195
|
+
zip(
|
|
196
|
+
npz_data["keys"],
|
|
197
|
+
npz_data["texts"],
|
|
198
|
+
npz_data["embeddings"],
|
|
199
|
+
strict=False,
|
|
200
|
+
)
|
|
201
|
+
):
|
|
202
|
+
data_str = data_array[i] if data_array is not None else ""
|
|
203
|
+
loaded_data[k] = (
|
|
204
|
+
EmbValue(emb_text=t, data=json.loads(data_str) if data_str else None),
|
|
205
|
+
e.tolist(),
|
|
206
|
+
)
|
|
207
|
+
|
|
137
208
|
return cls(data=loaded_data)
|
|
138
209
|
|
|
139
210
|
def __str__(self) -> str:
|
|
140
|
-
dims = -1 if len(self.data) == 0 else len(next(iter(self.data))[1])
|
|
211
|
+
dims: int = -1 if len(self.data) == 0 else len(next(iter(self.data.values()))[1])
|
|
141
212
|
return f"Embeddings({len(self.data)} items, {dims} dimensions)"
|
|
@@ -52,8 +52,8 @@ def rank_by_relatedness(
|
|
|
52
52
|
query_embedding = response.data[0]["embedding"]
|
|
53
53
|
|
|
54
54
|
scored_strings = [
|
|
55
|
-
(key,
|
|
56
|
-
for key,
|
|
55
|
+
(key, emb_value.emb_text, relatedness_fn(query_embedding, emb))
|
|
56
|
+
for key, emb_value, emb in embeddings.as_iterable()
|
|
57
57
|
]
|
|
58
58
|
scored_strings.sort(key=lambda x: x[2], reverse=True)
|
|
59
59
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from funlog import log_tallies
|
|
2
2
|
|
|
3
3
|
from kash.config.env_settings import KashEnv
|
|
4
|
-
from kash.config.logger import
|
|
5
|
-
from kash.config.text_styles import COLOR_ERROR
|
|
4
|
+
from kash.config.logger import get_logger
|
|
5
|
+
from kash.config.text_styles import COLOR_ERROR
|
|
6
|
+
from kash.config.unified_live import get_unified_live
|
|
6
7
|
from kash.exec.action_exec import run_action_with_shell_context
|
|
7
8
|
from kash.exec.history import record_command
|
|
8
9
|
from kash.exec_model.commands_model import Command
|
|
@@ -57,7 +58,7 @@ class ShellCallableAction:
|
|
|
57
58
|
log.info("Action shell args: %s", shell_args)
|
|
58
59
|
explicit_values = RawParamValues(shell_args.options)
|
|
59
60
|
if not action.interactive_input and not action.live_output:
|
|
60
|
-
with
|
|
61
|
+
with get_unified_live().status(f"Running action {action.name}…"):
|
|
61
62
|
result = run_action_with_shell_context(
|
|
62
63
|
action_cls,
|
|
63
64
|
explicit_values,
|
kash/help/help_embeddings.py
CHANGED
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing_extensions import override
|
|
7
7
|
|
|
8
8
|
from kash.config.logger import get_logger
|
|
9
|
-
from kash.embeddings.embeddings import Embeddings
|
|
9
|
+
from kash.embeddings.embeddings import Embeddings, EmbValue, KeyVal
|
|
10
10
|
from kash.embeddings.text_similarity import rank_by_relatedness
|
|
11
11
|
from kash.help.help_types import HelpDoc, HelpDocType
|
|
12
12
|
from kash.web_content.local_file_cache import Loadable
|
|
@@ -59,7 +59,10 @@ class HelpIndex:
|
|
|
59
59
|
from kash.web_content.file_cache_utils import cache_file
|
|
60
60
|
|
|
61
61
|
def calculate_and_save_help_embeddings(target_path: Path) -> None:
|
|
62
|
-
keyvals = [
|
|
62
|
+
keyvals = [
|
|
63
|
+
KeyVal(key=str(key), value=EmbValue(emb_text=doc.embedding_text()))
|
|
64
|
+
for key, doc in self._docs_by_key()
|
|
65
|
+
]
|
|
63
66
|
embeddings = Embeddings.embed(keyvals)
|
|
64
67
|
log.info("Embedded %d help documents, cached at: %s", len(embeddings.data), target_path)
|
|
65
68
|
embeddings.to_npz(target_path)
|
kash/model/graph_model.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from collections.abc import Iterable
|
|
2
2
|
from dataclasses import asdict, field
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
from pydantic.dataclasses import dataclass
|
|
5
6
|
from strif import abbrev_list
|
|
@@ -18,6 +19,7 @@ class Node:
|
|
|
18
19
|
body: str | None = None
|
|
19
20
|
url: str | None = None
|
|
20
21
|
thumbnail_url: str | None = None
|
|
22
|
+
data: dict[str, Any] | None = None
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
@dataclass(frozen=True)
|
kash/model/items_model.py
CHANGED
|
@@ -935,8 +935,8 @@ class Item:
|
|
|
935
935
|
"type": 64,
|
|
936
936
|
"format": 64,
|
|
937
937
|
"title": 40,
|
|
938
|
-
"url":
|
|
939
|
-
"external_path":
|
|
938
|
+
"url": 128,
|
|
939
|
+
"external_path": 0,
|
|
940
940
|
},
|
|
941
941
|
)
|
|
942
942
|
+ f"[{len(self.body) if self.body else 0} body chars]"
|
|
@@ -961,7 +961,7 @@ class Item:
|
|
|
961
961
|
+ f"[{len(self.body) if self.body else 0} body chars]"
|
|
962
962
|
)
|
|
963
963
|
|
|
964
|
-
def __repr__(self):
|
|
964
|
+
def __repr__(self) -> str:
|
|
965
965
|
return self.as_str_brief()
|
|
966
966
|
|
|
967
967
|
|
|
@@ -86,8 +86,8 @@ def multitask_status(
|
|
|
86
86
|
) -> MultiTaskStatus | nullcontext:
|
|
87
87
|
"""
|
|
88
88
|
Create a `MultiTaskStatus` context manager for displaying multiple task progress
|
|
89
|
-
using the global shell console
|
|
90
|
-
to disable status display.
|
|
89
|
+
using the global shell console with live display conflict prevention. If disabled,
|
|
90
|
+
returns a null context, so it's convenient to disable status display.
|
|
91
91
|
"""
|
|
92
92
|
if not enabled:
|
|
93
93
|
return nullcontext()
|