embedding-visualizer 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- embedding_visualizer-0.1.0/.github/workflows/workflow.yaml +20 -0
- embedding_visualizer-0.1.0/.gitignore +8 -0
- embedding_visualizer-0.1.0/PKG-INFO +11 -0
- embedding_visualizer-0.1.0/README.md +72 -0
- embedding_visualizer-0.1.0/examples/repo_files.ipynb +2642 -0
- embedding_visualizer-0.1.0/pyproject.toml +20 -0
- embedding_visualizer-0.1.0/src/embedding_visualizer/__init__.py +13 -0
- embedding_visualizer-0.1.0/src/embedding_visualizer/_cache.py +26 -0
- embedding_visualizer-0.1.0/src/embedding_visualizer/_embed.py +60 -0
- embedding_visualizer-0.1.0/src/embedding_visualizer/_plot.py +20 -0
- embedding_visualizer-0.1.0/src/embedding_visualizer/_projections.py +43 -0
- embedding_visualizer-0.1.0/src/embedding_visualizer/_visualize.py +191 -0
- embedding_visualizer-0.1.0/uv.lock +1382 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
- run: pip install build
|
|
19
|
+
- run: python -m build
|
|
20
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: embedding-visualizer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Visualize text embeddings with interactive plots
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: nbformat>=4.2.0
|
|
7
|
+
Requires-Dist: numpy>=1.24.0
|
|
8
|
+
Requires-Dist: openai>=1.0.0
|
|
9
|
+
Requires-Dist: plotly>=5.18.0
|
|
10
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
11
|
+
Requires-Dist: tiktoken>=0.5.0
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# embedding-visualizer
|
|
2
|
+
|
|
3
|
+
Interactive 2D visualization of text embeddings using OpenAI's embedding API and Plotly.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
cd embedding_visualizer
|
|
9
|
+
uv sync
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Requires an `OPENAI_API_KEY` environment variable.
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
from embedding_visualizer import visualize_embeddings, PrincipalComponent, TextEmbedding
|
|
18
|
+
|
|
19
|
+
docs = [
|
|
20
|
+
{
|
|
21
|
+
"text": "This text will be embedded",
|
|
22
|
+
"label": "optional, groups points in the legend",
|
|
23
|
+
"color": "optional, color for the point/label group",
|
|
24
|
+
"line-id": "optional, connects points with the same id",
|
|
25
|
+
"hover": "optional hover text",
|
|
26
|
+
},
|
|
27
|
+
# ...
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
# t-SNE or PCA projection
|
|
31
|
+
plot = visualize_embeddings(docs=docs, projection="t-sne") # or "pca"
|
|
32
|
+
|
|
33
|
+
# Custom per-axis projection
|
|
34
|
+
plot = visualize_embeddings(
|
|
35
|
+
docs=docs,
|
|
36
|
+
x_projection=PrincipalComponent(1),
|
|
37
|
+
y_projection=TextEmbedding("some text to project onto"),
|
|
38
|
+
title="My Embedding Plot",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
plot.display() # show in Jupyter or browser
|
|
42
|
+
plot.to_html("plot.html") # self-contained HTML file
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Doc fields
|
|
46
|
+
|
|
47
|
+
| Field | Required | Description |
|
|
48
|
+
|-------|----------|-------------|
|
|
49
|
+
| `text` | yes | Text to embed |
|
|
50
|
+
| `label` | no | Legend group name; points with the same label share a color |
|
|
51
|
+
| `color` | no | Point color. If `label` is set, applies to the whole group |
|
|
52
|
+
| `line-id` | no | Connects points with the same id in document order |
|
|
53
|
+
| `hover` | no | Custom hover text (defaults to first 100 chars of `text`) |
|
|
54
|
+
|
|
55
|
+
## Projections
|
|
56
|
+
|
|
57
|
+
- **`projection="t-sne"`** — t-SNE with cosine metric (default)
|
|
58
|
+
- **`projection="pca"`** — PCA
|
|
59
|
+
- **`PrincipalComponent(n)`** — project onto the nth principal component (1-indexed)
|
|
60
|
+
- **`TextEmbedding("text")`** — cosine similarity with a reference text's embedding
|
|
61
|
+
|
|
62
|
+
## Example
|
|
63
|
+
|
|
64
|
+
`examples/repo_files.py` embeds every Python file in this repository at multiple truncation points and connects versions of the same file with lines:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
uv run python examples/repo_files.py
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Caching
|
|
71
|
+
|
|
72
|
+
Embeddings are cached to `~/.cache/embedding_visualizer/` so repeated runs don't re-call the API.
|