diffusion-prompt-embedder 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusion_prompt_embedder-0.1.0/.gitignore +13 -0
- diffusion_prompt_embedder-0.1.0/.python-version +1 -0
- diffusion_prompt_embedder-0.1.0/.vscode/settings.json +5 -0
- diffusion_prompt_embedder-0.1.0/PKG-INFO +152 -0
- diffusion_prompt_embedder-0.1.0/README.md +119 -0
- diffusion_prompt_embedder-0.1.0/README.zh-CN.md +119 -0
- diffusion_prompt_embedder-0.1.0/pyproject.toml +94 -0
- diffusion_prompt_embedder-0.1.0/src/diffusion_prompt_embedder/__init__.py +17 -0
- diffusion_prompt_embedder-0.1.0/src/diffusion_prompt_embedder/clip/__init__.py +13 -0
- diffusion_prompt_embedder-0.1.0/src/diffusion_prompt_embedder/clip/tokenization.py +123 -0
- diffusion_prompt_embedder-0.1.0/src/diffusion_prompt_embedder/core/__init__.py +23 -0
- diffusion_prompt_embedder-0.1.0/src/diffusion_prompt_embedder/core/embedding.py +309 -0
- diffusion_prompt_embedder-0.1.0/src/diffusion_prompt_embedder/core/parser.py +178 -0
- diffusion_prompt_embedder-0.1.0/src/diffusion_prompt_embedder/py.typed +0 -0
- diffusion_prompt_embedder-0.1.0/tests/test_prompt_parser.py +235 -0
- diffusion_prompt_embedder-0.1.0/uv.lock +949 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12.6
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diffusion-prompt-embedder
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python library for parsing and processing prompts with support for embedding and tokenization
|
|
5
|
+
Project-URL: Homepage, https://github.com/jannchie/diffusion-prompt-embedder
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/jannchie/diffusion-prompt-embedder/issues
|
|
7
|
+
Project-URL: Documentation, https://github.com/jannchie/diffusion-prompt-embedder#readme
|
|
8
|
+
Author-email: Jianqi Pan <jannchie@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: ai,embedding,nlp,prompt,tokenization
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Provides-Extra: all
|
|
21
|
+
Requires-Dist: torch>=2.0.0; extra == 'all'
|
|
22
|
+
Requires-Dist: transformers>=4.51.3; extra == 'all'
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest-cov>=6.1.1; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=8.3.5; extra == 'dev'
|
|
26
|
+
Requires-Dist: torch>=2.0.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: transformers>=4.51.3; extra == 'dev'
|
|
28
|
+
Provides-Extra: torch
|
|
29
|
+
Requires-Dist: torch>=2.0.0; extra == 'torch'
|
|
30
|
+
Provides-Extra: transformers
|
|
31
|
+
Requires-Dist: transformers>=4.51.3; extra == 'transformers'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# Diffusion Prompt Embedder
|
|
35
|
+
|
|
36
|
+
[](https://pypi.org/project/diffusion-prompt-embedder/)
|
|
37
|
+
[](https://pypi.org/project/diffusion-prompt-embedder/)
|
|
38
|
+
[](https://opensource.org/licenses/MIT)
|
|
39
|
+
[](https://github.com/jannchie/diffusion-prompt-embedder)
|
|
40
|
+
|
|
41
|
+
A Python library specialized for parsing and processing weighted prompt text, supporting embedding generation and tokenization to enhance text processing for AI models like Stable Diffusion. It's compatible with SD Web UI's weighted prompts but doesn't include scheduling.
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- 💬 **Prompt Parsing**: Parse text prompts with weight markers (e.g., `a (cat:1.5) in the garden`)
|
|
46
|
+
- 🔢 **Weight Management**: Support for positive weight `(text)` and negative weight `[text]` syntax
|
|
47
|
+
- 📚 **CLIP Integration**: Seamless integration with CLIP text models for embedding generation
|
|
48
|
+
- 🔄 **Batch Processing**: Efficiently process batches of multiple prompts
|
|
49
|
+
- 🪄 **Long Text Support**: Handle prompts that exceed standard CLIP context length
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
Install the base library using pip:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install diffusion-prompt-embedder
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Usage Examples
|
|
60
|
+
|
|
61
|
+
### Parse Weighted Prompts
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from diffusion_prompt_embedder import parse_prompt_attention
|
|
65
|
+
|
|
66
|
+
# Basic parsing
|
|
67
|
+
result = parse_prompt_attention("a (cat:1.5) in the garden")
|
|
68
|
+
print(result) # [['a ', 1.0], ['cat', 1.5], [' in the garden', 1.0]]
|
|
69
|
+
|
|
70
|
+
# Using brackets to lower weight
|
|
71
|
+
result = parse_prompt_attention("a [cat] in the garden")
|
|
72
|
+
print(result) # [['a ', 1.0], ['cat', 0.9090909090909091], [' in the garden', 1.0]]
|
|
73
|
+
|
|
74
|
+
# Complex prompt example
|
|
75
|
+
result = parse_prompt_attention("a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).")
|
|
76
|
+
print(result)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Generate CLIP Embeddings
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import torch
|
|
83
|
+
from transformers import CLIPTokenizer, CLIPTextModel
|
|
84
|
+
from prompt_parser import get_embeddings_sd15
|
|
85
|
+
|
|
86
|
+
# Initialize CLIP model
|
|
87
|
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
|
88
|
+
text_encoder = CLIPTextModel.from_pretrained(
|
|
89
|
+
"openai/clip-vit-large-patch14",
|
|
90
|
+
torch_dtype=torch.float16
|
|
91
|
+
).to("cuda")
|
|
92
|
+
|
|
93
|
+
# Generate embeddings
|
|
94
|
+
prompt_embeds, neg_prompt_embeds = get_embeddings_sd15(
|
|
95
|
+
tokenizer=tokenizer,
|
|
96
|
+
text_encoder=text_encoder,
|
|
97
|
+
prompt="a (white:1.2) cat",
|
|
98
|
+
neg_prompt="blur, bad quality",
|
|
99
|
+
clip_skip=1 # Optional: skip layers in CLIP model
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Batch processing multiple prompts
|
|
103
|
+
from prompt_parser import get_embeddings_sd_15_batch
|
|
104
|
+
|
|
105
|
+
batch_embeds = get_embeddings_sd_15_batch(
|
|
106
|
+
tokenizer=tokenizer,
|
|
107
|
+
text_encoder=text_encoder,
|
|
108
|
+
prompts=["a (white:1.2) cat", "a (blue:1.4) dog", "a red bird"]
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Prompt Syntax
|
|
113
|
+
|
|
114
|
+
### Basic Weight Syntax
|
|
115
|
+
|
|
116
|
+
- `(text)` - Increases the prompt weight by 1.1x
|
|
117
|
+
- `(text:1.5)` - Sets the prompt weight to 1.5
|
|
118
|
+
- `[text]` - Decreases the prompt weight to 1/1.1 of original
|
|
119
|
+
- `\( \[ \) \]` - Use backslash to escape bracket characters
|
|
120
|
+
|
|
121
|
+
### BREAK Syntax
|
|
122
|
+
|
|
123
|
+
Use the `BREAK` keyword to create breakpoints in prompts:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
result = parse_prompt_attention("text1 BREAK text2")
|
|
127
|
+
# Result: [["text1", 1.0], ["BREAK", -1], ["text2", 1.0]]
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Development
|
|
131
|
+
|
|
132
|
+
Clone the repository and install development dependencies:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
git clone https://github.com/jannchie/diffusion-prompt-parser.git
|
|
136
|
+
cd diffusion-prompt-parser
|
|
137
|
+
pip install -e ".[dev]"
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Run tests:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
pytest
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
[MIT](https://opensource.org/licenses/MIT)
|
|
149
|
+
|
|
150
|
+
## Author
|
|
151
|
+
|
|
152
|
+
- Jianqi Pan ([@jannchie](https://github.com/jannchie))
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Diffusion Prompt Embedder
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/diffusion-prompt-embedder/)
|
|
4
|
+
[](https://pypi.org/project/diffusion-prompt-embedder/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://github.com/jannchie/diffusion-prompt-embedder)
|
|
7
|
+
|
|
8
|
+
A Python library specialized for parsing and processing weighted prompt text, supporting embedding generation and tokenization to enhance text processing for AI models like Stable Diffusion. It's compatible with SD Web UI's weighted prompts but doesn't include scheduling.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- 💬 **Prompt Parsing**: Parse text prompts with weight markers (e.g., `a (cat:1.5) in the garden`)
|
|
13
|
+
- 🔢 **Weight Management**: Support for positive weight `(text)` and negative weight `[text]` syntax
|
|
14
|
+
- 📚 **CLIP Integration**: Seamless integration with CLIP text models for embedding generation
|
|
15
|
+
- 🔄 **Batch Processing**: Efficiently process batches of multiple prompts
|
|
16
|
+
- 🪄 **Long Text Support**: Handle prompts that exceed standard CLIP context length
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
Install the base library using pip:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install diffusion-prompt-embedder
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Usage Examples
|
|
27
|
+
|
|
28
|
+
### Parse Weighted Prompts
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from diffusion_prompt_embedder import parse_prompt_attention
|
|
32
|
+
|
|
33
|
+
# Basic parsing
|
|
34
|
+
result = parse_prompt_attention("a (cat:1.5) in the garden")
|
|
35
|
+
print(result) # [['a ', 1.0], ['cat', 1.5], [' in the garden', 1.0]]
|
|
36
|
+
|
|
37
|
+
# Using brackets to lower weight
|
|
38
|
+
result = parse_prompt_attention("a [cat] in the garden")
|
|
39
|
+
print(result) # [['a ', 1.0], ['cat', 0.9090909090909091], [' in the garden', 1.0]]
|
|
40
|
+
|
|
41
|
+
# Complex prompt example
|
|
42
|
+
result = parse_prompt_attention("a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).")
|
|
43
|
+
print(result)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Generate CLIP Embeddings
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import torch
|
|
50
|
+
from transformers import CLIPTokenizer, CLIPTextModel
|
|
51
|
+
from prompt_parser import get_embeddings_sd15
|
|
52
|
+
|
|
53
|
+
# Initialize CLIP model
|
|
54
|
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
|
55
|
+
text_encoder = CLIPTextModel.from_pretrained(
|
|
56
|
+
"openai/clip-vit-large-patch14",
|
|
57
|
+
torch_dtype=torch.float16
|
|
58
|
+
).to("cuda")
|
|
59
|
+
|
|
60
|
+
# Generate embeddings
|
|
61
|
+
prompt_embeds, neg_prompt_embeds = get_embeddings_sd15(
|
|
62
|
+
tokenizer=tokenizer,
|
|
63
|
+
text_encoder=text_encoder,
|
|
64
|
+
prompt="a (white:1.2) cat",
|
|
65
|
+
neg_prompt="blur, bad quality",
|
|
66
|
+
clip_skip=1 # Optional: skip layers in CLIP model
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Batch processing multiple prompts
|
|
70
|
+
from prompt_parser import get_embeddings_sd_15_batch
|
|
71
|
+
|
|
72
|
+
batch_embeds = get_embeddings_sd_15_batch(
|
|
73
|
+
tokenizer=tokenizer,
|
|
74
|
+
text_encoder=text_encoder,
|
|
75
|
+
prompts=["a (white:1.2) cat", "a (blue:1.4) dog", "a red bird"]
|
|
76
|
+
)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Prompt Syntax
|
|
80
|
+
|
|
81
|
+
### Basic Weight Syntax
|
|
82
|
+
|
|
83
|
+
- `(text)` - Increases the prompt weight by 1.1x
|
|
84
|
+
- `(text:1.5)` - Sets the prompt weight to 1.5
|
|
85
|
+
- `[text]` - Decreases the prompt weight to 1/1.1 of original
|
|
86
|
+
- `\( \[ \) \]` - Use backslash to escape bracket characters
|
|
87
|
+
|
|
88
|
+
### BREAK Syntax
|
|
89
|
+
|
|
90
|
+
Use the `BREAK` keyword to create breakpoints in prompts:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
result = parse_prompt_attention("text1 BREAK text2")
|
|
94
|
+
# Result: [["text1", 1.0], ["BREAK", -1], ["text2", 1.0]]
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Development
|
|
98
|
+
|
|
99
|
+
Clone the repository and install development dependencies:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
git clone https://github.com/jannchie/diffusion-prompt-parser.git
|
|
103
|
+
cd diffusion-prompt-parser
|
|
104
|
+
pip install -e ".[dev]"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Run tests:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pytest
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## License
|
|
114
|
+
|
|
115
|
+
[MIT](https://opensource.org/licenses/MIT)
|
|
116
|
+
|
|
117
|
+
## Author
|
|
118
|
+
|
|
119
|
+
- Jianqi Pan ([@jannchie](https://github.com/jannchie))
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Diffusion Prompt Embedder
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/diffusion-prompt-embedder/)
|
|
4
|
+
[](https://pypi.org/project/diffusion-prompt-embedder/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://github.com/jannchie/diffusion-prompt-embedder)
|
|
7
|
+
|
|
8
|
+
一个专门用于解析和处理带有权重的提示文本的 Python 库,支持嵌入生成和标记化,为 Stable Diffusion 等 AI 模型提供增强的文本处理能力。它兼容 SD Web UI 的权重提示,但不包括调度部分。
|
|
9
|
+
|
|
10
|
+
## 特性
|
|
11
|
+
|
|
12
|
+
- 💬 **提示解析**: 解析带有权重标记的文本提示(例如 `a (cat:1.5) in the garden`)
|
|
13
|
+
- 🔢 **权重管理**: 支持正向权重 `(text)` 和负向权重 `[text]` 语法
|
|
14
|
+
- 📚 **CLIP 集成**: 无缝集成 CLIP 文本模型进行嵌入生成
|
|
15
|
+
- 🔄 **批处理支持**: 高效处理多个提示的批处理
|
|
16
|
+
- 🪄 **长文本处理**: 支持超出 CLIP 标准上下文长度的长提示
|
|
17
|
+
|
|
18
|
+
## 安装
|
|
19
|
+
|
|
20
|
+
使用 pip 安装基础库:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install diffusion-prompt-embedder
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## 使用示例
|
|
27
|
+
|
|
28
|
+
### 解析带有权重的提示
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from diffusion_prompt_embedder import parse_prompt_attention
|
|
32
|
+
|
|
33
|
+
# 基本解析
|
|
34
|
+
result = parse_prompt_attention("a (cat:1.5) in the garden")
|
|
35
|
+
print(result) # [['a ', 1.0], ['cat', 1.5], [' in the garden', 1.0]]
|
|
36
|
+
|
|
37
|
+
# 使用方括号降低权重
|
|
38
|
+
result = parse_prompt_attention("a [cat] in the garden")
|
|
39
|
+
print(result) # [['a ', 1.0], ['cat', 0.9090909090909091], [' in the garden', 1.0]]
|
|
40
|
+
|
|
41
|
+
# 复杂提示示例
|
|
42
|
+
result = parse_prompt_attention("a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).")
|
|
43
|
+
print(result)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### 生成 CLIP 嵌入
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import torch
|
|
50
|
+
from transformers import CLIPTokenizer, CLIPTextModel
|
|
51
|
+
from prompt_parser import get_embeddings_sd15
|
|
52
|
+
|
|
53
|
+
# 初始化 CLIP 模型
|
|
54
|
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
|
55
|
+
text_encoder = CLIPTextModel.from_pretrained(
|
|
56
|
+
"openai/clip-vit-large-patch14",
|
|
57
|
+
torch_dtype=torch.float16
|
|
58
|
+
).to("cuda")
|
|
59
|
+
|
|
60
|
+
# 生成嵌入
|
|
61
|
+
prompt_embeds, neg_prompt_embeds = get_embeddings_sd15(
|
|
62
|
+
tokenizer=tokenizer,
|
|
63
|
+
text_encoder=text_encoder,
|
|
64
|
+
prompt="a (white:1.2) cat",
|
|
65
|
+
neg_prompt="blur, bad quality",
|
|
66
|
+
clip_skip=1 # 可选:跳过 CLIP 模型中的层
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# 批处理多个提示
|
|
70
|
+
from prompt_parser import get_embeddings_sd_15_batch
|
|
71
|
+
|
|
72
|
+
batch_embeds = get_embeddings_sd_15_batch(
|
|
73
|
+
tokenizer=tokenizer,
|
|
74
|
+
text_encoder=text_encoder,
|
|
75
|
+
prompts=["a (white:1.2) cat", "a (blue:1.4) dog", "a red bird"]
|
|
76
|
+
)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## 提示语法
|
|
80
|
+
|
|
81
|
+
### 基本权重语法
|
|
82
|
+
|
|
83
|
+
- `(text)` - 将提示的权重提升 1.1 倍
|
|
84
|
+
- `(text:1.5)` - 将提示的权重设置为 1.5
|
|
85
|
+
- `[text]` - 将提示的权重降低为原来的 1/1.1
|
|
86
|
+
- `\( \[ \) \]` - 使用反斜杠转义括号字符
|
|
87
|
+
|
|
88
|
+
### BREAK 语法
|
|
89
|
+
|
|
90
|
+
使用 `BREAK` 关键字在提示中创建断点:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
result = parse_prompt_attention("text1 BREAK text2")
|
|
94
|
+
# 结果: [["text1", 1.0], ["BREAK", -1], ["text2", 1.0]]
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## 开发
|
|
98
|
+
|
|
99
|
+
克隆仓库并安装开发依赖:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
git clone https://github.com/jannchie/diffusion-prompt-parser.git
|
|
103
|
+
cd diffusion-prompt-parser
|
|
104
|
+
pip install -e ".[dev]"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
运行测试:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pytest
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## 许可证
|
|
114
|
+
|
|
115
|
+
[MIT](https://opensource.org/licenses/MIT)
|
|
116
|
+
|
|
117
|
+
## 作者
|
|
118
|
+
|
|
119
|
+
- Jianqi Pan ([@jannchie](https://github.com/jannchie))
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "diffusion-prompt-embedder"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A Python library for parsing and processing prompts with support for embedding and tokenization"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [{ name = "Jianqi Pan", email = "jannchie@gmail.com" }]
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
license = { text = "MIT" }
|
|
9
|
+
keywords = ["nlp", "prompt", "tokenization", "embedding", "ai"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.10",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
19
|
+
]
|
|
20
|
+
dependencies = []
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
"Homepage" = "https://github.com/jannchie/diffusion-prompt-embedder"
|
|
24
|
+
"Bug Tracker" = "https://github.com/jannchie/diffusion-prompt-embedder/issues"
|
|
25
|
+
"Documentation" = "https://github.com/jannchie/diffusion-prompt-embedder#readme"
|
|
26
|
+
|
|
27
|
+
[build-system]
|
|
28
|
+
requires = ["hatchling"]
|
|
29
|
+
build-backend = "hatchling.build"
|
|
30
|
+
|
|
31
|
+
[tool.hatch.build.targets.wheel]
|
|
32
|
+
packages = ["src/diffusion_prompt_embedder"]
|
|
33
|
+
|
|
34
|
+
[tool.ruff]
|
|
35
|
+
line-length = 300
|
|
36
|
+
select = ["ALL"]
|
|
37
|
+
|
|
38
|
+
ignore = [
|
|
39
|
+
"PGH",
|
|
40
|
+
"RUF003",
|
|
41
|
+
"BLE001",
|
|
42
|
+
"ERA001",
|
|
43
|
+
"FIX002",
|
|
44
|
+
"TD002",
|
|
45
|
+
"TD003",
|
|
46
|
+
"D",
|
|
47
|
+
"PLR2004",
|
|
48
|
+
"INP001",
|
|
49
|
+
"N812",
|
|
50
|
+
"FBT003",
|
|
51
|
+
"S311",
|
|
52
|
+
"ANN401",
|
|
53
|
+
"S105",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[tool.ruff.per-file-ignores]
|
|
57
|
+
"tests/**/*.py" = ["S101"]
|
|
58
|
+
|
|
59
|
+
[project.optional-dependencies]
|
|
60
|
+
torch = ["torch>=2.0.0"]
|
|
61
|
+
transformers = ["transformers>=4.51.3"]
|
|
62
|
+
all = ["torch>=2.0.0", "transformers>=4.51.3"]
|
|
63
|
+
dev = [
|
|
64
|
+
"pytest>=8.3.5",
|
|
65
|
+
"pytest-cov>=6.1.1",
|
|
66
|
+
"torch>=2.0.0",
|
|
67
|
+
"transformers>=4.51.3",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
[dependency-groups]
|
|
71
|
+
dev = [
|
|
72
|
+
"pytest>=8.3.5",
|
|
73
|
+
"pytest-cov>=6.1.1",
|
|
74
|
+
"torch>=2.0.0",
|
|
75
|
+
"transformers>=4.51.3",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
[tool.pytest.ini_options]
|
|
79
|
+
addopts = "--cov=diffusion_prompt_embedder --cov-report=term --cov-report=xml --tb=short"
|
|
80
|
+
testpaths = ["tests"]
|
|
81
|
+
|
|
82
|
+
[tool.coverage.run]
|
|
83
|
+
source = ["diffusion_prompt_embedder"]
|
|
84
|
+
omit = ["*/__pycache__/*", "*/tests/*"]
|
|
85
|
+
|
|
86
|
+
[tool.coverage.report]
|
|
87
|
+
exclude_lines = [
|
|
88
|
+
"pragma: no cover",
|
|
89
|
+
"def __repr__",
|
|
90
|
+
"raise NotImplementedError",
|
|
91
|
+
"if __name__ == .__main__.:",
|
|
92
|
+
"pass",
|
|
93
|
+
"raise ImportError",
|
|
94
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
prompt_parser: A library for parsing and processing text prompts with attention weights.
|
|
3
|
+
|
|
4
|
+
This package provides tools for parsing text prompts with attention weights syntax,
|
|
5
|
+
tokenizing prompts, and generating embeddings for use with Stable Diffusion models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from diffusion_prompt_embedder.core.embedding import get_embeddings_sd15, get_embeddings_sd_15_batch
|
|
11
|
+
from diffusion_prompt_embedder.core.parser import parse_prompt_attention
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"get_embeddings_sd15",
|
|
15
|
+
"get_embeddings_sd_15_batch",
|
|
16
|
+
"parse_prompt_attention",
|
|
17
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLIP model functionality for embedding generation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from diffusion_prompt_embedder.clip.tokenization import (
|
|
6
|
+
get_prompts_tokens_with_weights,
|
|
7
|
+
group_tokens_and_weights,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"get_prompts_tokens_with_weights",
|
|
12
|
+
"group_tokens_and_weights",
|
|
13
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from transformers import CLIPTokenizer
|
|
2
|
+
|
|
3
|
+
from diffusion_prompt_embedder.core.parser import parse_prompt_attention
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def group_tokens_and_weights(
|
|
7
|
+
token_ids: list[int],
|
|
8
|
+
weights: list[float],
|
|
9
|
+
*,
|
|
10
|
+
pad_last_block: bool = True,
|
|
11
|
+
) -> tuple[list[list[int]], list[list[float]]]:
|
|
12
|
+
"""
|
|
13
|
+
Group tokenized IDs and weights into CLIP-compatible chunks of 77 tokens.
|
|
14
|
+
|
|
15
|
+
This function takes tokenized IDs and their corresponding weights, then groups them
|
|
16
|
+
into chunks of 77 tokens (75 content tokens + BOS and EOS tokens). The last block
|
|
17
|
+
can be padded with EOS tokens based on the pad_last_block parameter.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
token_ids (list): Token IDs generated from the CLIP tokenizer
|
|
21
|
+
weights (list): Corresponding weights for each token
|
|
22
|
+
pad_last_block (bool): Whether to pad the last block to 75 tokens with EOS tokens
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
tuple: A tuple containing:
|
|
26
|
+
- list[list[int]]: Grouped token IDs with each sublist containing 77 tokens
|
|
27
|
+
- list[list[float]]: Grouped weights matching the token IDs structure
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
token_groups, weight_groups = group_tokens_and_weights(
|
|
31
|
+
token_ids=token_id_list,
|
|
32
|
+
weights=token_weight_list
|
|
33
|
+
)
|
|
34
|
+
"""
|
|
35
|
+
# Define beginning-of-sequence and end-of-sequence token IDs
|
|
36
|
+
bos, eos = 49406, 49407
|
|
37
|
+
|
|
38
|
+
# Initialize empty lists for storing grouped tokens and weights
|
|
39
|
+
new_token_ids = []
|
|
40
|
+
new_weights = []
|
|
41
|
+
|
|
42
|
+
# Process complete blocks of 75 tokens
|
|
43
|
+
while len(token_ids) >= 75:
|
|
44
|
+
# Extract the first 75 tokens and their weights
|
|
45
|
+
head_75_tokens = [token_ids.pop(0) for _ in range(75)]
|
|
46
|
+
head_75_weights = [weights.pop(0) for _ in range(75)]
|
|
47
|
+
|
|
48
|
+
# Create a complete block with BOS and EOS tokens
|
|
49
|
+
temp_77_token_ids = [bos, *head_75_tokens, eos]
|
|
50
|
+
temp_77_weights = [1.0, *head_75_weights, 1.0]
|
|
51
|
+
|
|
52
|
+
# Add the completed block to our result lists
|
|
53
|
+
new_token_ids.append(temp_77_token_ids)
|
|
54
|
+
new_weights.append(temp_77_weights)
|
|
55
|
+
|
|
56
|
+
# Process remaining tokens if any exist
|
|
57
|
+
if len(token_ids) > 0:
|
|
58
|
+
# Calculate padding length if pad_last_block is True
|
|
59
|
+
padding_len = 75 - len(token_ids) if pad_last_block else 0
|
|
60
|
+
|
|
61
|
+
# Create the final block with appropriate padding
|
|
62
|
+
temp_77_token_ids = [bos] + token_ids + [eos] * padding_len + [eos]
|
|
63
|
+
new_token_ids.append(temp_77_token_ids)
|
|
64
|
+
|
|
65
|
+
temp_77_weights = [1.0] + weights + [1.0] * padding_len + [1.0]
|
|
66
|
+
new_weights.append(temp_77_weights)
|
|
67
|
+
|
|
68
|
+
return new_token_ids, new_weights
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_prompts_tokens_with_weights(
|
|
72
|
+
clip_tokenizer: CLIPTokenizer,
|
|
73
|
+
prompt: str | None,
|
|
74
|
+
) -> tuple[list[int], list[float]]:
|
|
75
|
+
"""
|
|
76
|
+
Tokenize a prompt with attention weights into token IDs and their corresponding weights.
|
|
77
|
+
|
|
78
|
+
This function processes prompts with weighted terms (like "a (cat:1.2) in the garden")
|
|
79
|
+
and returns both the token IDs and their respective weights. Works for both positive
|
|
80
|
+
and negative prompts in Stable Diffusion.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
clip_tokenizer (CLIPTokenizer): The CLIP tokenizer instance
|
|
84
|
+
prompt (str | None): A prompt string with optional weights in parentheses
|
|
85
|
+
If None or empty, defaults to "empty"
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
tuple: A tuple containing:
|
|
89
|
+
- list[int]: List of token IDs
|
|
90
|
+
- list[float]: List of weights corresponding to each token
|
|
91
|
+
|
|
92
|
+
Example:
|
|
93
|
+
token_id_list, token_weight_list = get_prompts_tokens_with_weights(
|
|
94
|
+
clip_tokenizer=clip_tokenizer,
|
|
95
|
+
prompt="a (red:1.5) cat"
|
|
96
|
+
)
|
|
97
|
+
"""
|
|
98
|
+
# Use "empty" as default if prompt is None or empty
|
|
99
|
+
if (prompt is None) or (len(prompt) < 1):
|
|
100
|
+
prompt = "empty"
|
|
101
|
+
|
|
102
|
+
# Parse the prompt to get text chunks and their weights
|
|
103
|
+
texts_and_weights = parse_prompt_attention(prompt)
|
|
104
|
+
text_tokens: list[int] = []
|
|
105
|
+
text_weights: list[float] = []
|
|
106
|
+
|
|
107
|
+
for word, weight in texts_and_weights:
|
|
108
|
+
# Tokenize the text chunk, removing BOS/EOS tokens (positions 0 and -1)
|
|
109
|
+
token = clip_tokenizer(
|
|
110
|
+
word,
|
|
111
|
+
truncation=False, # Allow processing prompts of any length
|
|
112
|
+
).input_ids[1:-1]
|
|
113
|
+
|
|
114
|
+
# Append new tokens to the full token list
|
|
115
|
+
text_tokens = [*text_tokens, *token]
|
|
116
|
+
|
|
117
|
+
# Apply the same weight to all tokens in this text chunk
|
|
118
|
+
chunk_weights = [weight] * len(token)
|
|
119
|
+
|
|
120
|
+
# Append weights to the full weights list
|
|
121
|
+
text_weights = [*text_weights, *chunk_weights]
|
|
122
|
+
|
|
123
|
+
return text_tokens, text_weights
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core prompt parsing functionality.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from diffusion_prompt_embedder.core.embedding import (
|
|
6
|
+
get_embeddings_sd15,
|
|
7
|
+
get_embeddings_sd_15_batch,
|
|
8
|
+
)
|
|
9
|
+
from diffusion_prompt_embedder.core.parser import (
|
|
10
|
+
apply_multiplier_to_range,
|
|
11
|
+
merge_identical_weights,
|
|
12
|
+
parse_prompt_attention,
|
|
13
|
+
process_text_token,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"apply_multiplier_to_range",
|
|
18
|
+
"get_embeddings_sd15",
|
|
19
|
+
"get_embeddings_sd_15_batch",
|
|
20
|
+
"merge_identical_weights",
|
|
21
|
+
"parse_prompt_attention",
|
|
22
|
+
"process_text_token",
|
|
23
|
+
]
|