mailwise 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mailwise-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Petr
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,206 @@
1
+ Metadata-Version: 2.4
2
+ Name: mailwise
3
+ Version: 0.1.0
4
+ Summary: Turn email threads into a searchable knowledge base. Parse EML files, index with embeddings, and use RAG to learn how your best engineers analyze issues.
5
+ Author: Petr
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/PetrGuan/MailWise
8
+ Project-URL: Issues, https://github.com/PetrGuan/MailWise/issues
9
+ Keywords: email,eml,rag,embeddings,issue-tracking,knowledge-base
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Communications :: Email
17
+ Classifier: Topic :: Text Processing :: Indexing
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: sentence-transformers>=2.2.0
22
+ Requires-Dist: numpy>=1.24.0
23
+ Requires-Dist: click>=8.0
24
+ Requires-Dist: pyyaml>=6.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
28
+ Dynamic: license-file
29
+
30
+ # MailWise
31
+
32
+ [![CI](https://github.com/PetrGuan/MailWise/actions/workflows/ci.yml/badge.svg)](https://github.com/PetrGuan/MailWise/actions/workflows/ci.yml)
33
+ [![PyPI version](https://badge.fury.io/py/mailwise.svg)](https://pypi.org/project/mailwise/)
34
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://python.org)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
36
+
37
+ [English](README.md) | [中文](README.zh-CN.md)
38
+
39
+ Turn email threads into a searchable knowledge base. Parse EML files, index with embeddings, and use RAG to learn how your best engineers analyze issues.
40
+
41
+ ## What it does
42
+
43
+ MailWise reads `.eml` files (exported from Outlook, Thunderbird, etc.), splits email threads into individual replies, and builds a semantic search index. You can then:
44
+
45
+ - **Search** for similar past issues using natural language
46
+ - **Analyze** new issues with RAG — Claude reads how your experts solved similar problems and synthesizes advice
47
+ - **Tag expert engineers** whose replies get boosted in search results and highlighted in output
48
+
49
+ ## Why
50
+
51
+ If your team handles bugs/incidents via email, years of tribal knowledge is buried in threads. MailWise makes that knowledge searchable and actionable.
52
+
53
+ ## Quick start
54
+
55
+ ### Prerequisites
56
+
57
+ - Python 3.10+
58
+ - [Claude Code](https://claude.ai/code) (for the `analyze` command — uses your existing auth, no API key needed)
59
+
60
+ ### Install
61
+
62
+ From PyPI:
63
+
64
+ ```bash
65
+ pip install mailwise
66
+ ```
67
+
68
+ Or from source:
69
+
70
+ ```bash
71
+ git clone https://github.com/PetrGuan/MailWise.git
72
+ cd MailWise
73
+ pip install -e .
74
+ ```
75
+
76
+ ### Configure
77
+
78
+ The easiest way to get started:
79
+
80
+ ```bash
81
+ mailwise init
82
+ ```
83
+
84
+ This will walk you through setting up your EML directory, adding expert engineers, and verifying the setup.
85
+
86
+ Or configure manually:
87
+
88
+ ```bash
89
+ cp config.example.yaml config.yaml
90
+ ```
91
+
92
+ Edit `config.yaml` with your settings:
93
+
94
+ ```yaml
95
+ eml_directory: /path/to/your/eml/files
96
+ database: data/index.db
97
+ markdown_directory: markdown
98
+ embedding_model: all-MiniLM-L6-v2
99
+ expert_boost: 1.5
100
+
101
+ experts:
102
+ - email: senior.dev@company.com
103
+ name: Jane Doe
104
+ ```
105
+
106
+ ### Usage
107
+
108
+ ```bash
109
+ # Index your emails (incremental — only processes new/changed files)
110
+ mailwise index
111
+
112
+ # Search for similar past issues
113
+ mailwise search "sync failure after folder migration"
114
+
115
+ # Search with previews
116
+ mailwise search "calendar not updating" --show-body
117
+
118
+ # Only show expert replies
119
+ mailwise search "deleted emails reappear" --expert-only
120
+
121
+ # Deep analysis — Claude reasons over similar expert threads
122
+ mailwise analyze "User reports emails moved to local folder keep reappearing in Inbox"
123
+
124
+ # View full markdown of a specific email thread
125
+ mailwise show 42
126
+
127
+ # Check index stats
128
+ mailwise stats
129
+ ```
130
+
131
+ ### Managing experts
132
+
133
+ ```bash
134
+ # Add an expert
135
+ mailwise experts add engineer@company.com --name "Jane Doe"
136
+
137
+ # List all experts
138
+ mailwise experts list
139
+
140
+ # Remove an expert
141
+ mailwise experts remove engineer@company.com
142
+ ```
143
+
144
+ ## How it works
145
+
146
+ ```
147
+ EML files → Parser → Markdown + Embeddings → SQLite index
148
+
149
+ Query → Semantic search → Top matches
150
+
151
+ Claude (via RAG) → Expert-informed analysis
152
+ ```
153
+
154
+ 1. **Parse**: EML files are parsed in parallel and threads are split into individual replies using Outlook-style `From:/Sent:` delimiters
155
+ 2. **Clean**: Microsoft SafeLinks are unwrapped, mailto artifacts are removed
156
+ 3. **Markdown**: Each thread becomes a structured markdown file with `[Expert]` tags on replies from your designated engineers
157
+ 4. **Embed**: Each reply is embedded using `all-MiniLM-L6-v2` (runs locally, no API calls)
158
+ 5. **Index**: Embeddings and metadata are stored in SQLite for fast retrieval
159
+ 6. **Search**: Cosine similarity with expert score boosting finds relevant past issues
160
+ 7. **Analyze**: Top matches are fed to Claude (via Claude Code CLI) with a system prompt that focuses on expert reasoning patterns
161
+
162
+ ## Performance
163
+
164
+ Designed for large mailboxes (25K+ emails, 16GB+):
165
+
166
+ | Operation | Performance |
167
+ |---|---|
168
+ | Incremental check (no changes) | ~2-3s for 25K files (stat-based, no file reads) |
169
+ | Full index | ~5-10 min (parallel parsing + batch embedding) |
170
+ | Search query | <100ms (single matrix multiply over 100K+ vectors) |
171
+ | RAG analysis | ~10-20s (retrieval + Claude response) |
172
+
173
+ Key optimizations:
174
+ - **Two-phase change detection**: mtime+size stat check before SHA256 hashing
175
+ - **Parallel EML parsing**: multiprocessing with configurable workers
176
+ - **Batch embedding**: pre-computed offset arrays, no O(n²) lookups
177
+ - **Optimized search**: loads only embedding BLOBs into contiguous numpy array; fetches metadata only for top-k results
178
+ - **SQLite tuning**: WAL journal, 64MB cache, 256MB mmap, batch inserts via `executemany`
179
+
180
+ ## Architecture
181
+
182
+ ```
183
+ src/email_issue_indexer/
184
+ ├── cli.py # Click-based CLI
185
+ ├── parser.py # EML parsing + thread splitting (parallel-safe)
186
+ ├── markdown.py # Markdown conversion with expert tags
187
+ ├── safelinks.py # Microsoft SafeLinks URL cleaning
188
+ ├── embeddings.py # sentence-transformers embeddings + vector search
189
+ ├── store.py # SQLite storage layer (performance-tuned)
190
+ ├── indexer.py # Parallel batch orchestrator with progress tracking
191
+ ├── search.py # Optimized similarity search with expert boosting
192
+ └── rag.py # RAG layer using Claude Code CLI
193
+ ```
194
+
195
+ ## Privacy
196
+
197
+ All processing is local:
198
+ - Embeddings run on your machine (no data sent to any API for indexing)
199
+ - Email content stays in your local SQLite database and markdown files
200
+ - The `analyze` command sends relevant thread excerpts to Claude — same as chatting in Claude Code
201
+
202
+ Your `config.yaml`, `emails/`, `data/`, and `markdown/` directories are gitignored by default. Only `config.example.yaml` (with no real data) is committed. A pre-commit hook (`scripts/install-hooks.sh`) scans for accidental PII leaks.
203
+
204
+ ## License
205
+
206
+ MIT
@@ -0,0 +1,177 @@
1
+ # MailWise
2
+
3
+ [![CI](https://github.com/PetrGuan/MailWise/actions/workflows/ci.yml/badge.svg)](https://github.com/PetrGuan/MailWise/actions/workflows/ci.yml)
4
+ [![PyPI version](https://badge.fury.io/py/mailwise.svg)](https://pypi.org/project/mailwise/)
5
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://python.org)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
7
+
8
+ [English](README.md) | [中文](README.zh-CN.md)
9
+
10
+ Turn email threads into a searchable knowledge base. Parse EML files, index with embeddings, and use RAG to learn how your best engineers analyze issues.
11
+
12
+ ## What it does
13
+
14
+ MailWise reads `.eml` files (exported from Outlook, Thunderbird, etc.), splits email threads into individual replies, and builds a semantic search index. You can then:
15
+
16
+ - **Search** for similar past issues using natural language
17
+ - **Analyze** new issues with RAG — Claude reads how your experts solved similar problems and synthesizes advice
18
+ - **Tag expert engineers** whose replies get boosted in search results and highlighted in output
19
+
20
+ ## Why
21
+
22
+ If your team handles bugs/incidents via email, years of tribal knowledge is buried in threads. MailWise makes that knowledge searchable and actionable.
23
+
24
+ ## Quick start
25
+
26
+ ### Prerequisites
27
+
28
+ - Python 3.10+
29
+ - [Claude Code](https://claude.ai/code) (for the `analyze` command — uses your existing auth, no API key needed)
30
+
31
+ ### Install
32
+
33
+ From PyPI:
34
+
35
+ ```bash
36
+ pip install mailwise
37
+ ```
38
+
39
+ Or from source:
40
+
41
+ ```bash
42
+ git clone https://github.com/PetrGuan/MailWise.git
43
+ cd MailWise
44
+ pip install -e .
45
+ ```
46
+
47
+ ### Configure
48
+
49
+ The easiest way to get started:
50
+
51
+ ```bash
52
+ mailwise init
53
+ ```
54
+
55
+ This will walk you through setting up your EML directory, adding expert engineers, and verifying the setup.
56
+
57
+ Or configure manually:
58
+
59
+ ```bash
60
+ cp config.example.yaml config.yaml
61
+ ```
62
+
63
+ Edit `config.yaml` with your settings:
64
+
65
+ ```yaml
66
+ eml_directory: /path/to/your/eml/files
67
+ database: data/index.db
68
+ markdown_directory: markdown
69
+ embedding_model: all-MiniLM-L6-v2
70
+ expert_boost: 1.5
71
+
72
+ experts:
73
+ - email: senior.dev@company.com
74
+ name: Jane Doe
75
+ ```
76
+
77
+ ### Usage
78
+
79
+ ```bash
80
+ # Index your emails (incremental — only processes new/changed files)
81
+ mailwise index
82
+
83
+ # Search for similar past issues
84
+ mailwise search "sync failure after folder migration"
85
+
86
+ # Search with previews
87
+ mailwise search "calendar not updating" --show-body
88
+
89
+ # Only show expert replies
90
+ mailwise search "deleted emails reappear" --expert-only
91
+
92
+ # Deep analysis — Claude reasons over similar expert threads
93
+ mailwise analyze "User reports emails moved to local folder keep reappearing in Inbox"
94
+
95
+ # View full markdown of a specific email thread
96
+ mailwise show 42
97
+
98
+ # Check index stats
99
+ mailwise stats
100
+ ```
101
+
102
+ ### Managing experts
103
+
104
+ ```bash
105
+ # Add an expert
106
+ mailwise experts add engineer@company.com --name "Jane Doe"
107
+
108
+ # List all experts
109
+ mailwise experts list
110
+
111
+ # Remove an expert
112
+ mailwise experts remove engineer@company.com
113
+ ```
114
+
115
+ ## How it works
116
+
117
+ ```
118
+ EML files → Parser → Markdown + Embeddings → SQLite index
119
+
120
+ Query → Semantic search → Top matches
121
+
122
+ Claude (via RAG) → Expert-informed analysis
123
+ ```
124
+
125
+ 1. **Parse**: EML files are parsed in parallel and threads are split into individual replies using Outlook-style `From:/Sent:` delimiters
126
+ 2. **Clean**: Microsoft SafeLinks are unwrapped, mailto artifacts are removed
127
+ 3. **Markdown**: Each thread becomes a structured markdown file with `[Expert]` tags on replies from your designated engineers
128
+ 4. **Embed**: Each reply is embedded using `all-MiniLM-L6-v2` (runs locally, no API calls)
129
+ 5. **Index**: Embeddings and metadata are stored in SQLite for fast retrieval
130
+ 6. **Search**: Cosine similarity with expert score boosting finds relevant past issues
131
+ 7. **Analyze**: Top matches are fed to Claude (via Claude Code CLI) with a system prompt that focuses on expert reasoning patterns
132
+
133
+ ## Performance
134
+
135
+ Designed for large mailboxes (25K+ emails, 16GB+):
136
+
137
+ | Operation | Performance |
138
+ |---|---|
139
+ | Incremental check (no changes) | ~2-3s for 25K files (stat-based, no file reads) |
140
+ | Full index | ~5-10 min (parallel parsing + batch embedding) |
141
+ | Search query | <100ms (single matrix multiply over 100K+ vectors) |
142
+ | RAG analysis | ~10-20s (retrieval + Claude response) |
143
+
144
+ Key optimizations:
145
+ - **Two-phase change detection**: mtime+size stat check before SHA256 hashing
146
+ - **Parallel EML parsing**: multiprocessing with configurable workers
147
+ - **Batch embedding**: pre-computed offset arrays, no O(n²) lookups
148
+ - **Optimized search**: loads only embedding BLOBs into contiguous numpy array; fetches metadata only for top-k results
149
+ - **SQLite tuning**: WAL journal, 64MB cache, 256MB mmap, batch inserts via `executemany`
150
+
151
+ ## Architecture
152
+
153
+ ```
154
+ src/email_issue_indexer/
155
+ ├── cli.py # Click-based CLI
156
+ ├── parser.py # EML parsing + thread splitting (parallel-safe)
157
+ ├── markdown.py # Markdown conversion with expert tags
158
+ ├── safelinks.py # Microsoft SafeLinks URL cleaning
159
+ ├── embeddings.py # sentence-transformers embeddings + vector search
160
+ ├── store.py # SQLite storage layer (performance-tuned)
161
+ ├── indexer.py # Parallel batch orchestrator with progress tracking
162
+ ├── search.py # Optimized similarity search with expert boosting
163
+ └── rag.py # RAG layer using Claude Code CLI
164
+ ```
165
+
166
+ ## Privacy
167
+
168
+ All processing is local:
169
+ - Embeddings run on your machine (no data sent to any API for indexing)
170
+ - Email content stays in your local SQLite database and markdown files
171
+ - The `analyze` command sends relevant thread excerpts to Claude — same as chatting in Claude Code
172
+
173
+ Your `config.yaml`, `emails/`, `data/`, and `markdown/` directories are gitignored by default. Only `config.example.yaml` (with no real data) is committed. A pre-commit hook (`scripts/install-hooks.sh`) scans for accidental PII leaks.
174
+
175
+ ## License
176
+
177
+ MIT
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "mailwise"
7
+ version = "0.1.0"
8
+ description = "Turn email threads into a searchable knowledge base. Parse EML files, index with embeddings, and use RAG to learn how your best engineers analyze issues."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Petr" },
14
+ ]
15
+ keywords = ["email", "eml", "rag", "embeddings", "issue-tracking", "knowledge-base"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Communications :: Email",
24
+ "Topic :: Text Processing :: Indexing",
25
+ ]
26
+ dependencies = [
27
+ "sentence-transformers>=2.2.0",
28
+ "numpy>=1.24.0",
29
+ "click>=8.0",
30
+ "pyyaml>=6.0",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/PetrGuan/MailWise"
35
+ Issues = "https://github.com/PetrGuan/MailWise/issues"
36
+
37
+ [project.scripts]
38
+ mailwise = "email_issue_indexer.cli:cli"
39
+
40
+ [tool.setuptools.packages.find]
41
+ where = ["src"]
42
+
43
+ [project.optional-dependencies]
44
+ dev = [
45
+ "pytest>=7.0",
46
+ "pytest-cov>=4.0",
47
+ ]
48
+
49
+ [tool.pytest.ini_options]
50
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,2 @@
1
+ from setuptools import setup
2
+ setup()
@@ -0,0 +1,2 @@
1
+ """Email Issue Indexer - Parse, index, and search email issue threads."""
2
+ __version__ = "0.1.0"