mail2task 0.5.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mail2task-0.5.0 → mail2task-0.7.0}/PKG-INFO +19 -1
- {mail2task-0.5.0 → mail2task-0.7.0}/README.md +17 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/pyproject.toml +3 -2
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task/cli.py +6 -2
- mail2task-0.7.0/src/mail2task/email_parser.py +22 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task/extractor.py +17 -19
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task.egg-info/PKG-INFO +19 -1
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task.egg-info/SOURCES.txt +1 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task.egg-info/requires.txt +1 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/setup.cfg +0 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task/__init__.py +0 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task/parser.py +0 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task.egg-info/dependency_links.txt +0 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task.egg-info/entry_points.txt +0 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/src/mail2task.egg-info/top_level.txt +0 -0
- {mail2task-0.5.0 → mail2task-0.7.0}/tests/test_extractor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mail2task
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Extract actionable tasks from emails using Python
|
|
5
5
|
Author: Eby J Kavungal
|
|
6
6
|
Project-URL: Homepage, https://github.com/EbyJK/mail2task
|
|
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.9
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: dateparser
|
|
15
|
+
Requires-Dist: spacy
|
|
15
16
|
|
|
16
17
|
# mail2task
|
|
17
18
|
|
|
@@ -134,6 +135,23 @@ mail2task sample_email.txt --format markdown
|
|
|
134
135
|
- Priority detection
|
|
135
136
|
- Confidence scoring
|
|
136
137
|
|
|
138
|
+
## NLP Engine
|
|
139
|
+
|
|
140
|
+
mail2task uses spaCy for:
|
|
141
|
+
- sentence segmentation
|
|
142
|
+
- tokenization
|
|
143
|
+
- action verb detection
|
|
144
|
+
- lightweight NLP parsing
|
|
145
|
+
|
|
146
|
+
## `.eml` Email Support
|
|
147
|
+
|
|
148
|
+
mail2task can process raw email files directly.
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
mail2task sample_email.eml --pretty
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
|
|
137
155
|
## Project Structure
|
|
138
156
|
|
|
139
157
|
```text
|
|
@@ -119,6 +119,23 @@ mail2task sample_email.txt --format markdown
|
|
|
119
119
|
- Priority detection
|
|
120
120
|
- Confidence scoring
|
|
121
121
|
|
|
122
|
+
## NLP Engine
|
|
123
|
+
|
|
124
|
+
mail2task uses spaCy for:
|
|
125
|
+
- sentence segmentation
|
|
126
|
+
- tokenization
|
|
127
|
+
- action verb detection
|
|
128
|
+
- lightweight NLP parsing
|
|
129
|
+
|
|
130
|
+
## `.eml` Email Support
|
|
131
|
+
|
|
132
|
+
mail2task can process raw email files directly.
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
mail2task sample_email.eml --pretty
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
|
|
122
139
|
## Project Structure
|
|
123
140
|
|
|
124
141
|
```text
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "mail2task"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.7.0"
|
|
8
8
|
description = "Extract actionable tasks from emails using Python"
|
|
9
9
|
authors = [
|
|
10
10
|
{ name="Eby J Kavungal" }
|
|
@@ -13,7 +13,8 @@ readme = "README.md"
|
|
|
13
13
|
requires-python = ">=3.9"
|
|
14
14
|
|
|
15
15
|
dependencies = [
|
|
16
|
-
"dateparser"
|
|
16
|
+
"dateparser",
|
|
17
|
+
"spacy"
|
|
17
18
|
]
|
|
18
19
|
|
|
19
20
|
keywords = ["nlp", "tasks", "email", "automation", "productivity"]
|
|
@@ -2,6 +2,7 @@ import argparse
|
|
|
2
2
|
import json
|
|
3
3
|
import csv
|
|
4
4
|
from .extractor import extract_tasks
|
|
5
|
+
from .email_parser import extract_email_body
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def save_csv(tasks, output_file):
|
|
@@ -57,8 +58,11 @@ def main():
|
|
|
57
58
|
args = parser.parse_args()
|
|
58
59
|
|
|
59
60
|
try:
|
|
60
|
-
|
|
61
|
-
content
|
|
61
|
+
if args.file.endswith(".eml"):
|
|
62
|
+
content=extract_email_body(args.file)
|
|
63
|
+
else:
|
|
64
|
+
with open(args.file, "r", encoding="utf-8") as f:
|
|
65
|
+
content = f.read()
|
|
62
66
|
|
|
63
67
|
tasks = extract_tasks(content)
|
|
64
68
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from email import policy
|
|
2
|
+
from email.parser import BytesParser
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def extract_email_body(file_path: str):
|
|
6
|
+
with open(file_path, "rb") as f:
|
|
7
|
+
msg = BytesParser(policy=policy.default).parse(f)
|
|
8
|
+
|
|
9
|
+
# Prefer plain text body
|
|
10
|
+
if msg.is_multipart():
|
|
11
|
+
|
|
12
|
+
for part in msg.walk():
|
|
13
|
+
|
|
14
|
+
content_type = part.get_content_type()
|
|
15
|
+
|
|
16
|
+
if content_type == "text/plain":
|
|
17
|
+
return part.get_content()
|
|
18
|
+
|
|
19
|
+
else:
|
|
20
|
+
return msg.get_content()
|
|
21
|
+
|
|
22
|
+
return ""
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import dateparser
|
|
3
|
+
import spacy
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
nlp = spacy.load("en_core_web_sm")
|
|
3
7
|
|
|
4
8
|
|
|
5
9
|
ACTION_WORDS = [
|
|
@@ -78,18 +82,20 @@ def extract_due_date(text: str):
|
|
|
78
82
|
return None
|
|
79
83
|
|
|
80
84
|
|
|
81
|
-
def
|
|
82
|
-
|
|
85
|
+
def contains_action_verb(text: str):
|
|
86
|
+
doc = nlp(text)
|
|
83
87
|
|
|
84
|
-
|
|
88
|
+
for token in doc:
|
|
89
|
+
if token.lemma_.lower() in ACTION_WORDS:
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
return False
|
|
85
93
|
|
|
86
94
|
|
|
87
95
|
def calculate_confidence(text: str, due_date, priority):
|
|
88
96
|
score = 0.4
|
|
89
97
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if any(word in text_lower for word in ACTION_WORDS):
|
|
98
|
+
if contains_action_verb(text):
|
|
93
99
|
score += 0.25
|
|
94
100
|
|
|
95
101
|
if due_date:
|
|
@@ -106,23 +112,15 @@ def calculate_confidence(text: str, due_date, priority):
|
|
|
106
112
|
return round(min(score, 0.99), 2)
|
|
107
113
|
|
|
108
114
|
|
|
109
|
-
def split_sentences(email_text: str):
|
|
110
|
-
sentences = re.split(r"[.\n]+", email_text)
|
|
111
|
-
|
|
112
|
-
return [
|
|
113
|
-
clean_text(sentence)
|
|
114
|
-
for sentence in sentences
|
|
115
|
-
if clean_text(sentence)
|
|
116
|
-
]
|
|
117
|
-
|
|
118
|
-
|
|
119
115
|
def extract_tasks(email_text: str):
|
|
120
|
-
|
|
116
|
+
doc = nlp(email_text)
|
|
121
117
|
|
|
122
118
|
tasks = []
|
|
123
119
|
seen_titles = set()
|
|
124
120
|
|
|
125
|
-
for
|
|
121
|
+
for sent in doc.sents:
|
|
122
|
+
|
|
123
|
+
sentence = clean_text(sent.text)
|
|
126
124
|
|
|
127
125
|
if should_ignore(sentence):
|
|
128
126
|
continue
|
|
@@ -130,7 +128,7 @@ def extract_tasks(email_text: str):
|
|
|
130
128
|
if len(sentence.split()) < 3:
|
|
131
129
|
continue
|
|
132
130
|
|
|
133
|
-
if not
|
|
131
|
+
if not contains_action_verb(sentence):
|
|
134
132
|
continue
|
|
135
133
|
|
|
136
134
|
title = re.sub(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mail2task
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Extract actionable tasks from emails using Python
|
|
5
5
|
Author: Eby J Kavungal
|
|
6
6
|
Project-URL: Homepage, https://github.com/EbyJK/mail2task
|
|
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.9
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: dateparser
|
|
15
|
+
Requires-Dist: spacy
|
|
15
16
|
|
|
16
17
|
# mail2task
|
|
17
18
|
|
|
@@ -134,6 +135,23 @@ mail2task sample_email.txt --format markdown
|
|
|
134
135
|
- Priority detection
|
|
135
136
|
- Confidence scoring
|
|
136
137
|
|
|
138
|
+
## NLP Engine
|
|
139
|
+
|
|
140
|
+
mail2task uses spaCy for:
|
|
141
|
+
- sentence segmentation
|
|
142
|
+
- tokenization
|
|
143
|
+
- action verb detection
|
|
144
|
+
- lightweight NLP parsing
|
|
145
|
+
|
|
146
|
+
## `.eml` Email Support
|
|
147
|
+
|
|
148
|
+
mail2task can process raw email files directly.
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
mail2task sample_email.eml --pretty
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
|
|
137
155
|
## Project Structure
|
|
138
156
|
|
|
139
157
|
```text
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|