mail2task 0.4.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mail2task-0.4.0 → mail2task-0.6.0}/PKG-INFO +17 -1
- {mail2task-0.4.0 → mail2task-0.6.0}/README.md +15 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/pyproject.toml +3 -2
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task/extractor.py +50 -22
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task.egg-info/PKG-INFO +17 -1
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task.egg-info/requires.txt +1 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/setup.cfg +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task/__init__.py +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task/cli.py +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task/parser.py +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task.egg-info/SOURCES.txt +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task.egg-info/dependency_links.txt +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task.egg-info/entry_points.txt +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/src/mail2task.egg-info/top_level.txt +0 -0
- {mail2task-0.4.0 → mail2task-0.6.0}/tests/test_extractor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mail2task
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Extract actionable tasks from emails using Python
|
|
5
5
|
Author: Eby J Kavungal
|
|
6
6
|
Project-URL: Homepage, https://github.com/EbyJK/mail2task
|
|
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.9
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: dateparser
|
|
15
|
+
Requires-Dist: spacy
|
|
15
16
|
|
|
16
17
|
# mail2task
|
|
17
18
|
|
|
@@ -126,6 +127,21 @@ mail2task sample_email.txt --format markdown
|
|
|
126
127
|
- Task management systems
|
|
127
128
|
|
|
128
129
|
---
|
|
130
|
+
## NLP Features
|
|
131
|
+
|
|
132
|
+
- Sentence splitting
|
|
133
|
+
- Greeting/signature filtering
|
|
134
|
+
- Due date extraction
|
|
135
|
+
- Priority detection
|
|
136
|
+
- Confidence scoring
|
|
137
|
+
|
|
138
|
+
## NLP Engine
|
|
139
|
+
|
|
140
|
+
mail2task uses spaCy for:
|
|
141
|
+
- sentence segmentation
|
|
142
|
+
- tokenization
|
|
143
|
+
- action verb detection
|
|
144
|
+
- lightweight NLP parsing
|
|
129
145
|
|
|
130
146
|
## Project Structure
|
|
131
147
|
|
|
@@ -111,6 +111,21 @@ mail2task sample_email.txt --format markdown
|
|
|
111
111
|
- Task management systems
|
|
112
112
|
|
|
113
113
|
---
|
|
114
|
+
## NLP Features
|
|
115
|
+
|
|
116
|
+
- Sentence splitting
|
|
117
|
+
- Greeting/signature filtering
|
|
118
|
+
- Due date extraction
|
|
119
|
+
- Priority detection
|
|
120
|
+
- Confidence scoring
|
|
121
|
+
|
|
122
|
+
## NLP Engine
|
|
123
|
+
|
|
124
|
+
mail2task uses spaCy for:
|
|
125
|
+
- sentence segmentation
|
|
126
|
+
- tokenization
|
|
127
|
+
- action verb detection
|
|
128
|
+
- lightweight NLP parsing
|
|
114
129
|
|
|
115
130
|
## Project Structure
|
|
116
131
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "mail2task"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.6.0"
|
|
8
8
|
description = "Extract actionable tasks from emails using Python"
|
|
9
9
|
authors = [
|
|
10
10
|
{ name="Eby J Kavungal" }
|
|
@@ -13,7 +13,8 @@ readme = "README.md"
|
|
|
13
13
|
requires-python = ">=3.9"
|
|
14
14
|
|
|
15
15
|
dependencies = [
|
|
16
|
-
"dateparser"
|
|
16
|
+
"dateparser",
|
|
17
|
+
"spacy"
|
|
17
18
|
]
|
|
18
19
|
|
|
19
20
|
keywords = ["nlp", "tasks", "email", "automation", "productivity"]
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import dateparser
|
|
3
|
+
import spacy
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
nlp = spacy.load("en_core_web_sm")
|
|
3
7
|
|
|
4
8
|
|
|
5
9
|
ACTION_WORDS = [
|
|
@@ -15,7 +19,10 @@ ACTION_WORDS = [
|
|
|
15
19
|
"email",
|
|
16
20
|
"organize",
|
|
17
21
|
"create",
|
|
18
|
-
"fix"
|
|
22
|
+
"fix",
|
|
23
|
+
"approve",
|
|
24
|
+
"confirm",
|
|
25
|
+
"deliver"
|
|
19
26
|
]
|
|
20
27
|
|
|
21
28
|
|
|
@@ -26,8 +33,30 @@ PRIORITY_KEYWORDS = {
|
|
|
26
33
|
}
|
|
27
34
|
|
|
28
35
|
|
|
36
|
+
IGNORE_PATTERNS = [
|
|
37
|
+
r"^hi\b",
|
|
38
|
+
r"^hello\b",
|
|
39
|
+
r"^thanks\b",
|
|
40
|
+
r"^thank you\b",
|
|
41
|
+
r"^regards\b",
|
|
42
|
+
r"^best\b",
|
|
43
|
+
r"^sincerely\b"
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
29
47
|
def clean_text(text: str):
|
|
30
|
-
|
|
48
|
+
text = re.sub(r"\s+", " ", text)
|
|
49
|
+
return text.strip()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def should_ignore(text: str):
|
|
53
|
+
text_lower = text.lower()
|
|
54
|
+
|
|
55
|
+
for pattern in IGNORE_PATTERNS:
|
|
56
|
+
if re.search(pattern, text_lower):
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
return False
|
|
31
60
|
|
|
32
61
|
|
|
33
62
|
def detect_priority(text: str):
|
|
@@ -53,30 +82,28 @@ def extract_due_date(text: str):
|
|
|
53
82
|
return None
|
|
54
83
|
|
|
55
84
|
|
|
56
|
-
def
|
|
57
|
-
|
|
85
|
+
def contains_action_verb(text: str):
|
|
86
|
+
doc = nlp(text)
|
|
87
|
+
|
|
88
|
+
for token in doc:
|
|
89
|
+
if token.lemma_.lower() in ACTION_WORDS:
|
|
90
|
+
return True
|
|
58
91
|
|
|
59
|
-
return
|
|
92
|
+
return False
|
|
60
93
|
|
|
61
94
|
|
|
62
95
|
def calculate_confidence(text: str, due_date, priority):
|
|
63
96
|
score = 0.4
|
|
64
97
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# Action word boost
|
|
68
|
-
if any(word in text_lower for word in ACTION_WORDS):
|
|
98
|
+
if contains_action_verb(text):
|
|
69
99
|
score += 0.25
|
|
70
100
|
|
|
71
|
-
# Due date boost
|
|
72
101
|
if due_date:
|
|
73
102
|
score += 0.2
|
|
74
103
|
|
|
75
|
-
# Priority boost
|
|
76
104
|
if priority != "normal":
|
|
77
105
|
score += 0.1
|
|
78
106
|
|
|
79
|
-
# Sentence length quality
|
|
80
107
|
word_count = len(text.split())
|
|
81
108
|
|
|
82
109
|
if 4 <= word_count <= 20:
|
|
@@ -86,27 +113,28 @@ def calculate_confidence(text: str, due_date, priority):
|
|
|
86
113
|
|
|
87
114
|
|
|
88
115
|
def extract_tasks(email_text: str):
|
|
89
|
-
|
|
116
|
+
doc = nlp(email_text)
|
|
90
117
|
|
|
91
118
|
tasks = []
|
|
92
119
|
seen_titles = set()
|
|
93
120
|
|
|
94
|
-
for
|
|
95
|
-
|
|
121
|
+
for sent in doc.sents:
|
|
122
|
+
|
|
123
|
+
sentence = clean_text(sent.text)
|
|
96
124
|
|
|
97
|
-
if
|
|
125
|
+
if should_ignore(sentence):
|
|
98
126
|
continue
|
|
99
127
|
|
|
100
|
-
if len(
|
|
128
|
+
if len(sentence.split()) < 3:
|
|
101
129
|
continue
|
|
102
130
|
|
|
103
|
-
if not
|
|
131
|
+
if not contains_action_verb(sentence):
|
|
104
132
|
continue
|
|
105
133
|
|
|
106
134
|
title = re.sub(
|
|
107
135
|
r"\b(please|kindly)\b",
|
|
108
136
|
"",
|
|
109
|
-
|
|
137
|
+
sentence,
|
|
110
138
|
flags=re.IGNORECASE
|
|
111
139
|
).strip()
|
|
112
140
|
|
|
@@ -115,11 +143,11 @@ def extract_tasks(email_text: str):
|
|
|
115
143
|
|
|
116
144
|
seen_titles.add(title.lower())
|
|
117
145
|
|
|
118
|
-
due_date = extract_due_date(
|
|
119
|
-
priority = detect_priority(
|
|
146
|
+
due_date = extract_due_date(sentence)
|
|
147
|
+
priority = detect_priority(sentence)
|
|
120
148
|
|
|
121
149
|
confidence = calculate_confidence(
|
|
122
|
-
|
|
150
|
+
sentence,
|
|
123
151
|
due_date,
|
|
124
152
|
priority
|
|
125
153
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mail2task
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Extract actionable tasks from emails using Python
|
|
5
5
|
Author: Eby J Kavungal
|
|
6
6
|
Project-URL: Homepage, https://github.com/EbyJK/mail2task
|
|
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.9
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
Requires-Dist: dateparser
|
|
15
|
+
Requires-Dist: spacy
|
|
15
16
|
|
|
16
17
|
# mail2task
|
|
17
18
|
|
|
@@ -126,6 +127,21 @@ mail2task sample_email.txt --format markdown
|
|
|
126
127
|
- Task management systems
|
|
127
128
|
|
|
128
129
|
---
|
|
130
|
+
## NLP Features
|
|
131
|
+
|
|
132
|
+
- Sentence splitting
|
|
133
|
+
- Greeting/signature filtering
|
|
134
|
+
- Due date extraction
|
|
135
|
+
- Priority detection
|
|
136
|
+
- Confidence scoring
|
|
137
|
+
|
|
138
|
+
## NLP Engine
|
|
139
|
+
|
|
140
|
+
mail2task uses spaCy for:
|
|
141
|
+
- sentence segmentation
|
|
142
|
+
- tokenization
|
|
143
|
+
- action verb detection
|
|
144
|
+
- lightweight NLP parsing
|
|
129
145
|
|
|
130
146
|
## Project Structure
|
|
131
147
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|