mail2task 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mail2task
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: Extract actionable tasks from emails using Python
5
5
  Author: Eby J Kavungal
6
6
  Project-URL: Homepage, https://github.com/EbyJK/mail2task
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: dateparser
15
+ Requires-Dist: spacy
15
16
 
16
17
  # mail2task
17
18
 
@@ -126,6 +127,21 @@ mail2task sample_email.txt --format markdown
126
127
  - Task management systems
127
128
 
128
129
  ---
130
+ ## NLP Features
131
+
132
+ - Sentence splitting
133
+ - Greeting/signature filtering
134
+ - Due date extraction
135
+ - Priority detection
136
+ - Confidence scoring
137
+
138
+ ## NLP Engine
139
+
140
+ mail2task uses spaCy for:
141
+ - sentence segmentation
142
+ - tokenization
143
+ - action verb detection
144
+ - lightweight NLP parsing
129
145
 
130
146
  ## Project Structure
131
147
 
@@ -111,6 +111,21 @@ mail2task sample_email.txt --format markdown
111
111
  - Task management systems
112
112
 
113
113
  ---
114
+ ## NLP Features
115
+
116
+ - Sentence splitting
117
+ - Greeting/signature filtering
118
+ - Due date extraction
119
+ - Priority detection
120
+ - Confidence scoring
121
+
122
+ ## NLP Engine
123
+
124
+ mail2task uses spaCy for:
125
+ - sentence segmentation
126
+ - tokenization
127
+ - action verb detection
128
+ - lightweight NLP parsing
114
129
 
115
130
  ## Project Structure
116
131
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mail2task"
7
- version = "0.4.0"
7
+ version = "0.6.0"
8
8
  description = "Extract actionable tasks from emails using Python"
9
9
  authors = [
10
10
  { name="Eby J Kavungal" }
@@ -13,7 +13,8 @@ readme = "README.md"
13
13
  requires-python = ">=3.9"
14
14
 
15
15
  dependencies = [
16
- "dateparser"
16
+ "dateparser",
17
+ "spacy"
17
18
  ]
18
19
 
19
20
  keywords = ["nlp", "tasks", "email", "automation", "productivity"]
@@ -1,5 +1,9 @@
1
1
  import re
2
2
  import dateparser
3
+ import spacy
4
+
5
+
6
+ nlp = spacy.load("en_core_web_sm")
3
7
 
4
8
 
5
9
  ACTION_WORDS = [
@@ -15,7 +19,10 @@ ACTION_WORDS = [
15
19
  "email",
16
20
  "organize",
17
21
  "create",
18
- "fix"
22
+ "fix",
23
+ "approve",
24
+ "confirm",
25
+ "deliver"
19
26
  ]
20
27
 
21
28
 
@@ -26,8 +33,30 @@ PRIORITY_KEYWORDS = {
26
33
  }
27
34
 
28
35
 
36
+ IGNORE_PATTERNS = [
37
+ r"^hi\b",
38
+ r"^hello\b",
39
+ r"^thanks\b",
40
+ r"^thank you\b",
41
+ r"^regards\b",
42
+ r"^best\b",
43
+ r"^sincerely\b"
44
+ ]
45
+
46
+
29
47
  def clean_text(text: str):
30
- return re.sub(r"\s+", " ", text).strip()
48
+ text = re.sub(r"\s+", " ", text)
49
+ return text.strip()
50
+
51
+
52
+ def should_ignore(text: str):
53
+ text_lower = text.lower()
54
+
55
+ for pattern in IGNORE_PATTERNS:
56
+ if re.search(pattern, text_lower):
57
+ return True
58
+
59
+ return False
31
60
 
32
61
 
33
62
  def detect_priority(text: str):
@@ -53,30 +82,28 @@ def extract_due_date(text: str):
53
82
  return None
54
83
 
55
84
 
56
- def is_task_sentence(text: str):
57
- text_lower = text.lower()
85
+ def contains_action_verb(text: str):
86
+ doc = nlp(text)
87
+
88
+ for token in doc:
89
+ if token.lemma_.lower() in ACTION_WORDS:
90
+ return True
58
91
 
59
- return any(word in text_lower for word in ACTION_WORDS)
92
+ return False
60
93
 
61
94
 
62
95
  def calculate_confidence(text: str, due_date, priority):
63
96
  score = 0.4
64
97
 
65
- text_lower = text.lower()
66
-
67
- # Action word boost
68
- if any(word in text_lower for word in ACTION_WORDS):
98
+ if contains_action_verb(text):
69
99
  score += 0.25
70
100
 
71
- # Due date boost
72
101
  if due_date:
73
102
  score += 0.2
74
103
 
75
- # Priority boost
76
104
  if priority != "normal":
77
105
  score += 0.1
78
106
 
79
- # Sentence length quality
80
107
  word_count = len(text.split())
81
108
 
82
109
  if 4 <= word_count <= 20:
@@ -86,27 +113,28 @@ def calculate_confidence(text: str, due_date, priority):
86
113
 
87
114
 
88
115
  def extract_tasks(email_text: str):
89
- lines = email_text.splitlines()
116
+ doc = nlp(email_text)
90
117
 
91
118
  tasks = []
92
119
  seen_titles = set()
93
120
 
94
- for line in lines:
95
- line = clean_text(line)
121
+ for sent in doc.sents:
122
+
123
+ sentence = clean_text(sent.text)
96
124
 
97
- if not line:
125
+ if should_ignore(sentence):
98
126
  continue
99
127
 
100
- if len(line.split()) < 3:
128
+ if len(sentence.split()) < 3:
101
129
  continue
102
130
 
103
- if not is_task_sentence(line):
131
+ if not contains_action_verb(sentence):
104
132
  continue
105
133
 
106
134
  title = re.sub(
107
135
  r"\b(please|kindly)\b",
108
136
  "",
109
- line,
137
+ sentence,
110
138
  flags=re.IGNORECASE
111
139
  ).strip()
112
140
 
@@ -115,11 +143,11 @@ def extract_tasks(email_text: str):
115
143
 
116
144
  seen_titles.add(title.lower())
117
145
 
118
- due_date = extract_due_date(line)
119
- priority = detect_priority(line)
146
+ due_date = extract_due_date(sentence)
147
+ priority = detect_priority(sentence)
120
148
 
121
149
  confidence = calculate_confidence(
122
- line,
150
+ sentence,
123
151
  due_date,
124
152
  priority
125
153
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mail2task
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: Extract actionable tasks from emails using Python
5
5
  Author: Eby J Kavungal
6
6
  Project-URL: Homepage, https://github.com/EbyJK/mail2task
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: dateparser
15
+ Requires-Dist: spacy
15
16
 
16
17
  # mail2task
17
18
 
@@ -126,6 +127,21 @@ mail2task sample_email.txt --format markdown
126
127
  - Task management systems
127
128
 
128
129
  ---
130
+ ## NLP Features
131
+
132
+ - Sentence splitting
133
+ - Greeting/signature filtering
134
+ - Due date extraction
135
+ - Priority detection
136
+ - Confidence scoring
137
+
138
+ ## NLP Engine
139
+
140
+ mail2task uses spaCy for:
141
+ - sentence segmentation
142
+ - tokenization
143
+ - action verb detection
144
+ - lightweight NLP parsing
129
145
 
130
146
  ## Project Structure
131
147
 
File without changes