mail2task 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mail2task
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Extract actionable tasks from emails using Python
5
5
  Author: Eby J Kavungal
6
6
  Project-URL: Homepage, https://github.com/EbyJK/mail2task
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: dateparser
15
+ Requires-Dist: spacy
15
16
 
16
17
  # mail2task
17
18
 
@@ -134,6 +135,23 @@ mail2task sample_email.txt --format markdown
134
135
  - Priority detection
135
136
  - Confidence scoring
136
137
 
138
+ ## NLP Engine
139
+
140
+ mail2task uses spaCy for:
141
+ - sentence segmentation
142
+ - tokenization
143
+ - action verb detection
144
+ - lightweight NLP parsing
145
+
146
+ ## `.eml` Email Support
147
+
148
+ mail2task can process raw email files directly.
149
+
150
+ ```bash
151
+ mail2task sample_email.eml --pretty
152
+ ```
153
+
154
+
137
155
  ## Project Structure
138
156
 
139
157
  ```text
@@ -119,6 +119,23 @@ mail2task sample_email.txt --format markdown
119
119
  - Priority detection
120
120
  - Confidence scoring
121
121
 
122
+ ## NLP Engine
123
+
124
+ mail2task uses spaCy for:
125
+ - sentence segmentation
126
+ - tokenization
127
+ - action verb detection
128
+ - lightweight NLP parsing
129
+
130
+ ## `.eml` Email Support
131
+
132
+ mail2task can process raw email files directly.
133
+
134
+ ```bash
135
+ mail2task sample_email.eml --pretty
136
+ ```
137
+
138
+
122
139
  ## Project Structure
123
140
 
124
141
  ```text
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mail2task"
7
- version = "0.5.0"
7
+ version = "0.7.0"
8
8
  description = "Extract actionable tasks from emails using Python"
9
9
  authors = [
10
10
  { name="Eby J Kavungal" }
@@ -13,7 +13,8 @@ readme = "README.md"
13
13
  requires-python = ">=3.9"
14
14
 
15
15
  dependencies = [
16
- "dateparser"
16
+ "dateparser",
17
+ "spacy"
17
18
  ]
18
19
 
19
20
  keywords = ["nlp", "tasks", "email", "automation", "productivity"]
@@ -2,6 +2,7 @@ import argparse
2
2
  import json
3
3
  import csv
4
4
  from .extractor import extract_tasks
5
+ from .email_parser import extract_email_body
5
6
 
6
7
 
7
8
  def save_csv(tasks, output_file):
@@ -57,8 +58,11 @@ def main():
57
58
  args = parser.parse_args()
58
59
 
59
60
  try:
60
- with open(args.file, "r", encoding="utf-8") as f:
61
- content = f.read()
61
+ if args.file.endswith(".eml"):
62
+ content=extract_email_body(args.file)
63
+ else:
64
+ with open(args.file, "r", encoding="utf-8") as f:
65
+ content = f.read()
62
66
 
63
67
  tasks = extract_tasks(content)
64
68
 
@@ -0,0 +1,22 @@
1
+ from email import policy
2
+ from email.parser import BytesParser
3
+
4
+
5
+ def extract_email_body(file_path: str):
6
+ with open(file_path, "rb") as f:
7
+ msg = BytesParser(policy=policy.default).parse(f)
8
+
9
+ # Prefer plain text body
10
+ if msg.is_multipart():
11
+
12
+ for part in msg.walk():
13
+
14
+ content_type = part.get_content_type()
15
+
16
+ if content_type == "text/plain":
17
+ return part.get_content()
18
+
19
+ else:
20
+ return msg.get_content()
21
+
22
+ return ""
@@ -1,5 +1,9 @@
1
1
  import re
2
2
  import dateparser
3
+ import spacy
4
+
5
+
6
+ nlp = spacy.load("en_core_web_sm")
3
7
 
4
8
 
5
9
  ACTION_WORDS = [
@@ -78,18 +82,20 @@ def extract_due_date(text: str):
78
82
  return None
79
83
 
80
84
 
81
- def is_task_sentence(text: str):
82
- text_lower = text.lower()
85
+ def contains_action_verb(text: str):
86
+ doc = nlp(text)
83
87
 
84
- return any(word in text_lower for word in ACTION_WORDS)
88
+ for token in doc:
89
+ if token.lemma_.lower() in ACTION_WORDS:
90
+ return True
91
+
92
+ return False
85
93
 
86
94
 
87
95
  def calculate_confidence(text: str, due_date, priority):
88
96
  score = 0.4
89
97
 
90
- text_lower = text.lower()
91
-
92
- if any(word in text_lower for word in ACTION_WORDS):
98
+ if contains_action_verb(text):
93
99
  score += 0.25
94
100
 
95
101
  if due_date:
@@ -106,23 +112,15 @@ def calculate_confidence(text: str, due_date, priority):
106
112
  return round(min(score, 0.99), 2)
107
113
 
108
114
 
109
- def split_sentences(email_text: str):
110
- sentences = re.split(r"[.\n]+", email_text)
111
-
112
- return [
113
- clean_text(sentence)
114
- for sentence in sentences
115
- if clean_text(sentence)
116
- ]
117
-
118
-
119
115
  def extract_tasks(email_text: str):
120
- sentences = split_sentences(email_text)
116
+ doc = nlp(email_text)
121
117
 
122
118
  tasks = []
123
119
  seen_titles = set()
124
120
 
125
- for sentence in sentences:
121
+ for sent in doc.sents:
122
+
123
+ sentence = clean_text(sent.text)
126
124
 
127
125
  if should_ignore(sentence):
128
126
  continue
@@ -130,7 +128,7 @@ def extract_tasks(email_text: str):
130
128
  if len(sentence.split()) < 3:
131
129
  continue
132
130
 
133
- if not is_task_sentence(sentence):
131
+ if not contains_action_verb(sentence):
134
132
  continue
135
133
 
136
134
  title = re.sub(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mail2task
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Extract actionable tasks from emails using Python
5
5
  Author: Eby J Kavungal
6
6
  Project-URL: Homepage, https://github.com/EbyJK/mail2task
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: dateparser
15
+ Requires-Dist: spacy
15
16
 
16
17
  # mail2task
17
18
 
@@ -134,6 +135,23 @@ mail2task sample_email.txt --format markdown
134
135
  - Priority detection
135
136
  - Confidence scoring
136
137
 
138
+ ## NLP Engine
139
+
140
+ mail2task uses spaCy for:
141
+ - sentence segmentation
142
+ - tokenization
143
+ - action verb detection
144
+ - lightweight NLP parsing
145
+
146
+ ## `.eml` Email Support
147
+
148
+ mail2task can process raw email files directly.
149
+
150
+ ```bash
151
+ mail2task sample_email.eml --pretty
152
+ ```
153
+
154
+
137
155
  ## Project Structure
138
156
 
139
157
  ```text
@@ -2,6 +2,7 @@ README.md
2
2
  pyproject.toml
3
3
  src/mail2task/__init__.py
4
4
  src/mail2task/cli.py
5
+ src/mail2task/email_parser.py
5
6
  src/mail2task/extractor.py
6
7
  src/mail2task/parser.py
7
8
  src/mail2task.egg-info/PKG-INFO
File without changes