fastemailparser 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastemailparser-0.1.2/MANIFEST.in +2 -0
- {fastemailparser-0.1.0/fastemailparser.egg-info → fastemailparser-0.1.2}/PKG-INFO +1 -1
- fastemailparser-0.1.2/email.h +12 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2/fastemailparser.egg-info}/PKG-INFO +1 -1
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/fastemailparser.egg-info/SOURCES.txt +11 -1
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/setup.py +1 -1
- fastemailparser-0.1.2/src/body.h +9 -0
- fastemailparser-0.1.2/src/buf.h +25 -0
- fastemailparser-0.1.2/src/email_iter.h +16 -0
- fastemailparser-0.1.2/src/headers.h +9 -0
- fastemailparser-0.1.2/src/html.h +30 -0
- fastemailparser-0.1.2/src/mime.h +8 -0
- fastemailparser-0.1.2/src/signature.h +8 -0
- fastemailparser-0.1.2/src/standalone.h +10 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/LICENSE +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/README.md +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/emailparser.c +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/fastemailparser.egg-info/dependency_links.txt +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/fastemailparser.egg-info/top_level.txt +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/setup.cfg +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/body.c +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/email_iter.c +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/headers.c +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/html.c +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/mime.c +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/signature.c +0 -0
- {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/standalone.c +0 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
2
3
|
README.md
|
|
4
|
+
email.h
|
|
3
5
|
emailparser.c
|
|
4
6
|
setup.py
|
|
5
7
|
fastemailparser.egg-info/PKG-INFO
|
|
@@ -7,9 +9,17 @@ fastemailparser.egg-info/SOURCES.txt
|
|
|
7
9
|
fastemailparser.egg-info/dependency_links.txt
|
|
8
10
|
fastemailparser.egg-info/top_level.txt
|
|
9
11
|
src/body.c
|
|
12
|
+
src/body.h
|
|
13
|
+
src/buf.h
|
|
10
14
|
src/email_iter.c
|
|
15
|
+
src/email_iter.h
|
|
11
16
|
src/headers.c
|
|
17
|
+
src/headers.h
|
|
12
18
|
src/html.c
|
|
19
|
+
src/html.h
|
|
13
20
|
src/mime.c
|
|
21
|
+
src/mime.h
|
|
14
22
|
src/signature.c
|
|
15
|
-
src/
|
|
23
|
+
src/signature.h
|
|
24
|
+
src/standalone.c
|
|
25
|
+
src/standalone.h
|
|
@@ -28,7 +28,7 @@ module = Extension(
|
|
|
28
28
|
|
|
29
29
|
setup(
|
|
30
30
|
name="fastemailparser",
|
|
31
|
-
version="0.1.
|
|
31
|
+
version="0.1.2",
|
|
32
32
|
author="Julien Calenge @ Méthode",
|
|
33
33
|
author_email="julien.calenge@methode.dev",
|
|
34
34
|
description="Very fast email parsing tool, split emails, retrieve headers & signatures",
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#ifndef EMAILPARSER_BUF_H
|
|
2
|
+
#define EMAILPARSER_BUF_H
|
|
3
|
+
#include <stdlib.h>
|
|
4
|
+
#include <string.h>
|
|
5
|
+
|
|
6
|
+
typedef struct {
|
|
7
|
+
char *buf;
|
|
8
|
+
size_t len, cap;
|
|
9
|
+
} strbuf_t;
|
|
10
|
+
|
|
11
|
+
static inline int sb_push(strbuf_t *sb, const char *s, size_t n) {
|
|
12
|
+
if (sb->len + n + 1 > sb->cap) {
|
|
13
|
+
size_t cap = (sb->len + n + 1) * 2;
|
|
14
|
+
char *tmp = realloc(sb->buf, cap);
|
|
15
|
+
if (!tmp)
|
|
16
|
+
return -1;
|
|
17
|
+
sb->buf = tmp;
|
|
18
|
+
sb->cap = cap;
|
|
19
|
+
}
|
|
20
|
+
memcpy(sb->buf + sb->len, s, n);
|
|
21
|
+
sb->len += n;
|
|
22
|
+
sb->buf[sb->len] = '\0';
|
|
23
|
+
return 0;
|
|
24
|
+
}
|
|
25
|
+
#endif
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#ifndef EMAILPARSER_EMAIL_ITER_H
|
|
2
|
+
#define EMAILPARSER_EMAIL_ITER_H
|
|
3
|
+
#include "email.h"
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
#ifndef SEPARATOR_REGEX
|
|
7
|
+
#define SEPARATOR_REGEX_GEN_EN "(From|Sent|To|Subject|Cc|Bcc) ?( :|:) ?"
|
|
8
|
+
#define SEPARATOR_REGEX_GEN_FR "(De|À|Envoyé|Objet|Cc|Cci) ?( :|:) ?"
|
|
9
|
+
#define SEPARATOR_REGEX_STA_ALL "(De|From) ?( :|:) ?"
|
|
10
|
+
#define SEPARATOR_REGEX_END_ALL "(Objet|Subject) ?( :|:) ?"
|
|
11
|
+
#define SEPARATOR_REGEX SEPARATOR_REGEX_STA_ALL
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
email_t *new_email(char *raw);
|
|
15
|
+
bool get_next_val(email_t *email);
|
|
16
|
+
#endif
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
#ifndef EMAILPARSER_HEADERS_H
|
|
2
|
+
#define EMAILPARSER_HEADERS_H
|
|
3
|
+
#define PY_SSIZE_T_CLEAN
|
|
4
|
+
#include <Python.h>
|
|
5
|
+
#include <stddef.h>
|
|
6
|
+
|
|
7
|
+
const char *canonical_key(const char *name, size_t len);
|
|
8
|
+
PyObject *py_parse_headers(PyObject *module, PyObject *args);
|
|
9
|
+
#endif
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#ifndef EMAILPARSER_HTML_H
|
|
2
|
+
#define EMAILPARSER_HTML_H
|
|
3
|
+
#define PY_SSIZE_T_CLEAN
|
|
4
|
+
#include <Python.h>
|
|
5
|
+
#include "buf.h"
|
|
6
|
+
#include <ctype.h>
|
|
7
|
+
#include <libxml/tree.h>
|
|
8
|
+
|
|
9
|
+
int walk_text(xmlNodePtr node, strbuf_t *sb);
|
|
10
|
+
PyObject *segment_to_text(const char *html);
|
|
11
|
+
char *html_to_plain_c(const char *html);
|
|
12
|
+
|
|
13
|
+
/* Detect HTML: <tag>, </tag> — not bare < in email addresses */
|
|
14
|
+
static inline int looks_like_html(const char *text, size_t len) {
|
|
15
|
+
for (size_t i = 0; i + 1 < len && i < 512; i++) {
|
|
16
|
+
if (text[i] != '<')
|
|
17
|
+
continue;
|
|
18
|
+
if (text[i + 1] == '/')
|
|
19
|
+
return 1;
|
|
20
|
+
if (isalpha((unsigned char)text[i + 1])) {
|
|
21
|
+
size_t j = i + 2;
|
|
22
|
+
while (j < len && isalpha((unsigned char)text[j]))
|
|
23
|
+
j++;
|
|
24
|
+
if (j < len && (text[j] == '>' || text[j] == ' ' || text[j] == '/'))
|
|
25
|
+
return 1;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return 0;
|
|
29
|
+
}
|
|
30
|
+
#endif
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#ifndef EMAILPARSER_STANDALONE_H
|
|
2
|
+
#define EMAILPARSER_STANDALONE_H
|
|
3
|
+
#define PY_SSIZE_T_CLEAN
|
|
4
|
+
#include <Python.h>
|
|
5
|
+
#include <stddef.h>
|
|
6
|
+
|
|
7
|
+
char *extract_css(const char *html, size_t html_len);
|
|
8
|
+
PyObject *wrap_standalone(const char *css, size_t css_len, const char *segment,
|
|
9
|
+
size_t seg_len);
|
|
10
|
+
#endif
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fastemailparser-0.1.0 → fastemailparser-0.1.2}/fastemailparser.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|