fastemailparser 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. fastemailparser-0.1.2/MANIFEST.in +2 -0
  2. {fastemailparser-0.1.0/fastemailparser.egg-info → fastemailparser-0.1.2}/PKG-INFO +1 -1
  3. fastemailparser-0.1.2/email.h +12 -0
  4. {fastemailparser-0.1.0 → fastemailparser-0.1.2/fastemailparser.egg-info}/PKG-INFO +1 -1
  5. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/fastemailparser.egg-info/SOURCES.txt +11 -1
  6. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/setup.py +1 -1
  7. fastemailparser-0.1.2/src/body.h +9 -0
  8. fastemailparser-0.1.2/src/buf.h +25 -0
  9. fastemailparser-0.1.2/src/email_iter.h +16 -0
  10. fastemailparser-0.1.2/src/headers.h +9 -0
  11. fastemailparser-0.1.2/src/html.h +30 -0
  12. fastemailparser-0.1.2/src/mime.h +8 -0
  13. fastemailparser-0.1.2/src/signature.h +8 -0
  14. fastemailparser-0.1.2/src/standalone.h +10 -0
  15. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/LICENSE +0 -0
  16. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/README.md +0 -0
  17. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/emailparser.c +0 -0
  18. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/fastemailparser.egg-info/dependency_links.txt +0 -0
  19. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/fastemailparser.egg-info/top_level.txt +0 -0
  20. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/setup.cfg +0 -0
  21. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/body.c +0 -0
  22. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/email_iter.c +0 -0
  23. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/headers.c +0 -0
  24. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/html.c +0 -0
  25. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/mime.c +0 -0
  26. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/signature.c +0 -0
  27. {fastemailparser-0.1.0 → fastemailparser-0.1.2}/src/standalone.c +0 -0
@@ -0,0 +1,2 @@
1
+ include email.h
2
+ recursive-include src *.h
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fastemailparser
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Very fast email parsing tool, split emails, retrieve headers & signatures
5
5
  Home-page: https://github.com/Methode-dev/EmailParser
6
6
  Author: Julien Calenge @ Méthode
@@ -0,0 +1,12 @@
1
+ #ifndef EMAIL
2
+ #define EMAIL
3
+
4
+ typedef struct email_s {
5
+ int last_index;
6
+ int exhausted;
7
+ int yield_if_empty_chain; /* yield body even if no separator found (single
8
+ email) */
9
+ char *body;
10
+ } email_t;
11
+
12
+ #endif
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fastemailparser
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Very fast email parsing tool, split emails, retrieve headers & signatures
5
5
  Home-page: https://github.com/Methode-dev/EmailParser
6
6
  Author: Julien Calenge @ Méthode
@@ -1,5 +1,7 @@
1
1
  LICENSE
2
+ MANIFEST.in
2
3
  README.md
4
+ email.h
3
5
  emailparser.c
4
6
  setup.py
5
7
  fastemailparser.egg-info/PKG-INFO
@@ -7,9 +9,17 @@ fastemailparser.egg-info/SOURCES.txt
7
9
  fastemailparser.egg-info/dependency_links.txt
8
10
  fastemailparser.egg-info/top_level.txt
9
11
  src/body.c
12
+ src/body.h
13
+ src/buf.h
10
14
  src/email_iter.c
15
+ src/email_iter.h
11
16
  src/headers.c
17
+ src/headers.h
12
18
  src/html.c
19
+ src/html.h
13
20
  src/mime.c
21
+ src/mime.h
14
22
  src/signature.c
15
- src/standalone.c
23
+ src/signature.h
24
+ src/standalone.c
25
+ src/standalone.h
@@ -28,7 +28,7 @@ module = Extension(
28
28
 
29
29
  setup(
30
30
  name="fastemailparser",
31
- version="0.1.0",
31
+ version="0.1.2",
32
32
  author="Julien Calenge @ Méthode",
33
33
  author_email="julien.calenge@methode.dev",
34
34
  description="Very fast email parsing tool, split emails, retrieve headers & signatures",
@@ -0,0 +1,9 @@
1
+ #ifndef EMAILPARSER_BODY_H
2
+ #define EMAILPARSER_BODY_H
3
+ #define PY_SSIZE_T_CLEAN
4
+ #include <Python.h>
5
+ #include <stddef.h>
6
+
7
+ const char *find_body_start(const char *text, size_t len);
8
+ PyObject *py_extract_body(PyObject *module, PyObject *args);
9
+ #endif
@@ -0,0 +1,25 @@
1
+ #ifndef EMAILPARSER_BUF_H
2
+ #define EMAILPARSER_BUF_H
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+
6
+ typedef struct {
7
+ char *buf;
8
+ size_t len, cap;
9
+ } strbuf_t;
10
+
11
+ static inline int sb_push(strbuf_t *sb, const char *s, size_t n) {
12
+ if (sb->len + n + 1 > sb->cap) {
13
+ size_t cap = (sb->len + n + 1) * 2;
14
+ char *tmp = realloc(sb->buf, cap);
15
+ if (!tmp)
16
+ return -1;
17
+ sb->buf = tmp;
18
+ sb->cap = cap;
19
+ }
20
+ memcpy(sb->buf + sb->len, s, n);
21
+ sb->len += n;
22
+ sb->buf[sb->len] = '\0';
23
+ return 0;
24
+ }
25
+ #endif
@@ -0,0 +1,16 @@
1
+ #ifndef EMAILPARSER_EMAIL_ITER_H
2
+ #define EMAILPARSER_EMAIL_ITER_H
3
+ #include "email.h"
4
+ #include <stdbool.h>
5
+
6
+ #ifndef SEPARATOR_REGEX
7
+ #define SEPARATOR_REGEX_GEN_EN "(From|Sent|To|Subject|Cc|Bcc) ?(&nbsp;:|:) ?"
8
+ #define SEPARATOR_REGEX_GEN_FR "(De|À|Envoyé|Objet|Cc|Cci) ?(&nbsp;:|:) ?"
9
+ #define SEPARATOR_REGEX_STA_ALL "(De|From) ?(&nbsp;:|:) ?"
10
+ #define SEPARATOR_REGEX_END_ALL "(Objet|Subject) ?(&nbsp;:|:) ?"
11
+ #define SEPARATOR_REGEX SEPARATOR_REGEX_STA_ALL
12
+ #endif
13
+
14
+ email_t *new_email(char *raw);
15
+ bool get_next_val(email_t *email);
16
+ #endif
@@ -0,0 +1,9 @@
1
+ #ifndef EMAILPARSER_HEADERS_H
2
+ #define EMAILPARSER_HEADERS_H
3
+ #define PY_SSIZE_T_CLEAN
4
+ #include <Python.h>
5
+ #include <stddef.h>
6
+
7
+ const char *canonical_key(const char *name, size_t len);
8
+ PyObject *py_parse_headers(PyObject *module, PyObject *args);
9
+ #endif
@@ -0,0 +1,30 @@
1
+ #ifndef EMAILPARSER_HTML_H
2
+ #define EMAILPARSER_HTML_H
3
+ #define PY_SSIZE_T_CLEAN
4
+ #include <Python.h>
5
+ #include "buf.h"
6
+ #include <ctype.h>
7
+ #include <libxml/tree.h>
8
+
9
+ int walk_text(xmlNodePtr node, strbuf_t *sb);
10
+ PyObject *segment_to_text(const char *html);
11
+ char *html_to_plain_c(const char *html);
12
+
13
+ /* Detect HTML: <tag>, </tag> — not bare < in email addresses */
14
+ static inline int looks_like_html(const char *text, size_t len) {
15
+ for (size_t i = 0; i + 1 < len && i < 512; i++) {
16
+ if (text[i] != '<')
17
+ continue;
18
+ if (text[i + 1] == '/')
19
+ return 1;
20
+ if (isalpha((unsigned char)text[i + 1])) {
21
+ size_t j = i + 2;
22
+ while (j < len && isalpha((unsigned char)text[j]))
23
+ j++;
24
+ if (j < len && (text[j] == '>' || text[j] == ' ' || text[j] == '/'))
25
+ return 1;
26
+ }
27
+ }
28
+ return 0;
29
+ }
30
+ #endif
@@ -0,0 +1,8 @@
1
+ #ifndef EMAILPARSER_MIME_H
2
+ #define EMAILPARSER_MIME_H
3
+ #include <stddef.h>
4
+
5
+ char *decode_qp(const char *in, size_t in_len, size_t *out_len);
6
+ char *skip_mime_headers(char *raw);
7
+ int has_html_mime_part(const char *text, int len);
8
+ #endif
@@ -0,0 +1,8 @@
1
+ #ifndef EMAILPARSER_SIGNATURE_H
2
+ #define EMAILPARSER_SIGNATURE_H
3
+ #define PY_SSIZE_T_CLEAN
4
+ #include <Python.h>
5
+
6
+ PyObject *py_find_signature(PyObject *module, PyObject *args);
7
+ PyObject *py_strip_signature(PyObject *module, PyObject *args);
8
+ #endif
@@ -0,0 +1,10 @@
1
+ #ifndef EMAILPARSER_STANDALONE_H
2
+ #define EMAILPARSER_STANDALONE_H
3
+ #define PY_SSIZE_T_CLEAN
4
+ #include <Python.h>
5
+ #include <stddef.h>
6
+
7
+ char *extract_css(const char *html, size_t html_len);
8
+ PyObject *wrap_standalone(const char *css, size_t css_len, const char *segment,
9
+ size_t seg_len);
10
+ #endif
File without changes