parseur-py 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parseur/__init__.py +31 -0
- parseur/cli.py +346 -0
- parseur/client.py +44 -0
- parseur/config.py +32 -0
- parseur/decorator.py +49 -0
- parseur/document.py +212 -0
- parseur/event.py +28 -0
- parseur/mailbox.py +77 -0
- parseur/utils.py +52 -0
- parseur/webhook.py +122 -0
- parseur_py-0.0.1.dist-info/METADATA +242 -0
- parseur_py-0.0.1.dist-info/RECORD +16 -0
- parseur_py-0.0.1.dist-info/WHEEL +5 -0
- parseur_py-0.0.1.dist-info/entry_points.txt +2 -0
- parseur_py-0.0.1.dist-info/licenses/LICENSE +21 -0
- parseur_py-0.0.1.dist-info/top_level.txt +1 -0
parseur/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from parseur.config import Config
|
|
4
|
+
from parseur.document import Document, DocumentOrderKey
|
|
5
|
+
from parseur.event import ParseurEvent
|
|
6
|
+
from parseur.mailbox import Mailbox, MailboxOrderKey
|
|
7
|
+
from parseur.schemas.document import DocumentStatus
|
|
8
|
+
from parseur.utils import to_json
|
|
9
|
+
from parseur.webhook import Webhook
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Config",
|
|
13
|
+
"Document",
|
|
14
|
+
"DocumentOrderKey",
|
|
15
|
+
"DocumentStatus",
|
|
16
|
+
"Mailbox",
|
|
17
|
+
"MailboxOrderKey",
|
|
18
|
+
"ParseurEvent",
|
|
19
|
+
"Webhook",
|
|
20
|
+
"to_json",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
CONFIG_PATH = Path.home() / ".parseur.conf"
|
|
25
|
+
config = Config(CONFIG_PATH)
|
|
26
|
+
config.load()
|
|
27
|
+
|
|
28
|
+
DEFAULT_API_BASE = "https://api.parseur.com"
|
|
29
|
+
|
|
30
|
+
api_key = config.api_key
|
|
31
|
+
api_base = config.api_base or DEFAULT_API_BASE
|
parseur/cli.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
import parseur
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.group()
|
|
9
|
+
def cli():
|
|
10
|
+
"""Parseur CLI - manage Parseur.com from the command line."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@cli.command()
|
|
15
|
+
@click.option(
|
|
16
|
+
"--api-key",
|
|
17
|
+
required=True,
|
|
18
|
+
help="Your Parseur API key",
|
|
19
|
+
)
|
|
20
|
+
@click.option(
|
|
21
|
+
"--api-base",
|
|
22
|
+
default=parseur.DEFAULT_API_BASE,
|
|
23
|
+
help="Optional API base URL",
|
|
24
|
+
)
|
|
25
|
+
def init(api_key, api_base):
|
|
26
|
+
"""Initialize the CLI with your API token and optional base URL."""
|
|
27
|
+
config = parseur.Config(parseur.CONFIG_PATH)
|
|
28
|
+
config.api_key = api_key
|
|
29
|
+
config.api_base = api_base
|
|
30
|
+
config.save()
|
|
31
|
+
click.echo(f"✅ Parseur CLI initialized and config saved to {parseur.CONFIG_PATH}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ------------------------
|
|
35
|
+
# Mailbox commands
|
|
36
|
+
# ------------------------
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@cli.command("list-mailboxes")
|
|
40
|
+
@click.option("--search", help="Search string (mailbox name or email prefix)")
|
|
41
|
+
@click.option(
|
|
42
|
+
"--order-by",
|
|
43
|
+
type=click.Choice([e.value for e in parseur.MailboxOrderKey]),
|
|
44
|
+
help=(
|
|
45
|
+
"Order by field. Use one of: "
|
|
46
|
+
"name, document_count, template_count, "
|
|
47
|
+
"PARSEDOK_count (processed), PARSEDKO_count (failed), "
|
|
48
|
+
"QUOTAEXC_count (quota exceeded), EXPORTKO_count (export failed)"
|
|
49
|
+
),
|
|
50
|
+
)
|
|
51
|
+
@click.option(
|
|
52
|
+
"--descending/--ascending",
|
|
53
|
+
default=False,
|
|
54
|
+
help="Sort descending (default is ascending)",
|
|
55
|
+
)
|
|
56
|
+
def list_mailboxes(search, order_by, descending):
|
|
57
|
+
"""
|
|
58
|
+
List all mailboxes with optional filtering and sorting.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
order_by_enum = parseur.MailboxOrderKey(order_by) if order_by else None
|
|
62
|
+
|
|
63
|
+
mailboxes = parseur.Mailbox.list(
|
|
64
|
+
search=search,
|
|
65
|
+
order_by=order_by_enum,
|
|
66
|
+
ascending=not descending,
|
|
67
|
+
)
|
|
68
|
+
click.echo(parseur.to_json(mailboxes))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@cli.command("get-mailbox")
|
|
72
|
+
@click.argument("mailbox_id", type=int)
|
|
73
|
+
def get_mailbox(mailbox_id):
|
|
74
|
+
"""Get details of a mailbox."""
|
|
75
|
+
result = parseur.Mailbox.retrieve(mailbox_id)
|
|
76
|
+
click.echo(parseur.to_json(result))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@cli.command("get-mailbox-schema")
|
|
80
|
+
@click.argument("mailbox_id", type=int)
|
|
81
|
+
def get_mailbox_schema(mailbox_id):
|
|
82
|
+
"""Get schema of a mailbox."""
|
|
83
|
+
result = parseur.Mailbox.schema(mailbox_id)
|
|
84
|
+
click.echo(parseur.to_json(result))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ------------------------
|
|
88
|
+
# Document commands
|
|
89
|
+
# ------------------------
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@cli.command("list-documents")
|
|
93
|
+
@click.argument("mailbox_id", type=int)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--search",
|
|
96
|
+
help="Search string (document id, name, template name, email addresses, metadata)",
|
|
97
|
+
)
|
|
98
|
+
@click.option(
|
|
99
|
+
"--order-by",
|
|
100
|
+
type=click.Choice([e.value for e in parseur.DocumentOrderKey]),
|
|
101
|
+
help="Order by field (name, created, processed, status)",
|
|
102
|
+
)
|
|
103
|
+
@click.option(
|
|
104
|
+
"--descending/--ascending",
|
|
105
|
+
default=False,
|
|
106
|
+
help="Sort descending (default is ascending)",
|
|
107
|
+
)
|
|
108
|
+
@click.option(
|
|
109
|
+
"--received-after",
|
|
110
|
+
type=click.DateTime(formats=["%Y-%m-%d"]),
|
|
111
|
+
help="Filter documents received after this date (YYYY-MM-DD)",
|
|
112
|
+
)
|
|
113
|
+
@click.option(
|
|
114
|
+
"--received-before",
|
|
115
|
+
type=click.DateTime(formats=["%Y-%m-%d"]),
|
|
116
|
+
help="Filter documents received before this date (YYYY-MM-DD)",
|
|
117
|
+
)
|
|
118
|
+
@click.option(
|
|
119
|
+
"--with-result",
|
|
120
|
+
is_flag=True,
|
|
121
|
+
help="Include parsed result with each document",
|
|
122
|
+
)
|
|
123
|
+
def list_documents(
|
|
124
|
+
mailbox_id,
|
|
125
|
+
search,
|
|
126
|
+
order_by,
|
|
127
|
+
descending,
|
|
128
|
+
received_after,
|
|
129
|
+
received_before,
|
|
130
|
+
with_result,
|
|
131
|
+
):
|
|
132
|
+
"""
|
|
133
|
+
List all documents in a mailbox with optional filtering, sorting, and result inclusion.
|
|
134
|
+
"""
|
|
135
|
+
# Convert order_by string to enum if provided
|
|
136
|
+
order_by_enum = parseur.DocumentOrderKey(order_by) if order_by else None
|
|
137
|
+
docs = parseur.Document.list(
|
|
138
|
+
mailbox_id=mailbox_id,
|
|
139
|
+
search=search,
|
|
140
|
+
order_by=order_by_enum,
|
|
141
|
+
ascending=not descending,
|
|
142
|
+
received_after=received_after,
|
|
143
|
+
received_before=received_before,
|
|
144
|
+
with_result=with_result,
|
|
145
|
+
)
|
|
146
|
+
click.echo(parseur.to_json(docs))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@cli.command("get-document")
|
|
150
|
+
@click.argument("document_id", type=str)
|
|
151
|
+
def get_document(document_id):
|
|
152
|
+
"""Get details of a document."""
|
|
153
|
+
result = parseur.Document.retrieve(document_id)
|
|
154
|
+
click.echo(parseur.to_json(result))
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@cli.command("reprocess-document")
|
|
158
|
+
@click.argument("document_id", type=str)
|
|
159
|
+
def reprocess_document(document_id):
|
|
160
|
+
"""Reprocess a document."""
|
|
161
|
+
result = parseur.Document.reprocess(document_id)
|
|
162
|
+
click.echo(parseur.to_json(result))
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@cli.command("skip-document")
|
|
166
|
+
@click.argument("document_id", type=str)
|
|
167
|
+
def skip_document(document_id):
|
|
168
|
+
"""Skip a document."""
|
|
169
|
+
result = parseur.Document.skip(document_id)
|
|
170
|
+
click.echo(parseur.to_json(result))
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@cli.command("copy-document")
|
|
174
|
+
@click.argument("document_id", type=str)
|
|
175
|
+
@click.argument("target_mailbox_id", type=int)
|
|
176
|
+
def copy_document(document_id, target_mailbox_id):
|
|
177
|
+
"""Copy a document to another mailbox."""
|
|
178
|
+
result = parseur.Document.copy(document_id, target_mailbox_id)
|
|
179
|
+
click.echo(parseur.to_json(result))
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@cli.command("get-document-logs")
|
|
183
|
+
@click.argument("document_id", type=str)
|
|
184
|
+
def get_document_logs(document_id):
|
|
185
|
+
"""Get logs of a document."""
|
|
186
|
+
logs = parseur.Document.logs(document_id)
|
|
187
|
+
click.echo(parseur.to_json(logs))
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@cli.command("delete-document")
|
|
191
|
+
@click.argument("document_id", type=str)
|
|
192
|
+
def delete_document(document_id):
|
|
193
|
+
"""Delete a document."""
|
|
194
|
+
parseur.Document.delete(document_id)
|
|
195
|
+
click.echo(f"✅ Document {document_id} deleted.")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@cli.command("upload-file")
|
|
199
|
+
@click.argument("mailbox_id", type=int)
|
|
200
|
+
@click.argument("file_path", type=click.Path(exists=True))
|
|
201
|
+
def upload_file(mailbox_id, file_path):
|
|
202
|
+
"""Upload a document file to a mailbox."""
|
|
203
|
+
result = parseur.Document.upload_file(mailbox_id, file_path)
|
|
204
|
+
click.echo(parseur.to_json(result))
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
@cli.command("upload-folder")
|
|
208
|
+
@click.argument("mailbox_id", type=int)
|
|
209
|
+
@click.argument("folder_path", type=str)
|
|
210
|
+
def upload_folder(mailbox_id, folder_path):
|
|
211
|
+
"""Upload all files from a glob path."""
|
|
212
|
+
results = list(parseur.Document.upload_folder(mailbox_id, folder_path))
|
|
213
|
+
click.echo(parseur.to_json(results))
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
@cli.command("upload-text")
|
|
217
|
+
@click.option("--recipient", required=True, help="Mailbox email address")
|
|
218
|
+
@click.option("--subject", required=True, help="Subject line for the document")
|
|
219
|
+
@click.option("--sender", default=None, help="Sender email (optional)")
|
|
220
|
+
@click.option("--body-html", default=None, help="HTML text content")
|
|
221
|
+
@click.option("--body-plain", default=None, help="Plain text content")
|
|
222
|
+
def upload_text(recipient, subject, sender, body_html, body_plain):
|
|
223
|
+
"""Upload text content to a mailbox by email address."""
|
|
224
|
+
result = parseur.Document.upload_text(
|
|
225
|
+
recipient, subject, sender, body_html, body_plain
|
|
226
|
+
)
|
|
227
|
+
click.echo(parseur.to_json(result))
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# ------------------------
|
|
231
|
+
# Webhook commands
|
|
232
|
+
# ------------------------
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@cli.command("create-webhook")
|
|
236
|
+
@click.option(
|
|
237
|
+
"--event",
|
|
238
|
+
required=True,
|
|
239
|
+
type=click.Choice([e.value for e in parseur.ParseurEvent]),
|
|
240
|
+
help="Event type to listen for",
|
|
241
|
+
)
|
|
242
|
+
@click.option(
|
|
243
|
+
"--target-url",
|
|
244
|
+
required=True,
|
|
245
|
+
help="The URL to receive webhook POSTs, e.g. https://api.example.com/parseur.",
|
|
246
|
+
)
|
|
247
|
+
@click.option(
|
|
248
|
+
"--mailbox-id",
|
|
249
|
+
type=int,
|
|
250
|
+
help="Mailbox ID (required for document events).",
|
|
251
|
+
)
|
|
252
|
+
@click.option(
|
|
253
|
+
"--table-field-id",
|
|
254
|
+
type=str,
|
|
255
|
+
help="Table field ID in 'PF12345' format (required for table events).",
|
|
256
|
+
)
|
|
257
|
+
@click.option(
|
|
258
|
+
"--header",
|
|
259
|
+
multiple=True,
|
|
260
|
+
type=str,
|
|
261
|
+
help="Custom HTTP header in 'Key:Value' format. Can be used multiple times.",
|
|
262
|
+
)
|
|
263
|
+
@click.option(
|
|
264
|
+
"--name",
|
|
265
|
+
type=str,
|
|
266
|
+
help="Optional name for the webhook.",
|
|
267
|
+
)
|
|
268
|
+
def create_webhook(event, target_url, mailbox_id, table_field_id, header, name):
|
|
269
|
+
"""
|
|
270
|
+
Create a new custom webhook for your Parseur account.
|
|
271
|
+
"""
|
|
272
|
+
headers = {}
|
|
273
|
+
for h in header:
|
|
274
|
+
if ":" not in h:
|
|
275
|
+
click.echo(f"❌ Invalid header format: {h}")
|
|
276
|
+
sys.exit(1)
|
|
277
|
+
key, value = h.split(":", 1)
|
|
278
|
+
headers[key.strip()] = value.strip()
|
|
279
|
+
|
|
280
|
+
event_enum = parseur.ParseurEvent(event)
|
|
281
|
+
result = parseur.Webhook.create(
|
|
282
|
+
event=event_enum,
|
|
283
|
+
target_url=target_url,
|
|
284
|
+
mailbox_id=mailbox_id,
|
|
285
|
+
table_field_id=table_field_id,
|
|
286
|
+
headers=headers or None,
|
|
287
|
+
name=name,
|
|
288
|
+
)
|
|
289
|
+
click.echo(parseur.to_json(result))
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
@cli.command("get-webhook")
|
|
293
|
+
@click.argument("webhook_id", type=int)
|
|
294
|
+
def get_webhook(webhook_id):
|
|
295
|
+
"""Get details of a webhook."""
|
|
296
|
+
result = parseur.Webhook.retrieve(webhook_id)
|
|
297
|
+
click.echo(parseur.to_json(result))
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
@cli.command("delete-webhook")
|
|
301
|
+
@click.argument("webhook_id", type=int)
|
|
302
|
+
def delete_webhook(webhook_id):
|
|
303
|
+
"""
|
|
304
|
+
Delete a registered webhook by its ID.
|
|
305
|
+
|
|
306
|
+
This command permanently removes the webhook from your Parseur account.
|
|
307
|
+
"""
|
|
308
|
+
parseur.Webhook.delete(webhook_id)
|
|
309
|
+
click.echo(f"✅ Webhook {webhook_id} deleted.")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@cli.command("enable-webhook")
|
|
313
|
+
@click.argument("mailbox_id", type=int)
|
|
314
|
+
@click.argument("webhook_id", type=int)
|
|
315
|
+
def enable_webhook(mailbox_id, webhook_id):
|
|
316
|
+
"""
|
|
317
|
+
Enable a webhook for the specified mailbox.
|
|
318
|
+
|
|
319
|
+
Activates the webhook by adding it to the mailbox.
|
|
320
|
+
"""
|
|
321
|
+
result = parseur.Webhook.enable(mailbox_id, webhook_id)
|
|
322
|
+
click.echo(parseur.to_json(result))
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
@cli.command("pause-webhook")
|
|
326
|
+
@click.argument("mailbox_id", type=int)
|
|
327
|
+
@click.argument("webhook_id", type=int)
|
|
328
|
+
def pause_webhook(mailbox_id, webhook_id):
|
|
329
|
+
"""
|
|
330
|
+
Pause a webhook for the specified mailbox.
|
|
331
|
+
|
|
332
|
+
Removes the webhook from the mailbox without deleting it.
|
|
333
|
+
"""
|
|
334
|
+
result = parseur.Webhook.pause(mailbox_id, webhook_id)
|
|
335
|
+
click.echo(parseur.to_json(result))
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@cli.command("list-webhooks")
|
|
339
|
+
def list_webhooks():
|
|
340
|
+
"""List all registered webhooks."""
|
|
341
|
+
webhooks = parseur.Webhook.list()
|
|
342
|
+
click.echo(parseur.to_json(webhooks))
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
if __name__ == "__main__":
|
|
346
|
+
cli()
|
parseur/client.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, Generator, Optional
|
|
3
|
+
from urllib.parse import urljoin
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
import parseur
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Client:
|
|
11
|
+
@classmethod
|
|
12
|
+
def auth_headers(cls, json=True) -> Dict[str, str]:
|
|
13
|
+
if not parseur.api_key:
|
|
14
|
+
raise ValueError("API token is required. Run 'parseur init' first.")
|
|
15
|
+
headers = {"Authorization": f"Token {parseur.api_key}"}
|
|
16
|
+
if json:
|
|
17
|
+
headers["Content-Type"] = "application/json"
|
|
18
|
+
return headers
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def request(cls, method: str, endpoint: str, **kwargs) -> Any:
|
|
22
|
+
url = urljoin(parseur.api_base, endpoint)
|
|
23
|
+
logging.debug(f"Request: {method} {url}")
|
|
24
|
+
headers = cls.auth_headers(json="json" in kwargs)
|
|
25
|
+
response = requests.request(method, url, headers=headers, **kwargs)
|
|
26
|
+
response.raise_for_status()
|
|
27
|
+
if response.status_code == 204:
|
|
28
|
+
return None
|
|
29
|
+
return response.json()
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def paginate(
|
|
33
|
+
cls, endpoint: str, params: Optional[Dict[str, Any]] = None
|
|
34
|
+
) -> Generator[Dict, None, None]:
|
|
35
|
+
url = urljoin(parseur.api_base, endpoint)
|
|
36
|
+
headers = cls.auth_headers()
|
|
37
|
+
while url:
|
|
38
|
+
logging.debug(f"Paginate request: {url}")
|
|
39
|
+
response = requests.get(url, headers=headers, params=params)
|
|
40
|
+
response.raise_for_status()
|
|
41
|
+
data = response.json()
|
|
42
|
+
for item in data["results"]:
|
|
43
|
+
yield item
|
|
44
|
+
url = data.get("next")
|
parseur/config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import configparser
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Config:
|
|
6
|
+
def __init__(self, config_path: str):
|
|
7
|
+
self.config_path = config_path
|
|
8
|
+
self.api_key = None
|
|
9
|
+
self.api_base = None
|
|
10
|
+
|
|
11
|
+
def load(self):
|
|
12
|
+
cfg = configparser.ConfigParser()
|
|
13
|
+
if self.config_path:
|
|
14
|
+
cfg.read(self.config_path)
|
|
15
|
+
api_key = cfg.get("parseur", "api_key")
|
|
16
|
+
api_base = cfg.get("parseur", "api_base")
|
|
17
|
+
self.api_key = api_key
|
|
18
|
+
self.api_base = api_base
|
|
19
|
+
return cfg
|
|
20
|
+
|
|
21
|
+
def save(self):
|
|
22
|
+
if not self.config_path:
|
|
23
|
+
raise ValueError("Config path must be set before saving.")
|
|
24
|
+
cfg = configparser.ConfigParser()
|
|
25
|
+
cfg.read(self.config_path)
|
|
26
|
+
if not cfg.has_section("parseur"):
|
|
27
|
+
cfg.add_section("parseur")
|
|
28
|
+
cfg.set("parseur", "api_key", self.api_key)
|
|
29
|
+
cfg.set("parseur", "api_base", self.api_base)
|
|
30
|
+
with open(self.config_path, "w") as f:
|
|
31
|
+
cfg.write(f)
|
|
32
|
+
logging.info(f"Config saved to {self.config_path}")
|
parseur/decorator.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Callable, Iterable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def rate_limited_batch(batch_size: int = 5, max_per_second: int = 5):
|
|
8
|
+
"""
|
|
9
|
+
Decorator for a batch-processing function that takes a list of items and yields results.
|
|
10
|
+
Applies rate limiting: max N calls per second, B items per batch.
|
|
11
|
+
"""
|
|
12
|
+
assert batch_size > 0 and max_per_second > 0
|
|
13
|
+
|
|
14
|
+
def decorator(func: Callable[..., Iterable[Any]]):
|
|
15
|
+
@functools.wraps(func)
|
|
16
|
+
def wrapper(cls, items: Iterable[Any], *args, **kwargs) -> Iterable[Any]:
|
|
17
|
+
batch = []
|
|
18
|
+
start_time = None
|
|
19
|
+
|
|
20
|
+
for item in items:
|
|
21
|
+
logging.info(f"Processing item: {item}")
|
|
22
|
+
batch.append(item)
|
|
23
|
+
|
|
24
|
+
if len(batch) >= batch_size:
|
|
25
|
+
if start_time:
|
|
26
|
+
elapsed = time.time() - start_time
|
|
27
|
+
sleep_time = max(0, 1.0 - elapsed)
|
|
28
|
+
if sleep_time > 0:
|
|
29
|
+
time.sleep(sleep_time)
|
|
30
|
+
|
|
31
|
+
start_time = time.time()
|
|
32
|
+
for result in func(cls, batch, *args, **kwargs):
|
|
33
|
+
yield result
|
|
34
|
+
batch = []
|
|
35
|
+
|
|
36
|
+
# Remaining items
|
|
37
|
+
if batch:
|
|
38
|
+
if start_time:
|
|
39
|
+
elapsed = time.time() - start_time
|
|
40
|
+
sleep_time = max(0, 1.0 - elapsed)
|
|
41
|
+
if sleep_time > 0:
|
|
42
|
+
time.sleep(sleep_time)
|
|
43
|
+
|
|
44
|
+
for result in func(cls, batch, *args, **kwargs):
|
|
45
|
+
yield result
|
|
46
|
+
|
|
47
|
+
return wrapper
|
|
48
|
+
|
|
49
|
+
return decorator
|
parseur/document.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from glob import iglob
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Iterable, List, Optional
|
|
7
|
+
|
|
8
|
+
from parseur.client import Client
|
|
9
|
+
from parseur.decorator import rate_limited_batch
|
|
10
|
+
from parseur.schemas.document import DocumentLogSchema, DocumentSchema
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DocumentOrderKey(str, Enum):
|
|
14
|
+
"""
|
|
15
|
+
Enumeration of supported document sorting keys.
|
|
16
|
+
|
|
17
|
+
Used with the `order_by` parameter to specify sorting in list_documents and yield_documents.
|
|
18
|
+
|
|
19
|
+
Members:
|
|
20
|
+
|
|
21
|
+
- `NAME`: Sort by document name.
|
|
22
|
+
- `CREATED`: Sort by created/received date.
|
|
23
|
+
- `PROCESSED`: Sort by processed date.
|
|
24
|
+
- `STATUS`: Sort by document status.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
NAME = "name"
|
|
28
|
+
CREATED = "created"
|
|
29
|
+
PROCESSED = "processed"
|
|
30
|
+
STATUS = "status"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Document:
|
|
34
|
+
"""Document resource providing class-based API access."""
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_response(cls, data: Dict) -> Dict:
|
|
38
|
+
"""Validate and deserialize a single document dict."""
|
|
39
|
+
return DocumentSchema().load(data)
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def log_from_response(cls, data: Dict) -> Dict:
|
|
43
|
+
"""Validate and deserialize a single document log dict."""
|
|
44
|
+
return DocumentLogSchema().load(data)
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def iter(
|
|
48
|
+
cls,
|
|
49
|
+
mailbox_id: int,
|
|
50
|
+
*,
|
|
51
|
+
search: Optional[str] = None,
|
|
52
|
+
order_by: Optional[DocumentOrderKey] = None,
|
|
53
|
+
ascending: bool = True,
|
|
54
|
+
received_after: Optional[datetime] = None,
|
|
55
|
+
received_before: Optional[datetime] = None,
|
|
56
|
+
with_result: bool = False,
|
|
57
|
+
) -> Iterable[Dict]:
|
|
58
|
+
"""
|
|
59
|
+
Yield all documents in a mailbox with pagination and filtering.
|
|
60
|
+
|
|
61
|
+
:param mailbox_id: The mailbox ID to retrieve documents from.
|
|
62
|
+
:param str search: Search string to filter documents.
|
|
63
|
+
The search query parameter searches the following properties:
|
|
64
|
+
|
|
65
|
+
- document id (exact match)
|
|
66
|
+
- document name
|
|
67
|
+
- template name
|
|
68
|
+
- from, to, cc, and bcc email addresses
|
|
69
|
+
- document metadata header
|
|
70
|
+
|
|
71
|
+
:param DocumentOrderKey order_by: Enum value specifying the sorting field.
|
|
72
|
+
:param bool ascending: Whether to sort in ascending order (True) or descending order (False).
|
|
73
|
+
:param datetime.datetime received_after: Filter for documents received after this date (converted to UTC YYYY-MM-DD).
|
|
74
|
+
:param datetime.datetime received_before: Filter for documents received before this date (converted to UTC YYYY-MM-DD).
|
|
75
|
+
:param bool with_result: Whether to include the parsed result in the returned documents.
|
|
76
|
+
:yield dict: Each yielded dictionary represents a document.
|
|
77
|
+
"""
|
|
78
|
+
params = {}
|
|
79
|
+
|
|
80
|
+
if search:
|
|
81
|
+
params["search"] = search
|
|
82
|
+
|
|
83
|
+
if order_by:
|
|
84
|
+
prefix = "" if ascending else "-"
|
|
85
|
+
params["ordering"] = f"{prefix}{order_by.value}"
|
|
86
|
+
|
|
87
|
+
if received_after:
|
|
88
|
+
utc_date = received_after.astimezone(timezone.utc).strftime("%Y-%m-%d")
|
|
89
|
+
params["received_after"] = utc_date
|
|
90
|
+
if received_before:
|
|
91
|
+
utc_date = received_before.astimezone(timezone.utc).strftime("%Y-%m-%d")
|
|
92
|
+
params["received_before"] = utc_date
|
|
93
|
+
|
|
94
|
+
if received_after or received_before:
|
|
95
|
+
params["tz"] = "UTC"
|
|
96
|
+
|
|
97
|
+
if with_result:
|
|
98
|
+
params["with_result"] = "true"
|
|
99
|
+
|
|
100
|
+
for raw in Client.paginate(f"/parser/{mailbox_id}/document_set", params=params):
|
|
101
|
+
yield cls.from_response(raw)
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def list(
|
|
105
|
+
cls,
|
|
106
|
+
mailbox_id: int,
|
|
107
|
+
*,
|
|
108
|
+
search: Optional[str] = None,
|
|
109
|
+
order_by: Optional[DocumentOrderKey] = None,
|
|
110
|
+
ascending: bool = True,
|
|
111
|
+
received_after: Optional[datetime] = None,
|
|
112
|
+
received_before: Optional[datetime] = None,
|
|
113
|
+
with_result: bool = False,
|
|
114
|
+
) -> List[Dict]:
|
|
115
|
+
return list(
|
|
116
|
+
cls.iter(
|
|
117
|
+
mailbox_id,
|
|
118
|
+
search=search,
|
|
119
|
+
order_by=order_by,
|
|
120
|
+
ascending=ascending,
|
|
121
|
+
received_after=received_after,
|
|
122
|
+
received_before=received_before,
|
|
123
|
+
with_result=with_result,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
def retrieve(cls, document_id: str) -> Dict:
|
|
129
|
+
"""Retrieve document details, deserialized."""
|
|
130
|
+
raw = Client.request("GET", f"/document/{document_id}")
|
|
131
|
+
return cls.from_response(raw)
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def reprocess(cls, document_id: str) -> Dict:
|
|
135
|
+
raw = Client.request("POST", f"/document/{document_id}/process")
|
|
136
|
+
return cls.from_response(raw)
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def skip(cls, document_id: str) -> Dict:
|
|
140
|
+
raw = Client.request("POST", f"/document/{document_id}/skip")
|
|
141
|
+
return cls.from_response(raw)
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def copy(cls, document_id: str, target_mailbox_id: int) -> Dict:
|
|
145
|
+
raw = Client.request(
|
|
146
|
+
"POST", f"/document/{document_id}/copy/{target_mailbox_id}"
|
|
147
|
+
)
|
|
148
|
+
return cls.from_response(raw)
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def logs(cls, document_id: str) -> List[Dict]:
|
|
152
|
+
logs = []
|
|
153
|
+
for raw in Client.paginate(f"/document/{document_id}/log_set"):
|
|
154
|
+
logs.append(cls.log_from_response(raw))
|
|
155
|
+
return logs
|
|
156
|
+
|
|
157
|
+
@classmethod
|
|
158
|
+
def delete(cls, document_id: str) -> bool:
|
|
159
|
+
Client.request("DELETE", f"/document/{document_id}")
|
|
160
|
+
logging.info(f"Deleted document ID: {document_id}")
|
|
161
|
+
return True
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def upload_file(cls, mailbox_id: int, file_path: str) -> Dict:
|
|
165
|
+
with open(file_path, "rb") as file:
|
|
166
|
+
files = {"file": file}
|
|
167
|
+
response = Client.request(
|
|
168
|
+
"POST", f"/parser/{mailbox_id}/upload", files=files
|
|
169
|
+
)
|
|
170
|
+
if response.status_code >= 400:
|
|
171
|
+
logging.error(f"API Error {response.status_code}: {response.text}")
|
|
172
|
+
response.raise_for_status()
|
|
173
|
+
return response.json()
|
|
174
|
+
|
|
175
|
+
@classmethod
|
|
176
|
+
@rate_limited_batch()
|
|
177
|
+
def batch_upload_files(
|
|
178
|
+
cls, file_paths: List[str], mailbox_id: int
|
|
179
|
+
) -> Iterable[Dict]:
|
|
180
|
+
for file_path in file_paths:
|
|
181
|
+
try:
|
|
182
|
+
yield cls.upload_file(mailbox_id, file_path)
|
|
183
|
+
except Exception as e:
|
|
184
|
+
yield {"file": file_path, "error": str(e)}
|
|
185
|
+
|
|
186
|
+
@classmethod
|
|
187
|
+
def upload_folder(cls, mailbox_id: int, folder_path: str) -> Iterable[Dict]:
|
|
188
|
+
paths = (
|
|
189
|
+
str(p) for p in iglob(folder_path, recursive=True) if Path(p).is_file()
|
|
190
|
+
)
|
|
191
|
+
return cls.batch_upload_files(paths, mailbox_id)
|
|
192
|
+
|
|
193
|
+
@classmethod
|
|
194
|
+
def upload_text(
|
|
195
|
+
cls,
|
|
196
|
+
recipient: str,
|
|
197
|
+
subject: str,
|
|
198
|
+
sender: Optional[str] = None,
|
|
199
|
+
body_html: Optional[str] = None,
|
|
200
|
+
body_plain: Optional[str] = None,
|
|
201
|
+
) -> Dict:
|
|
202
|
+
data = {"recipient": recipient, "subject": subject}
|
|
203
|
+
if sender:
|
|
204
|
+
data["from"] = sender
|
|
205
|
+
if body_html:
|
|
206
|
+
data["body_html"] = body_html
|
|
207
|
+
if body_plain:
|
|
208
|
+
data["body_plain"] = body_plain
|
|
209
|
+
logging.info(
|
|
210
|
+
f"Uploading text to Parseur: recipient={recipient}, subject={subject}"
|
|
211
|
+
)
|
|
212
|
+
return Client.request("POST", "/email", json=data)
|
parseur/event.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ParseurEvent(str, Enum):
|
|
5
|
+
"""
|
|
6
|
+
Enumeration of supported Parseur webhook event types.
|
|
7
|
+
|
|
8
|
+
Use these values when registering webhooks to specify which event to listen for.
|
|
9
|
+
|
|
10
|
+
Members:
|
|
11
|
+
|
|
12
|
+
- `DOCUMENT_PROCESSED`: Document processed successfully.
|
|
13
|
+
- `DOCUMENT_PROCESSED_FLATTENED`: Document processed as flat data.
|
|
14
|
+
- `DOCUMENT_TEMPLATE_NEEDED`: Document processing failed (template needed).
|
|
15
|
+
- `DOCUMENT_EXPORT_FAILED`: Export of the document failed.
|
|
16
|
+
- `TABLE_PROCESSED`: A table field row was processed.
|
|
17
|
+
- `TABLE_PROCESSED_FLATTENED`: A table field row (flattened) was processed.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
DOCUMENT_PROCESSED = "document.processed"
|
|
21
|
+
DOCUMENT_PROCESSED_FLATTENED = "document.processed.flattened"
|
|
22
|
+
DOCUMENT_TEMPLATE_NEEDED = "document.template_needed"
|
|
23
|
+
DOCUMENT_EXPORT_FAILED = "document.export_failed"
|
|
24
|
+
TABLE_PROCESSED = "table.processed"
|
|
25
|
+
TABLE_PROCESSED_FLATTENED = "table.processed.flattened"
|
|
26
|
+
|
|
27
|
+
def is_table_event(self) -> bool:
|
|
28
|
+
return self.value.startswith("table")
|
parseur/mailbox.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Any, Dict, Iterable, List, Optional
|
|
3
|
+
|
|
4
|
+
from parseur.client import Client
|
|
5
|
+
from parseur.schemas.mailbox import MailboxSchema
|
|
6
|
+
from parseur.utils import resolve_absolute_urls
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MailboxOrderKey(str, Enum):
|
|
10
|
+
"""
|
|
11
|
+
Enumeration of supported mailbox sorting keys.
|
|
12
|
+
|
|
13
|
+
Used with the `order_by` parameter to specify sorting in Mailbox.list() and Mailbox.iter().
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
NAME = "name"
|
|
17
|
+
DOCUMENT_COUNT = "document_count"
|
|
18
|
+
TEMPLATE_COUNT = "template_count"
|
|
19
|
+
PARSEDOK_COUNT = "PARSEDOK_count"
|
|
20
|
+
PARSEDKO_COUNT = "PARSEDKO_count"
|
|
21
|
+
QUOTAEXC_COUNT = "QUOTAEXC_count"
|
|
22
|
+
EXPORTKO_COUNT = "EXPORTKO_count"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Mailbox:
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def from_response(cls, data: Dict) -> Dict:
|
|
29
|
+
"""
|
|
30
|
+
Deserialize a single mailbox API response.
|
|
31
|
+
|
|
32
|
+
:param data: Raw API response dictionary.
|
|
33
|
+
:return: Validated and transformed mailbox dictionary.
|
|
34
|
+
"""
|
|
35
|
+
return resolve_absolute_urls(MailboxSchema().load(data))
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def iter(
|
|
39
|
+
cls,
|
|
40
|
+
*,
|
|
41
|
+
search: Optional[str] = None,
|
|
42
|
+
order_by: Optional[MailboxOrderKey] = None,
|
|
43
|
+
ascending: bool = True,
|
|
44
|
+
) -> Iterable[Dict]:
|
|
45
|
+
"""
|
|
46
|
+
Yield all mailboxes with pagination and optional filtering or sorting.
|
|
47
|
+
"""
|
|
48
|
+
params = {}
|
|
49
|
+
if search:
|
|
50
|
+
params["search"] = search
|
|
51
|
+
if order_by:
|
|
52
|
+
prefix = "" if ascending else "-"
|
|
53
|
+
params["ordering"] = f"{prefix}{order_by.value}"
|
|
54
|
+
for raw in Client.paginate("/parser", params=params):
|
|
55
|
+
yield cls.from_response(raw)
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def list(
|
|
59
|
+
cls,
|
|
60
|
+
*,
|
|
61
|
+
search: Optional[str] = None,
|
|
62
|
+
order_by: Optional[MailboxOrderKey] = None,
|
|
63
|
+
ascending: bool = True,
|
|
64
|
+
) -> List[Dict[str, Any]]:
|
|
65
|
+
"""Retrieve all mailboxes as a list."""
|
|
66
|
+
return list(cls.iter(search=search, order_by=order_by, ascending=ascending))
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def retrieve(cls, mailbox_id: int) -> Dict[str, Any]:
|
|
70
|
+
"""Retrieve a single mailbox by ID."""
|
|
71
|
+
raw = Client.request("GET", f"/parser/{mailbox_id}")
|
|
72
|
+
return cls.from_response(raw)
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def schema(cls, mailbox_id: int) -> Dict[str, Any]:
|
|
76
|
+
"""Get the schema for a mailbox."""
|
|
77
|
+
return Client.request("GET", f"/parser/{mailbox_id}/schema")
|
parseur/utils.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
import json
|
|
3
|
+
from urllib.parse import urljoin
|
|
4
|
+
|
|
5
|
+
import parseur
|
|
6
|
+
|
|
7
|
+
ABSOLUTE_URL_FIELDS = {"csv_download", "json_download", "xls_download"}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def resolve_absolute_urls(obj):
|
|
11
|
+
if isinstance(obj, dict):
|
|
12
|
+
for key in obj:
|
|
13
|
+
if key in ABSOLUTE_URL_FIELDS and obj[key]:
|
|
14
|
+
obj[key] = urljoin(parseur.api_base, obj[key])
|
|
15
|
+
else:
|
|
16
|
+
obj[key] = resolve_absolute_urls(obj[key])
|
|
17
|
+
elif isinstance(obj, list):
|
|
18
|
+
return [resolve_absolute_urls(item) for item in obj]
|
|
19
|
+
return obj
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ISODateJSONEncoder(json.JSONEncoder):
|
|
23
|
+
"""
|
|
24
|
+
JSON Encoder that converts datetime and date objects to ISO 8601 strings.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def default(self, obj):
|
|
28
|
+
if isinstance(obj, (datetime, date)):
|
|
29
|
+
return obj.isoformat()
|
|
30
|
+
return super().default(obj)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def to_json(data, indent=2, sort_keys=True, ensure_ascii=False):
|
|
34
|
+
"""
|
|
35
|
+
Serialize a Python object to a JSON-formatted string with ISO datetime support.
|
|
36
|
+
|
|
37
|
+
This function uses the custom ISODateJSONEncoder to automatically
|
|
38
|
+
convert datetime.datetime objects to ISO 8601 strings.
|
|
39
|
+
|
|
40
|
+
:param data: The data to serialize (dict, list, etc.).
|
|
41
|
+
:param indent: Number of spaces to indent in the output JSON. Default is 2.
|
|
42
|
+
:param sort_keys: Whether to sort the dictionary keys in the output. Default is True.
|
|
43
|
+
:param ensure_ascii: Whether to escape non-ASCII characters. Default is False.
|
|
44
|
+
:return: A JSON-formatted string.
|
|
45
|
+
"""
|
|
46
|
+
return json.dumps(
|
|
47
|
+
data,
|
|
48
|
+
indent=indent,
|
|
49
|
+
sort_keys=sort_keys,
|
|
50
|
+
ensure_ascii=ensure_ascii,
|
|
51
|
+
cls=ISODateJSONEncoder,
|
|
52
|
+
)
|
parseur/webhook.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, Iterable, List, Optional
|
|
3
|
+
|
|
4
|
+
from parseur.event import ParseurEvent
|
|
5
|
+
from parseur.schemas.webhook import WebhookSchema
|
|
6
|
+
from parseur.client import Client
|
|
7
|
+
from parseur.mailbox import Mailbox
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Webhook:
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def from_response(cls, data: Dict) -> Dict:
|
|
14
|
+
"""
|
|
15
|
+
Deserialize a webhook API response.
|
|
16
|
+
|
|
17
|
+
:param data: Raw API response dictionary.
|
|
18
|
+
:return: Deserialized webhook dictionary.
|
|
19
|
+
"""
|
|
20
|
+
return WebhookSchema().load(data)
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def create(
|
|
24
|
+
cls,
|
|
25
|
+
event: ParseurEvent,
|
|
26
|
+
target_url: str,
|
|
27
|
+
mailbox_id: Optional[int] = None,
|
|
28
|
+
table_field_id: Optional[str] = None,
|
|
29
|
+
headers: Optional[Dict[str, str]] = None,
|
|
30
|
+
name: Optional[str] = None,
|
|
31
|
+
) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Create a new custom webhook for Parseur.
|
|
34
|
+
|
|
35
|
+
:param event: Webhook event type (document or table event).
|
|
36
|
+
:param target_url: The URL to send webhook POSTs to.
|
|
37
|
+
:param mailbox_id: Mailbox ID (required for document events).
|
|
38
|
+
:param table_field_id: Table field ID (required for table events, e.g. "PF12345").
|
|
39
|
+
:param headers: Optional custom HTTP headers.
|
|
40
|
+
:param name: Optional custom name for the webhook.
|
|
41
|
+
:return: The created webhook object as a dictionary.
|
|
42
|
+
"""
|
|
43
|
+
body = {
|
|
44
|
+
"event": event.value,
|
|
45
|
+
"target": target_url,
|
|
46
|
+
"category": "CUSTOM",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if name:
|
|
50
|
+
body["name"] = name
|
|
51
|
+
if headers:
|
|
52
|
+
body["headers"] = headers
|
|
53
|
+
|
|
54
|
+
if event.is_table_event():
|
|
55
|
+
if not table_field_id:
|
|
56
|
+
raise ValueError("table_field_id is required for table events")
|
|
57
|
+
body["parser_field"] = table_field_id
|
|
58
|
+
else:
|
|
59
|
+
if not mailbox_id:
|
|
60
|
+
raise ValueError("mailbox_id is required for document events")
|
|
61
|
+
body["parser"] = mailbox_id
|
|
62
|
+
|
|
63
|
+
raw = Client.request("POST", "/webhook", json=body)
|
|
64
|
+
return cls.from_response(raw)
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def retrieve(cls, webhook_id: int) -> bool:
|
|
68
|
+
"""
|
|
69
|
+
Retrieve a webhook from the account.
|
|
70
|
+
|
|
71
|
+
:param webhook_id: ID of the webhook to delete.
|
|
72
|
+
:return: The updated mailbox object as a dictionary.
|
|
73
|
+
"""
|
|
74
|
+
raw = Client.request("GET", f"/webhook/{webhook_id}")
|
|
75
|
+
return cls.from_response(raw)
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def delete(cls, webhook_id: int) -> bool:
|
|
79
|
+
"""
|
|
80
|
+
Delete a webhook from the account.
|
|
81
|
+
|
|
82
|
+
:param webhook_id: ID of the webhook to delete.
|
|
83
|
+
:return: True if deletion was successful.
|
|
84
|
+
"""
|
|
85
|
+
Client.request("DELETE", f"/webhook/{webhook_id}")
|
|
86
|
+
logging.info(f"Deleted webhook ID: {webhook_id}")
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def enable(cls, mailbox_id: int, webhook_id: int) -> Dict[str, Any]:
|
|
91
|
+
"""
|
|
92
|
+
Enable an existing webhook for a given mailbox.
|
|
93
|
+
|
|
94
|
+
:param mailbox_id: ID of the mailbox.
|
|
95
|
+
:param webhook_id: ID of the webhook to enable.
|
|
96
|
+
:return: The updated mailbox object as a dictionary.
|
|
97
|
+
"""
|
|
98
|
+
raw = Client.request("POST", f"/parser/{mailbox_id}/webhook_set/{webhook_id}")
|
|
99
|
+
return Mailbox.from_response(raw)
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def pause(cls, mailbox_id: int, webhook_id: int) -> Dict[str, Any]:
|
|
103
|
+
"""
|
|
104
|
+
Pause (disable) an existing webhook for a given mailbox.
|
|
105
|
+
|
|
106
|
+
:param mailbox_id: ID of the mailbox.
|
|
107
|
+
:param webhook_id: ID of the webhook to pause.
|
|
108
|
+
:return: The updated mailbox object as a dictionary.
|
|
109
|
+
"""
|
|
110
|
+
raw = Client.request("DELETE", f"/parser/{mailbox_id}/webhook_set/{webhook_id}")
|
|
111
|
+
return Mailbox.from_response(raw)
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def list(cls) -> List[Dict[str, Any]]:
|
|
115
|
+
"""Retrieve all webhooks as a list."""
|
|
116
|
+
return list(cls.iter())
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def iter(cls) -> Iterable[Dict[str, Any]]:
|
|
120
|
+
"""Yield all webhooks registered on the account."""
|
|
121
|
+
for raw in Client.request("GET", "/webhook"):
|
|
122
|
+
yield cls.from_response(raw)
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: parseur-py
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A Python client for the Parseur.com API to manage mailboxes, documents, uploads, and listen for new parsing events.
|
|
5
|
+
Author-email: Parseur Team <admin@parseur.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/parseur/parseur-py
|
|
7
|
+
Project-URL: Repository, https://github.com/parseur/parseur-py
|
|
8
|
+
Project-URL: Issues, https://github.com/parseur/parseur-py/issues
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: click>=8.2.1
|
|
13
|
+
Requires-Dist: requests>=2.31.0
|
|
14
|
+
Requires-Dist: marshmallow>=4.0.0
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# 🤖🧙parseur-py
|
|
18
|
+
|
|
19
|
+
**parseur-py** is a modern Python client for the [Parseur](https://parseur.com) API. It lets you **manage mailboxes, documents, uploads, and webhooks** programmatically or from the command line.
|
|
20
|
+
|
|
21
|
+
Built to help you automate document parsing at scale, parseur-py makes integrating with Parseur fast, easy, and Pythonic.
|
|
22
|
+
|
|
23
|
+
[](https://github.com/parseur/parseur-py)
|
|
24
|
+
[](https://badge.fury.io/py/parseur-py)
|
|
25
|
+
[](https://opensource.org/licenses/MIT)
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## ✨ Features
|
|
30
|
+
|
|
31
|
+
✅ List, search, and sort mailboxes
|
|
32
|
+
✅ Get mailbox details and schema
|
|
33
|
+
✅ List, search, filter, and sort documents
|
|
34
|
+
✅ Upload documents by file or email content
|
|
35
|
+
✅ Reprocess, skip, copy, or delete documents
|
|
36
|
+
✅ Manage custom webhooks for real-time events
|
|
37
|
+
✅ Fully-featured **Command Line Interface (CLI)**
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## 🚀 Quick Start
|
|
42
|
+
|
|
43
|
+
### Install the package
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install parseur-py
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Install the package from source
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install -e .
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Build documentation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install -r requirements-doc.txt
|
|
59
|
+
cd docs
|
|
60
|
+
make html
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
### Initialize your configuration
|
|
66
|
+
|
|
67
|
+
Store your Parseur API credentials securely:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
parseur init --api-key YOUR_PARSEUR_API_KEY
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Your config is saved (by default) in:
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
~/.parseur.conf
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
### Example usage
|
|
82
|
+
|
|
83
|
+
List all your mailboxes:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
parseur list-mailboxes
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
List documents in a mailbox:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
parseur list-documents 12345
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Upload a file to a mailbox:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
parseur upload-file 12345 ./path/to/document.pdf
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Register a custom webhook:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
parseur create-webhook --event document.processed --target-url https://yourserver.com/webhook --mailbox-id 12345
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## 📜 CLI Commands
|
|
110
|
+
|
|
111
|
+
Run:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
parseur --help
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
for a full list of available commands.
|
|
118
|
+
|
|
119
|
+
### Highlights
|
|
120
|
+
|
|
121
|
+
- **init**: Set your API token and (optional) base URL
|
|
122
|
+
- **list-mailboxes**: Search and sort mailboxes
|
|
123
|
+
- **get-mailbox**: Fetch a mailbox by ID
|
|
124
|
+
- **get-mailbox-schema**: Get the mailbox parsing schema
|
|
125
|
+
- **list-documents**: Advanced document search, filtering, sorting
|
|
126
|
+
- **get-document**: Fetch document details
|
|
127
|
+
- **reprocess-document / skip-document / delete-document**: Document lifecycle operations
|
|
128
|
+
- **upload-file / upload-text**: Upload new documents
|
|
129
|
+
- **create-webhook / get-webhook / list-webhooks / delete-webhook**: Create, get, list, and delete custom webhook integrations.
|
|
130
|
+
- **enable-webhook / pause-webhook**: Activate or pause a webhook for a specific mailbox.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## 🔎 Advanced Search & Filtering
|
|
135
|
+
|
|
136
|
+
**Mailbox listing supports:**
|
|
137
|
+
|
|
138
|
+
- **Search** by name or email prefix
|
|
139
|
+
- **Sort** by:
|
|
140
|
+
- name
|
|
141
|
+
- document_count
|
|
142
|
+
- template_count
|
|
143
|
+
- PARSEDOK_count (processed)
|
|
144
|
+
- PARSEDKO_count (failed)
|
|
145
|
+
- QUOTAEXC_count (quota exceeded)
|
|
146
|
+
- EXPORTKO_count (export failed)
|
|
147
|
+
|
|
148
|
+
**Document listing supports:**
|
|
149
|
+
|
|
150
|
+
- **Search** in:
|
|
151
|
+
- document ID
|
|
152
|
+
- document name
|
|
153
|
+
- template name
|
|
154
|
+
- email addresses (from, to, cc, bcc)
|
|
155
|
+
- document metadata header
|
|
156
|
+
- **Sort** by:
|
|
157
|
+
- name
|
|
158
|
+
- created (received date)
|
|
159
|
+
- processed date
|
|
160
|
+
- status
|
|
161
|
+
- **Filter** by:
|
|
162
|
+
- received_after / received_before dates
|
|
163
|
+
- **Include** parsed result in response
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## ⚡ Webhooks Support
|
|
168
|
+
|
|
169
|
+
Easily register custom webhooks for events like:
|
|
170
|
+
|
|
171
|
+
- `document.processed`
|
|
172
|
+
- `document.processed.flattened`
|
|
173
|
+
- `document.template_needed`
|
|
174
|
+
- `document.export_failed`
|
|
175
|
+
- `table.processed`
|
|
176
|
+
- `table.processed.flattened`
|
|
177
|
+
|
|
178
|
+
Your webhook endpoint will receive POST notifications with Parseur payloads, enabling real-time integrations with your systems.
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## 🛠️ Configuration
|
|
183
|
+
|
|
184
|
+
Your API token and settings are stored in a simple INI file:
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
[parseur]
|
|
188
|
+
api_token = YOUR_API_KEY
|
|
189
|
+
base_url = https://api.parseur.com
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
You can customize the path by setting \`--config-path\` in your calls if needed.
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## 🐍 Python Client Usage
|
|
197
|
+
|
|
198
|
+
Beyond the CLI, **parseur-py** is a standard Python library. Example:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
import parseur
|
|
202
|
+
|
|
203
|
+
parseur.api_key = "YOUR_API_KEY"
|
|
204
|
+
|
|
205
|
+
for mailbox in parseur.Mailbox.list():
|
|
206
|
+
print(mailbox["name"])
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## 📖 Documentation
|
|
212
|
+
|
|
213
|
+
- [Parseur Official API Docs](https://help.parseur.com/en/articles/3566128-use-parseur-document-parsing-api)
|
|
214
|
+
- This package mirrors Parseur’s REST API, adding pagination handling, schema support, and convenient CLI commands.
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## 💼 License
|
|
219
|
+
|
|
220
|
+
MIT License
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## 🤝 Contributing
|
|
225
|
+
|
|
226
|
+
We welcome contributions! Please:
|
|
227
|
+
|
|
228
|
+
1. Fork the repo
|
|
229
|
+
2. Create your feature branch (`git checkout -b feature/foo`)
|
|
230
|
+
3. Commit your changes (`git commit -am 'Add foo'`)
|
|
231
|
+
4. Push to the branch (`git push origin feature/foo`)
|
|
232
|
+
5. Open a pull request
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## ✨ Credits
|
|
237
|
+
|
|
238
|
+
Developed with ❤️ by the [Parseur](https://parseur.com) team.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
*Parseur is the easiest way to automatically extract data from emails and documents. Stop copy-pasting data and automate your workflows!*
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
parseur/__init__.py,sha256=urneWGucdS4jmwh-tb1j1UAnNT4FhKnsTb6NOuVDxEc,717
|
|
2
|
+
parseur/cli.py,sha256=dozg0EOLieG9bCwq2rujcuoW6inuM-MDEPAxQwxGmsQ,9707
|
|
3
|
+
parseur/client.py,sha256=eGFYl_pthbulehh_Csk7sOqOcvf9_HfbJstE0qzKHrE,1497
|
|
4
|
+
parseur/config.py,sha256=qI5HgPH2yg33x6y7sTLX6UUm-LZTfn4ImpR483K4Gxg,1023
|
|
5
|
+
parseur/decorator.py,sha256=yC9D5U6aLj4rTuQw3AUS0_cz78jEGPVvULQi7sWFlOo,1634
|
|
6
|
+
parseur/document.py,sha256=mTZibUkIFlQ85TQZgwDW6XuTONWtxjRSorejhOukyAE,7232
|
|
7
|
+
parseur/event.py,sha256=tNCS70MVuoWOZ3g6BjwnE-4C4iGpK3ts7fCK6-N-FQg,1053
|
|
8
|
+
parseur/mailbox.py,sha256=YmA5bcqm3laccBGlsIgzReF4DSQe4FR6e56PyApY9Nw,2359
|
|
9
|
+
parseur/utils.py,sha256=Mx7az0A2XbCag1yM8dhF_2jREsDxgjZRxBDdZU5RbdY,1658
|
|
10
|
+
parseur/webhook.py,sha256=Krq-xpS6a3aZPs_pf10wTy1INfXGd8dKJK6zb3rLsAk,4088
|
|
11
|
+
parseur_py-0.0.1.dist-info/licenses/LICENSE,sha256=wlaTLxAYcgFP2EDvy5Dvrq5aqKOdh7k16AzS1r2vPZU,1064
|
|
12
|
+
parseur_py-0.0.1.dist-info/METADATA,sha256=ZZ9hdCoTWgp8bPFy6e859Ob5l8FyMU5B3hyFVCjFQRo,5605
|
|
13
|
+
parseur_py-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
parseur_py-0.0.1.dist-info/entry_points.txt,sha256=gcCC05O_4YfWK76ZXNX7s7ChwgriVVSaQ06e1R6AFHU,44
|
|
15
|
+
parseur_py-0.0.1.dist-info/top_level.txt,sha256=yvljW9vjQJA24AGSxgzlUjx2jfkVvzdEzmfF_fD322E,8
|
|
16
|
+
parseur_py-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Parseur
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
parseur
|