document-manager 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- document_manager/__init__.py +21 -0
- document_manager/client/__init__.py +9 -0
- document_manager/client/document_manager.py +632 -0
- document_manager/client/errors.py +25 -0
- document_manager/types/__init__.py +32 -0
- document_manager/types/articles.py +60 -0
- document_manager/types/common.py +9 -0
- document_manager/types/search.py +91 -0
- document_manager/types/users.py +44 -0
- document_manager-0.0.1.dist-info/METADATA +59 -0
- document_manager-0.0.1.dist-info/RECORD +13 -0
- document_manager-0.0.1.dist-info/WHEEL +5 -0
- document_manager-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API client handler for Document Management System.
|
|
3
|
+
|
|
4
|
+
You can find:
|
|
5
|
+
- The client handler class under client folder.
|
|
6
|
+
- The pydantic models for request/response validation under types folder.
|
|
7
|
+
|
|
8
|
+
Quickstart:
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
|
|
12
|
+
from document_manager.client import DocumentManagerClient
|
|
13
|
+
|
|
14
|
+
client = DocumentManagerClient(
|
|
15
|
+
base_url="API_BASE_URL",
|
|
16
|
+
api_key="USER_API_KEY",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
print(f"Client initialized: {client.health_check()}")
|
|
20
|
+
```
|
|
21
|
+
"""
|
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides a client class to interact with the API
|
|
3
|
+
for managing users, articles, and performing searches.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Optional, List
|
|
8
|
+
from urllib.parse import urljoin
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
from pydantic import ValidationError
|
|
12
|
+
|
|
13
|
+
from .errors import (
|
|
14
|
+
DocumentManagerClientError,
|
|
15
|
+
APIAuthenticationError,
|
|
16
|
+
APIValidationError,
|
|
17
|
+
APIConnectionError,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from ..types import (
|
|
21
|
+
ListArticlesResponse,
|
|
22
|
+
IngestRequest,
|
|
23
|
+
ArticleResponse,
|
|
24
|
+
UpdateDoiRequest,
|
|
25
|
+
DeleteArticleRequest,
|
|
26
|
+
DeleteArticleResponse,
|
|
27
|
+
ArticleSearchRequest,
|
|
28
|
+
ArticleSearchResult,
|
|
29
|
+
ChunkSearchRequest,
|
|
30
|
+
ChunkSearchResult,
|
|
31
|
+
SummarizeRequest,
|
|
32
|
+
SummarizeResponse,
|
|
33
|
+
UserRequest,
|
|
34
|
+
UserResponse,
|
|
35
|
+
DeleteUserResponse,
|
|
36
|
+
ListUsersResponse,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Configure module logger
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class DocumentManagerClient:
|
|
44
|
+
"""
|
|
45
|
+
Client for interacting with the Document Management API.
|
|
46
|
+
|
|
47
|
+
This client provides methods to interact with all API endpoints including
|
|
48
|
+
user management, article operations, and search functionality.
|
|
49
|
+
|
|
50
|
+
Attributes:
|
|
51
|
+
base_url: Base URL of the API (e.g., 'https://myapp.azurewebsites.net/api')
|
|
52
|
+
api_key: User API key for authentication (for regular endpoints)
|
|
53
|
+
function_key: Function key for admin endpoints
|
|
54
|
+
timeout: Request timeout in seconds
|
|
55
|
+
session: Requests session for connection pooling
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
>>> client = DocumentManagerClient(
|
|
59
|
+
... base_url='https://myapp.azurewebsites.net/api',
|
|
60
|
+
... api_key='user-api-key-123'
|
|
61
|
+
... )
|
|
62
|
+
>>> articles = client.list_articles()
|
|
63
|
+
>>> print(f"Found {articles.count} articles")
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
base_url: str,
|
|
69
|
+
api_key: Optional[str] = None,
|
|
70
|
+
function_key: Optional[str] = None,
|
|
71
|
+
timeout: int = 30,
|
|
72
|
+
):
|
|
73
|
+
"""Initialize the API client.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
base_url: Base URL of the API (without trailing slash)
|
|
77
|
+
api_key: User API key for authenticated endpoints
|
|
78
|
+
function_key: Function key for admin endpoints
|
|
79
|
+
timeout: Request timeout in seconds (default: 30)
|
|
80
|
+
"""
|
|
81
|
+
self.base_url = base_url.rstrip("/")
|
|
82
|
+
self.api_key = api_key
|
|
83
|
+
self.function_key = function_key
|
|
84
|
+
self.timeout = timeout
|
|
85
|
+
self.session = requests.Session()
|
|
86
|
+
|
|
87
|
+
logger.info(f"Initialized API client for {self.base_url}")
|
|
88
|
+
|
|
89
|
+
def _get_url(self, endpoint: str) -> str:
|
|
90
|
+
"""Construct full URL for an endpoint.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
endpoint: Endpoint path (e.g., 'articles/list')
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Full URL for the endpoint
|
|
97
|
+
"""
|
|
98
|
+
return urljoin(self.base_url + "/", endpoint)
|
|
99
|
+
|
|
100
|
+
def _get_headers(self, is_admin: bool = False) -> dict:
|
|
101
|
+
"""Get request headers including authentication.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
is_admin: Whether this is an admin endpoint requiring function key
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Dictionary of HTTP headers
|
|
108
|
+
|
|
109
|
+
Raises:
|
|
110
|
+
APIAuthenticationError: If required authentication key is missing
|
|
111
|
+
"""
|
|
112
|
+
headers = {
|
|
113
|
+
"Content-Type": "application/json",
|
|
114
|
+
"Accept": "application/json",
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if is_admin:
|
|
118
|
+
if not self.function_key:
|
|
119
|
+
raise APIAuthenticationError(
|
|
120
|
+
"Function key required for admin endpoints but not provided"
|
|
121
|
+
)
|
|
122
|
+
headers["x-functions-key"] = self.function_key
|
|
123
|
+
else:
|
|
124
|
+
if self.api_key:
|
|
125
|
+
headers["x-api-key"] = self.api_key
|
|
126
|
+
|
|
127
|
+
return headers
|
|
128
|
+
|
|
129
|
+
def _handle_response(self, response: requests.Response, expected_model=None):
|
|
130
|
+
"""Handle API response and parse into model.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
response: HTTP response object
|
|
134
|
+
expected_model: Pydantic model to parse response into (optional)
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Parsed model instance if expected_model provided, else response text
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
APIAuthenticationError: If authentication fails (401, 403)
|
|
141
|
+
APIValidationError: If response validation fails
|
|
142
|
+
DocumentManagerClientError: For other API errors
|
|
143
|
+
"""
|
|
144
|
+
# Handle authentication errors
|
|
145
|
+
if response.status_code in (401, 403):
|
|
146
|
+
try:
|
|
147
|
+
error_data = response.json()
|
|
148
|
+
error_msg = error_data.get("error", "Authentication failed")
|
|
149
|
+
except Exception:
|
|
150
|
+
error_msg = "Authentication failed"
|
|
151
|
+
logger.error(f"Authentication error: {error_msg}")
|
|
152
|
+
raise APIAuthenticationError(error_msg)
|
|
153
|
+
|
|
154
|
+
# Handle other error status codes
|
|
155
|
+
if not response.ok:
|
|
156
|
+
try:
|
|
157
|
+
error_data = response.json()
|
|
158
|
+
error_msg = error_data.get(
|
|
159
|
+
"error", f"API error: {response.status_code}"
|
|
160
|
+
)
|
|
161
|
+
except Exception:
|
|
162
|
+
error_msg = f"API error: {response.status_code} - {response.text}"
|
|
163
|
+
|
|
164
|
+
logger.error(f"API error (status {response.status_code}): {error_msg}")
|
|
165
|
+
raise DocumentManagerClientError(error_msg)
|
|
166
|
+
|
|
167
|
+
# Parse response if model is provided
|
|
168
|
+
if expected_model:
|
|
169
|
+
try:
|
|
170
|
+
response_data = response.json()
|
|
171
|
+
return expected_model(**response_data)
|
|
172
|
+
except ValidationError as e:
|
|
173
|
+
logger.error(f"Response validation error: {e}")
|
|
174
|
+
raise APIValidationError(f"Invalid response format: {e}")
|
|
175
|
+
except Exception as e:
|
|
176
|
+
logger.error(f"Failed to parse response: {e}")
|
|
177
|
+
raise DocumentManagerClientError(f"Failed to parse response: {e}")
|
|
178
|
+
|
|
179
|
+
return response.text
|
|
180
|
+
|
|
181
|
+
def _request(
|
|
182
|
+
self,
|
|
183
|
+
method: str,
|
|
184
|
+
endpoint: str,
|
|
185
|
+
data=None,
|
|
186
|
+
is_admin: bool = False,
|
|
187
|
+
expected_model=None,
|
|
188
|
+
):
|
|
189
|
+
"""Make HTTP request to API.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
method: HTTP method (GET, POST, PATCH, DELETE)
|
|
193
|
+
endpoint: API endpoint path
|
|
194
|
+
data: Request body data (will be serialized to JSON)
|
|
195
|
+
is_admin: Whether this is an admin endpoint
|
|
196
|
+
expected_model: Pydantic model for response parsing
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Parsed response model or text
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
APIConnectionError: If connection fails
|
|
203
|
+
APIAuthenticationError: If authentication fails
|
|
204
|
+
APIValidationError: If validation fails
|
|
205
|
+
DocumentManagerClientError: For other errors
|
|
206
|
+
"""
|
|
207
|
+
url = self._get_url(endpoint)
|
|
208
|
+
headers = self._get_headers(is_admin=is_admin)
|
|
209
|
+
|
|
210
|
+
# Serialize request body if it's a Pydantic model
|
|
211
|
+
json_data = None
|
|
212
|
+
if data:
|
|
213
|
+
if hasattr(data, "model_dump"):
|
|
214
|
+
json_data = data.model_dump()
|
|
215
|
+
else:
|
|
216
|
+
json_data = data
|
|
217
|
+
|
|
218
|
+
logger.debug(f"{method} {url}")
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
response = self.session.request(
|
|
222
|
+
method=method,
|
|
223
|
+
url=url,
|
|
224
|
+
headers=headers,
|
|
225
|
+
json=json_data,
|
|
226
|
+
timeout=self.timeout,
|
|
227
|
+
)
|
|
228
|
+
return self._handle_response(response, expected_model=expected_model)
|
|
229
|
+
|
|
230
|
+
except requests.exceptions.Timeout:
|
|
231
|
+
logger.error(f"Request timeout after {self.timeout}s: {method} {url}")
|
|
232
|
+
raise APIConnectionError(f"Request timeout after {self.timeout}s")
|
|
233
|
+
|
|
234
|
+
except requests.exceptions.ConnectionError as e:
|
|
235
|
+
logger.error(f"Connection error: {e}")
|
|
236
|
+
raise APIConnectionError(f"Failed to connect to API: {e}")
|
|
237
|
+
|
|
238
|
+
except (APIAuthenticationError, APIValidationError, DocumentManagerClientError):
|
|
239
|
+
# Re-raise our custom exceptions
|
|
240
|
+
raise
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.error(f"Unexpected error: {e}", exc_info=True)
|
|
244
|
+
raise DocumentManagerClientError(f"Unexpected error: {e}")
|
|
245
|
+
|
|
246
|
+
# -------------------------------------------------------------------------
|
|
247
|
+
# Health Check
|
|
248
|
+
# -------------------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
def health_check(self) -> bool:
|
|
251
|
+
"""Check if API is healthy and responding.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
True if API is healthy, False otherwise
|
|
255
|
+
"""
|
|
256
|
+
try:
|
|
257
|
+
result = self._request("GET", "health")
|
|
258
|
+
logger.info("Health check: OK")
|
|
259
|
+
return True
|
|
260
|
+
except Exception as e:
|
|
261
|
+
logger.warning(f"Health check failed: {e}")
|
|
262
|
+
return False
|
|
263
|
+
|
|
264
|
+
# -------------------------------------------------------------------------
|
|
265
|
+
# Article Endpoints
|
|
266
|
+
# -------------------------------------------------------------------------
|
|
267
|
+
|
|
268
|
+
def list_articles(self) -> ListArticlesResponse:
|
|
269
|
+
"""List all articles for the authenticated user.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
ListArticlesResponse with articles and count
|
|
273
|
+
|
|
274
|
+
Raises:
|
|
275
|
+
APIAuthenticationError: If authentication fails
|
|
276
|
+
DocumentManagerClientError: If request fails
|
|
277
|
+
"""
|
|
278
|
+
logger.info("Listing articles")
|
|
279
|
+
return self._request(
|
|
280
|
+
"GET",
|
|
281
|
+
"articles/list",
|
|
282
|
+
expected_model=ListArticlesResponse,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def ingest_article(
|
|
286
|
+
self,
|
|
287
|
+
file_name: str,
|
|
288
|
+
pdf_base64: str,
|
|
289
|
+
) -> ArticleResponse:
|
|
290
|
+
"""Ingest a PDF article.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
file_name: Name of the PDF file
|
|
294
|
+
pdf_base64: Base64-encoded PDF content
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
ArticleResponse with ingestion result
|
|
298
|
+
|
|
299
|
+
Raises:
|
|
300
|
+
APIAuthenticationError: If authentication fails
|
|
301
|
+
APIValidationError: If request validation fails
|
|
302
|
+
DocumentManagerClientError: If request fails
|
|
303
|
+
"""
|
|
304
|
+
logger.info(f"Ingesting article: {file_name}")
|
|
305
|
+
try:
|
|
306
|
+
request = IngestRequest(file_name=file_name, pdf_base64=pdf_base64)
|
|
307
|
+
except ValidationError as e:
|
|
308
|
+
logger.error(f"Request validation error: {e}")
|
|
309
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
310
|
+
|
|
311
|
+
return self._request(
|
|
312
|
+
"POST",
|
|
313
|
+
"articles/ingest",
|
|
314
|
+
data=request,
|
|
315
|
+
expected_model=ArticleResponse,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
def update_article_doi(
|
|
319
|
+
self,
|
|
320
|
+
file_name: str,
|
|
321
|
+
doi: str,
|
|
322
|
+
) -> ArticleResponse:
|
|
323
|
+
"""Update DOI for an article.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
file_name: Name of the article file
|
|
327
|
+
doi: New DOI value
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
ArticleResponse with update result
|
|
331
|
+
|
|
332
|
+
Raises:
|
|
333
|
+
APIAuthenticationError: If authentication fails
|
|
334
|
+
APIValidationError: If request validation fails
|
|
335
|
+
DocumentManagerClientError: If request fails
|
|
336
|
+
"""
|
|
337
|
+
logger.info(f"Updating DOI for article: {file_name}")
|
|
338
|
+
try:
|
|
339
|
+
request = UpdateDoiRequest(file_name=file_name, doi=doi)
|
|
340
|
+
except ValidationError as e:
|
|
341
|
+
logger.error(f"Request validation error: {e}")
|
|
342
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
343
|
+
|
|
344
|
+
return self._request(
|
|
345
|
+
"PATCH",
|
|
346
|
+
"articles/update-doi",
|
|
347
|
+
data=request,
|
|
348
|
+
expected_model=ArticleResponse,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
def delete_article(self, file_name: str) -> DeleteArticleResponse:
|
|
352
|
+
"""Delete an article and its chunks.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
file_name: Name of the article file to delete
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
DeleteArticleResponse with deletion result
|
|
359
|
+
|
|
360
|
+
Raises:
|
|
361
|
+
APIAuthenticationError: If authentication fails
|
|
362
|
+
APIValidationError: If request validation fails
|
|
363
|
+
DocumentManagerClientError: If request fails
|
|
364
|
+
"""
|
|
365
|
+
logger.info(f"Deleting article: {file_name}")
|
|
366
|
+
try:
|
|
367
|
+
request = DeleteArticleRequest(file_name=file_name)
|
|
368
|
+
except ValidationError as e:
|
|
369
|
+
logger.error(f"Request validation error: {e}")
|
|
370
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
371
|
+
|
|
372
|
+
return self._request(
|
|
373
|
+
"DELETE",
|
|
374
|
+
"articles/delete",
|
|
375
|
+
data=request,
|
|
376
|
+
expected_model=DeleteArticleResponse,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# -------------------------------------------------------------------------
|
|
380
|
+
# Search Endpoints
|
|
381
|
+
# -------------------------------------------------------------------------
|
|
382
|
+
|
|
383
|
+
def search_articles(
|
|
384
|
+
self,
|
|
385
|
+
query: str,
|
|
386
|
+
topk: int = 5,
|
|
387
|
+
min_year: Optional[int] = None,
|
|
388
|
+
max_year: Optional[int] = None,
|
|
389
|
+
) -> ArticleSearchResult:
|
|
390
|
+
"""Search for articles with metadata filtering.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
query: Search query string
|
|
394
|
+
topk: Number of results to return (1-100, default: 5)
|
|
395
|
+
min_year: Minimum publication year filter (optional)
|
|
396
|
+
max_year: Maximum publication year filter (optional)
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
ArticleSearchResult with matching articles
|
|
400
|
+
|
|
401
|
+
Raises:
|
|
402
|
+
APIAuthenticationError: If authentication fails
|
|
403
|
+
APIValidationError: If request validation fails
|
|
404
|
+
DocumentManagerClientError: If request fails
|
|
405
|
+
"""
|
|
406
|
+
logger.info(f"Searching articles: query='{query}', topk={topk}")
|
|
407
|
+
try:
|
|
408
|
+
request = ArticleSearchRequest(
|
|
409
|
+
query=query,
|
|
410
|
+
topk=topk,
|
|
411
|
+
min_year=min_year,
|
|
412
|
+
max_year=max_year,
|
|
413
|
+
)
|
|
414
|
+
except ValidationError as e:
|
|
415
|
+
logger.error(f"Request validation error: {e}")
|
|
416
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
417
|
+
|
|
418
|
+
return self._request(
|
|
419
|
+
"POST",
|
|
420
|
+
"search/article",
|
|
421
|
+
data=request,
|
|
422
|
+
expected_model=ArticleSearchResult,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
def search_chunks(
|
|
426
|
+
self,
|
|
427
|
+
query: str,
|
|
428
|
+
topk: int = 5,
|
|
429
|
+
min_year: Optional[int] = None,
|
|
430
|
+
max_year: Optional[int] = None,
|
|
431
|
+
article_ids: Optional[List[str]] = None,
|
|
432
|
+
) -> ChunkSearchResult:
|
|
433
|
+
"""Search for chunks with metadata filtering.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
query: Search query string
|
|
437
|
+
topk: Number of results to return (1-100, default: 5)
|
|
438
|
+
min_year: Minimum publication year filter (optional)
|
|
439
|
+
max_year: Maximum publication year filter (optional)
|
|
440
|
+
article_ids: List of article IDs to search within (optional)
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
ChunkSearchResult with matching chunks
|
|
444
|
+
|
|
445
|
+
Raises:
|
|
446
|
+
APIAuthenticationError: If authentication fails
|
|
447
|
+
APIValidationError: If request validation fails
|
|
448
|
+
DocumentManagerClientError: If request fails
|
|
449
|
+
"""
|
|
450
|
+
logger.info(f"Searching chunks: query='{query}', topk={topk}")
|
|
451
|
+
try:
|
|
452
|
+
request = ChunkSearchRequest(
|
|
453
|
+
query=query,
|
|
454
|
+
topk=topk,
|
|
455
|
+
min_year=min_year,
|
|
456
|
+
max_year=max_year,
|
|
457
|
+
article_ids=article_ids,
|
|
458
|
+
)
|
|
459
|
+
except ValidationError as e:
|
|
460
|
+
logger.error(f"Request validation error: {e}")
|
|
461
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
462
|
+
|
|
463
|
+
return self._request(
|
|
464
|
+
"POST",
|
|
465
|
+
"search/chunk",
|
|
466
|
+
data=request,
|
|
467
|
+
expected_model=ChunkSearchResult,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
def summarize_articles(
|
|
471
|
+
self,
|
|
472
|
+
query: str,
|
|
473
|
+
article_ids: List[str],
|
|
474
|
+
map_instructions: Optional[str] = None,
|
|
475
|
+
reduce_instructions: Optional[str] = None,
|
|
476
|
+
) -> SummarizeResponse:
|
|
477
|
+
"""Summarize articles using LLM.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
query: Query/context for summarization
|
|
481
|
+
article_ids: List of article IDs to summarize
|
|
482
|
+
map_instructions: Custom map phase instructions (optional)
|
|
483
|
+
reduce_instructions: Custom reduce phase instructions (optional)
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
SummarizeResponse with article summaries
|
|
487
|
+
|
|
488
|
+
Raises:
|
|
489
|
+
APIAuthenticationError: If authentication fails
|
|
490
|
+
APIValidationError: If request validation fails
|
|
491
|
+
DocumentManagerClientError: If request fails
|
|
492
|
+
"""
|
|
493
|
+
logger.info(f"Summarizing {len(article_ids)} articles with query: '{query}'")
|
|
494
|
+
try:
|
|
495
|
+
request = SummarizeRequest(
|
|
496
|
+
query=query,
|
|
497
|
+
article_ids=article_ids,
|
|
498
|
+
map_instructions=map_instructions,
|
|
499
|
+
reduce_instructions=reduce_instructions,
|
|
500
|
+
)
|
|
501
|
+
except ValidationError as e:
|
|
502
|
+
logger.error(f"Request validation error: {e}")
|
|
503
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
504
|
+
|
|
505
|
+
return self._request(
|
|
506
|
+
"POST",
|
|
507
|
+
"search/summarize",
|
|
508
|
+
data=request,
|
|
509
|
+
expected_model=SummarizeResponse,
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
# -------------------------------------------------------------------------
|
|
513
|
+
# User Management Endpoints (Admin)
|
|
514
|
+
# -------------------------------------------------------------------------
|
|
515
|
+
|
|
516
|
+
def list_users(self) -> ListUsersResponse:
|
|
517
|
+
"""List all users with their metadata (ADMIN only).
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
ListUsersResponse with users and count
|
|
521
|
+
|
|
522
|
+
Raises:
|
|
523
|
+
APIAuthenticationError: If authentication fails or function key missing
|
|
524
|
+
DocumentManagerClientError: If request fails
|
|
525
|
+
"""
|
|
526
|
+
logger.info("Listing users (admin)")
|
|
527
|
+
return self._request(
|
|
528
|
+
"GET",
|
|
529
|
+
"users/list",
|
|
530
|
+
is_admin=True,
|
|
531
|
+
expected_model=ListUsersResponse,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
def create_user(self, user_name: str) -> UserResponse:
|
|
535
|
+
"""Create a new user and generate API key (ADMIN only).
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
user_name: Username for the new user
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
UserResponse with user details and API key
|
|
542
|
+
|
|
543
|
+
Raises:
|
|
544
|
+
APIAuthenticationError: If authentication fails or function key missing
|
|
545
|
+
APIValidationError: If request validation fails
|
|
546
|
+
DocumentManagerClientError: If request fails (e.g., user already exists)
|
|
547
|
+
"""
|
|
548
|
+
logger.info(f"Creating user: {user_name} (admin)")
|
|
549
|
+
try:
|
|
550
|
+
request = UserRequest(user_name=user_name)
|
|
551
|
+
except ValidationError as e:
|
|
552
|
+
logger.error(f"Request validation error: {e}")
|
|
553
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
554
|
+
|
|
555
|
+
return self._request(
|
|
556
|
+
"POST",
|
|
557
|
+
"users/create",
|
|
558
|
+
data=request,
|
|
559
|
+
is_admin=True,
|
|
560
|
+
expected_model=UserResponse,
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
def delete_user(self, user_name: str) -> DeleteUserResponse:
|
|
564
|
+
"""Delete a user and all their data (ADMIN only).
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
user_name: Username to delete
|
|
568
|
+
|
|
569
|
+
Returns:
|
|
570
|
+
DeleteUserResponse with deletion statistics
|
|
571
|
+
|
|
572
|
+
Raises:
|
|
573
|
+
APIAuthenticationError: If authentication fails or function key missing
|
|
574
|
+
APIValidationError: If request validation fails
|
|
575
|
+
DocumentManagerClientError: If request fails
|
|
576
|
+
"""
|
|
577
|
+
logger.info(f"Deleting user: {user_name} (admin)")
|
|
578
|
+
try:
|
|
579
|
+
request = UserRequest(user_name=user_name)
|
|
580
|
+
except ValidationError as e:
|
|
581
|
+
logger.error(f"Request validation error: {e}")
|
|
582
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
583
|
+
|
|
584
|
+
return self._request(
|
|
585
|
+
"DELETE",
|
|
586
|
+
"users/delete",
|
|
587
|
+
data=request,
|
|
588
|
+
is_admin=True,
|
|
589
|
+
expected_model=DeleteUserResponse,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
def regenerate_user_key(self, user_name: str) -> UserResponse:
|
|
593
|
+
"""Regenerate API key for a user (ADMIN only).
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
user_name: Username to regenerate key for
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
UserResponse with new API key
|
|
600
|
+
|
|
601
|
+
Raises:
|
|
602
|
+
APIAuthenticationError: If authentication fails or function key missing
|
|
603
|
+
APIValidationError: If request validation fails
|
|
604
|
+
DocumentManagerClientError: If request fails
|
|
605
|
+
"""
|
|
606
|
+
logger.info(f"Regenerating API key for user: {user_name} (admin)")
|
|
607
|
+
try:
|
|
608
|
+
request = UserRequest(user_name=user_name)
|
|
609
|
+
except ValidationError as e:
|
|
610
|
+
logger.error(f"Request validation error: {e}")
|
|
611
|
+
raise APIValidationError(f"Invalid request data: {e}")
|
|
612
|
+
|
|
613
|
+
return self._request(
|
|
614
|
+
"POST",
|
|
615
|
+
"users/regenerate-key",
|
|
616
|
+
data=request,
|
|
617
|
+
is_admin=True,
|
|
618
|
+
expected_model=UserResponse,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
def close(self):
|
|
622
|
+
"""Close the client session and cleanup resources."""
|
|
623
|
+
logger.info("Closing API client session")
|
|
624
|
+
self.session.close()
|
|
625
|
+
|
|
626
|
+
def __enter__(self):
|
|
627
|
+
"""Context manager entry."""
|
|
628
|
+
return self
|
|
629
|
+
|
|
630
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
631
|
+
"""Context manager exit."""
|
|
632
|
+
self.close()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Document Manager API Client Errors"""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DocumentManagerClientError(Exception):
|
|
5
|
+
"""Base exception for API client errors."""
|
|
6
|
+
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class APIAuthenticationError(DocumentManagerClientError):
|
|
11
|
+
"""Raised when authentication fails."""
|
|
12
|
+
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class APIValidationError(DocumentManagerClientError):
|
|
17
|
+
"""Raised when request/response validation fails."""
|
|
18
|
+
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class APIConnectionError(DocumentManagerClientError):
|
|
23
|
+
"""Raised when connection to API fails."""
|
|
24
|
+
|
|
25
|
+
pass
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Pydantic models for request/response validation."""
|
|
2
|
+
|
|
3
|
+
from .articles import (
|
|
4
|
+
ArticleListItem,
|
|
5
|
+
ListArticlesResponse,
|
|
6
|
+
IngestRequest,
|
|
7
|
+
ArticleResponse,
|
|
8
|
+
UpdateDoiRequest,
|
|
9
|
+
DeleteArticleRequest,
|
|
10
|
+
DeleteArticleResponse,
|
|
11
|
+
)
|
|
12
|
+
from .common import (
|
|
13
|
+
ErrorResponse,
|
|
14
|
+
)
|
|
15
|
+
from .search import (
|
|
16
|
+
ArticleSearchRequest,
|
|
17
|
+
ArticleMetadata,
|
|
18
|
+
ArticleSearchResult,
|
|
19
|
+
ChunkSearchRequest,
|
|
20
|
+
ChunkMetadata,
|
|
21
|
+
ChunkSearchResult,
|
|
22
|
+
SummarizeRequest,
|
|
23
|
+
ArticleSummary,
|
|
24
|
+
SummarizeResponse,
|
|
25
|
+
)
|
|
26
|
+
from .users import (
|
|
27
|
+
UserRequest,
|
|
28
|
+
UserResponse,
|
|
29
|
+
DeleteUserResponse,
|
|
30
|
+
UserMetadata,
|
|
31
|
+
ListUsersResponse,
|
|
32
|
+
)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Models for article-related API operations."""
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ArticleListItem(BaseModel):
|
|
8
|
+
"""Article list item model."""
|
|
9
|
+
|
|
10
|
+
file_name: str
|
|
11
|
+
authors: str
|
|
12
|
+
publisher: str
|
|
13
|
+
pub_year: int
|
|
14
|
+
doi: str
|
|
15
|
+
paper_title: str
|
|
16
|
+
abstract: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ListArticlesResponse(BaseModel):
|
|
20
|
+
"""Response model for listing articles."""
|
|
21
|
+
|
|
22
|
+
articles: List[ArticleListItem]
|
|
23
|
+
count: int
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class IngestRequest(BaseModel):
|
|
27
|
+
"""Request model for PDF ingestion."""
|
|
28
|
+
|
|
29
|
+
file_name: str = Field(..., min_length=1, description="Name of the PDF file")
|
|
30
|
+
pdf_base64: str = Field(..., min_length=1, description="Base64-encoded PDF content")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ArticleResponse(BaseModel):
|
|
34
|
+
"""Response model for article operations (ingest, update DOI)."""
|
|
35
|
+
|
|
36
|
+
success: bool
|
|
37
|
+
file_name: str
|
|
38
|
+
doi: str
|
|
39
|
+
message: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class UpdateDoiRequest(BaseModel):
|
|
43
|
+
"""Request model for updating article DOI."""
|
|
44
|
+
|
|
45
|
+
file_name: str
|
|
46
|
+
doi: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DeleteArticleRequest(BaseModel):
|
|
50
|
+
"""Request model for deleting an article."""
|
|
51
|
+
|
|
52
|
+
file_name: str
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DeleteArticleResponse(BaseModel):
|
|
56
|
+
"""Response model for article deletion."""
|
|
57
|
+
|
|
58
|
+
success: bool
|
|
59
|
+
file_name: str
|
|
60
|
+
chunks_deleted: int
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Models for search-related API operations."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ArticleSearchRequest(BaseModel):
|
|
9
|
+
"""Request model for article search."""
|
|
10
|
+
|
|
11
|
+
query: str
|
|
12
|
+
topk: int = Field(default=5, ge=1, le=100)
|
|
13
|
+
min_year: Optional[int] = None
|
|
14
|
+
max_year: Optional[int] = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ArticleMetadata(BaseModel):
|
|
18
|
+
"""Article metadata model."""
|
|
19
|
+
|
|
20
|
+
article_id: str
|
|
21
|
+
file_name: str
|
|
22
|
+
authors: str
|
|
23
|
+
publisher: str
|
|
24
|
+
pub_year: int
|
|
25
|
+
doi: str
|
|
26
|
+
paper_title: str
|
|
27
|
+
abstract: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ArticleSearchResult(BaseModel):
|
|
31
|
+
"""Article search result model."""
|
|
32
|
+
|
|
33
|
+
results: List[ArticleMetadata]
|
|
34
|
+
count: int
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ChunkSearchRequest(BaseModel):
|
|
38
|
+
"""Request model for chunk search."""
|
|
39
|
+
|
|
40
|
+
query: str
|
|
41
|
+
topk: int = Field(default=5, ge=1, le=100)
|
|
42
|
+
min_year: Optional[int] = None
|
|
43
|
+
max_year: Optional[int] = None
|
|
44
|
+
article_ids: Optional[List[str]] = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ChunkMetadata(BaseModel):
|
|
48
|
+
"""Chunk search result model."""
|
|
49
|
+
|
|
50
|
+
chunk_id: str
|
|
51
|
+
article_id: str
|
|
52
|
+
file_name: str
|
|
53
|
+
pub_year: int
|
|
54
|
+
doi: str
|
|
55
|
+
paper_title: str
|
|
56
|
+
chunk_index: int
|
|
57
|
+
section_index: int
|
|
58
|
+
section_title: str
|
|
59
|
+
content: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ChunkSearchResult(BaseModel):
|
|
63
|
+
"""Chunk search result model."""
|
|
64
|
+
|
|
65
|
+
results: List[ChunkMetadata]
|
|
66
|
+
count: int
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class SummarizeRequest(BaseModel):
|
|
70
|
+
"""Request model for article summarization."""
|
|
71
|
+
|
|
72
|
+
query: str
|
|
73
|
+
article_ids: List[str]
|
|
74
|
+
map_instructions: Optional[str] = None
|
|
75
|
+
reduce_instructions: Optional[str] = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ArticleSummary(BaseModel):
|
|
79
|
+
"""Article summary model."""
|
|
80
|
+
|
|
81
|
+
success: bool
|
|
82
|
+
article_id: str
|
|
83
|
+
title: str
|
|
84
|
+
summary: str
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class SummarizeResponse(BaseModel):
|
|
88
|
+
"""Response model for article summarization."""
|
|
89
|
+
|
|
90
|
+
results: List[ArticleSummary]
|
|
91
|
+
count: int
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Models for user-related API operations."""
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class UserRequest(BaseModel):
|
|
8
|
+
"""Request model for user operations (create, delete, regenerate key)."""
|
|
9
|
+
|
|
10
|
+
user_name: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class UserResponse(BaseModel):
|
|
14
|
+
"""Response model for user creation and key regeneration."""
|
|
15
|
+
|
|
16
|
+
success: bool
|
|
17
|
+
user_name: str
|
|
18
|
+
api_key: str
|
|
19
|
+
message: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DeleteUserResponse(BaseModel):
|
|
23
|
+
"""Response model for user deletion."""
|
|
24
|
+
|
|
25
|
+
success: bool
|
|
26
|
+
user_name: str
|
|
27
|
+
articles_deleted: int
|
|
28
|
+
chunks_deleted: int
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class UserMetadata(BaseModel):
|
|
32
|
+
"""User metadata model."""
|
|
33
|
+
|
|
34
|
+
user_name: str
|
|
35
|
+
created_at: str
|
|
36
|
+
article_count: int
|
|
37
|
+
chunk_count: int
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ListUsersResponse(BaseModel):
|
|
41
|
+
"""Response model for listing users."""
|
|
42
|
+
|
|
43
|
+
users: List[UserMetadata]
|
|
44
|
+
count: int
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: document_manager
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Library to interact with document management API
|
|
5
|
+
Author-email: Gergo Ferenczy <ifj.ferenczy.gergo@gmail.com>
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Keywords: document management,api,library
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Requires-Dist: pydantic==2.10.2
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: black; extra == "dev"
|
|
17
|
+
Requires-Dist: isort; extra == "dev"
|
|
18
|
+
Requires-Dist: build; extra == "dev"
|
|
19
|
+
Requires-Dist: twine; extra == "dev"
|
|
20
|
+
|
|
21
|
+
# Document Manager
|
|
22
|
+
|
|
23
|
+
A Python library for interacting with the Document Management API. Provides a client for managing users, articles, and performing full-text search operations.
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install document_manager
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from document_manager.client import DocumentManagerClient
|
|
35
|
+
|
|
36
|
+
client = DocumentManagerClient(
|
|
37
|
+
base_url="https://api.example.com",
|
|
38
|
+
api_key="your-api-key"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Check API health
|
|
42
|
+
print(client.health_check())
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Features
|
|
46
|
+
|
|
47
|
+
- **User Management**: Create, list, and delete users
|
|
48
|
+
- **Article Management**: Ingest, update, and delete articles
|
|
49
|
+
- **Search**: Full-text search on articles and chunks
|
|
50
|
+
- **Summarization**: Generate summaries from articles
|
|
51
|
+
- **Type Safety**: Pydantic models for request/response validation
|
|
52
|
+
|
|
53
|
+
## Documentation
|
|
54
|
+
|
|
55
|
+
See the [client module](src/document_manager/client/) for available methods and [types module](src/document_manager/types/) for data models.
|
|
56
|
+
|
|
57
|
+
## Requirements
|
|
58
|
+
|
|
59
|
+
- Python >= 3.12
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
document_manager/__init__.py,sha256=sn-5nFGl0sEM6mv_AfyerRMeobnmqAlNUMVJMcqd514,430
|
|
2
|
+
document_manager/client/__init__.py,sha256=aU4sRKVIkiXKuVBwrdhyzoZfJJdJ4mNd3E-Wc_9a6L4,242
|
|
3
|
+
document_manager/client/document_manager.py,sha256=RIpRbuM78Mjb9Rp707ELi2Xceo0K1YxPc59wITIbLU4,20759
|
|
4
|
+
document_manager/client/errors.py,sha256=xyWUcpc1kuwYmCb7KijBWTI6r873I8SO_8vmmjZLt18,496
|
|
5
|
+
document_manager/types/__init__.py,sha256=0BeQjGmK8s9BrEII2W29afVpeBNXlBtaxjzispTwesA,637
|
|
6
|
+
document_manager/types/articles.py,sha256=L63hGaoM0uYk2nEbP_0ZrnoCQB_tBmAcukFXq6iarLA,1253
|
|
7
|
+
document_manager/types/common.py,sha256=QfvhBtonAok2daYeoGFNWtRSshZ563NvFUIS2jzyzpc,163
|
|
8
|
+
document_manager/types/search.py,sha256=CCX61uG7JvHjtEgNx0plDKYBD_qErX9wE4Q-67b1cp4,1832
|
|
9
|
+
document_manager/types/users.py,sha256=t9eg7LDwX78sf-8SvOiwWDhFIpvAqT9xF0xzRZzsjAE,849
|
|
10
|
+
document_manager-0.0.1.dist-info/METADATA,sha256=an1phTleSf2AI1Wgq_Skpyqz3EqGr03AR-ME_A2aLEk,1652
|
|
11
|
+
document_manager-0.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
12
|
+
document_manager-0.0.1.dist-info/top_level.txt,sha256=_d2v1HJMIwEZfZoPmTuIKJ-EVXi0Srhvc8Fedt2E-L0,17
|
|
13
|
+
document_manager-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
document_manager
|