putplace 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of putplace might be problematic. Click here for more details.

putplace/database.py ADDED
@@ -0,0 +1,387 @@
1
+ """MongoDB database connection and operations."""
2
+
3
+ import logging
4
+ from typing import Optional
5
+
6
+ from pymongo import AsyncMongoClient
7
+ from pymongo.asynchronous.collection import AsyncCollection
8
+ from pymongo.errors import (
9
+ ConnectionFailure,
10
+ DuplicateKeyError,
11
+ OperationFailure,
12
+ ServerSelectionTimeoutError,
13
+ )
14
+
15
+ from .config import settings
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class MongoDB:
21
+ """MongoDB connection manager."""
22
+
23
+ client: Optional[AsyncMongoClient] = None
24
+ collection: Optional[AsyncCollection] = None
25
+ users_collection: Optional[AsyncCollection] = None
26
+
27
+ async def connect(self) -> None:
28
+ """Connect to MongoDB.
29
+
30
+ Raises:
31
+ ConnectionFailure: If unable to connect to MongoDB
32
+ ServerSelectionTimeoutError: If connection times out
33
+ OperationFailure: If authentication or other operation fails
34
+ """
35
+ try:
36
+ logger.info(f"Connecting to MongoDB at {settings.mongodb_url}")
37
+ self.client = AsyncMongoClient(
38
+ settings.mongodb_url,
39
+ serverSelectionTimeoutMS=5000, # 5 second timeout
40
+ )
41
+
42
+ # Verify connection by pinging the server
43
+ await self.client.admin.command("ping")
44
+ logger.info("Successfully connected to MongoDB")
45
+
46
+ db = self.client[settings.mongodb_database]
47
+ self.collection = db[settings.mongodb_collection]
48
+ self.users_collection = db["users"]
49
+
50
+ # Create indexes on sha256 for efficient lookups
51
+ await self.collection.create_index("sha256")
52
+ await self.collection.create_index([("hostname", 1), ("filepath", 1)])
53
+ await self.collection.create_index("uploaded_by_user_id")
54
+ logger.info("File metadata indexes created successfully")
55
+
56
+ # Create indexes for API keys collection
57
+ api_keys_collection = db["api_keys"]
58
+ await api_keys_collection.create_index("key_hash", unique=True)
59
+ await api_keys_collection.create_index([("is_active", 1)])
60
+ logger.info("API keys indexes created successfully")
61
+
62
+ # Create indexes for users collection
63
+ await self.users_collection.create_index("username", unique=True)
64
+ await self.users_collection.create_index("email", unique=True)
65
+ logger.info("Users indexes created successfully")
66
+
67
+ except ServerSelectionTimeoutError as e:
68
+ logger.error(f"MongoDB connection timeout: {e}")
69
+ self.client = None
70
+ self.collection = None
71
+ raise ConnectionFailure(f"Could not connect to MongoDB at {settings.mongodb_url}") from e
72
+ except ConnectionFailure as e:
73
+ logger.error(f"MongoDB connection failed: {e}")
74
+ self.client = None
75
+ self.collection = None
76
+ raise
77
+ except OperationFailure as e:
78
+ logger.error(f"MongoDB operation failed (check authentication): {e}")
79
+ self.client = None
80
+ self.collection = None
81
+ raise
82
+ except Exception as e:
83
+ logger.error(f"Unexpected error connecting to MongoDB: {e}")
84
+ self.client = None
85
+ self.collection = None
86
+ raise ConnectionFailure(f"Unexpected error connecting to MongoDB: {e}") from e
87
+
88
+ async def close(self) -> None:
89
+ """Close MongoDB connection."""
90
+ if self.client:
91
+ logger.info("Closing MongoDB connection")
92
+ await self.client.close()
93
+
94
+ async def is_healthy(self) -> bool:
95
+ """Check if database connection is healthy.
96
+
97
+ Returns:
98
+ True if database is reachable, False otherwise
99
+ """
100
+ if self.client is None or self.collection is None:
101
+ return False
102
+
103
+ try:
104
+ # Ping the database to verify connection
105
+ await self.client.admin.command("ping")
106
+ return True
107
+ except Exception as e:
108
+ logger.warning(f"Database health check failed: {e}")
109
+ return False
110
+
111
+ async def insert_file_metadata(self, data: dict) -> str:
112
+ """Insert file metadata into MongoDB.
113
+
114
+ Args:
115
+ data: File metadata dictionary
116
+
117
+ Returns:
118
+ Inserted document ID
119
+
120
+ Raises:
121
+ RuntimeError: If database not connected
122
+ ConnectionFailure: If database connection is lost
123
+ OperationFailure: If database operation fails
124
+ """
125
+ if self.collection is None:
126
+ raise RuntimeError("Database not connected")
127
+
128
+ try:
129
+ # Make a copy to avoid modifying the input dict
130
+ # (insert_one adds an _id field to the dict)
131
+ data_copy = data.copy()
132
+ result = await self.collection.insert_one(data_copy)
133
+ return str(result.inserted_id)
134
+ except (ConnectionFailure, ServerSelectionTimeoutError) as e:
135
+ logger.error(f"Database connection lost during insert: {e}")
136
+ raise ConnectionFailure("Lost connection to database") from e
137
+ except OperationFailure as e:
138
+ logger.error(f"Database operation failed during insert: {e}")
139
+ raise
140
+
141
+ async def find_by_sha256(self, sha256: str) -> Optional[dict]:
142
+ """Find file metadata by SHA256 hash.
143
+
144
+ Args:
145
+ sha256: SHA256 hash to search for
146
+
147
+ Returns:
148
+ File metadata document or None if not found
149
+
150
+ Raises:
151
+ RuntimeError: If database not connected
152
+ ConnectionFailure: If database connection is lost
153
+ OperationFailure: If database operation fails
154
+ """
155
+ if self.collection is None:
156
+ raise RuntimeError("Database not connected")
157
+
158
+ try:
159
+ return await self.collection.find_one({"sha256": sha256})
160
+ except (ConnectionFailure, ServerSelectionTimeoutError) as e:
161
+ logger.error(f"Database connection lost during find: {e}")
162
+ raise ConnectionFailure("Lost connection to database") from e
163
+ except OperationFailure as e:
164
+ logger.error(f"Database operation failed during find: {e}")
165
+ raise
166
+
167
+ async def has_file_content(self, sha256: str) -> bool:
168
+ """Check if server already has file content for this SHA256.
169
+
170
+ Args:
171
+ sha256: SHA256 hash to check
172
+
173
+ Returns:
174
+ True if file content exists, False otherwise
175
+
176
+ Raises:
177
+ RuntimeError: If database not connected
178
+ ConnectionFailure: If database connection is lost
179
+ """
180
+ if self.collection is None:
181
+ raise RuntimeError("Database not connected")
182
+
183
+ try:
184
+ # Check if any document with this SHA256 has file content
185
+ result = await self.collection.find_one(
186
+ {"sha256": sha256, "has_file_content": True}
187
+ )
188
+ return result is not None
189
+ except (ConnectionFailure, ServerSelectionTimeoutError) as e:
190
+ logger.error(f"Database connection lost during has_file_content check: {e}")
191
+ raise ConnectionFailure("Lost connection to database") from e
192
+ except OperationFailure as e:
193
+ logger.error(f"Database operation failed during has_file_content check: {e}")
194
+ raise
195
+
196
+ async def mark_file_uploaded(self, sha256: str, hostname: str, filepath: str, storage_path: str) -> bool:
197
+ """Mark that file content has been uploaded for a specific metadata record.
198
+
199
+ Args:
200
+ sha256: SHA256 hash of the file
201
+ hostname: Hostname where file is located
202
+ filepath: Full path to the file
203
+ storage_path: Full storage path where file is stored (local path or S3 URI)
204
+
205
+ Returns:
206
+ True if updated successfully, False if not found
207
+
208
+ Raises:
209
+ RuntimeError: If database not connected
210
+ ConnectionFailure: If database connection is lost
211
+ """
212
+ if self.collection is None:
213
+ raise RuntimeError("Database not connected")
214
+
215
+ try:
216
+ from datetime import datetime
217
+
218
+ result = await self.collection.update_one(
219
+ {"sha256": sha256, "hostname": hostname, "filepath": filepath},
220
+ {
221
+ "$set": {
222
+ "has_file_content": True,
223
+ "file_uploaded_at": datetime.utcnow(),
224
+ "storage_path": storage_path,
225
+ }
226
+ },
227
+ )
228
+ return result.modified_count > 0
229
+ except (ConnectionFailure, ServerSelectionTimeoutError) as e:
230
+ logger.error(f"Database connection lost during mark_file_uploaded: {e}")
231
+ raise ConnectionFailure("Lost connection to database") from e
232
+ except OperationFailure as e:
233
+ logger.error(f"Database operation failed during mark_file_uploaded: {e}")
234
+ raise
235
+
236
+ async def get_files_by_user(self, user_id: str, limit: int = 100, skip: int = 0) -> list[dict]:
237
+ """Get all files uploaded by a specific user.
238
+
239
+ Args:
240
+ user_id: User ID to filter by
241
+ limit: Maximum number of files to return
242
+ skip: Number of files to skip (for pagination)
243
+
244
+ Returns:
245
+ List of file metadata documents
246
+
247
+ Raises:
248
+ RuntimeError: If database not connected
249
+ ConnectionFailure: If database connection is lost
250
+ """
251
+ if self.collection is None:
252
+ raise RuntimeError("Database not connected")
253
+
254
+ try:
255
+ cursor = self.collection.find(
256
+ {"uploaded_by_user_id": user_id}
257
+ ).sort("created_at", -1).limit(limit).skip(skip)
258
+
259
+ files = []
260
+ async for doc in cursor:
261
+ doc["_id"] = str(doc["_id"])
262
+ files.append(doc)
263
+
264
+ return files
265
+ except (ConnectionFailure, ServerSelectionTimeoutError) as e:
266
+ logger.error(f"Database connection lost during get_files_by_user: {e}")
267
+ raise ConnectionFailure("Lost connection to database") from e
268
+ except OperationFailure as e:
269
+ logger.error(f"Database operation failed during get_files_by_user: {e}")
270
+ raise
271
+
272
+ async def get_files_by_sha256(self, sha256: str) -> list[dict]:
273
+ """Get all files with a specific SHA256 hash (across all users).
274
+
275
+ Args:
276
+ sha256: SHA256 hash to search for
277
+
278
+ Returns:
279
+ List of file metadata documents, sorted with epoch file first
280
+
281
+ Raises:
282
+ RuntimeError: If database not connected
283
+ ConnectionFailure: If database connection is lost
284
+ """
285
+ if self.collection is None:
286
+ raise RuntimeError("Database not connected")
287
+
288
+ try:
289
+ cursor = self.collection.find({"sha256": sha256})
290
+
291
+ files = []
292
+ async for doc in cursor:
293
+ doc["_id"] = str(doc["_id"])
294
+ files.append(doc)
295
+
296
+ # Sort: files with content first (by upload time), then metadata-only (by created time)
297
+ def sort_key(file):
298
+ if file.get("has_file_content"):
299
+ # Files with content: sort by upload time (earliest first)
300
+ return (0, file.get("file_uploaded_at", file.get("created_at")))
301
+ else:
302
+ # Files without content: sort after files with content
303
+ return (1, file.get("created_at"))
304
+
305
+ files.sort(key=sort_key)
306
+ return files
307
+
308
+ except (ConnectionFailure, ServerSelectionTimeoutError) as e:
309
+ logger.error(f"Database connection lost during get_files_by_sha256: {e}")
310
+ raise ConnectionFailure("Lost connection to database") from e
311
+ except OperationFailure as e:
312
+ logger.error(f"Database operation failed during get_files_by_sha256: {e}")
313
+ raise
314
+
315
+ # User authentication methods
316
+
317
+ async def create_user(self, username: str, email: str, hashed_password: str, full_name: Optional[str] = None) -> str:
318
+ """Create a new user.
319
+
320
+ Args:
321
+ username: User's username
322
+ email: User's email
323
+ hashed_password: Hashed password
324
+ full_name: User's full name (optional)
325
+
326
+ Returns:
327
+ Inserted user document ID
328
+
329
+ Raises:
330
+ RuntimeError: If database not connected
331
+ DuplicateKeyError: If username or email already exists
332
+ """
333
+ if self.users_collection is None:
334
+ raise RuntimeError("Database not connected")
335
+
336
+ from datetime import datetime
337
+
338
+ user_data = {
339
+ "username": username,
340
+ "email": email,
341
+ "hashed_password": hashed_password,
342
+ "full_name": full_name,
343
+ "is_active": True,
344
+ "created_at": datetime.utcnow(),
345
+ }
346
+
347
+ try:
348
+ result = await self.users_collection.insert_one(user_data)
349
+ return str(result.inserted_id)
350
+ except DuplicateKeyError as e:
351
+ if "username" in str(e):
352
+ raise DuplicateKeyError("Username already exists")
353
+ elif "email" in str(e):
354
+ raise DuplicateKeyError("Email already exists")
355
+ raise
356
+
357
+ async def get_user_by_username(self, username: str) -> Optional[dict]:
358
+ """Get user by username.
359
+
360
+ Args:
361
+ username: Username to search for
362
+
363
+ Returns:
364
+ User document or None if not found
365
+ """
366
+ if self.users_collection is None:
367
+ raise RuntimeError("Database not connected")
368
+
369
+ return await self.users_collection.find_one({"username": username})
370
+
371
+ async def get_user_by_email(self, email: str) -> Optional[dict]:
372
+ """Get user by email.
373
+
374
+ Args:
375
+ email: Email to search for
376
+
377
+ Returns:
378
+ User document or None if not found
379
+ """
380
+ if self.users_collection is None:
381
+ raise RuntimeError("Database not connected")
382
+
383
+ return await self.users_collection.find_one({"email": email})
384
+
385
+
386
+ # Global database instance
387
+ mongodb = MongoDB()