nucliadb-utils 4.0.3.post573__py3-none-any.whl → 4.0.3.post575__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,561 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- from unittest.mock import ANY, AsyncMock, MagicMock, call, patch
20
-
21
- import pytest
22
- from nucliadb_protos.resources_pb2 import CloudFile
23
-
24
- from nucliadb_utils.storages import pg
25
-
26
- pytestmark = pytest.mark.asyncio
27
-
28
-
29
- async def iter_result(data):
30
- for item in data:
31
- yield item
32
-
33
-
34
- @pytest.fixture
35
- def transaction():
36
- yield MagicMock(return_value=AsyncMock())
37
-
38
-
39
- @pytest.fixture
40
- def connection(transaction):
41
- mock = AsyncMock()
42
- mock.transaction = MagicMock(return_value=transaction)
43
- yield mock
44
-
45
-
46
- @pytest.fixture
47
- def pool(connection):
48
- acquire = MagicMock(return_value=AsyncMock())
49
- acquire.return_value.__aenter__.return_value = connection
50
- pool = AsyncMock()
51
- pool.acquire = acquire
52
- with patch(
53
- "nucliadb_utils.storages.pg.asyncpg.create_pool", AsyncMock(return_value=pool)
54
- ):
55
- yield pool
56
-
57
-
58
- @pytest.fixture
59
- def data_layer(connection):
60
- yield pg.PostgresFileDataLayer(connection)
61
-
62
-
63
- @pytest.fixture
64
- def storage(data_layer, pool):
65
- with patch(
66
- "nucliadb_utils.storages.pg.PostgresFileDataLayer", return_value=data_layer
67
- ):
68
- storage = pg.PostgresStorage("dsn")
69
- storage.pool = pool
70
- yield storage
71
-
72
-
73
- @pytest.fixture
74
- def storage_field(storage):
75
- yield pg.PostgresStorageField(storage, "bucket", "fullkey")
76
-
77
-
78
- @pytest.fixture
79
- def chunk_info():
80
- yield [
81
- {
82
- "filename": "filename",
83
- "size": 5,
84
- "content_type": "content_type",
85
- "part_id": 0,
86
- },
87
- {
88
- "filename": "filename",
89
- "size": 5,
90
- "content_type": "content_type",
91
- "part_id": 1,
92
- },
93
- {
94
- "filename": "filename",
95
- "size": 5,
96
- "content_type": "content_type",
97
- "part_id": 2,
98
- },
99
- ]
100
-
101
-
102
- @pytest.fixture
103
- def chunk_data():
104
- yield [
105
- {
106
- "part_id": 0,
107
- "size": 5,
108
- "data": b"data1",
109
- },
110
- {
111
- "part_id": 1,
112
- "size": 5,
113
- "data": b"data2",
114
- },
115
- {
116
- "part_id": 2,
117
- "size": 5,
118
- "data": b"data3",
119
- },
120
- ]
121
-
122
-
123
- class TestPostgresFileDataLayer:
124
- async def test_delete_kb(self, data_layer: pg.PostgresFileDataLayer, connection):
125
- assert await data_layer.delete_kb("test_kb")
126
-
127
- assert connection.execute.call_count == 2
128
-
129
- connection.execute.assert_has_awaits(
130
- [
131
- call(ANY, "test_kb"),
132
- call(ANY, "test_kb"),
133
- ]
134
- )
135
-
136
- async def test_create_file(self, data_layer: pg.PostgresFileDataLayer, connection):
137
- await data_layer.create_file(
138
- kb_id="kb_id",
139
- file_id="file_id",
140
- filename="filename",
141
- size=1,
142
- content_type="content_type",
143
- )
144
-
145
- connection.execute.assert_awaited_once_with(
146
- ANY,
147
- "kb_id",
148
- "file_id",
149
- "filename",
150
- 1,
151
- "content_type",
152
- )
153
-
154
- async def test_delete_file(self, data_layer: pg.PostgresFileDataLayer, connection):
155
- await data_layer.delete_file("test_kb", "test_file")
156
-
157
- connection.execute.assert_awaited_with(ANY, "test_kb", "test_file")
158
-
159
- async def test_append_chunk(self, data_layer: pg.PostgresFileDataLayer, connection):
160
- await data_layer.append_chunk(kb_id="kb_id", file_id="file_id", data=b"data")
161
-
162
- connection.execute.assert_awaited_once_with(ANY, "kb_id", "file_id", b"data", 4)
163
-
164
- async def test_get_file_info(
165
- self, data_layer: pg.PostgresFileDataLayer, connection
166
- ):
167
- record = {
168
- "filename": "filename",
169
- "size": 1,
170
- "content_type": "content_type",
171
- "file_id": "file_id",
172
- }
173
- connection.fetchrow.return_value = record
174
- assert await data_layer.get_file_info(
175
- kb_id="kb_id", file_id="file_id"
176
- ) == pg.FileInfo(
177
- filename=record["filename"], # type: ignore
178
- size=record["size"], # type: ignore
179
- content_type=record["content_type"], # type: ignore
180
- key=record["file_id"], # type: ignore
181
- )
182
-
183
- connection.fetchrow.assert_awaited_once_with(ANY, "kb_id", "file_id")
184
-
185
- async def test_get_file_info_none(
186
- self, data_layer: pg.PostgresFileDataLayer, connection
187
- ):
188
- connection.fetchrow.return_value = None
189
- assert await data_layer.get_file_info(kb_id="kb_id", file_id="file_id") is None
190
-
191
- async def test_move(self, data_layer: pg.PostgresFileDataLayer, connection):
192
- await data_layer.move(
193
- origin_key="origin_key",
194
- destination_key="destination_key",
195
- origin_kb="origin_kb",
196
- destination_kb="destination_kb",
197
- )
198
-
199
- assert connection.execute.call_count == 4
200
-
201
- connection.execute.assert_has_awaits(
202
- [
203
- call(ANY, "destination_kb", "destination_key"),
204
- call(
205
- ANY, "destination_kb", "destination_key", "origin_kb", "origin_key"
206
- ),
207
- call(ANY, "destination_kb", "destination_key"),
208
- call(
209
- ANY, "destination_kb", "destination_key", "origin_kb", "origin_key"
210
- ),
211
- ]
212
- )
213
-
214
- async def test_copy(self, data_layer: pg.PostgresFileDataLayer, connection):
215
- await data_layer.copy(
216
- origin_key="origin_key",
217
- destination_key="destination_key",
218
- origin_kb="origin_kb",
219
- destination_kb="destination_kb",
220
- )
221
-
222
- assert connection.execute.call_count == 2
223
-
224
- connection.execute.assert_has_awaits(
225
- [
226
- call(
227
- ANY, "destination_kb", "destination_key", "origin_kb", "origin_key"
228
- ),
229
- call(
230
- ANY, "destination_kb", "destination_key", "origin_kb", "origin_key"
231
- ),
232
- ]
233
- )
234
-
235
- async def test_iterate_kb(self, data_layer: pg.PostgresFileDataLayer, connection):
236
- connection.cursor = MagicMock(
237
- return_value=iter_result(
238
- [
239
- {
240
- "file_id": "file_id",
241
- "filename": "filename",
242
- "size": 1,
243
- "content_type": "content_type",
244
- },
245
- {
246
- "file_id": "file_id",
247
- "filename": "filename",
248
- "size": 1,
249
- "content_type": "content_type",
250
- },
251
- ]
252
- )
253
- )
254
-
255
- async for file_info in data_layer.iterate_kb("kb_id", "prefix"):
256
- assert file_info == pg.FileInfo(
257
- filename="filename",
258
- size=1,
259
- content_type="content_type",
260
- key="file_id",
261
- )
262
-
263
- connection.cursor.assert_called_once_with(ANY, "kb_id", "prefix%")
264
-
265
- async def test_iterate_range(
266
- self, data_layer: pg.PostgresFileDataLayer, connection, chunk_info, chunk_data
267
- ):
268
- connection.fetch.return_value = chunk_info
269
- connection.fetchrow.side_effect = chunk_data
270
-
271
- chunks = []
272
- async for chunk in data_layer.iterate_range(
273
- kb_id="kb_id", file_id="file_id", start=3, end=8
274
- ):
275
- chunks.append(chunk)
276
-
277
- assert chunks == [b"a1", b"dat"]
278
-
279
- async def test_iterate_range_start_part(
280
- self, data_layer: pg.PostgresFileDataLayer, connection, chunk_info, chunk_data
281
- ):
282
- connection.fetch.return_value = chunk_info
283
- connection.fetchrow.side_effect = chunk_data
284
-
285
- chunks = []
286
- async for chunk in data_layer.iterate_range(
287
- kb_id="kb_id", file_id="file_id", start=0, end=5
288
- ):
289
- chunks.append(chunk)
290
-
291
- assert chunks == [b"data1"]
292
-
293
- async def test_iterate_range_middle_part(
294
- self, data_layer: pg.PostgresFileDataLayer, connection, chunk_info, chunk_data
295
- ):
296
- connection.fetch.return_value = chunk_info
297
- connection.fetchrow.side_effect = chunk_data[1:]
298
-
299
- chunks = []
300
- async for chunk in data_layer.iterate_range(
301
- kb_id="kb_id", file_id="file_id", start=5, end=10
302
- ):
303
- chunks.append(chunk)
304
-
305
- assert chunks == [b"data2"]
306
-
307
- async def test_iterate_range_end_part(
308
- self, data_layer: pg.PostgresFileDataLayer, connection, chunk_info, chunk_data
309
- ):
310
- connection.fetch.return_value = chunk_info
311
- connection.fetchrow.side_effect = chunk_data[2:]
312
-
313
- chunks = []
314
- async for chunk in data_layer.iterate_range(
315
- kb_id="kb_id", file_id="file_id", start=10, end=15
316
- ):
317
- chunks.append(chunk)
318
-
319
- assert chunks == [b"data3"]
320
-
321
- async def test_iterate_range_cross_all(
322
- self, data_layer: pg.PostgresFileDataLayer, connection, chunk_info, chunk_data
323
- ):
324
- connection.fetch.return_value = chunk_info
325
- connection.fetchrow.side_effect = chunk_data
326
-
327
- chunks = []
328
- async for chunk in data_layer.iterate_range(
329
- kb_id="kb_id", file_id="file_id", start=2, end=13
330
- ):
331
- chunks.append(chunk)
332
-
333
- assert chunks == [b"ta1", b"data2", b"dat"]
334
-
335
-
336
- class TestPostgresStorageField:
337
- @pytest.fixture()
338
- def field(self):
339
- yield CloudFile(uri="uri", bucket_name="bucket_name")
340
-
341
- async def test_move(self, storage_field: pg.PostgresStorageField, connection):
342
- await storage_field.move(
343
- "origin_uri",
344
- "destination_uri",
345
- "origin_bucket_name",
346
- "destination_bucket_name",
347
- )
348
-
349
- assert connection.execute.call_count == 4
350
-
351
- connection.execute.assert_has_awaits(
352
- [
353
- call(ANY, "destination_bucket_name", "destination_uri"),
354
- call(
355
- ANY,
356
- "destination_bucket_name",
357
- "destination_uri",
358
- "origin_bucket_name",
359
- "origin_uri",
360
- ),
361
- call(ANY, "destination_bucket_name", "destination_uri"),
362
- call(
363
- ANY,
364
- "destination_bucket_name",
365
- "destination_uri",
366
- "origin_bucket_name",
367
- "origin_uri",
368
- ),
369
- ]
370
- )
371
-
372
- async def test_copy(self, storage_field: pg.PostgresStorageField, connection):
373
- await storage_field.copy(
374
- "origin_uri",
375
- "destination_uri",
376
- "origin_bucket_name",
377
- "destination_bucket_name",
378
- )
379
-
380
- assert connection.execute.call_count == 2
381
-
382
- connection.execute.assert_has_awaits(
383
- [
384
- call(
385
- ANY,
386
- "destination_bucket_name",
387
- "destination_uri",
388
- "origin_bucket_name",
389
- "origin_uri",
390
- ),
391
- call(
392
- ANY,
393
- "destination_bucket_name",
394
- "destination_uri",
395
- "origin_bucket_name",
396
- "origin_uri",
397
- ),
398
- ]
399
- )
400
-
401
- async def test_iter_data(
402
- self,
403
- storage_field: pg.PostgresStorageField,
404
- connection,
405
- chunk_info,
406
- chunk_data,
407
- field,
408
- ):
409
- storage_field.field = field
410
- connection.fetch.return_value = chunk_info
411
- connection.fetchrow.side_effect = chunk_data
412
-
413
- chunks = []
414
- async for chunk in storage_field.iter_data():
415
- chunks.append(chunk)
416
-
417
- assert chunks == [b"data1", b"data2", b"data3"]
418
-
419
- async def test_read_range(
420
- self,
421
- storage_field: pg.PostgresStorageField,
422
- connection,
423
- chunk_info,
424
- chunk_data,
425
- field,
426
- ):
427
- storage_field.field = field
428
- connection.fetch.return_value = chunk_info
429
- connection.fetchrow.side_effect = chunk_data
430
-
431
- chunks = []
432
- async for chunk in storage_field.read_range(0, 15):
433
- chunks.append(chunk)
434
-
435
- assert chunks == [b"data1", b"data2", b"data3"]
436
-
437
- async def test_start(
438
- self,
439
- storage_field: pg.PostgresStorageField,
440
- connection,
441
- field,
442
- ):
443
- field.upload_uri = "upload_uri"
444
- storage_field.field = field
445
-
446
- new_field = await storage_field.start(field)
447
- assert new_field.upload_uri != "upload_uri"
448
-
449
- assert connection.execute.call_count == 3
450
-
451
- async def test_append(
452
- self,
453
- storage_field: pg.PostgresStorageField,
454
- connection,
455
- field,
456
- ):
457
- field.upload_uri = "upload_uri"
458
- storage_field.field = field
459
-
460
- await storage_field.append(field, iter_result([b"test1", b"test2"]))
461
-
462
- assert field.offset == 10
463
-
464
- assert connection.execute.call_count == 2
465
-
466
- async def test_finish(
467
- self,
468
- storage_field: pg.PostgresStorageField,
469
- connection,
470
- field,
471
- ):
472
- field.upload_uri = "upload_uri"
473
- field.old_uri = "old_uri"
474
- storage_field.field = field
475
-
476
- await storage_field.finish()
477
- assert field.uri == storage_field.key
478
-
479
- assert connection.execute.call_count == 6
480
-
481
- async def test_upload(
482
- self,
483
- storage_field: pg.PostgresStorageField,
484
- connection,
485
- field,
486
- ):
487
- field.upload_uri = "upload_uri"
488
- storage_field.field = field
489
-
490
- await storage_field.upload(iter_result([b"test1", b"test2"]), field)
491
-
492
- assert connection.execute.call_count == 9
493
-
494
-
495
- class TestPostgresStorage:
496
- async def test_initialize(self, storage: pg.PostgresStorage, pool, connection):
497
- await storage.initialize()
498
-
499
- assert pool.acquire.call_count == 1
500
- assert connection.execute.call_count == 1
501
-
502
- async def test_finalize(self, storage: pg.PostgresStorage, pool):
503
- await storage.finalize()
504
-
505
- pool.close.assert_called_once()
506
-
507
- def test_get_bucket_name(self, storage: pg.PostgresStorage):
508
- assert storage.get_bucket_name("bucket_name") == "bucket_name"
509
-
510
- async def test_create_kb(self, storage: pg.PostgresStorage):
511
- assert await storage.create_kb("kb_id")
512
-
513
- async def test_delete_kb(self, storage: pg.PostgresStorage, connection):
514
- assert await storage.delete_kb("kb_id") == (True, False)
515
- connection.execute.assert_has_awaits(
516
- [
517
- call(ANY, "kb_id"),
518
- call(ANY, "kb_id"),
519
- ]
520
- )
521
-
522
- async def test_delete_upload(self, storage: pg.PostgresStorage, connection):
523
- await storage.delete_upload("file_id", "kb_id")
524
- connection.execute.assert_awaited_with(ANY, "kb_id", "file_id")
525
-
526
- async def test_iterate_objects(self, storage: pg.PostgresStorage, connection):
527
- connection.cursor = MagicMock(
528
- return_value=iter_result(
529
- [
530
- {
531
- "file_id": "file_id1",
532
- "filename": "filename",
533
- "size": 1,
534
- "content_type": "content_type",
535
- },
536
- {
537
- "file_id": "file_id2",
538
- "filename": "filename",
539
- "size": 1,
540
- "content_type": "content_type",
541
- },
542
- ]
543
- )
544
- )
545
-
546
- object_names = []
547
- async for object_info in storage.iterate_objects("kb_id", "file_id"):
548
- object_names.append(object_info.name)
549
- assert object_names == ["file_id1", "file_id2"]
550
-
551
- async def test_download(
552
- self, storage: pg.PostgresStorage, connection, chunk_info, chunk_data
553
- ):
554
- connection.fetch.return_value = chunk_info
555
- connection.fetchrow.side_effect = chunk_data
556
-
557
- chunks = []
558
- async for chunk in storage.download("kb_id", "file_id"):
559
- chunks.append(chunk)
560
-
561
- assert chunks == [b"data1", b"data2", b"data3"]