crawlee 1.0.3b5__py3-none-any.whl → 1.0.3b7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- crawlee/_utils/sitemap.py +1 -1
- crawlee/storage_clients/_sql/_request_queue_client.py +10 -2
- {crawlee-1.0.3b5.dist-info → crawlee-1.0.3b7.dist-info}/METADATA +1 -1
- {crawlee-1.0.3b5.dist-info → crawlee-1.0.3b7.dist-info}/RECORD +7 -7
- {crawlee-1.0.3b5.dist-info → crawlee-1.0.3b7.dist-info}/WHEEL +0 -0
- {crawlee-1.0.3b5.dist-info → crawlee-1.0.3b7.dist-info}/entry_points.txt +0 -0
- {crawlee-1.0.3b5.dist-info → crawlee-1.0.3b7.dist-info}/licenses/LICENSE +0 -0
crawlee/_utils/sitemap.py
CHANGED
|
@@ -335,7 +335,7 @@ async def _fetch_and_process_sitemap(
|
|
|
335
335
|
# Check if the first chunk is a valid gzip header
|
|
336
336
|
if first_chunk and raw_chunk.startswith(b'\x1f\x8b'):
|
|
337
337
|
decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16)
|
|
338
|
-
|
|
338
|
+
first_chunk = False
|
|
339
339
|
|
|
340
340
|
chunk = decompressor.decompress(raw_chunk) if decompressor else raw_chunk
|
|
341
341
|
text_chunk = decoder.decode(chunk)
|
|
@@ -546,12 +546,20 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
|
|
|
546
546
|
block_until = now + timedelta(seconds=self._BLOCK_REQUEST_TIME)
|
|
547
547
|
# Extend blocking for forefront request, it is considered blocked by the current client.
|
|
548
548
|
stmt = stmt.values(
|
|
549
|
-
sequence_number=new_sequence,
|
|
549
|
+
sequence_number=new_sequence,
|
|
550
|
+
time_blocked_until=block_until,
|
|
551
|
+
client_key=self.client_key,
|
|
552
|
+
data=request.model_dump_json(),
|
|
550
553
|
)
|
|
551
554
|
else:
|
|
552
555
|
new_sequence = state.sequence_counter
|
|
553
556
|
state.sequence_counter += 1
|
|
554
|
-
stmt = stmt.values(
|
|
557
|
+
stmt = stmt.values(
|
|
558
|
+
sequence_number=new_sequence,
|
|
559
|
+
time_blocked_until=None,
|
|
560
|
+
client_key=None,
|
|
561
|
+
data=request.model_dump_json(),
|
|
562
|
+
)
|
|
555
563
|
|
|
556
564
|
result = await session.execute(stmt)
|
|
557
565
|
result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
|
|
@@ -33,7 +33,7 @@ crawlee/_utils/recoverable_state.py,sha256=_88kOEDDRg1lr6RWs7NNDku6NNRlg7zuzUOoU
|
|
|
33
33
|
crawlee/_utils/recurring_task.py,sha256=sA0n4Cf9pYLQyBD9PZ7QbR6m6KphlbkACaT2GdbLfs4,1757
|
|
34
34
|
crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
|
|
35
35
|
crawlee/_utils/robots.py,sha256=k3Yi2OfKT0H04MPkP-OBGGV7fEePgOqb60awltjMYWY,4346
|
|
36
|
-
crawlee/_utils/sitemap.py,sha256=
|
|
36
|
+
crawlee/_utils/sitemap.py,sha256=UI9EJiFiyFvV5_flVUtdsEVz8ZsJeRERPtcx8ZsqjTU,16632
|
|
37
37
|
crawlee/_utils/system.py,sha256=tA8AP__9vsJ9OTLTnAYAKkxc8U5-IEna0N_hqYBybUo,4294
|
|
38
38
|
crawlee/_utils/time.py,sha256=WK17P939r65dLz2rWvL59OEJoxgzdinw-ND9WuG4DuU,2353
|
|
39
39
|
crawlee/_utils/try_import.py,sha256=QI_58ifc2l0Rxehzu6xcofQrRAVeLzZuBTTTHttLl8s,1310
|
|
@@ -176,7 +176,7 @@ crawlee/storage_clients/_sql/_client_mixin.py,sha256=U9ThDUuRbT5JDtCFlBurhZIs1Ay
|
|
|
176
176
|
crawlee/storage_clients/_sql/_dataset_client.py,sha256=tiJVvOPZgc7cy4kGfWnun-g2TJMHMdaLnoqns5Sl6ek,10203
|
|
177
177
|
crawlee/storage_clients/_sql/_db_models.py,sha256=Gs4MS1YL0gWaUfNReVKJUXsqbU_d5jxiyvZ0sFxAV2A,9845
|
|
178
178
|
crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=LnVLWhOjo4LdvtCac4fwuf__DgEQjlqSxz8KkjY3Qx4,11311
|
|
179
|
-
crawlee/storage_clients/_sql/_request_queue_client.py,sha256=
|
|
179
|
+
crawlee/storage_clients/_sql/_request_queue_client.py,sha256=OlvAOwEoYY5f4NO7BdhLFRT_i_E3YzJDb_ptKKK2huY,29478
|
|
180
180
|
crawlee/storage_clients/_sql/_storage_client.py,sha256=ITtMpwfotIW4SZjO4rycB5wfMKaqTAJgMvzcUZxckrk,10905
|
|
181
181
|
crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
182
|
crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
|
|
@@ -187,8 +187,8 @@ crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKp
|
|
|
187
187
|
crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
|
|
188
188
|
crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
|
|
189
189
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
190
|
-
crawlee-1.0.
|
|
191
|
-
crawlee-1.0.
|
|
192
|
-
crawlee-1.0.
|
|
193
|
-
crawlee-1.0.
|
|
194
|
-
crawlee-1.0.
|
|
190
|
+
crawlee-1.0.3b7.dist-info/METADATA,sha256=OoJsB59i-P_GmyPCh6uiSyj-CBfUDISaQ0J-IzJNKi0,29314
|
|
191
|
+
crawlee-1.0.3b7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
192
|
+
crawlee-1.0.3b7.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
193
|
+
crawlee-1.0.3b7.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
194
|
+
crawlee-1.0.3b7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|