apify 3.4.2b4__tar.gz → 3.4.2b5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {apify-3.4.2b4 → apify-3.4.2b5}/CHANGELOG.md +5 -0
  2. {apify-3.4.2b4 → apify-3.4.2b5}/PKG-INFO +1 -1
  3. {apify-3.4.2b4 → apify-3.4.2b5}/pyproject.toml +1 -1
  4. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_charging.py +70 -24
  5. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/_types.py +57 -0
  6. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/_async_thread.py +14 -7
  7. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/middlewares/apify_proxy.py +2 -3
  8. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/scheduler.py +8 -1
  9. {apify-3.4.2b4 → apify-3.4.2b5}/.gitignore +0 -0
  10. {apify-3.4.2b4 → apify-3.4.2b5}/CONTRIBUTING.md +0 -0
  11. {apify-3.4.2b4 → apify-3.4.2b5}/LICENSE +0 -0
  12. {apify-3.4.2b4 → apify-3.4.2b5}/README.md +0 -0
  13. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/__init__.py +0 -0
  14. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_actor.py +0 -0
  15. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_configuration.py +0 -0
  16. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_consts.py +0 -0
  17. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_crypto.py +0 -0
  18. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_proxy_configuration.py +0 -0
  19. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_utils.py +0 -0
  20. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_webhook.py +0 -0
  21. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/__init__.py +0 -0
  22. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/_apify_event_manager.py +0 -0
  23. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/py.typed +0 -0
  24. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/log.py +0 -0
  25. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/py.typed +0 -0
  26. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/request_loaders/__init__.py +0 -0
  27. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/request_loaders/_apify_request_list.py +0 -0
  28. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/request_loaders/py.typed +0 -0
  29. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/__init__.py +0 -0
  30. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/_actor_runner.py +0 -0
  31. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/_logging_config.py +0 -0
  32. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/extensions/__init__.py +0 -0
  33. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/extensions/_httpcache.py +0 -0
  34. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/middlewares/__init__.py +0 -0
  35. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/middlewares/py.typed +0 -0
  36. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/pipelines/__init__.py +0 -0
  37. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
  38. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/pipelines/py.typed +0 -0
  39. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/py.typed +0 -0
  40. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/requests.py +0 -0
  41. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/utils.py +0 -0
  42. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/__init__.py +0 -0
  43. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/__init__.py +0 -0
  44. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_alias_resolving.py +0 -0
  45. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_api_client_creation.py +0 -0
  46. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_dataset_client.py +0 -0
  47. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_key_value_store_client.py +0 -0
  48. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_models.py +0 -0
  49. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_client.py +0 -0
  50. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +0 -0
  51. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_single_client.py +0 -0
  52. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
  53. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_utils.py +0 -0
  54. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/py.typed +0 -0
  55. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/__init__.py +0 -0
  56. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/_dataset_client.py +0 -0
  57. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
  58. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
  59. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_ppe_dataset_mixin.py +0 -0
  60. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
  61. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_smart_apify/_storage_client.py +0 -0
  62. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/py.typed +0 -0
  63. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storages/__init__.py +0 -0
  64. {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storages/py.typed +0 -0
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
5
5
  <!-- git-cliff-unreleased-start -->
6
6
  ## 3.4.2 - **not yet released**
7
7
 
8
+ ### 🐛 Bug Fixes
9
+
10
+ - **scrapy:** Correct proxy middleware exception log and import ([#953](https://github.com/apify/apify-sdk-python/pull/953)) ([5bd6eb9](https://github.com/apify/apify-sdk-python/commit/5bd6eb9843d90844cec083372e932413bceedec9)) by [@vdusek](https://github.com/vdusek)
11
+ - **scrapy:** Skip a request that fails to convert instead of crashing the run ([#952](https://github.com/apify/apify-sdk-python/pull/952)) ([db9444f](https://github.com/apify/apify-sdk-python/commit/db9444faeb0158c29aa394121cf733ff2e843f28)) by [@vdusek](https://github.com/vdusek)
12
+
8
13
  ### 🚜 Refactor
9
14
 
10
15
  - [**breaking**] Remove deprecated APIs ([#918](https://github.com/apify/apify-sdk-python/pull/918)) ([3e5728d](https://github.com/apify/apify-sdk-python/commit/3e5728d94cb8fd879d5a76e33a03d55792d835d5)) by [@vdusek](https://github.com/vdusek), closes [#635](https://github.com/apify/apify-sdk-python/issues/635)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 3.4.2b4
3
+ Version: 3.4.2b5
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "apify"
7
- version = "3.4.2b4"
7
+ version = "3.4.2b5"
8
8
  description = "Apify SDK for Python"
9
9
  authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
10
10
  license = { file = "LICENSE" }
@@ -7,7 +7,7 @@ from datetime import UTC, datetime
7
7
  from decimal import Decimal
8
8
  from typing import TYPE_CHECKING, Annotated, Literal, Protocol, TypedDict
9
9
 
10
- from pydantic import BaseModel, ConfigDict, Field
10
+ from pydantic import Field
11
11
 
12
12
  import apify_client._models as _client_models
13
13
  from apify_client._models import ActorChargeEvent as ClientActorChargeEvent
@@ -28,14 +28,17 @@ if TYPE_CHECKING:
28
28
 
29
29
  from apify._configuration import Configuration
30
30
 
31
- PricingModel = Literal['PAY_PER_EVENT', 'PRICE_PER_DATASET_ITEM', 'FLAT_PRICE_PER_MONTH', 'FREE']
32
- """Pricing model for an Actor."""
31
+ charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
32
+ """Holds the current `ChargingManager` instance, if any.
33
+
34
+ Allows PPE-aware dataset clients to access the charging manager without needing to pass it explicitly.
35
+ """
33
36
 
34
37
  DEFAULT_DATASET_ITEM_EVENT = 'apify-default-dataset-item'
38
+ """Name of the synthetic event charged for each item pushed to the default dataset."""
35
39
 
36
- # Context variable to hold the current `ChargingManager` instance, if any. This allows PPE-aware dataset clients to
37
- # access the charging manager without needing to pass it explicitly.
38
- charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
40
+ PricingModel = Literal['PAY_PER_EVENT', 'PRICE_PER_DATASET_ITEM', 'FLAT_PRICE_PER_MONTH', 'FREE']
41
+ """Pricing model for an Actor."""
39
42
 
40
43
  _ensure_context = ensure_context('active')
41
44
 
@@ -49,48 +52,91 @@ _ensure_context = ensure_context('active')
49
52
  # `apify-client` instance) flows through the same code paths without conversion.
50
53
 
51
54
 
52
- class _RelaxedPricingMetadata(BaseModel):
53
- """Mixin relaxing the `CommonActorPricingInfo` metadata fields the platform env var omits."""
54
-
55
- model_config = ConfigDict(populate_by_name=True, extra='allow')
56
-
57
- apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
58
- created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
59
- started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
60
-
61
-
62
55
  @docs_group('Charging')
63
56
  class ActorChargeEvent(ClientActorChargeEvent):
64
- # `event_description` is required in apify-client but omitted from the env var.
57
+ """Definition of a single chargeable event in the pay-per-event pricing model."""
58
+
65
59
  event_description: Annotated[str | None, Field(alias='eventDescription')] = None
60
+ """Human-readable description of the event.
61
+
62
+ Required in apify-client but omitted from the env var, so it is relaxed to optional.
63
+ """
66
64
 
67
65
 
68
66
  @docs_group('Charging')
69
67
  class PricingPerEvent(ClientPricingPerEvent):
68
+ """Pay-per-event pricing details - the chargeable events and their prices."""
69
+
70
70
  actor_charge_events: Annotated[dict[str, ActorChargeEvent] | None, Field(alias='actorChargeEvents')] = None
71
+ """Mapping of event name to its charge definition."""
71
72
 
72
73
 
73
74
  @docs_group('Charging')
74
- class FreeActorPricingInfo(_RelaxedPricingMetadata, ClientFree):
75
- pass
75
+ class FreeActorPricingInfo(ClientFree):
76
+ """Pricing info for an Actor offered free of charge."""
77
+
78
+ apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
79
+ """Apify's margin on the price, as a percentage."""
80
+
81
+ created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
82
+ """Timestamp when this pricing info was created."""
83
+
84
+ started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
85
+ """Timestamp when this pricing became effective."""
76
86
 
77
87
 
78
88
  @docs_group('Charging')
79
- class FlatPricePerMonthActorPricingInfo(_RelaxedPricingMetadata, ClientFlatPricePerMonth):
89
+ class FlatPricePerMonthActorPricingInfo(ClientFlatPricePerMonth):
90
+ """Pricing info for an Actor billed at a flat monthly price."""
91
+
92
+ apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
93
+ """Apify's margin on the price, as a percentage."""
94
+
95
+ created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
96
+ """Timestamp when this pricing info was created."""
97
+
98
+ started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
99
+ """Timestamp when this pricing became effective."""
100
+
80
101
  trial_minutes: Annotated[int | None, Field(alias='trialMinutes')] = None
102
+ """Length of the free trial period, in minutes."""
103
+
81
104
  price_per_unit_usd: Annotated[float | None, Field(alias='pricePerUnitUsd')] = None
105
+ """Price per unit, in USD."""
82
106
 
83
107
 
84
108
  @docs_group('Charging')
85
- class PricePerDatasetItemActorPricingInfo(_RelaxedPricingMetadata, ClientPricePerDatasetItem):
109
+ class PricePerDatasetItemActorPricingInfo(ClientPricePerDatasetItem):
110
+ """Pricing info for an Actor billed per dataset item produced."""
111
+
112
+ apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
113
+ """Apify's margin on the price, as a percentage."""
114
+
115
+ created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
116
+ """Timestamp when this pricing info was created."""
117
+
118
+ started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
119
+ """Timestamp when this pricing became effective."""
120
+
86
121
  unit_name: Annotated[str | None, Field(alias='unitName')] = None
87
- # `price_per_unit_usd` is already optional in apify-client - inherited.
122
+ """Name of the billed unit."""
88
123
 
89
124
 
90
125
  @docs_group('Charging')
91
- class PayPerEventActorPricingInfo(_RelaxedPricingMetadata, ClientPayPerEvent):
92
- # Re-typed to the relaxed element so an omitted `eventDescription` validates; the field stays required.
126
+ class PayPerEventActorPricingInfo(ClientPayPerEvent):
127
+ """Pricing info for an Actor billed per charged event."""
128
+
129
+ apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
130
+ """Apify's margin on the price, as a percentage."""
131
+
132
+ created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
133
+ """Timestamp when this pricing info was created."""
134
+
135
+ started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
136
+ """Timestamp when this pricing became effective."""
137
+
93
138
  pricing_per_event: Annotated[PricingPerEvent, Field(alias='pricingPerEvent')]
139
+ """The pay-per-event pricing details."""
94
140
 
95
141
 
96
142
  ActorPricingInfoModel = ClientFree | ClientFlatPricePerMonth | ClientPricePerDatasetItem | ClientPayPerEvent
@@ -27,14 +27,31 @@ This is the Apify-specific subset of [`Event`][crawlee.events.Event] — for the
27
27
 
28
28
  @docs_group('Event data')
29
29
  class SystemInfoEventData(BaseModel):
30
+ """Resource usage metrics carried by a `systemInfo` event."""
31
+
30
32
  mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
33
+ """Average memory usage over the measured interval, in bytes."""
34
+
31
35
  mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
36
+ """Current memory usage, in bytes."""
37
+
32
38
  mem_max_bytes: Annotated[float, Field(alias='memMaxBytes')]
39
+ """Peak memory usage observed so far, in bytes."""
40
+
33
41
  cpu_avg_usage: Annotated[float, Field(alias='cpuAvgUsage')]
42
+ """Average CPU usage over the measured interval, in percent."""
43
+
34
44
  cpu_max_usage: Annotated[float, Field(alias='cpuMaxUsage')]
45
+ """Peak CPU usage observed so far, in percent."""
46
+
35
47
  cpu_current_usage: Annotated[float, Field(alias='cpuCurrentUsage')]
48
+ """Current CPU usage, in percent."""
49
+
36
50
  is_cpu_overloaded: Annotated[bool, Field(alias='isCpuOverloaded')]
51
+ """Whether the CPU is currently overloaded."""
52
+
37
53
  created_at: Annotated[datetime, Field(alias='createdAt')]
54
+ """Timestamp when the metrics were collected."""
38
55
 
39
56
  def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
40
57
  return EventSystemInfoData.model_validate(
@@ -54,36 +71,63 @@ class SystemInfoEventData(BaseModel):
54
71
 
55
72
  @docs_group('Events')
56
73
  class PersistStateEvent(BaseModel):
74
+ """A `persistState` event instructing the Actor to persist its state."""
75
+
57
76
  name: Literal[Event.PERSIST_STATE]
77
+ """The event name."""
78
+
58
79
  data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
80
+ """The event payload."""
59
81
 
60
82
 
61
83
  @docs_group('Events')
62
84
  class SystemInfoEvent(BaseModel):
85
+ """A `systemInfo` event carrying the Actor's resource usage metrics."""
86
+
63
87
  name: Literal[Event.SYSTEM_INFO]
88
+ """The event name."""
89
+
64
90
  data: SystemInfoEventData
91
+ """The event payload."""
65
92
 
66
93
 
67
94
  @docs_group('Events')
68
95
  class MigratingEvent(BaseModel):
96
+ """A `migrating` event signalling the Actor is about to be migrated to another host."""
97
+
69
98
  name: Literal[Event.MIGRATING]
99
+ """The event name."""
100
+
70
101
  data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
102
+ """The event payload."""
71
103
 
72
104
 
73
105
  @docs_group('Events')
74
106
  class AbortingEvent(BaseModel):
107
+ """An `aborting` event signalling the Actor run is being aborted."""
108
+
75
109
  name: Literal[Event.ABORTING]
110
+ """The event name."""
111
+
76
112
  data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
113
+ """The event payload."""
77
114
 
78
115
 
79
116
  @docs_group('Events')
80
117
  class ExitEvent(BaseModel):
118
+ """An `exit` event signalling the Actor process is about to exit."""
119
+
81
120
  name: Literal[Event.EXIT]
121
+ """The event name."""
122
+
82
123
  data: Annotated[EventExitData, Field(default_factory=EventExitData)]
124
+ """The event payload."""
83
125
 
84
126
 
85
127
  @docs_group('Events')
86
128
  class EventWithoutData(BaseModel):
129
+ """A framework-level event that carries no payload (e.g. browser and page lifecycle events)."""
130
+
87
131
  name: Literal[
88
132
  Event.SESSION_RETIRED,
89
133
  Event.BROWSER_LAUNCHED,
@@ -92,19 +136,32 @@ class EventWithoutData(BaseModel):
92
136
  Event.PAGE_CREATED,
93
137
  Event.PAGE_CLOSED,
94
138
  ]
139
+ """The event name."""
140
+
95
141
  data: Any = None
142
+ """The event payload, always empty for this event."""
96
143
 
97
144
 
98
145
  @docs_group('Events')
99
146
  class DeprecatedEvent(BaseModel):
147
+ """A deprecated event kept for backward compatibility (e.g. `cpuInfo`)."""
148
+
100
149
  name: Literal['cpuInfo']
150
+ """The event name."""
151
+
101
152
  data: Annotated[dict[str, Any], Field(default_factory=dict)]
153
+ """The event payload."""
102
154
 
103
155
 
104
156
  @docs_group('Events')
105
157
  class UnknownEvent(BaseModel):
158
+ """A fallback for any event whose name is not recognized by the SDK."""
159
+
106
160
  name: str
161
+ """The event name."""
162
+
107
163
  data: Annotated[dict[str, Any], Field(default_factory=dict)]
164
+ """The event payload."""
108
165
 
109
166
 
110
167
  EventMessage = PersistStateEvent | SystemInfoEvent | MigratingEvent | AbortingEvent | ExitEvent | EventWithoutData
@@ -5,7 +5,7 @@ import threading
5
5
  from concurrent import futures
6
6
  from datetime import timedelta
7
7
  from logging import getLogger
8
- from typing import TYPE_CHECKING, Any
8
+ from typing import TYPE_CHECKING, Any, Literal
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from collections.abc import Coroutine
@@ -14,13 +14,16 @@ logger = getLogger(__name__)
14
14
 
15
15
 
16
16
  class AsyncThread:
17
- """Class for running an asyncio event loop in a separate thread.
17
+ """Run an asyncio event loop in a dedicated background thread.
18
18
 
19
- This allows running asynchronous coroutines from synchronous code by executingthem on an event loop
20
- that runs in its own dedicated thread.
19
+ This lets synchronous Scrapy callbacks drive asynchronous Apify and Crawlee coroutines. The
20
+ scheduler and the HTTP cache storage each own their own `AsyncThread`, so the request queue and
21
+ the key-value store never share an event loop; they only share the read-only global
22
+ `Configuration`. A single shared loop would also work but would couple their lifecycles.
21
23
  """
22
24
 
23
- def __init__(self) -> None:
25
+ def __init__(self, default_timeout: timedelta = timedelta(seconds=60)) -> None:
26
+ self._default_timeout = default_timeout
24
27
  self._eventloop = asyncio.new_event_loop()
25
28
 
26
29
  # Start the event loop in a dedicated daemon thread.
@@ -33,7 +36,7 @@ class AsyncThread:
33
36
  def run_coro(
34
37
  self,
35
38
  coro: Coroutine,
36
- timeout: timedelta = timedelta(seconds=60),
39
+ timeout: timedelta | Literal['default'] = 'default',
37
40
  ) -> Any:
38
41
  """Run a coroutine on an event loop running in a separate thread.
39
42
 
@@ -42,7 +45,8 @@ class AsyncThread:
42
45
 
43
46
  Args:
44
47
  coro: The coroutine to run.
45
- timeout: The maximum number of seconds to wait for the coroutine to finish.
48
+ timeout: The maximum time to wait for the coroutine to finish. Pass `'default'` to use the
49
+ `default_timeout` passed to the constructor.
46
50
 
47
51
  Returns:
48
52
  The result returned by the coroutine.
@@ -52,6 +56,9 @@ class AsyncThread:
52
56
  TimeoutError: If the coroutine does not complete within the timeout.
53
57
  Exception: Any exception raised during coroutine execution.
54
58
  """
59
+ if timeout == 'default':
60
+ timeout = self._default_timeout
61
+
55
62
  if not self._eventloop.is_running():
56
63
  raise RuntimeError(f'The coroutine {coro} cannot be executed because the event loop is not running.')
57
64
 
@@ -7,7 +7,7 @@ from scrapy.core.downloader.handlers.http11 import TunnelError
7
7
  from scrapy.exceptions import NotConfigured
8
8
 
9
9
  from apify import Actor, ProxyConfiguration
10
- from apify.scrapy import get_basic_auth_header
10
+ from apify.scrapy.utils import get_basic_auth_header
11
11
 
12
12
  if TYPE_CHECKING:
13
13
  from scrapy import Request, Spider
@@ -30,7 +30,6 @@ class ApifyHttpProxyMiddleware:
30
30
 
31
31
  Args:
32
32
  proxy_settings: Dictionary containing proxy settings, provided by the Actor input.
33
- auth_encoding: Encoding for basic authentication (default is 'latin-1').
34
33
  """
35
34
  self._proxy_settings = proxy_settings
36
35
  self._proxy_cfg_internal: ProxyConfiguration | None = None
@@ -111,7 +110,7 @@ class ApifyHttpProxyMiddleware:
111
110
  if isinstance(exception, TunnelError):
112
111
  Actor.log.warning(
113
112
  f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", '
114
- 'reason="{exception}", skipping...'
113
+ f'reason="{exception}", skipping...'
115
114
  )
116
115
 
117
116
  async def _get_new_proxy_url(self) -> ParseResult:
@@ -170,6 +170,13 @@ class ApifyScheduler(BaseScheduler):
170
170
  traceback.print_exc()
171
171
  raise
172
172
 
173
- scrapy_request = to_scrapy_request(apify_request, spider=self.spider)
173
+ # Reconstruct the Scrapy request. A malformed queue entry must not crash the whole run: it
174
+ # has already been marked handled above, so log it and skip it instead of propagating.
175
+ try:
176
+ scrapy_request = to_scrapy_request(apify_request, spider=self.spider)
177
+ except Exception:
178
+ logger.exception(f'Failed to convert Apify request {apify_request} to a Scrapy request; skipping it.')
179
+ return None
180
+
174
181
  logger.debug(f'Converted to scrapy_request: {scrapy_request}')
175
182
  return scrapy_request
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes