crawlee 0.6.13b17__py3-none-any.whl → 1.1.2b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

Files changed (102) hide show
  1. crawlee/_autoscaling/snapshotter.py +1 -1
  2. crawlee/_request.py +35 -33
  3. crawlee/_service_locator.py +44 -24
  4. crawlee/_types.py +106 -34
  5. crawlee/_utils/context.py +2 -2
  6. crawlee/_utils/file.py +7 -0
  7. crawlee/_utils/raise_if_too_many_kwargs.py +12 -0
  8. crawlee/_utils/recoverable_state.py +32 -8
  9. crawlee/_utils/recurring_task.py +17 -1
  10. crawlee/_utils/requests.py +0 -26
  11. crawlee/_utils/robots.py +17 -5
  12. crawlee/_utils/sitemap.py +4 -2
  13. crawlee/_utils/system.py +3 -3
  14. crawlee/_utils/time.py +120 -0
  15. crawlee/_utils/urls.py +9 -2
  16. crawlee/browsers/_browser_pool.py +4 -1
  17. crawlee/browsers/_playwright_browser_controller.py +21 -15
  18. crawlee/browsers/_playwright_browser_plugin.py +17 -3
  19. crawlee/browsers/_types.py +1 -1
  20. crawlee/configuration.py +2 -0
  21. crawlee/crawlers/__init__.py +2 -1
  22. crawlee/crawlers/_abstract_http/__init__.py +2 -1
  23. crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +50 -12
  24. crawlee/crawlers/_abstract_http/_abstract_http_parser.py +1 -1
  25. crawlee/crawlers/_abstract_http/_http_crawling_context.py +1 -1
  26. crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +39 -15
  27. crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +1 -1
  28. crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +8 -3
  29. crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +1 -1
  30. crawlee/crawlers/_basic/_basic_crawler.py +219 -126
  31. crawlee/crawlers/_basic/_logging_utils.py +5 -1
  32. crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +2 -2
  33. crawlee/crawlers/_parsel/_parsel_crawler.py +2 -2
  34. crawlee/crawlers/_playwright/_playwright_crawler.py +60 -11
  35. crawlee/crawlers/_playwright/_playwright_http_client.py +7 -1
  36. crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +4 -1
  37. crawlee/crawlers/_playwright/_types.py +12 -2
  38. crawlee/events/_event_manager.py +4 -4
  39. crawlee/events/_types.py +6 -6
  40. crawlee/fingerprint_suite/_fingerprint_generator.py +3 -0
  41. crawlee/fingerprint_suite/_header_generator.py +2 -2
  42. crawlee/fingerprint_suite/_types.py +2 -2
  43. crawlee/http_clients/_base.py +4 -0
  44. crawlee/http_clients/_curl_impersonate.py +12 -0
  45. crawlee/http_clients/_httpx.py +16 -6
  46. crawlee/http_clients/_impit.py +25 -10
  47. crawlee/otel/crawler_instrumentor.py +3 -3
  48. crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +2 -2
  49. crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +3 -0
  50. crawlee/request_loaders/_request_list.py +3 -3
  51. crawlee/request_loaders/_request_loader.py +5 -1
  52. crawlee/request_loaders/_sitemap_request_loader.py +248 -50
  53. crawlee/sessions/_models.py +2 -2
  54. crawlee/sessions/_session_pool.py +1 -1
  55. crawlee/statistics/_error_snapshotter.py +1 -1
  56. crawlee/statistics/_models.py +43 -4
  57. crawlee/statistics/_statistics.py +24 -33
  58. crawlee/storage_clients/__init__.py +16 -0
  59. crawlee/storage_clients/_base/_request_queue_client.py +2 -2
  60. crawlee/storage_clients/_base/_storage_client.py +13 -0
  61. crawlee/storage_clients/_file_system/_dataset_client.py +29 -27
  62. crawlee/storage_clients/_file_system/_key_value_store_client.py +30 -26
  63. crawlee/storage_clients/_file_system/_request_queue_client.py +169 -153
  64. crawlee/storage_clients/_file_system/_storage_client.py +16 -3
  65. crawlee/storage_clients/_file_system/_utils.py +0 -0
  66. crawlee/storage_clients/_memory/_dataset_client.py +16 -4
  67. crawlee/storage_clients/_memory/_key_value_store_client.py +16 -4
  68. crawlee/storage_clients/_memory/_request_queue_client.py +55 -36
  69. crawlee/storage_clients/_memory/_storage_client.py +6 -3
  70. crawlee/storage_clients/_redis/__init__.py +6 -0
  71. crawlee/storage_clients/_redis/_client_mixin.py +295 -0
  72. crawlee/storage_clients/_redis/_dataset_client.py +325 -0
  73. crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
  74. crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
  75. crawlee/storage_clients/_redis/_storage_client.py +146 -0
  76. crawlee/storage_clients/_redis/_utils.py +23 -0
  77. crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
  78. crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
  79. crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
  80. crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
  81. crawlee/storage_clients/_redis/py.typed +0 -0
  82. crawlee/storage_clients/_sql/__init__.py +6 -0
  83. crawlee/storage_clients/_sql/_client_mixin.py +385 -0
  84. crawlee/storage_clients/_sql/_dataset_client.py +310 -0
  85. crawlee/storage_clients/_sql/_db_models.py +268 -0
  86. crawlee/storage_clients/_sql/_key_value_store_client.py +300 -0
  87. crawlee/storage_clients/_sql/_request_queue_client.py +720 -0
  88. crawlee/storage_clients/_sql/_storage_client.py +282 -0
  89. crawlee/storage_clients/_sql/py.typed +0 -0
  90. crawlee/storage_clients/models.py +13 -11
  91. crawlee/storages/_base.py +5 -1
  92. crawlee/storages/_dataset.py +12 -2
  93. crawlee/storages/_key_value_store.py +17 -4
  94. crawlee/storages/_request_queue.py +13 -5
  95. crawlee/storages/_storage_instance_manager.py +133 -71
  96. crawlee/storages/_utils.py +11 -0
  97. {crawlee-0.6.13b17.dist-info → crawlee-1.1.2b7.dist-info}/METADATA +18 -6
  98. {crawlee-0.6.13b17.dist-info → crawlee-1.1.2b7.dist-info}/RECORD +101 -78
  99. {crawlee-0.6.13b17.dist-info → crawlee-1.1.2b7.dist-info}/WHEEL +1 -1
  100. crawlee/_utils/measure_time.py +0 -31
  101. {crawlee-0.6.13b17.dist-info → crawlee-1.1.2b7.dist-info}/entry_points.txt +0 -0
  102. {crawlee-0.6.13b17.dist-info → crawlee-1.1.2b7.dist-info}/licenses/LICENSE +0 -0
@@ -3,14 +3,16 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  import logging
5
5
  from abc import ABC
6
+ from datetime import timedelta
6
7
  from typing import TYPE_CHECKING, Any, Generic
7
8
 
8
9
  from more_itertools import partition
9
10
  from pydantic import ValidationError
10
- from typing_extensions import TypeVar
11
+ from typing_extensions import NotRequired, TypeVar
11
12
 
12
13
  from crawlee._request import Request, RequestOptions
13
14
  from crawlee._utils.docs import docs_group
15
+ from crawlee._utils.time import SharedTimeout
14
16
  from crawlee._utils.urls import to_absolute_url_iterator
15
17
  from crawlee.crawlers._basic import BasicCrawler, BasicCrawlerOptions, ContextPipeline
16
18
  from crawlee.errors import SessionError
@@ -32,9 +34,24 @@ TCrawlingContext = TypeVar('TCrawlingContext', bound=ParsedHttpCrawlingContext)
32
34
  TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
33
35
 
34
36
 
37
+ class HttpCrawlerOptions(
38
+ BasicCrawlerOptions[TCrawlingContext, TStatisticsState],
39
+ Generic[TCrawlingContext, TStatisticsState],
40
+ ):
41
+ """Arguments for the `AbstractHttpCrawler` constructor.
42
+
43
+ It is intended for typing forwarded `__init__` arguments in the subclasses.
44
+ """
45
+
46
+ navigation_timeout: NotRequired[timedelta | None]
47
+ """Timeout for the HTTP request."""
48
+
49
+
35
50
  @docs_group('Crawlers')
36
51
  class AbstractHttpCrawler(
37
- Generic[TCrawlingContext, TParseResult, TSelectResult], BasicCrawler[TCrawlingContext, StatisticsState], ABC
52
+ BasicCrawler[TCrawlingContext, StatisticsState],
53
+ ABC,
54
+ Generic[TCrawlingContext, TParseResult, TSelectResult],
38
55
  ):
39
56
  """A web crawler for performing HTTP requests.
40
57
 
@@ -54,10 +71,13 @@ class AbstractHttpCrawler(
54
71
  self,
55
72
  *,
56
73
  parser: AbstractHttpParser[TParseResult, TSelectResult],
74
+ navigation_timeout: timedelta | None = None,
57
75
  **kwargs: Unpack[BasicCrawlerOptions[TCrawlingContext, StatisticsState]],
58
76
  ) -> None:
59
77
  self._parser = parser
78
+ self._navigation_timeout = navigation_timeout or timedelta(minutes=1)
60
79
  self._pre_navigation_hooks: list[Callable[[BasicCrawlingContext], Awaitable[None]]] = []
80
+ self._shared_navigation_timeouts: dict[int, SharedTimeout] = {}
61
81
 
62
82
  if '_context_pipeline' not in kwargs:
63
83
  raise ValueError(
@@ -110,9 +130,17 @@ class AbstractHttpCrawler(
110
130
  async def _execute_pre_navigation_hooks(
111
131
  self, context: BasicCrawlingContext
112
132
  ) -> AsyncGenerator[BasicCrawlingContext, None]:
113
- for hook in self._pre_navigation_hooks:
114
- await hook(context)
115
- yield context
133
+ context_id = id(context)
134
+ self._shared_navigation_timeouts[context_id] = SharedTimeout(self._navigation_timeout)
135
+
136
+ try:
137
+ for hook in self._pre_navigation_hooks:
138
+ async with self._shared_navigation_timeouts[context_id]:
139
+ await hook(context)
140
+
141
+ yield context
142
+ finally:
143
+ self._shared_navigation_timeouts.pop(context_id, None)
116
144
 
117
145
  async def _parse_http_response(
118
146
  self, context: HttpCrawlingContext
@@ -165,7 +193,15 @@ class AbstractHttpCrawler(
165
193
  kwargs.setdefault('strategy', 'same-hostname')
166
194
 
167
195
  links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
168
- links_iterator = to_absolute_url_iterator(context.request.loaded_url or context.request.url, links_iterator)
196
+
197
+ # Get base URL from <base> tag if present
198
+ extracted_base_urls = list(self._parser.find_links(parsed_content, 'base[href]'))
199
+ base_url: str = (
200
+ str(extracted_base_urls[0])
201
+ if extracted_base_urls
202
+ else context.request.loaded_url or context.request.url
203
+ )
204
+ links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
169
205
 
170
206
  if robots_txt_file:
171
207
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -212,12 +248,14 @@ class AbstractHttpCrawler(
212
248
  Yields:
213
249
  The original crawling context enhanced by HTTP response.
214
250
  """
215
- result = await self._http_client.crawl(
216
- request=context.request,
217
- session=context.session,
218
- proxy_info=context.proxy_info,
219
- statistics=self._statistics,
220
- )
251
+ async with self._shared_navigation_timeouts[id(context)] as remaining_timeout:
252
+ result = await self._http_client.crawl(
253
+ request=context.request,
254
+ session=context.session,
255
+ proxy_info=context.proxy_info,
256
+ statistics=self._statistics,
257
+ timeout=remaining_timeout,
258
+ )
221
259
 
222
260
  yield HttpCrawlingContext.from_basic_crawling_context(context=context, http_response=result.http_response)
223
261
 
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
16
16
 
17
17
 
18
18
  @docs_group('HTTP parsers')
19
- class AbstractHttpParser(Generic[TParseResult, TSelectResult], ABC):
19
+ class AbstractHttpParser(ABC, Generic[TParseResult, TSelectResult]):
20
20
  """Parser used for parsing HTTP response and inspecting parsed result to find links or detect blocking."""
21
21
 
22
22
  @abstractmethod
@@ -31,7 +31,7 @@ class HttpCrawlingContext(BasicCrawlingContext, HttpCrawlingResult):
31
31
 
32
32
  @dataclass(frozen=True)
33
33
  @docs_group('Crawling contexts')
34
- class ParsedHttpCrawlingContext(Generic[TParseResult], HttpCrawlingContext):
34
+ class ParsedHttpCrawlingContext(HttpCrawlingContext, Generic[TParseResult]):
35
35
  """The crawling context used by `AbstractHttpCrawler`.
36
36
 
37
37
  It provides access to key objects as well as utility functions for handling crawling tasks.
@@ -12,7 +12,7 @@ from bs4 import BeautifulSoup, Tag
12
12
  from parsel import Selector
13
13
  from typing_extensions import Self, TypeVar, override
14
14
 
15
- from crawlee._types import BasicCrawlingContext, JsonSerializable, RequestHandlerRunResult
15
+ from crawlee._types import BasicCrawlingContext, ConcurrencySettings, JsonSerializable, RequestHandlerRunResult
16
16
  from crawlee._utils.docs import docs_group
17
17
  from crawlee._utils.wait import wait_for
18
18
  from crawlee.crawlers import (
@@ -71,7 +71,6 @@ class _NonPersistentStatistics(Statistics):
71
71
  async def __aenter__(self) -> Self:
72
72
  self._active = True
73
73
  await self._state.initialize()
74
- self._after_initialize()
75
74
  return self
76
75
 
77
76
  async def __aexit__(
@@ -85,8 +84,8 @@ class _NonPersistentStatistics(Statistics):
85
84
 
86
85
  @docs_group('Crawlers')
87
86
  class AdaptivePlaywrightCrawler(
88
- Generic[TStaticCrawlingContext, TStaticParseResult, TStaticSelectResult],
89
87
  BasicCrawler[AdaptivePlaywrightCrawlingContext, AdaptivePlaywrightCrawlerStatisticState],
88
+ Generic[TStaticCrawlingContext, TStaticParseResult, TStaticSelectResult],
90
89
  ):
91
90
  """An adaptive web crawler capable of using both static HTTP request based crawling and browser based crawling.
92
91
 
@@ -149,15 +148,15 @@ class AdaptivePlaywrightCrawler(
149
148
  non-default configuration.
150
149
  kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
151
150
  """
152
- # Some sub crawler kwargs are internally modified. Prepare copies.
153
- basic_crawler_kwargs_for_static_crawler = deepcopy(kwargs)
154
- basic_crawler_kwargs_for_pw_crawler = deepcopy(kwargs)
155
-
156
151
  # Adaptive crawling related.
157
152
  self.rendering_type_predictor = rendering_type_predictor or DefaultRenderingTypePredictor()
158
153
  self.result_checker = result_checker or (lambda _: True)
159
154
  self.result_comparator = result_comparator or create_default_comparator(result_checker)
160
155
 
156
+ # Set default concurrency settings for browser crawlers if not provided
157
+ if 'concurrency_settings' not in kwargs or kwargs['concurrency_settings'] is None:
158
+ kwargs['concurrency_settings'] = ConcurrencySettings(desired_concurrency=1)
159
+
161
160
  super().__init__(statistics=statistics, **kwargs)
162
161
 
163
162
  # Sub crawlers related.
@@ -166,11 +165,11 @@ class AdaptivePlaywrightCrawler(
166
165
  # Each sub crawler will use custom logger .
167
166
  static_logger = getLogger('Subcrawler_static')
168
167
  static_logger.setLevel(logging.ERROR)
169
- basic_crawler_kwargs_for_static_crawler['_logger'] = static_logger
168
+ basic_crawler_kwargs_for_static_crawler: _BasicCrawlerOptions = {'_logger': static_logger, **kwargs}
170
169
 
171
170
  pw_logger = getLogger('Subcrawler_playwright')
172
171
  pw_logger.setLevel(logging.ERROR)
173
- basic_crawler_kwargs_for_pw_crawler['_logger'] = pw_logger
172
+ basic_crawler_kwargs_for_pw_crawler: _BasicCrawlerOptions = {'_logger': pw_logger, **kwargs}
174
173
 
175
174
  # Initialize sub crawlers to create their pipelines.
176
175
  static_crawler_class = AbstractHttpCrawler.create_parsed_http_crawler_class(static_parser=static_parser)
@@ -315,7 +314,7 @@ class AdaptivePlaywrightCrawler(
315
314
  ),
316
315
  logger=self._logger,
317
316
  )
318
- return SubCrawlerRun(result=result)
317
+ return SubCrawlerRun(result=result, run_context=context_linked_to_result)
319
318
  except Exception as e:
320
319
  return SubCrawlerRun(exception=e)
321
320
 
@@ -371,7 +370,8 @@ class AdaptivePlaywrightCrawler(
371
370
  self.track_http_only_request_handler_runs()
372
371
 
373
372
  static_run = await self._crawl_one(rendering_type='static', context=context)
374
- if static_run.result and self.result_checker(static_run.result):
373
+ if static_run.result and static_run.run_context and self.result_checker(static_run.result):
374
+ self._update_context_from_copy(context, static_run.run_context)
375
375
  self._context_result_map[context] = static_run.result
376
376
  return
377
377
  if static_run.exception:
@@ -402,13 +402,10 @@ class AdaptivePlaywrightCrawler(
402
402
  if pw_run.exception is not None:
403
403
  raise pw_run.exception
404
404
 
405
- if pw_run.result:
406
- self._context_result_map[context] = pw_run.result
407
-
405
+ if pw_run.result and pw_run.run_context:
408
406
  if should_detect_rendering_type:
409
407
  detection_result: RenderingType
410
408
  static_run = await self._crawl_one('static', context=context, state=old_state_copy)
411
-
412
409
  if static_run.result and self.result_comparator(static_run.result, pw_run.result):
413
410
  detection_result = 'static'
414
411
  else:
@@ -417,6 +414,9 @@ class AdaptivePlaywrightCrawler(
417
414
  context.log.debug(f'Detected rendering type {detection_result} for {context.request.url}')
418
415
  self.rendering_type_predictor.store_result(context.request, detection_result)
419
416
 
417
+ self._update_context_from_copy(context, pw_run.run_context)
418
+ self._context_result_map[context] = pw_run.result
419
+
420
420
  def pre_navigation_hook(
421
421
  self,
422
422
  hook: Callable[[AdaptivePlaywrightPreNavCrawlingContext], Awaitable[None]] | None = None,
@@ -451,8 +451,32 @@ class AdaptivePlaywrightCrawler(
451
451
  def track_rendering_type_mispredictions(self) -> None:
452
452
  self.statistics.state.rendering_type_mispredictions += 1
453
453
 
454
+ def _update_context_from_copy(self, context: BasicCrawlingContext, context_copy: BasicCrawlingContext) -> None:
455
+ """Update mutable fields of `context` from `context_copy`.
456
+
457
+ Uses object.__setattr__ to bypass frozen dataclass restrictions,
458
+ allowing state synchronization after isolated crawler execution.
459
+ """
460
+ updating_attributes = {
461
+ 'request': ('headers', 'user_data'),
462
+ 'session': ('_user_data', '_usage_count', '_error_score', '_cookies'),
463
+ }
464
+
465
+ for attr, sub_attrs in updating_attributes.items():
466
+ original_sub_obj = getattr(context, attr)
467
+ copy_sub_obj = getattr(context_copy, attr)
468
+
469
+ # Check that both sub objects are not None
470
+ if original_sub_obj is None or copy_sub_obj is None:
471
+ continue
472
+
473
+ for sub_attr in sub_attrs:
474
+ new_value = getattr(copy_sub_obj, sub_attr)
475
+ object.__setattr__(original_sub_obj, sub_attr, new_value)
476
+
454
477
 
455
478
  @dataclass(frozen=True)
456
479
  class SubCrawlerRun:
457
480
  result: RequestHandlerRunResult | None = None
458
481
  exception: Exception | None = None
482
+ run_context: BasicCrawlingContext | None = None
@@ -12,7 +12,7 @@ from crawlee.statistics import StatisticsState
12
12
  class AdaptivePlaywrightCrawlerStatisticState(StatisticsState):
13
13
  """Statistic data about a crawler run with additional information related to adaptive crawling."""
14
14
 
15
- model_config = ConfigDict(populate_by_name=True, ser_json_inf_nan='constants')
15
+ model_config = ConfigDict(validate_by_name=True, validate_by_alias=True, ser_json_inf_nan='constants')
16
16
 
17
17
  http_only_request_handler_runs: Annotated[int, Field(alias='http_only_request_handler_runs')] = 0
18
18
  """Number representing how many times static http based crawling was used."""
@@ -17,7 +17,7 @@ if TYPE_CHECKING:
17
17
  from playwright.async_api import Page, Response
18
18
  from typing_extensions import Self
19
19
 
20
- from crawlee.crawlers._playwright._types import BlockRequestsFunction
20
+ from crawlee.crawlers._playwright._types import BlockRequestsFunction, GotoOptions
21
21
 
22
22
 
23
23
  TStaticParseResult = TypeVar('TStaticParseResult')
@@ -31,7 +31,8 @@ class AdaptiveContextError(RuntimeError):
31
31
  @dataclass(frozen=True)
32
32
  @docs_group('Crawling contexts')
33
33
  class AdaptivePlaywrightCrawlingContext(
34
- Generic[TStaticParseResult, TStaticSelectResult], ParsedHttpCrawlingContext[TStaticParseResult]
34
+ ParsedHttpCrawlingContext[TStaticParseResult],
35
+ Generic[TStaticParseResult, TStaticSelectResult],
35
36
  ):
36
37
  _static_parser: AbstractHttpParser[TStaticParseResult, TStaticSelectResult]
37
38
  """The crawling context used by `AdaptivePlaywrightCrawler`.
@@ -189,8 +190,9 @@ class AdaptivePlaywrightCrawlingContext(
189
190
  http_response = await PlaywrightHttpResponse.from_playwright_response(
190
191
  response=context.response, protocol=protocol_guess or ''
191
192
  )
192
- # block_requests is useful only on pre-navigation contexts. It is useless here.
193
+ # block_requests and goto_options are useful only on pre-navigation contexts. It is useless here.
193
194
  context_kwargs.pop('block_requests')
195
+ context_kwargs.pop('goto_options')
194
196
  return cls(
195
197
  parsed_content=await parser.parse(http_response),
196
198
  http_response=http_response,
@@ -211,6 +213,9 @@ class AdaptivePlaywrightPreNavCrawlingContext(BasicCrawlingContext):
211
213
  block_requests: BlockRequestsFunction | None = None
212
214
  """Blocks network requests matching specified URL patterns."""
213
215
 
216
+ goto_options: GotoOptions | None = None
217
+ """Additional options to pass to Playwright's `Page.goto()` method. The `timeout` option is not supported."""
218
+
214
219
  @property
215
220
  def page(self) -> Page:
216
221
  """The Playwright `Page` object for the current page.
@@ -32,7 +32,7 @@ FeatureVector = tuple[float, float]
32
32
 
33
33
 
34
34
  class RenderingTypePredictorState(BaseModel):
35
- model_config = ConfigDict(populate_by_name=True)
35
+ model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
36
36
 
37
37
  model: Annotated[
38
38
  LogisticRegression,