dateparser 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dateparser/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "1.2.1"
1
+ __version__ = "1.2.2"
2
2
 
3
3
  from .conf import apply_settings
4
4
  from .date import DateDataParser
Binary file
dateparser/parser.py CHANGED
@@ -399,8 +399,8 @@ class _parser:
399
399
  return datetime(**params)
400
400
  except ValueError as e:
401
401
  error_text = e.__str__()
402
- error_msgs = ["day is out of range", "day must be in"]
403
- if error_msgs[0] in error_text or error_msgs[1] in error_text:
402
+ error_msgs = ["day is out of range", "day must be in", "must be in range"]
403
+ if any(msg in error_text for msg in error_msgs):
404
404
  if not (self._token_day or hasattr(self, "_token_weekday")):
405
405
  # if day is not available put last day of the month
406
406
  params["day"] = get_last_day_of_month(
@@ -512,6 +512,10 @@ class _parser:
512
512
 
513
513
  dateobj = dateobj + delta
514
514
 
515
+ # set the token_month here so that it is not subsequently
516
+ # altered by _correct_for_month
517
+ self._token_month = dateobj.month
518
+
515
519
  # NOTE: If this assert fires, self.now needs to be made offset-aware in a similar
516
520
  # way that dateobj is temporarily made offset-aware.
517
521
  assert not (self.now.tzinfo is None and dateobj.tzinfo is not None), (
@@ -57,6 +57,8 @@ def search_dates(
57
57
  ('on May 6th 2004', datetime.datetime(2004, 5, 6, 0, 0))]
58
58
 
59
59
  """
60
+ text = _search_with_detection.preprocess_text(text, languages)
61
+
60
62
  result = _search_with_detection.search_dates(
61
63
  text=text,
62
64
  languages=languages,
@@ -295,3 +295,10 @@ class DateSearchWithDetection:
295
295
  language_shortname, text, settings=settings
296
296
  ),
297
297
  }
298
+
299
+ def preprocess_text(self, text, languages):
300
+ """Preprocess text to handle language-specific quirks."""
301
+ if languages and "ru" in languages:
302
+ # Replace "с" (from) before numbers with a placeholder
303
+ text = re.sub(r"\bс\s+(?=\d)", "[FROM] ", text)
304
+ return text
@@ -1,4 +1,8 @@
1
+ import os
2
+ import pickle
3
+ import zlib
1
4
  from datetime import datetime, timedelta, timezone, tzinfo
5
+ from pathlib import Path
2
6
 
3
7
  import regex as re
4
8
 
@@ -84,8 +88,47 @@ def get_local_tz_offset():
84
88
  return offset
85
89
 
86
90
 
87
- _search_regex_parts = []
88
- _tz_offsets = list(build_tz_offsets(_search_regex_parts))
89
- _search_regex = re.compile("|".join(_search_regex_parts))
90
- _search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
91
91
  local_tz_offset = get_local_tz_offset()
92
+
93
+ _tz_offsets = None
94
+ _search_regex = None
95
+ _search_regex_ignorecase = None
96
+
97
+
98
+ def _load_offsets(cache_path, current_hash):
99
+ global _tz_offsets, _search_regex, _search_regex_ignorecase
100
+
101
+ try:
102
+ with open(cache_path, mode="rb") as file:
103
+ (
104
+ serialized_hash,
105
+ _tz_offsets,
106
+ _search_regex,
107
+ _search_regex_ignorecase,
108
+ ) = pickle.load(file)
109
+ if current_hash is None or current_hash == serialized_hash:
110
+ return
111
+ except (FileNotFoundError, ValueError, TypeError):
112
+ pass
113
+
114
+ _search_regex_parts = []
115
+ _tz_offsets = list(build_tz_offsets(_search_regex_parts))
116
+ _search_regex = re.compile("|".join(_search_regex_parts))
117
+ _search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
118
+
119
+ with open(cache_path, mode="wb") as file:
120
+ pickle.dump(
121
+ (current_hash, _tz_offsets, _search_regex, _search_regex_ignorecase),
122
+ file,
123
+ protocol=5,
124
+ )
125
+
126
+
127
+ CACHE_PATH = Path(__file__).parent.joinpath("data", "dateparser_tz_cache.pkl")
128
+
129
+ if "BUILD_TZ_CACHE" in os.environ:
130
+ current_hash = zlib.crc32(str(timezone_info_list).encode("utf-8"))
131
+ else:
132
+ current_hash = None
133
+
134
+ _load_offsets(CACHE_PATH, current_hash)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: dateparser
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: Date parsing library designed to parse dates from HTML pages
5
5
  Home-page: https://github.com/scrapinghub/dateparser
6
6
  Author: Scrapinghub
@@ -24,7 +24,7 @@ License-File: LICENSE
24
24
  License-File: AUTHORS.rst
25
25
  Requires-Dist: python-dateutil>=2.7.0
26
26
  Requires-Dist: pytz>=2024.2
27
- Requires-Dist: regex!=2019.02.19,!=2021.8.27,>=2015.06.24
27
+ Requires-Dist: regex>=2024.9.11
28
28
  Requires-Dist: tzlocal>=0.2
29
29
  Provides-Extra: calendars
30
30
  Requires-Dist: convertdate>=2.2.1; extra == "calendars"
@@ -41,6 +41,7 @@ Dynamic: description
41
41
  Dynamic: home-page
42
42
  Dynamic: keywords
43
43
  Dynamic: license
44
+ Dynamic: license-file
44
45
  Dynamic: project-url
45
46
  Dynamic: provides-extra
46
47
  Dynamic: requires-dist
@@ -318,6 +319,19 @@ To be able to use them you need to install the `calendar` extra by typing:
318
319
  History
319
320
  =======
320
321
 
322
+ 1.2.2 (2025-06-26)
323
+ ------------------
324
+
325
+ Fixes:
326
+
327
+ - Handle the Russian preposition “с” (#1261)
328
+ - Fix weekday search (#1274)
329
+
330
+ Improvements:
331
+
332
+ - Add Python 3.14 support (#1273)
333
+ - Cache timezone offsets to improve import time (#1250)
334
+
321
335
  1.2.1 (2025-02-05)
322
336
  ------------------
323
337
 
@@ -1,10 +1,10 @@
1
- dateparser/__init__.py,sha256=3ajvpKU-xlhD0gF4Xp1bqU3PGTrMFLfYBlua29tRM1Q,2739
1
+ dateparser/__init__.py,sha256=Wfhc4HQ6pB00SH9DjixNnmfukzzhl-Bsoo-S-sshegk,2739
2
2
  dateparser/conf.py,sha256=ynrRajwQ6dtgbSm9T0plfxQEYEarHo6Qsr76ISInX-4,8467
3
3
  dateparser/date.py,sha256=AluFz3-uzuRL6Y1cF5uFgjpcxEmTspJAHGMdIR1oBzE,21763
4
4
  dateparser/date_parser.py,sha256=LPMD1La3fUTyfa17i0Ng-mii6ucWIBme-zJJY3es-Ug,1782
5
5
  dateparser/freshness_date_parser.py,sha256=VgFAxveZqRDXCHTkQzTab7jx197btjv-YFcSOeZwOAY,5081
6
- dateparser/parser.py,sha256=_PiRb9uoIQNcnymsd0KRO6ZO5M_82Aa418rwp-PHGKU,25838
7
- dateparser/timezone_parser.py,sha256=98cWyyBl8D71Ry-cAVgB0pa935NPHZQiP451YLvK_zU,2913
6
+ dateparser/parser.py,sha256=KDde8HlyD5wFERiT8ABrIxTakLsEE7VYA4Kif6VWRbQ,26005
7
+ dateparser/timezone_parser.py,sha256=MoMTAH__hkeaoZQ6VaZhJpUPxd-yA9QTczm-mmHztKc,4042
8
8
  dateparser/timezones.py,sha256=gtJhZDCy4sw37MpUogWk4LHKmn8DpG5caO3LIaCC264,13981
9
9
  dateparser/calendars/__init__.py,sha256=Gf9qLl9xbCFhmuWtuakkaHy_vSuOLOoo1kr33xJGkIY,4323
10
10
  dateparser/calendars/hijri.py,sha256=J9IUHs162UslQ4r5OPIr-BgmiyY3_4kj_z1wOIlB57E,168
@@ -16,6 +16,7 @@ dateparser/custom_language_detection/fasttext.py,sha256=uuNN8JlKcUSi5pqUrnywPRrq
16
16
  dateparser/custom_language_detection/langdetect.py,sha256=lxSrW6bpB3f4_YaF3nnyi6xmSsvwsnmBmOelHQGKeck,1195
17
17
  dateparser/custom_language_detection/language_mapping.py,sha256=HeZLw65ghqXmjLaE8KEOm4BZedkFVrX7dvC_FynXWtk,557
18
18
  dateparser/data/__init__.py,sha256=20PfpbkZv8Q9LU6p0T4RA0YNq8oQykhvWThKPhYgp6Y,115
19
+ dateparser/data/dateparser_tz_cache.pkl,sha256=wXxZFkophi11wLJJdVEgUgLpcqf1GS5SAA89e8Mg0-I,134536
19
20
  dateparser/data/languages_info.py,sha256=BPHjASz2MUvgc7kF2uXrtYbcj823kjLxrR8X_wLiAEc,13406
20
21
  dateparser/data/date_translation_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
22
  dateparser/data/date_translation_data/af.py,sha256=LR5yRcwOJ9_QWn5EH_jwfAhwNGtnf4RFhdNoVHqjd7U,4588
@@ -228,12 +229,14 @@ dateparser/languages/dictionary.py,sha256=KiSswl08OsBqW-UiBeST2ff-BtqZyH-sNy1FAs
228
229
  dateparser/languages/loader.py,sha256=gTyyVEX8ppeVEQ1SrIjjxwSq8hpTI5HTnS_cyVotegc,7323
229
230
  dateparser/languages/locale.py,sha256=3T5VIzS5WHpcs7fScKW-rQiW4refCBrp-BxyXzDYlio,24631
230
231
  dateparser/languages/validation.py,sha256=gtLpxvh3XI_nzryBZ8NA6QL8YLcT6HV1j81dV3nDxY4,16750
231
- dateparser/search/__init__.py,sha256=__laR2Q19meiPpZLILINZMePGRG4DmvOoc2tOncU9ZM,2841
232
+ dateparser/search/__init__.py,sha256=4AixzJu9YID1vzNSKx8uJ0cyKNZGvO8BzMN6Jpoci84,2909
232
233
  dateparser/search/detection.py,sha256=YAnTYbM18FGZV2pxn6lNDbfoiQttzM52ZDgOhlV87II,2695
233
- dateparser/search/search.py,sha256=-c9LIBUnTPRfchEoFrcVeu5JVJBkMsl1jGueB3FhKKY,11846
234
+ dateparser/search/search.py,sha256=v9eH-xwZBu7b4yGsMiwLBBBh8C1l0IS4hmdvd7Tv2V8,12154
234
235
  dateparser/search/text_detection.py,sha256=SSQUOr5V3Qm946Tiz7hNWcpYJtcyP9Bxayxec0hC7zc,3238
235
236
  dateparser/utils/__init__.py,sha256=X5ssdQuo60F_SnVcKpt-a2ehvPClMaX2pQ2vwGWGSac,7234
236
237
  dateparser/utils/strptime.py,sha256=tnPJ_C4wKZAGwW03z5-XHy5ykZ3lP6RLpw6QR-jT-f0,2853
238
+ dateparser-1.2.2.dist-info/licenses/AUTHORS.rst,sha256=xyYeT2AGKVrsHYxoOxgDE8XmSK7VNtJNqFzw25JLE2g,711
239
+ dateparser-1.2.2.dist-info/licenses/LICENSE,sha256=t122Vbt6QqjoVh0NuHMVi7naudRoryRAAPAtfdg-l_Q,1468
237
240
  dateparser_cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
238
241
  dateparser_cli/cli.py,sha256=pZSVwZO_N1AfPJfbbTHgvEnV8kmvyKmd6YQLYT3epis,991
239
242
  dateparser_cli/exceptions.py,sha256=ifwai5On6opuhUpBHMoz0iFkybtZg7m3gHHnxtOEoEk,58
@@ -247,10 +250,8 @@ dateparser_scripts/order_languages.py,sha256=BkYmAqFvoMvj3I7xcjeutz0Bx5VQQky68Y5
247
250
  dateparser_scripts/update_supported_languages_and_locales.py,sha256=BaHzzCiLKvkhKcal44CyedZNIUrBrI4fm0LxHicRJ8g,1460
248
251
  dateparser_scripts/utils.py,sha256=Uw4HbgwbKYuUeztUw9OQqtzPtjmBxBBqGytLXjtSia8,2579
249
252
  dateparser_scripts/write_complete_data.py,sha256=w1pExoB7Z5oN02pSMvkzN2K-kQ_5UbehAdvTZ_bYUCQ,4540
250
- dateparser-1.2.1.dist-info/AUTHORS.rst,sha256=xyYeT2AGKVrsHYxoOxgDE8XmSK7VNtJNqFzw25JLE2g,711
251
- dateparser-1.2.1.dist-info/LICENSE,sha256=t122Vbt6QqjoVh0NuHMVi7naudRoryRAAPAtfdg-l_Q,1468
252
- dateparser-1.2.1.dist-info/METADATA,sha256=xBE9ekzZoqtRzII8t2YytRzToJ0qJ3suxF0qjBJPyEQ,29406
253
- dateparser-1.2.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
254
- dateparser-1.2.1.dist-info/entry_points.txt,sha256=QPwc8kOjbaaxJdyQXiix9sGbUjC_G_Ga5LgJ_11KKAs,68
255
- dateparser-1.2.1.dist-info/top_level.txt,sha256=LujVBIKC69tvws1XkgyOFRDjEEd-E1SjAirYrhEbqn8,61
256
- dateparser-1.2.1.dist-info/RECORD,,
253
+ dateparser-1.2.2.dist-info/METADATA,sha256=Ji6LM-TkErOhjKfv5MfYuayFv8C-JBsOa0BfjRv0Tks,29635
254
+ dateparser-1.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
255
+ dateparser-1.2.2.dist-info/entry_points.txt,sha256=QPwc8kOjbaaxJdyQXiix9sGbUjC_G_Ga5LgJ_11KKAs,68
256
+ dateparser-1.2.2.dist-info/top_level.txt,sha256=LujVBIKC69tvws1XkgyOFRDjEEd-E1SjAirYrhEbqn8,61
257
+ dateparser-1.2.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5