dateparser 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateparser/__init__.py +1 -1
- dateparser/data/dateparser_tz_cache.pkl +0 -0
- dateparser/parser.py +6 -2
- dateparser/search/__init__.py +2 -0
- dateparser/search/search.py +7 -0
- dateparser/timezone_parser.py +47 -4
- {dateparser-1.2.1.dist-info → dateparser-1.2.2.dist-info}/METADATA +17 -3
- {dateparser-1.2.1.dist-info → dateparser-1.2.2.dist-info}/RECORD +13 -12
- {dateparser-1.2.1.dist-info → dateparser-1.2.2.dist-info}/WHEEL +1 -1
- {dateparser-1.2.1.dist-info → dateparser-1.2.2.dist-info}/entry_points.txt +0 -0
- {dateparser-1.2.1.dist-info → dateparser-1.2.2.dist-info/licenses}/AUTHORS.rst +0 -0
- {dateparser-1.2.1.dist-info → dateparser-1.2.2.dist-info/licenses}/LICENSE +0 -0
- {dateparser-1.2.1.dist-info → dateparser-1.2.2.dist-info}/top_level.txt +0 -0
dateparser/__init__.py
CHANGED
|
Binary file
|
dateparser/parser.py
CHANGED
|
@@ -399,8 +399,8 @@ class _parser:
|
|
|
399
399
|
return datetime(**params)
|
|
400
400
|
except ValueError as e:
|
|
401
401
|
error_text = e.__str__()
|
|
402
|
-
error_msgs = ["day is out of range", "day must be in"]
|
|
403
|
-
if
|
|
402
|
+
error_msgs = ["day is out of range", "day must be in", "must be in range"]
|
|
403
|
+
if any(msg in error_text for msg in error_msgs):
|
|
404
404
|
if not (self._token_day or hasattr(self, "_token_weekday")):
|
|
405
405
|
# if day is not available put last day of the month
|
|
406
406
|
params["day"] = get_last_day_of_month(
|
|
@@ -512,6 +512,10 @@ class _parser:
|
|
|
512
512
|
|
|
513
513
|
dateobj = dateobj + delta
|
|
514
514
|
|
|
515
|
+
# set the token_month here so that it is not subsequently
|
|
516
|
+
# altered by _correct_for_month
|
|
517
|
+
self._token_month = dateobj.month
|
|
518
|
+
|
|
515
519
|
# NOTE: If this assert fires, self.now needs to be made offset-aware in a similar
|
|
516
520
|
# way that dateobj is temporarily made offset-aware.
|
|
517
521
|
assert not (self.now.tzinfo is None and dateobj.tzinfo is not None), (
|
dateparser/search/__init__.py
CHANGED
dateparser/search/search.py
CHANGED
|
@@ -295,3 +295,10 @@ class DateSearchWithDetection:
|
|
|
295
295
|
language_shortname, text, settings=settings
|
|
296
296
|
),
|
|
297
297
|
}
|
|
298
|
+
|
|
299
|
+
def preprocess_text(self, text, languages):
|
|
300
|
+
"""Preprocess text to handle language-specific quirks."""
|
|
301
|
+
if languages and "ru" in languages:
|
|
302
|
+
# Replace "с" (from) before numbers with a placeholder
|
|
303
|
+
text = re.sub(r"\bс\s+(?=\d)", "[FROM] ", text)
|
|
304
|
+
return text
|
dateparser/timezone_parser.py
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pickle
|
|
3
|
+
import zlib
|
|
1
4
|
from datetime import datetime, timedelta, timezone, tzinfo
|
|
5
|
+
from pathlib import Path
|
|
2
6
|
|
|
3
7
|
import regex as re
|
|
4
8
|
|
|
@@ -84,8 +88,47 @@ def get_local_tz_offset():
|
|
|
84
88
|
return offset
|
|
85
89
|
|
|
86
90
|
|
|
87
|
-
_search_regex_parts = []
|
|
88
|
-
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
|
|
89
|
-
_search_regex = re.compile("|".join(_search_regex_parts))
|
|
90
|
-
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
|
|
91
91
|
local_tz_offset = get_local_tz_offset()
|
|
92
|
+
|
|
93
|
+
_tz_offsets = None
|
|
94
|
+
_search_regex = None
|
|
95
|
+
_search_regex_ignorecase = None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _load_offsets(cache_path, current_hash):
|
|
99
|
+
global _tz_offsets, _search_regex, _search_regex_ignorecase
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
with open(cache_path, mode="rb") as file:
|
|
103
|
+
(
|
|
104
|
+
serialized_hash,
|
|
105
|
+
_tz_offsets,
|
|
106
|
+
_search_regex,
|
|
107
|
+
_search_regex_ignorecase,
|
|
108
|
+
) = pickle.load(file)
|
|
109
|
+
if current_hash is None or current_hash == serialized_hash:
|
|
110
|
+
return
|
|
111
|
+
except (FileNotFoundError, ValueError, TypeError):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
_search_regex_parts = []
|
|
115
|
+
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
|
|
116
|
+
_search_regex = re.compile("|".join(_search_regex_parts))
|
|
117
|
+
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
|
|
118
|
+
|
|
119
|
+
with open(cache_path, mode="wb") as file:
|
|
120
|
+
pickle.dump(
|
|
121
|
+
(current_hash, _tz_offsets, _search_regex, _search_regex_ignorecase),
|
|
122
|
+
file,
|
|
123
|
+
protocol=5,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
CACHE_PATH = Path(__file__).parent.joinpath("data", "dateparser_tz_cache.pkl")
|
|
128
|
+
|
|
129
|
+
if "BUILD_TZ_CACHE" in os.environ:
|
|
130
|
+
current_hash = zlib.crc32(str(timezone_info_list).encode("utf-8"))
|
|
131
|
+
else:
|
|
132
|
+
current_hash = None
|
|
133
|
+
|
|
134
|
+
_load_offsets(CACHE_PATH, current_hash)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: dateparser
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Date parsing library designed to parse dates from HTML pages
|
|
5
5
|
Home-page: https://github.com/scrapinghub/dateparser
|
|
6
6
|
Author: Scrapinghub
|
|
@@ -24,7 +24,7 @@ License-File: LICENSE
|
|
|
24
24
|
License-File: AUTHORS.rst
|
|
25
25
|
Requires-Dist: python-dateutil>=2.7.0
|
|
26
26
|
Requires-Dist: pytz>=2024.2
|
|
27
|
-
Requires-Dist: regex
|
|
27
|
+
Requires-Dist: regex>=2024.9.11
|
|
28
28
|
Requires-Dist: tzlocal>=0.2
|
|
29
29
|
Provides-Extra: calendars
|
|
30
30
|
Requires-Dist: convertdate>=2.2.1; extra == "calendars"
|
|
@@ -41,6 +41,7 @@ Dynamic: description
|
|
|
41
41
|
Dynamic: home-page
|
|
42
42
|
Dynamic: keywords
|
|
43
43
|
Dynamic: license
|
|
44
|
+
Dynamic: license-file
|
|
44
45
|
Dynamic: project-url
|
|
45
46
|
Dynamic: provides-extra
|
|
46
47
|
Dynamic: requires-dist
|
|
@@ -318,6 +319,19 @@ To be able to use them you need to install the `calendar` extra by typing:
|
|
|
318
319
|
History
|
|
319
320
|
=======
|
|
320
321
|
|
|
322
|
+
1.2.2 (2025-06-26)
|
|
323
|
+
------------------
|
|
324
|
+
|
|
325
|
+
Fixes:
|
|
326
|
+
|
|
327
|
+
- Handle the Russian preposition “с” (#1261)
|
|
328
|
+
- Fix weekday search (#1274)
|
|
329
|
+
|
|
330
|
+
Improvements:
|
|
331
|
+
|
|
332
|
+
- Add Python 3.14 support (#1273)
|
|
333
|
+
- Cache timezone offsets to improve import time (#1250)
|
|
334
|
+
|
|
321
335
|
1.2.1 (2025-02-05)
|
|
322
336
|
------------------
|
|
323
337
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
dateparser/__init__.py,sha256=
|
|
1
|
+
dateparser/__init__.py,sha256=Wfhc4HQ6pB00SH9DjixNnmfukzzhl-Bsoo-S-sshegk,2739
|
|
2
2
|
dateparser/conf.py,sha256=ynrRajwQ6dtgbSm9T0plfxQEYEarHo6Qsr76ISInX-4,8467
|
|
3
3
|
dateparser/date.py,sha256=AluFz3-uzuRL6Y1cF5uFgjpcxEmTspJAHGMdIR1oBzE,21763
|
|
4
4
|
dateparser/date_parser.py,sha256=LPMD1La3fUTyfa17i0Ng-mii6ucWIBme-zJJY3es-Ug,1782
|
|
5
5
|
dateparser/freshness_date_parser.py,sha256=VgFAxveZqRDXCHTkQzTab7jx197btjv-YFcSOeZwOAY,5081
|
|
6
|
-
dateparser/parser.py,sha256=
|
|
7
|
-
dateparser/timezone_parser.py,sha256=
|
|
6
|
+
dateparser/parser.py,sha256=KDde8HlyD5wFERiT8ABrIxTakLsEE7VYA4Kif6VWRbQ,26005
|
|
7
|
+
dateparser/timezone_parser.py,sha256=MoMTAH__hkeaoZQ6VaZhJpUPxd-yA9QTczm-mmHztKc,4042
|
|
8
8
|
dateparser/timezones.py,sha256=gtJhZDCy4sw37MpUogWk4LHKmn8DpG5caO3LIaCC264,13981
|
|
9
9
|
dateparser/calendars/__init__.py,sha256=Gf9qLl9xbCFhmuWtuakkaHy_vSuOLOoo1kr33xJGkIY,4323
|
|
10
10
|
dateparser/calendars/hijri.py,sha256=J9IUHs162UslQ4r5OPIr-BgmiyY3_4kj_z1wOIlB57E,168
|
|
@@ -16,6 +16,7 @@ dateparser/custom_language_detection/fasttext.py,sha256=uuNN8JlKcUSi5pqUrnywPRrq
|
|
|
16
16
|
dateparser/custom_language_detection/langdetect.py,sha256=lxSrW6bpB3f4_YaF3nnyi6xmSsvwsnmBmOelHQGKeck,1195
|
|
17
17
|
dateparser/custom_language_detection/language_mapping.py,sha256=HeZLw65ghqXmjLaE8KEOm4BZedkFVrX7dvC_FynXWtk,557
|
|
18
18
|
dateparser/data/__init__.py,sha256=20PfpbkZv8Q9LU6p0T4RA0YNq8oQykhvWThKPhYgp6Y,115
|
|
19
|
+
dateparser/data/dateparser_tz_cache.pkl,sha256=wXxZFkophi11wLJJdVEgUgLpcqf1GS5SAA89e8Mg0-I,134536
|
|
19
20
|
dateparser/data/languages_info.py,sha256=BPHjASz2MUvgc7kF2uXrtYbcj823kjLxrR8X_wLiAEc,13406
|
|
20
21
|
dateparser/data/date_translation_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
22
|
dateparser/data/date_translation_data/af.py,sha256=LR5yRcwOJ9_QWn5EH_jwfAhwNGtnf4RFhdNoVHqjd7U,4588
|
|
@@ -228,12 +229,14 @@ dateparser/languages/dictionary.py,sha256=KiSswl08OsBqW-UiBeST2ff-BtqZyH-sNy1FAs
|
|
|
228
229
|
dateparser/languages/loader.py,sha256=gTyyVEX8ppeVEQ1SrIjjxwSq8hpTI5HTnS_cyVotegc,7323
|
|
229
230
|
dateparser/languages/locale.py,sha256=3T5VIzS5WHpcs7fScKW-rQiW4refCBrp-BxyXzDYlio,24631
|
|
230
231
|
dateparser/languages/validation.py,sha256=gtLpxvh3XI_nzryBZ8NA6QL8YLcT6HV1j81dV3nDxY4,16750
|
|
231
|
-
dateparser/search/__init__.py,sha256=
|
|
232
|
+
dateparser/search/__init__.py,sha256=4AixzJu9YID1vzNSKx8uJ0cyKNZGvO8BzMN6Jpoci84,2909
|
|
232
233
|
dateparser/search/detection.py,sha256=YAnTYbM18FGZV2pxn6lNDbfoiQttzM52ZDgOhlV87II,2695
|
|
233
|
-
dateparser/search/search.py,sha256
|
|
234
|
+
dateparser/search/search.py,sha256=v9eH-xwZBu7b4yGsMiwLBBBh8C1l0IS4hmdvd7Tv2V8,12154
|
|
234
235
|
dateparser/search/text_detection.py,sha256=SSQUOr5V3Qm946Tiz7hNWcpYJtcyP9Bxayxec0hC7zc,3238
|
|
235
236
|
dateparser/utils/__init__.py,sha256=X5ssdQuo60F_SnVcKpt-a2ehvPClMaX2pQ2vwGWGSac,7234
|
|
236
237
|
dateparser/utils/strptime.py,sha256=tnPJ_C4wKZAGwW03z5-XHy5ykZ3lP6RLpw6QR-jT-f0,2853
|
|
238
|
+
dateparser-1.2.2.dist-info/licenses/AUTHORS.rst,sha256=xyYeT2AGKVrsHYxoOxgDE8XmSK7VNtJNqFzw25JLE2g,711
|
|
239
|
+
dateparser-1.2.2.dist-info/licenses/LICENSE,sha256=t122Vbt6QqjoVh0NuHMVi7naudRoryRAAPAtfdg-l_Q,1468
|
|
237
240
|
dateparser_cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
238
241
|
dateparser_cli/cli.py,sha256=pZSVwZO_N1AfPJfbbTHgvEnV8kmvyKmd6YQLYT3epis,991
|
|
239
242
|
dateparser_cli/exceptions.py,sha256=ifwai5On6opuhUpBHMoz0iFkybtZg7m3gHHnxtOEoEk,58
|
|
@@ -247,10 +250,8 @@ dateparser_scripts/order_languages.py,sha256=BkYmAqFvoMvj3I7xcjeutz0Bx5VQQky68Y5
|
|
|
247
250
|
dateparser_scripts/update_supported_languages_and_locales.py,sha256=BaHzzCiLKvkhKcal44CyedZNIUrBrI4fm0LxHicRJ8g,1460
|
|
248
251
|
dateparser_scripts/utils.py,sha256=Uw4HbgwbKYuUeztUw9OQqtzPtjmBxBBqGytLXjtSia8,2579
|
|
249
252
|
dateparser_scripts/write_complete_data.py,sha256=w1pExoB7Z5oN02pSMvkzN2K-kQ_5UbehAdvTZ_bYUCQ,4540
|
|
250
|
-
dateparser-1.2.
|
|
251
|
-
dateparser-1.2.
|
|
252
|
-
dateparser-1.2.
|
|
253
|
-
dateparser-1.2.
|
|
254
|
-
dateparser-1.2.
|
|
255
|
-
dateparser-1.2.1.dist-info/top_level.txt,sha256=LujVBIKC69tvws1XkgyOFRDjEEd-E1SjAirYrhEbqn8,61
|
|
256
|
-
dateparser-1.2.1.dist-info/RECORD,,
|
|
253
|
+
dateparser-1.2.2.dist-info/METADATA,sha256=Ji6LM-TkErOhjKfv5MfYuayFv8C-JBsOa0BfjRv0Tks,29635
|
|
254
|
+
dateparser-1.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
255
|
+
dateparser-1.2.2.dist-info/entry_points.txt,sha256=QPwc8kOjbaaxJdyQXiix9sGbUjC_G_Ga5LgJ_11KKAs,68
|
|
256
|
+
dateparser-1.2.2.dist-info/top_level.txt,sha256=LujVBIKC69tvws1XkgyOFRDjEEd-E1SjAirYrhEbqn8,61
|
|
257
|
+
dateparser-1.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|