dateparser 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateparser/__init__.py +1 -1
- dateparser/conf.py +11 -0
- dateparser/data/date_translation_data/en.py +15 -6
- dateparser/data/date_translation_data/fi.py +1 -0
- dateparser/data/date_translation_data/ru.py +56 -23
- dateparser/date.py +2 -1
- dateparser/freshness_date_parser.py +35 -9
- dateparser/languages/dictionary.py +12 -1
- dateparser/languages/locale.py +28 -1
- dateparser/search/search.py +18 -1
- dateparser/utils/strptime.py +28 -1
- dateparser/utils/time_spans.py +152 -0
- {dateparser-1.2.2.dist-info → dateparser-1.3.0.dist-info}/METADATA +40 -4
- {dateparser-1.2.2.dist-info → dateparser-1.3.0.dist-info}/RECORD +21 -20
- {dateparser-1.2.2.dist-info → dateparser-1.3.0.dist-info}/WHEEL +1 -1
- dateparser_data/settings.py +4 -0
- dateparser_scripts/write_complete_data.py +5 -3
- {dateparser-1.2.2.dist-info → dateparser-1.3.0.dist-info}/entry_points.txt +0 -0
- {dateparser-1.2.2.dist-info → dateparser-1.3.0.dist-info}/licenses/AUTHORS.rst +0 -0
- {dateparser-1.2.2.dist-info → dateparser-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {dateparser-1.2.2.dist-info → dateparser-1.3.0.dist-info}/top_level.txt +0 -0
dateparser/__init__.py
CHANGED
dateparser/conf.py
CHANGED
|
@@ -27,6 +27,9 @@ class Settings:
|
|
|
27
27
|
* `SKIP_TOKENS`
|
|
28
28
|
* `NORMALIZE`
|
|
29
29
|
* `RETURN_TIME_AS_PERIOD`
|
|
30
|
+
* `RETURN_TIME_SPAN`
|
|
31
|
+
* `DEFAULT_START_OF_WEEK`
|
|
32
|
+
* `DEFAULT_DAYS_IN_MONTH`
|
|
30
33
|
* `PARSERS`
|
|
31
34
|
* `DEFAULT_LANGUAGES`
|
|
32
35
|
* `LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD`
|
|
@@ -229,6 +232,14 @@ def check_settings(settings):
|
|
|
229
232
|
"CACHE_SIZE_LIMIT": {
|
|
230
233
|
"type": int,
|
|
231
234
|
},
|
|
235
|
+
"RETURN_TIME_SPAN": {"type": bool},
|
|
236
|
+
"DEFAULT_START_OF_WEEK": {
|
|
237
|
+
"values": ("monday", "sunday"),
|
|
238
|
+
"type": str,
|
|
239
|
+
},
|
|
240
|
+
"DEFAULT_DAYS_IN_MONTH": {
|
|
241
|
+
"type": int,
|
|
242
|
+
},
|
|
232
243
|
}
|
|
233
244
|
|
|
234
245
|
modified_settings = settings._mod_settings # check only modified settings
|
|
@@ -51,32 +51,39 @@ info = {
|
|
|
51
51
|
],
|
|
52
52
|
"monday": [
|
|
53
53
|
"mon",
|
|
54
|
-
"monday"
|
|
54
|
+
"monday",
|
|
55
|
+
"mo"
|
|
55
56
|
],
|
|
56
57
|
"tuesday": [
|
|
57
58
|
"tue",
|
|
58
59
|
"tuesday",
|
|
60
|
+
"tu",
|
|
59
61
|
"Tues"
|
|
60
62
|
],
|
|
61
63
|
"wednesday": [
|
|
62
64
|
"wed",
|
|
63
|
-
"wednesday"
|
|
65
|
+
"wednesday",
|
|
66
|
+
"we"
|
|
64
67
|
],
|
|
65
68
|
"thursday": [
|
|
66
69
|
"thu",
|
|
67
|
-
"thursday"
|
|
70
|
+
"thursday",
|
|
71
|
+
"th"
|
|
68
72
|
],
|
|
69
73
|
"friday": [
|
|
70
74
|
"fri",
|
|
71
|
-
"friday"
|
|
75
|
+
"friday",
|
|
76
|
+
"fr"
|
|
72
77
|
],
|
|
73
78
|
"saturday": [
|
|
74
79
|
"sat",
|
|
75
|
-
"saturday"
|
|
80
|
+
"saturday",
|
|
81
|
+
"sa"
|
|
76
82
|
],
|
|
77
83
|
"sunday": [
|
|
78
84
|
"sun",
|
|
79
|
-
"sunday"
|
|
85
|
+
"sunday",
|
|
86
|
+
"su"
|
|
80
87
|
],
|
|
81
88
|
"am": [
|
|
82
89
|
"am"
|
|
@@ -258,6 +265,8 @@ info = {
|
|
|
258
265
|
"in \\1 week": [
|
|
259
266
|
"in (\\d+[.,]?\\d*) week",
|
|
260
267
|
"in (\\d+[.,]?\\d*) weeks",
|
|
268
|
+
"in (\\d+[.,]?\\d*) weeks time",
|
|
269
|
+
"in (\\d+[.,]?\\d*) weeks' time",
|
|
261
270
|
"in (\\d+[.,]?\\d*) wk"
|
|
262
271
|
],
|
|
263
272
|
"in \\1 year": [
|
|
@@ -340,73 +340,91 @@ info = {
|
|
|
340
340
|
],
|
|
341
341
|
"simplifications": [
|
|
342
342
|
{
|
|
343
|
-
"од(
|
|
343
|
+
"од(ин|на|ну|ни|ной|ною|но|ного|ному|ним|нем)": "1"
|
|
344
344
|
},
|
|
345
345
|
{
|
|
346
|
-
"
|
|
346
|
+
"перв(ой|ого|ому|ым|ом|ая|ую|ое|ые|ых|ыми)": "1"
|
|
347
347
|
},
|
|
348
348
|
{
|
|
349
|
-
"
|
|
349
|
+
"дв(а|е|ух|ум|умя|ое)": "2"
|
|
350
350
|
},
|
|
351
351
|
{
|
|
352
|
-
"
|
|
352
|
+
"пар(а|ы|е|у|ой|ою|ам|ами|ах)": "2"
|
|
353
353
|
},
|
|
354
354
|
{
|
|
355
|
-
"
|
|
355
|
+
"втор(ой|ого|ому|ым|ом|ая|ую|ое|ые|ых|ыми)": "2"
|
|
356
356
|
},
|
|
357
357
|
{
|
|
358
|
-
"
|
|
358
|
+
"тр(и|ёх|ем|ём|емя|етье)": "3"
|
|
359
359
|
},
|
|
360
360
|
{
|
|
361
|
-
"
|
|
361
|
+
"трети(й|его|ему|им|ем|я|ей|ею|ую|е|и|их|ими)": "3"
|
|
362
362
|
},
|
|
363
363
|
{
|
|
364
|
-
"
|
|
364
|
+
"четыр(е|ёх|ем|ём|ьмя)": "4"
|
|
365
365
|
},
|
|
366
366
|
{
|
|
367
|
-
"
|
|
367
|
+
"четвёрт(ый|ого|ому|ым|ом|ая|ой|ою|ую|ое|ые|ых|ыми)": "4"
|
|
368
368
|
},
|
|
369
369
|
{
|
|
370
|
-
"
|
|
370
|
+
"четверт(ый|ого|ому|ым|ом|ая|ой|ою|ую|ое|ые|ых|ыми)": "4"
|
|
371
371
|
},
|
|
372
372
|
{
|
|
373
|
-
"
|
|
373
|
+
"пят(ь|и|ью)": "5"
|
|
374
374
|
},
|
|
375
375
|
{
|
|
376
|
-
"
|
|
376
|
+
"пят(ый|ого|ому|ым|ом|ая|ой|ою|ую|ое|ые|ых|ыми)": "5"
|
|
377
377
|
},
|
|
378
378
|
{
|
|
379
|
-
"
|
|
379
|
+
"шест(ь|и|ью)": "6"
|
|
380
380
|
},
|
|
381
381
|
{
|
|
382
|
-
"
|
|
382
|
+
"шест(ый|ого|ому|ым|ом|ая|ой|ою|ую|ое|ые|ых|ыми)": "6"
|
|
383
383
|
},
|
|
384
384
|
{
|
|
385
|
-
"
|
|
385
|
+
"сем(ь|и|ью)": "7"
|
|
386
386
|
},
|
|
387
387
|
{
|
|
388
|
-
"
|
|
388
|
+
"седьм(ой|ого|ому|ым|ом|ая|ой|ою|ую|ое|ые|ых|ыми)": "7"
|
|
389
389
|
},
|
|
390
390
|
{
|
|
391
|
-
"
|
|
391
|
+
"восьм(и|ью|ьею)|восем(ь|ью)": "8"
|
|
392
392
|
},
|
|
393
393
|
{
|
|
394
|
-
"
|
|
394
|
+
"восьм(ой|ого|ому|ым|ом|ая|ой|ою|ую|ое|ые|ых|ыми)": "8"
|
|
395
395
|
},
|
|
396
396
|
{
|
|
397
|
-
"
|
|
397
|
+
"девят(ь|и|ью)": "9"
|
|
398
398
|
},
|
|
399
399
|
{
|
|
400
|
-
"
|
|
400
|
+
"девят(ый|ого|ому|ым|ом|ая|ой|ою|ую|ое|ые|ых|ыми)": "9"
|
|
401
401
|
},
|
|
402
402
|
{
|
|
403
|
-
"
|
|
403
|
+
"десять": "10"
|
|
404
404
|
},
|
|
405
405
|
{
|
|
406
|
-
"
|
|
406
|
+
"одиннадцать": "11"
|
|
407
407
|
},
|
|
408
408
|
{
|
|
409
|
-
"
|
|
409
|
+
"двенадцать": "12"
|
|
410
|
+
},
|
|
411
|
+
{
|
|
412
|
+
"пятнадцать": "15"
|
|
413
|
+
},
|
|
414
|
+
{
|
|
415
|
+
"двадцат(ь|ое)": "20"
|
|
416
|
+
},
|
|
417
|
+
{
|
|
418
|
+
"тридцат(ь|ое)": "30"
|
|
419
|
+
},
|
|
420
|
+
{
|
|
421
|
+
"соро(к|ка|ковое)": "40"
|
|
422
|
+
},
|
|
423
|
+
{
|
|
424
|
+
"пятьдесят": "50"
|
|
425
|
+
},
|
|
426
|
+
{
|
|
427
|
+
"пятидесятое": "50"
|
|
410
428
|
},
|
|
411
429
|
{
|
|
412
430
|
"((?<=(через|спустя|в течение)\\s+)секунд[уы]|(?<=[^\\d]\\s+|^)секунду(?=(\\s+назад)))": "1 секунду"
|
|
@@ -426,12 +444,27 @@ info = {
|
|
|
426
444
|
{
|
|
427
445
|
"((?<=(через|спустя|в течение)\\s+)недел[юи]|(?<=[^\\d]\\s+|^)неделю(?=(\\s+назад)))": "1 неделю"
|
|
428
446
|
},
|
|
447
|
+
{
|
|
448
|
+
"полгода": "6 месяцев"
|
|
449
|
+
},
|
|
429
450
|
{
|
|
430
451
|
"((?<=(через|спустя|в течение)\\s+)месяца?|(?<=[^\\d]\\s+|^)месяц(?=(\\s+назад)))": "1 месяц"
|
|
431
452
|
},
|
|
432
453
|
{
|
|
433
454
|
"((?<=(через|спустя|в течение)\\s+)года?|(?<=[^\\d]\\s+|^)год(?=(\\s+назад)))": "1 год"
|
|
434
455
|
},
|
|
456
|
+
{
|
|
457
|
+
"полтора года": "18 месяцев"
|
|
458
|
+
},
|
|
459
|
+
{
|
|
460
|
+
"полчаса": "30 минут"
|
|
461
|
+
},
|
|
462
|
+
{
|
|
463
|
+
"несколько секунд": "44 секунды"
|
|
464
|
+
},
|
|
465
|
+
{
|
|
466
|
+
"полтора часа": "90 минут"
|
|
467
|
+
},
|
|
435
468
|
{
|
|
436
469
|
"(\\d{3,}1)\\s*год\\s*$": "\\1"
|
|
437
470
|
},
|
dateparser/date.py
CHANGED
|
@@ -19,6 +19,7 @@ from dateparser.utils import (
|
|
|
19
19
|
set_correct_day_from_settings,
|
|
20
20
|
set_correct_month_from_settings,
|
|
21
21
|
)
|
|
22
|
+
from dateparser.utils.strptime import strptime as patched_strptime
|
|
22
23
|
|
|
23
24
|
APOSTROPHE_LOOK_ALIKE_CHARS = [
|
|
24
25
|
"\N{RIGHT SINGLE QUOTATION MARK}", # '\u2019'
|
|
@@ -182,7 +183,7 @@ def parse_with_formats(date_string, date_formats, settings):
|
|
|
182
183
|
period = "day"
|
|
183
184
|
for date_format in date_formats:
|
|
184
185
|
try:
|
|
185
|
-
date_obj =
|
|
186
|
+
date_obj = patched_strptime(date_string, date_format)
|
|
186
187
|
except ValueError:
|
|
187
188
|
continue
|
|
188
189
|
else:
|
|
@@ -10,7 +10,7 @@ from .parser import time_parser
|
|
|
10
10
|
from .timezone_parser import pop_tz_offset_from_string
|
|
11
11
|
|
|
12
12
|
_UNITS = r"decade|year|month|week|day|hour|minute|second"
|
|
13
|
-
PATTERN = re.compile(r"(
|
|
13
|
+
PATTERN = re.compile(r"([+-]?\d+[.,]?\d*)\s*(%s)\b" % _UNITS, re.I | re.S | re.U)
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class FreshnessDateDataParser:
|
|
@@ -112,7 +112,13 @@ class FreshnessDateDataParser:
|
|
|
112
112
|
if not self._are_all_words_units(date_string):
|
|
113
113
|
return None, None
|
|
114
114
|
|
|
115
|
-
|
|
115
|
+
result = self.get_kwargs(date_string)
|
|
116
|
+
if isinstance(result, tuple):
|
|
117
|
+
kwargs, explicit_signs = result
|
|
118
|
+
else:
|
|
119
|
+
kwargs = result
|
|
120
|
+
explicit_signs = {}
|
|
121
|
+
|
|
116
122
|
if not kwargs:
|
|
117
123
|
return None, None
|
|
118
124
|
period = "day"
|
|
@@ -121,16 +127,27 @@ class FreshnessDateDataParser:
|
|
|
121
127
|
if k in kwargs:
|
|
122
128
|
period = k[:-1]
|
|
123
129
|
break
|
|
124
|
-
td = relativedelta(**kwargs)
|
|
125
130
|
|
|
126
|
-
|
|
131
|
+
going_forward = (
|
|
127
132
|
re.search(r"\bin\b", date_string)
|
|
128
133
|
or re.search(r"\bfuture\b", prefer_dates_from)
|
|
129
134
|
and not re.search(r"\bago\b", date_string)
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
adjusted_kwargs = {}
|
|
138
|
+
for key, value in kwargs.items():
|
|
139
|
+
if explicit_signs.get(key, False):
|
|
140
|
+
adjusted_kwargs[key] = value
|
|
141
|
+
else:
|
|
142
|
+
if going_forward:
|
|
143
|
+
adjusted_kwargs[key] = value
|
|
144
|
+
else:
|
|
145
|
+
adjusted_kwargs[key] = -value
|
|
146
|
+
|
|
147
|
+
td = relativedelta(**adjusted_kwargs)
|
|
148
|
+
|
|
149
|
+
date = now + td
|
|
150
|
+
|
|
134
151
|
return date, period
|
|
135
152
|
|
|
136
153
|
def get_kwargs(self, date_string):
|
|
@@ -139,12 +156,21 @@ class FreshnessDateDataParser:
|
|
|
139
156
|
return {}
|
|
140
157
|
|
|
141
158
|
kwargs = {}
|
|
159
|
+
explicit_signs = {}
|
|
160
|
+
|
|
142
161
|
for num, unit in m:
|
|
162
|
+
has_explicit_sign = num.startswith("+") or num.startswith("-")
|
|
163
|
+
explicit_signs[unit + "s"] = has_explicit_sign
|
|
143
164
|
kwargs[unit + "s"] = float(num.replace(",", "."))
|
|
165
|
+
|
|
144
166
|
if "decades" in kwargs:
|
|
145
167
|
kwargs["years"] = 10 * kwargs["decades"] + kwargs.get("years", 0)
|
|
168
|
+
if "decades" in explicit_signs:
|
|
169
|
+
explicit_signs["years"] = explicit_signs["decades"]
|
|
146
170
|
del kwargs["decades"]
|
|
147
|
-
|
|
171
|
+
explicit_signs.pop("decades", None)
|
|
172
|
+
|
|
173
|
+
return kwargs, explicit_signs
|
|
148
174
|
|
|
149
175
|
def get_date_data(self, date_string, settings=None):
|
|
150
176
|
from dateparser.date import DateData
|
|
@@ -201,14 +201,25 @@ class Dictionary:
|
|
|
201
201
|
curr_split = (
|
|
202
202
|
[known] if self._should_capture(known, keep_formatting) else []
|
|
203
203
|
)
|
|
204
|
+
|
|
204
205
|
if unparsed and self._should_capture(unparsed, keep_formatting):
|
|
205
206
|
curr_split = (
|
|
206
207
|
self._split_by_numerals(unparsed, keep_formatting) + curr_split
|
|
207
208
|
)
|
|
209
|
+
|
|
208
210
|
if unknown:
|
|
209
211
|
string = unknown if string != unknown else ""
|
|
210
212
|
|
|
211
|
-
|
|
213
|
+
for token in curr_split:
|
|
214
|
+
if (
|
|
215
|
+
splitted
|
|
216
|
+
and splitted[-1].isdigit()
|
|
217
|
+
and token in {"st", "nd", "rd", "th"}
|
|
218
|
+
):
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
splitted.append(token)
|
|
222
|
+
|
|
212
223
|
return splitted
|
|
213
224
|
|
|
214
225
|
def _split_by_numerals(self, string, keep_formatting):
|
dateparser/languages/locale.py
CHANGED
|
@@ -144,7 +144,6 @@ class Locale:
|
|
|
144
144
|
date_string_tokens[i] = dictionary[word] or fallback
|
|
145
145
|
if "in" in date_string_tokens:
|
|
146
146
|
date_string_tokens = self._clear_future_words(date_string_tokens)
|
|
147
|
-
|
|
148
147
|
return self._join(
|
|
149
148
|
list(filter(bool, date_string_tokens)),
|
|
150
149
|
separator="" if keep_formatting else " ",
|
|
@@ -426,11 +425,39 @@ class Locale:
|
|
|
426
425
|
def _simplify(self, date_string, settings=None):
|
|
427
426
|
date_string = date_string.lower()
|
|
428
427
|
simplifications = self._get_simplifications(settings=settings)
|
|
428
|
+
|
|
429
|
+
if self.info.get("name") == "ru":
|
|
430
|
+
date_string = self._process_russian_compound_ordinals(
|
|
431
|
+
date_string, simplifications
|
|
432
|
+
)
|
|
433
|
+
else:
|
|
434
|
+
date_string = self._apply_simplifications(date_string, simplifications)
|
|
435
|
+
|
|
436
|
+
return date_string
|
|
437
|
+
|
|
438
|
+
def _apply_simplifications(self, date_string, simplifications):
|
|
429
439
|
for simplification in simplifications:
|
|
430
440
|
pattern, replacement = list(simplification.items())[0]
|
|
431
441
|
date_string = pattern.sub(replacement, date_string).lower()
|
|
432
442
|
return date_string
|
|
433
443
|
|
|
444
|
+
def _process_russian_compound_ordinals(self, date_string, simplifications):
|
|
445
|
+
"""Process Russian compound ordinals mathematically (двадцать + первое = 21)."""
|
|
446
|
+
date_string = self._apply_simplifications(date_string, simplifications)
|
|
447
|
+
|
|
448
|
+
def replace_number_pairs(match):
|
|
449
|
+
first_num = int(match.group(1))
|
|
450
|
+
second_num = int(match.group(2))
|
|
451
|
+
result = first_num + second_num
|
|
452
|
+
if 1 <= result <= 31 and first_num in [20, 30] and 1 <= second_num <= 9:
|
|
453
|
+
return str(result)
|
|
454
|
+
return match.group(0)
|
|
455
|
+
|
|
456
|
+
number_pair_pattern = r"\b(\d+)\s+(\d+)\b"
|
|
457
|
+
date_string = re.sub(number_pair_pattern, replace_number_pairs, date_string)
|
|
458
|
+
|
|
459
|
+
return date_string
|
|
460
|
+
|
|
434
461
|
def _get_simplifications(self, settings=None):
|
|
435
462
|
no_word_spacing = eval(self.info.get("no_word_spacing", "False"))
|
|
436
463
|
if settings.NORMALIZE:
|
dateparser/search/search.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from collections.abc import Set
|
|
2
|
+
from datetime import datetime
|
|
2
3
|
|
|
3
4
|
import regex as re
|
|
4
5
|
|
|
@@ -7,6 +8,7 @@ from dateparser.custom_language_detection.language_mapping import map_languages
|
|
|
7
8
|
from dateparser.date import DateDataParser
|
|
8
9
|
from dateparser.languages.loader import LocaleDataLoader
|
|
9
10
|
from dateparser.search.text_detection import FullTextLanguageDetector
|
|
11
|
+
from dateparser.utils.time_spans import detect_time_span, generate_time_span
|
|
10
12
|
|
|
11
13
|
RELATIVE_REG = re.compile("(ago|in|from now|tomorrow|today|yesterday)")
|
|
12
14
|
|
|
@@ -185,8 +187,23 @@ class _ExactLanguageSearch:
|
|
|
185
187
|
translated=translated,
|
|
186
188
|
settings=settings,
|
|
187
189
|
)
|
|
190
|
+
|
|
191
|
+
results = list(zip(substrings, [i[0]["date_obj"] for i in parsed]))
|
|
192
|
+
|
|
193
|
+
if getattr(settings, "RETURN_TIME_SPAN", False):
|
|
194
|
+
span_info = detect_time_span(text)
|
|
195
|
+
if span_info:
|
|
196
|
+
base_date = getattr(settings, "RELATIVE_BASE", None) or datetime.now()
|
|
197
|
+
start_date, end_date = generate_time_span(
|
|
198
|
+
span_info, base_date, settings
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
matched_text = span_info["matched_text"]
|
|
202
|
+
results.append((matched_text + " (start)", start_date))
|
|
203
|
+
results.append((matched_text + " (end)", end_date))
|
|
204
|
+
|
|
188
205
|
parser._settings = Settings()
|
|
189
|
-
return
|
|
206
|
+
return results
|
|
190
207
|
|
|
191
208
|
|
|
192
209
|
class DateSearchWithDetection:
|
dateparser/utils/strptime.py
CHANGED
|
@@ -90,7 +90,34 @@ def patch_strptime():
|
|
|
90
90
|
__strptime = patch_strptime()
|
|
91
91
|
|
|
92
92
|
|
|
93
|
-
def
|
|
93
|
+
def _prepare_format(date_string: str, og_format: str) -> tuple[str, str]:
|
|
94
|
+
# Adapted from std lib: https://github.com/python/cpython/blob/e34a5e33049ce845de646cf24a498766a2da3586/Lib/_strptime.py#L448
|
|
95
|
+
format = re.sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", og_format)
|
|
96
|
+
format = re.sub(r"\s+", r"\\s+", format)
|
|
97
|
+
format = re.sub(r"'", "['\u02bc]", format)
|
|
98
|
+
year_in_format = False
|
|
99
|
+
day_of_month_in_format = False
|
|
100
|
+
|
|
101
|
+
def repl(m: re.Match[str]) -> str:
|
|
102
|
+
format_char = m[1]
|
|
103
|
+
if format_char in ("Y", "y", "G"):
|
|
104
|
+
nonlocal year_in_format
|
|
105
|
+
year_in_format = True
|
|
106
|
+
elif format_char in ("d",):
|
|
107
|
+
nonlocal day_of_month_in_format
|
|
108
|
+
day_of_month_in_format = True
|
|
109
|
+
|
|
110
|
+
return ""
|
|
111
|
+
|
|
112
|
+
_ = re.sub(r"%[-_0^#]*[0-9]*([OE]?\\?.?)", repl, format)
|
|
113
|
+
if day_of_month_in_format and not year_in_format:
|
|
114
|
+
current_year = datetime.today().year
|
|
115
|
+
return f"{current_year} {date_string}", f"%Y {og_format}"
|
|
116
|
+
return date_string, og_format
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def strptime(date_string: str, format: str) -> datetime:
|
|
120
|
+
date_string, format = _prepare_format(date_string, format)
|
|
94
121
|
obj = datetime(*__strptime(date_string, format)[:-3])
|
|
95
122
|
|
|
96
123
|
if "%f" in format:
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for handling time spans and date ranges.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from dateutil.relativedelta import relativedelta
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_week_start(date, start_of_week="monday"):
|
|
11
|
+
"""Get the start of the week for a given date."""
|
|
12
|
+
if start_of_week == "monday":
|
|
13
|
+
days_back = date.weekday()
|
|
14
|
+
else: # sunday
|
|
15
|
+
days_back = (date.weekday() + 1) % 7
|
|
16
|
+
|
|
17
|
+
return date - timedelta(days=days_back)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_week_end(date, start_of_week="monday"):
|
|
21
|
+
"""Get the end of the week for a given date."""
|
|
22
|
+
week_start = get_week_start(date, start_of_week)
|
|
23
|
+
return week_start + timedelta(days=6)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def detect_time_span(text):
|
|
27
|
+
"""Detect time span expressions in text and return span information."""
|
|
28
|
+
span_patterns = [
|
|
29
|
+
{
|
|
30
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+month\b",
|
|
31
|
+
"type": "month",
|
|
32
|
+
"direction": "past",
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+week\b",
|
|
36
|
+
"type": "week",
|
|
37
|
+
"direction": "past",
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+(\d+)\s+days?\b",
|
|
41
|
+
"type": "days",
|
|
42
|
+
"direction": "past",
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+(\d+)\s+weeks?\b",
|
|
46
|
+
"type": "weeks",
|
|
47
|
+
"direction": "past",
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:past|last|previous)\s+(\d+)\s+months?\b",
|
|
51
|
+
"type": "months",
|
|
52
|
+
"direction": "past",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+month\b",
|
|
56
|
+
"type": "month",
|
|
57
|
+
"direction": "future",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+week\b",
|
|
61
|
+
"type": "week",
|
|
62
|
+
"direction": "future",
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+(\d+)\s+days?\b",
|
|
66
|
+
"type": "days",
|
|
67
|
+
"direction": "future",
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+(\d+)\s+weeks?\b",
|
|
71
|
+
"type": "weeks",
|
|
72
|
+
"direction": "future",
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"pattern": r"\b(?:for\s+the\s+|during\s+the\s+|in\s+the\s+)?(?:next|coming|following)\s+(\d+)\s+months?\b",
|
|
76
|
+
"type": "months",
|
|
77
|
+
"direction": "future",
|
|
78
|
+
},
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
for pattern_info in span_patterns:
|
|
82
|
+
match = re.search(pattern_info["pattern"], text, re.IGNORECASE)
|
|
83
|
+
if match:
|
|
84
|
+
result = {
|
|
85
|
+
"type": pattern_info["type"],
|
|
86
|
+
"direction": pattern_info["direction"],
|
|
87
|
+
"matched_text": match.group(0),
|
|
88
|
+
"start_pos": match.start(),
|
|
89
|
+
"end_pos": match.end(),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if match.groups():
|
|
93
|
+
result["number"] = int(match.group(1))
|
|
94
|
+
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def generate_time_span(span_info, base_date=None, settings=None):
|
|
101
|
+
"""Generate start and end dates for a time span."""
|
|
102
|
+
if base_date is None:
|
|
103
|
+
base_date = datetime.now()
|
|
104
|
+
|
|
105
|
+
if settings is None:
|
|
106
|
+
start_of_week = "monday"
|
|
107
|
+
days_in_month = 30
|
|
108
|
+
else:
|
|
109
|
+
start_of_week = getattr(settings, "DEFAULT_START_OF_WEEK", "monday")
|
|
110
|
+
days_in_month = getattr(settings, "DEFAULT_DAYS_IN_MONTH", 30)
|
|
111
|
+
|
|
112
|
+
span_type = span_info["type"]
|
|
113
|
+
direction = span_info["direction"]
|
|
114
|
+
number = span_info.get("number", 1)
|
|
115
|
+
|
|
116
|
+
if direction == "past":
|
|
117
|
+
end_date = base_date
|
|
118
|
+
|
|
119
|
+
if span_type == "month":
|
|
120
|
+
start_date = end_date - relativedelta(days=days_in_month)
|
|
121
|
+
elif span_type == "week":
|
|
122
|
+
week_start = get_week_start(end_date, start_of_week)
|
|
123
|
+
start_date = week_start - timedelta(days=7)
|
|
124
|
+
end_date = week_start - timedelta(days=1)
|
|
125
|
+
elif span_type == "days":
|
|
126
|
+
start_date = end_date - timedelta(days=number)
|
|
127
|
+
elif span_type == "weeks":
|
|
128
|
+
start_date = end_date - timedelta(weeks=number)
|
|
129
|
+
elif span_type == "months":
|
|
130
|
+
start_date = end_date - relativedelta(months=number)
|
|
131
|
+
else:
|
|
132
|
+
start_date = end_date - timedelta(days=1)
|
|
133
|
+
|
|
134
|
+
else:
|
|
135
|
+
start_date = base_date
|
|
136
|
+
|
|
137
|
+
if span_type == "month":
|
|
138
|
+
end_date = start_date + relativedelta(days=days_in_month)
|
|
139
|
+
elif span_type == "week":
|
|
140
|
+
week_start = get_week_start(start_date, start_of_week)
|
|
141
|
+
start_date = week_start + timedelta(days=7)
|
|
142
|
+
end_date = start_date + timedelta(days=6)
|
|
143
|
+
elif span_type == "days":
|
|
144
|
+
end_date = start_date + timedelta(days=number)
|
|
145
|
+
elif span_type == "weeks":
|
|
146
|
+
end_date = start_date + timedelta(weeks=number)
|
|
147
|
+
elif span_type == "months":
|
|
148
|
+
end_date = start_date + relativedelta(months=number)
|
|
149
|
+
else:
|
|
150
|
+
end_date = start_date + timedelta(days=1)
|
|
151
|
+
|
|
152
|
+
return (start_date, end_date)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dateparser
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Date parsing library designed to parse dates from HTML pages
|
|
5
5
|
Home-page: https://github.com/scrapinghub/dateparser
|
|
6
6
|
Author: Scrapinghub
|
|
@@ -13,13 +13,13 @@ Classifier: Intended Audience :: Developers
|
|
|
13
13
|
Classifier: License :: OSI Approved :: BSD License
|
|
14
14
|
Classifier: Natural Language :: English
|
|
15
15
|
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
21
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
22
|
-
Requires-Python: >=3.
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
License-File: AUTHORS.rst
|
|
25
25
|
Requires-Dist: python-dateutil>=2.7.0
|
|
@@ -31,7 +31,7 @@ Requires-Dist: convertdate>=2.2.1; extra == "calendars"
|
|
|
31
31
|
Requires-Dist: hijridate; extra == "calendars"
|
|
32
32
|
Provides-Extra: fasttext
|
|
33
33
|
Requires-Dist: fasttext>=0.9.1; extra == "fasttext"
|
|
34
|
-
Requires-Dist: numpy<2,>=1.
|
|
34
|
+
Requires-Dist: numpy<2,>=1.22.0; extra == "fasttext"
|
|
35
35
|
Provides-Extra: langdetect
|
|
36
36
|
Requires-Dist: langdetect>=1.0.0; extra == "langdetect"
|
|
37
37
|
Dynamic: author
|
|
@@ -261,6 +261,17 @@ You can extract dates from longer strings of text. They are returned as list of
|
|
|
261
261
|
|
|
262
262
|
:noindex:
|
|
263
263
|
|
|
264
|
+
Time Span Detection
|
|
265
|
+
-------------------
|
|
266
|
+
|
|
267
|
+
The `search_dates` function can detect time spans from expressions like "past month", "last week", etc. When `RETURN_TIME_SPAN` is enabled, it returns start and end dates for the detected period.
|
|
268
|
+
|
|
269
|
+
.. code-block:: python
|
|
270
|
+
|
|
271
|
+
>>> search_dates("Messages from the past month", settings={'RETURN_TIME_SPAN': True})
|
|
272
|
+
[('past month (start)', datetime.datetime(2024, 11, 7, 0, 0)),
|
|
273
|
+
('past month (end)', datetime.datetime(2024, 12, 7, 23, 59, 59, 999999))]
|
|
274
|
+
|
|
264
275
|
Advanced Usage
|
|
265
276
|
==============
|
|
266
277
|
If you need more control over what is being parser check the `settings` section as well as the `using-datedataparser` section.
|
|
@@ -319,6 +330,31 @@ To be able to use them you need to install the `calendar` extra by typing:
|
|
|
319
330
|
History
|
|
320
331
|
=======
|
|
321
332
|
|
|
333
|
+
1.3.0 (2026-02-04)
|
|
334
|
+
------------------
|
|
335
|
+
|
|
336
|
+
Dropped Python 3.9 support. (#1296)
|
|
337
|
+
|
|
338
|
+
New features:
|
|
339
|
+
|
|
340
|
+
- ``search_dates()`` can now detect time spans from expressions like “past
|
|
341
|
+
month”, “last week”, etc. For details, see the “Time Span Detection” section
|
|
342
|
+
and the ``RETURN_TIME_SPAN``, ``DEFAULT_START_OF_WEEK`` and
|
|
343
|
+
``DEFAULT_DAYS_IN_MONTH`` settings in the documentation. (#1284)
|
|
344
|
+
|
|
345
|
+
Fixes:
|
|
346
|
+
|
|
347
|
+
- Assume the current year if not specified (#1288)
|
|
348
|
+
- Support expressions like “yesterday +1h” (#1303)
|
|
349
|
+
- English: Support most 2-letter day-of-the-week names (#1214)
|
|
350
|
+
- English: Support “in N weeks' time” (#1283)
|
|
351
|
+
- Finnish: Support dates with “klo” (#1301)
|
|
352
|
+
- Russian: Support compound ordinals (#1280)
|
|
353
|
+
|
|
354
|
+
Cleanups and internal improvements:
|
|
355
|
+
|
|
356
|
+
- Fixed year expectation issues in tests. (#1294)
|
|
357
|
+
|
|
322
358
|
1.2.2 (2025-06-26)
|
|
323
359
|
------------------
|
|
324
360
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
dateparser/__init__.py,sha256=
|
|
2
|
-
dateparser/conf.py,sha256=
|
|
3
|
-
dateparser/date.py,sha256=
|
|
1
|
+
dateparser/__init__.py,sha256=E93qThUWMzU0u_HUYzn6LIueEYKO_KmqGbX27D3xz_s,2739
|
|
2
|
+
dateparser/conf.py,sha256=bpJiegIAnBPXoPhtEe5GN2Vf6XgWiqyJ76WPYGL_DCs,8782
|
|
3
|
+
dateparser/date.py,sha256=DZZZ04Csp5psRxVA3Pmu7BEyTV5549IxSi-bnDEv4wc,21829
|
|
4
4
|
dateparser/date_parser.py,sha256=LPMD1La3fUTyfa17i0Ng-mii6ucWIBme-zJJY3es-Ug,1782
|
|
5
|
-
dateparser/freshness_date_parser.py,sha256=
|
|
5
|
+
dateparser/freshness_date_parser.py,sha256=0E8zbvHt_zA97Fdet3HUHk8tXtVEzBH-f0Yi_Vt-Z44,5899
|
|
6
6
|
dateparser/parser.py,sha256=KDde8HlyD5wFERiT8ABrIxTakLsEE7VYA4Kif6VWRbQ,26005
|
|
7
7
|
dateparser/timezone_parser.py,sha256=MoMTAH__hkeaoZQ6VaZhJpUPxd-yA9QTczm-mmHztKc,4042
|
|
8
8
|
dateparser/timezones.py,sha256=gtJhZDCy4sw37MpUogWk4LHKmn8DpG5caO3LIaCC264,13981
|
|
@@ -61,7 +61,7 @@ dateparser/data/date_translation_data/dz.py,sha256=Tnpiobh5Z0B-ypBu8iwFnoKETeDbi
|
|
|
61
61
|
dateparser/data/date_translation_data/ebu.py,sha256=urgcEcGO18BZI9yL9VoNvrpnPiIwzpdp2Lu_gaULCpg,2708
|
|
62
62
|
dateparser/data/date_translation_data/ee.py,sha256=9iFh_krQjTdG7wHb0F-mHV1M9tNIVY_ZvXFElo8JsTM,4693
|
|
63
63
|
dateparser/data/date_translation_data/el.py,sha256=ZLpqqIxH1dHAh9R74Nc1u2fFj076H92VpbnoBSaFH5g,6726
|
|
64
|
-
dateparser/data/date_translation_data/en.py,sha256=
|
|
64
|
+
dateparser/data/date_translation_data/en.py,sha256=xEwkpuITw596WymLZVJgHW4TKjzpY2xs2hMZBhTh-Uk,18481
|
|
65
65
|
dateparser/data/date_translation_data/eo.py,sha256=zRsL-WwsirzvOa1egdLX2eYSJcBLLSqf9_kFHRCq4iM,2555
|
|
66
66
|
dateparser/data/date_translation_data/es.py,sha256=jHUrQVXJJr5aJDEJ2_m14QEwjgXRIW8PyOjahBcGMg4,10564
|
|
67
67
|
dateparser/data/date_translation_data/et.py,sha256=32v713IxqOn6tylqQM2PF533cEIB8WdaDvhXj7NmWQk,4471
|
|
@@ -69,7 +69,7 @@ dateparser/data/date_translation_data/eu.py,sha256=q2ozI0EjZ15OgL9XDZwSK3SBzuLGZ
|
|
|
69
69
|
dateparser/data/date_translation_data/ewo.py,sha256=91HMITP1VhKEOkCcUlcH_IoQId_1N1ubToPKHAUaI34,2738
|
|
70
70
|
dateparser/data/date_translation_data/fa.py,sha256=ubYX0IR-kNqg2FcfbwU-fk9MZcm-_aSULd1GzEX_fdo,5315
|
|
71
71
|
dateparser/data/date_translation_data/ff.py,sha256=gm3_z4qoVSMAOMVJFQPyu_WscLmuj89t4e6lfD_UrkU,2750
|
|
72
|
-
dateparser/data/date_translation_data/fi.py,sha256=
|
|
72
|
+
dateparser/data/date_translation_data/fi.py,sha256=gh5OM8sBw3AoocrXINweOJzQp11CM39FHjKIqyA-2uU,7134
|
|
73
73
|
dateparser/data/date_translation_data/fil.py,sha256=rEf4yjFoKDDZUGjEXfscBiA51D_P30KLHgs2lHdlE2M,4160
|
|
74
74
|
dateparser/data/date_translation_data/fo.py,sha256=q0UZ1nf0E9r0LNeZxh6YmeBVFNQ3lnQ1_W-J1mjuCvo,5163
|
|
75
75
|
dateparser/data/date_translation_data/fr.py,sha256=CrfovpWkbm24zo4Cx6mBTG10rMg570Vldf22BGFbV5U,10219
|
|
@@ -169,7 +169,7 @@ dateparser/data/date_translation_data/rm.py,sha256=XR9vQVtjdmYAO3CF1HZ3geg9ss9w8
|
|
|
169
169
|
dateparser/data/date_translation_data/rn.py,sha256=m8C0_pWqKSfSP6cEX7W7qFLDRBT0_3CMBpjihCmj878,2636
|
|
170
170
|
dateparser/data/date_translation_data/ro.py,sha256=BmIeWoTCSj2_Jm7tSSW576KihgeKL4ZbLYDm3c8VXtw,5407
|
|
171
171
|
dateparser/data/date_translation_data/rof.py,sha256=6P8K_OTRvB3l_uJNPbqMDhcugd9-04LOixoJCEmFCSg,2501
|
|
172
|
-
dateparser/data/date_translation_data/ru.py,sha256=
|
|
172
|
+
dateparser/data/date_translation_data/ru.py,sha256=Mn08V6pMrwrtp7XMFUATHJR92Nj5nSJ-kpbSof5OFQ4,12205
|
|
173
173
|
dateparser/data/date_translation_data/rw.py,sha256=WcS5199LBbHjaewoQunQg-4fj8tlzuAw0yUkzQ4ciS0,2597
|
|
174
174
|
dateparser/data/date_translation_data/rwk.py,sha256=iom2uEfZDe-UORTeggcIyTlaGH7JR0EDkZjD_eTfuvc,2551
|
|
175
175
|
dateparser/data/date_translation_data/sah.py,sha256=xLH9m4W8DT7-HmS8LW0WS40yIRg-aV5pcU3lvdW2sCA,4536
|
|
@@ -225,33 +225,34 @@ dateparser/data/date_translation_data/zh-Hant.py,sha256=3QH9WURGtqTWG4E6sUlCogoa
|
|
|
225
225
|
dateparser/data/date_translation_data/zh.py,sha256=jTbrYZtOGx19fPgkH-3ccYUkdb-pqaayNwr6R5SB8FE,5318
|
|
226
226
|
dateparser/data/date_translation_data/zu.py,sha256=blFaRxCN2KVqzAi_7NP5cf_Xz_gAX4YqAOkn1nyZDBg,3467
|
|
227
227
|
dateparser/languages/__init__.py,sha256=iQ4o11LbvlawSaAzPHRuavXqDmzPdTmSgI_YwUTgQ9A,62
|
|
228
|
-
dateparser/languages/dictionary.py,sha256=
|
|
228
|
+
dateparser/languages/dictionary.py,sha256=h7Sp_lYYuVdMCeSdz7fkmav8trRFQueQtSiS4kHt3xk,12118
|
|
229
229
|
dateparser/languages/loader.py,sha256=gTyyVEX8ppeVEQ1SrIjjxwSq8hpTI5HTnS_cyVotegc,7323
|
|
230
|
-
dateparser/languages/locale.py,sha256=
|
|
230
|
+
dateparser/languages/locale.py,sha256=khMelzkzLcTH7yF01W9ksJoIPcm72IBvhUTVN5yQE70,25752
|
|
231
231
|
dateparser/languages/validation.py,sha256=gtLpxvh3XI_nzryBZ8NA6QL8YLcT6HV1j81dV3nDxY4,16750
|
|
232
232
|
dateparser/search/__init__.py,sha256=4AixzJu9YID1vzNSKx8uJ0cyKNZGvO8BzMN6Jpoci84,2909
|
|
233
233
|
dateparser/search/detection.py,sha256=YAnTYbM18FGZV2pxn6lNDbfoiQttzM52ZDgOhlV87II,2695
|
|
234
|
-
dateparser/search/search.py,sha256=
|
|
234
|
+
dateparser/search/search.py,sha256=_hrGubK6sFk-1zwAeRo_dkqniakL6qJOI-nhUhOQivw,12833
|
|
235
235
|
dateparser/search/text_detection.py,sha256=SSQUOr5V3Qm946Tiz7hNWcpYJtcyP9Bxayxec0hC7zc,3238
|
|
236
236
|
dateparser/utils/__init__.py,sha256=X5ssdQuo60F_SnVcKpt-a2ehvPClMaX2pQ2vwGWGSac,7234
|
|
237
|
-
dateparser/utils/strptime.py,sha256=
|
|
238
|
-
dateparser
|
|
239
|
-
dateparser-1.
|
|
237
|
+
dateparser/utils/strptime.py,sha256=YgdfLAwedcer3sdwtq0gol2TBpYIqANvFrf4u0K3mAg,3948
|
|
238
|
+
dateparser/utils/time_spans.py,sha256=8DLVRU7iM_v5tJhuYdjhOaTH2MxroguXorxxaJUf3-E,5303
|
|
239
|
+
dateparser-1.3.0.dist-info/licenses/AUTHORS.rst,sha256=xyYeT2AGKVrsHYxoOxgDE8XmSK7VNtJNqFzw25JLE2g,711
|
|
240
|
+
dateparser-1.3.0.dist-info/licenses/LICENSE,sha256=t122Vbt6QqjoVh0NuHMVi7naudRoryRAAPAtfdg-l_Q,1468
|
|
240
241
|
dateparser_cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
241
242
|
dateparser_cli/cli.py,sha256=pZSVwZO_N1AfPJfbbTHgvEnV8kmvyKmd6YQLYT3epis,991
|
|
242
243
|
dateparser_cli/exceptions.py,sha256=ifwai5On6opuhUpBHMoz0iFkybtZg7m3gHHnxtOEoEk,58
|
|
243
244
|
dateparser_cli/fasttext_manager.py,sha256=Q1I42NFR6YghTRySjNA9WFQnJ6l_2KDW4VaxXU1amiA,1495
|
|
244
245
|
dateparser_cli/utils.py,sha256=XERf49Ed3Avx91sjC6Rz1Lii9yvt59LXKhOsZ3SuN9E,645
|
|
245
246
|
dateparser_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
246
|
-
dateparser_data/settings.py,sha256=
|
|
247
|
+
dateparser_data/settings.py,sha256=7rSLmqtbw8DKGvhs23D4aqP-IZbhI-dGaI2n8l_7vuk,977
|
|
247
248
|
dateparser_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
248
249
|
dateparser_scripts/get_cldr_data.py,sha256=A6ziFXtK72UvGDgHYavDqadsnBH5ezM1RgpXydk-DPA,16926
|
|
249
250
|
dateparser_scripts/order_languages.py,sha256=BkYmAqFvoMvj3I7xcjeutz0Bx5VQQky68Y5fNrNzLmg,7201
|
|
250
251
|
dateparser_scripts/update_supported_languages_and_locales.py,sha256=BaHzzCiLKvkhKcal44CyedZNIUrBrI4fm0LxHicRJ8g,1460
|
|
251
252
|
dateparser_scripts/utils.py,sha256=Uw4HbgwbKYuUeztUw9OQqtzPtjmBxBBqGytLXjtSia8,2579
|
|
252
|
-
dateparser_scripts/write_complete_data.py,sha256=
|
|
253
|
-
dateparser-1.
|
|
254
|
-
dateparser-1.
|
|
255
|
-
dateparser-1.
|
|
256
|
-
dateparser-1.
|
|
257
|
-
dateparser-1.
|
|
253
|
+
dateparser_scripts/write_complete_data.py,sha256=RS_0k-TZAdiCG7P7aRiZDgPhfFwlzFIk3df6a0846cw,4543
|
|
254
|
+
dateparser-1.3.0.dist-info/METADATA,sha256=7KawAlvH8rbrbloiIYfJfr8PQ2lEvwUoDjPnaMgrqhk,30933
|
|
255
|
+
dateparser-1.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
256
|
+
dateparser-1.3.0.dist-info/entry_points.txt,sha256=QPwc8kOjbaaxJdyQXiix9sGbUjC_G_Ga5LgJ_11KKAs,68
|
|
257
|
+
dateparser-1.3.0.dist-info/top_level.txt,sha256=LujVBIKC69tvws1XkgyOFRDjEEd-E1SjAirYrhEbqn8,61
|
|
258
|
+
dateparser-1.3.0.dist-info/RECORD,,
|
dateparser_data/settings.py
CHANGED
|
@@ -26,6 +26,10 @@ settings = {
|
|
|
26
26
|
"DEFAULT_LANGUAGES": [],
|
|
27
27
|
# Optional language detection
|
|
28
28
|
"LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": 0.5,
|
|
29
|
+
# Time span settings
|
|
30
|
+
"RETURN_TIME_SPAN": False,
|
|
31
|
+
"DEFAULT_START_OF_WEEK": "monday",
|
|
32
|
+
"DEFAULT_DAYS_IN_MONTH": 30,
|
|
29
33
|
# Other settings
|
|
30
34
|
"RETURN_TIME_AS_PERIOD": False,
|
|
31
35
|
"PARSERS": default_parsers,
|
|
@@ -4,7 +4,7 @@ import shutil
|
|
|
4
4
|
from collections import OrderedDict
|
|
5
5
|
|
|
6
6
|
import regex as re
|
|
7
|
-
from ruamel.yaml import
|
|
7
|
+
from ruamel.yaml import YAML
|
|
8
8
|
|
|
9
9
|
from dateparser_scripts.order_languages import avoid_languages
|
|
10
10
|
from dateparser_scripts.utils import combine_dicts
|
|
@@ -55,7 +55,8 @@ def _get_complete_date_translation_data(language):
|
|
|
55
55
|
cldr_data = json.load(f, object_pairs_hook=OrderedDict)
|
|
56
56
|
if language in supplementary_languages:
|
|
57
57
|
with open(supplementary_date_directory + language + ".yaml") as g:
|
|
58
|
-
|
|
58
|
+
yaml = YAML()
|
|
59
|
+
supplementary_data = OrderedDict(yaml.load(g))
|
|
59
60
|
complete_data = combine_dicts(cldr_data, supplementary_data)
|
|
60
61
|
if "name" not in complete_data:
|
|
61
62
|
complete_data["name"] = language
|
|
@@ -88,7 +89,8 @@ def write_complete_data(in_memory=False):
|
|
|
88
89
|
os.mkdir(date_translation_directory)
|
|
89
90
|
|
|
90
91
|
with open(supplementary_directory + "base_data.yaml") as f:
|
|
91
|
-
|
|
92
|
+
yaml = YAML()
|
|
93
|
+
base_data = yaml.load(f)
|
|
92
94
|
|
|
93
95
|
for language in all_languages:
|
|
94
96
|
date_translation_data = _get_complete_date_translation_data(language)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|