csv-detective 0.8.1.dev1526__py3-none-any.whl → 0.8.1.dev1544__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/__init__.py +1 -1
- csv_detective/detect_fields/temp/datetime_aware/__init__.py +3 -1
- csv_detective/detect_fields/temp/datetime_naive/__init__.py +2 -0
- csv_detective/detect_labels/__init__.py +1 -1
- csv_detective/detection/formats.py +0 -1
- csv_detective/output/schema.py +4 -3
- csv_detective/output/utils.py +0 -2
- csv_detective/parsing/columns.py +8 -5
- csv_detective/validate.py +12 -6
- {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1544.data}/data/share/csv_detective/CHANGELOG.md +2 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/METADATA +1 -1
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/RECORD +19 -21
- tests/test_fields.py +6 -6
- csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -16
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -20
- {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1544.data}/data/share/csv_detective/LICENSE +0 -0
- {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1544.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,9 @@ PROPORTION = 1
|
|
|
8
8
|
def _is(val: Optional[Any]) -> bool:
|
|
9
9
|
"""Detects timezone-aware datetimes only"""
|
|
10
10
|
# early stops, to cut processing time
|
|
11
|
-
|
|
11
|
+
# 21 is the minimal length of a datetime format YYMMDDTHH:MM:SS+HH:MM
|
|
12
|
+
# 32 is the maximal length of an ISO datetime format YYYY-MM-DDTHH:MM:SS.dddddd+HH:MM, keeping some slack
|
|
13
|
+
if not isinstance(val, str) or len(val) > 35 or len(val) < 21:
|
|
12
14
|
return False
|
|
13
15
|
threshold = 0.7
|
|
14
16
|
if sum([char.isdigit() or char in {"-", "/", ":", " "} for char in val]) / len(val) < threshold:
|
|
@@ -8,6 +8,8 @@ PROPORTION = 1
|
|
|
8
8
|
def _is(val: Optional[Any]) -> bool:
|
|
9
9
|
"""Detects naive datetimes only"""
|
|
10
10
|
# early stops, to cut processing time
|
|
11
|
+
# 15 is the minimal length of a datetime format YYMMDDTHH:MM:SS
|
|
12
|
+
# 26 is the maximal length of an ISO datetime format YYYY-MM-DDTHH:MM:SS.dddddd, keeping some slack
|
|
11
13
|
if not isinstance(val, str) or len(val) > 30 or len(val) < 15:
|
|
12
14
|
return False
|
|
13
15
|
threshold = 0.7
|
csv_detective/output/schema.py
CHANGED
|
@@ -61,7 +61,8 @@ def get_description(format: str) -> str:
|
|
|
61
61
|
"url": "Adresse URL",
|
|
62
62
|
"uuid": "Identifiant unique au format UUID",
|
|
63
63
|
"date": "Date",
|
|
64
|
-
"
|
|
64
|
+
"datetime_aware": "Date au format datetime avec fuseau horaire",
|
|
65
|
+
"datetime_naive": "Date au format datetime sans fuseau horaire",
|
|
65
66
|
"datetime_rfc822": "Date au format datetime (RFC822)",
|
|
66
67
|
"year": "Année",
|
|
67
68
|
}
|
|
@@ -107,7 +108,6 @@ def get_validata_type(format: str) -> str:
|
|
|
107
108
|
"string": "string",
|
|
108
109
|
"date": "date",
|
|
109
110
|
"datetime_aware": "datetime",
|
|
110
|
-
"datetime_iso": "datetime",
|
|
111
111
|
"datetime_naive": "datetime",
|
|
112
112
|
"datetime_rfc822": "datetime",
|
|
113
113
|
"json_geojson": "geojson",
|
|
@@ -146,7 +146,8 @@ def get_example(format: str) -> str:
|
|
|
146
146
|
"csp_insee": "anciens agriculteurs exploitants",
|
|
147
147
|
"date": "2020-01-01",
|
|
148
148
|
"date_fr": "12 janvier 2020",
|
|
149
|
-
"
|
|
149
|
+
"datetime_aware": "2020-01-01T00:00:00+02:00",
|
|
150
|
+
"datetime_naive": "2020-01-01T00:00:00",
|
|
150
151
|
"datetime_rfc822": "Tue, 1 Jan 2020 00:00:00 +0000",
|
|
151
152
|
"departement": "Ain",
|
|
152
153
|
"email": "example@example.com",
|
csv_detective/output/utils.py
CHANGED
|
@@ -34,8 +34,6 @@ def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
|
|
|
34
34
|
formats_to_remove.add("longitude_l93")
|
|
35
35
|
if "code_region" in formats_detected:
|
|
36
36
|
formats_to_remove.add("code_departement")
|
|
37
|
-
if "datetime_iso" in formats_detected:
|
|
38
|
-
formats_to_remove.add("datetime_naive")
|
|
39
37
|
if "datetime_rfc822" in formats_detected:
|
|
40
38
|
formats_to_remove.add("datetime_aware")
|
|
41
39
|
|
csv_detective/parsing/columns.py
CHANGED
|
@@ -16,7 +16,7 @@ def test_col_val(
|
|
|
16
16
|
skipna: bool = True,
|
|
17
17
|
limited_output: bool = False,
|
|
18
18
|
verbose: bool = False,
|
|
19
|
-
):
|
|
19
|
+
) -> float:
|
|
20
20
|
"""Tests values of the serie using test_func.
|
|
21
21
|
- skipna : if True indicates that NaNs are not counted as False
|
|
22
22
|
- proportion : indicates the proportion of values that have to pass the test
|
|
@@ -81,10 +81,13 @@ def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna:
|
|
|
81
81
|
if verbose:
|
|
82
82
|
start = time()
|
|
83
83
|
logging.info("Testing columns to get types")
|
|
84
|
-
test_funcs =
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
test_funcs = {
|
|
85
|
+
test.__name__.split(".")[-1]: {
|
|
86
|
+
"func": test._is,
|
|
87
|
+
"prop": test.PROPORTION,
|
|
88
|
+
}
|
|
89
|
+
for test in all_tests
|
|
90
|
+
}
|
|
88
91
|
return_table = pd.DataFrame(columns=table.columns)
|
|
89
92
|
for idx, (key, value) in enumerate(test_funcs.items()):
|
|
90
93
|
if verbose:
|
csv_detective/validate.py
CHANGED
|
@@ -5,11 +5,15 @@ import pandas as pd
|
|
|
5
5
|
|
|
6
6
|
from csv_detective.load_tests import return_all_tests
|
|
7
7
|
from csv_detective.parsing.load import load_file
|
|
8
|
+
from csv_detective.parsing.columns import test_col_val
|
|
8
9
|
|
|
9
10
|
logging.basicConfig(level=logging.INFO)
|
|
10
11
|
|
|
11
12
|
tests = {
|
|
12
|
-
t.__name__.split(".")[-1]:
|
|
13
|
+
t.__name__.split(".")[-1]: {
|
|
14
|
+
"func": t._is,
|
|
15
|
+
"prop": t.PROPORTION,
|
|
16
|
+
}
|
|
13
17
|
for t in return_all_tests("ALL", "detect_fields")
|
|
14
18
|
}
|
|
15
19
|
|
|
@@ -56,11 +60,13 @@ def validate(
|
|
|
56
60
|
if args["format"] == "string":
|
|
57
61
|
# no test for columns that have not been recognized as a specific format
|
|
58
62
|
continue
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
63
|
+
test_result: float = test_col_val(
|
|
64
|
+
serie=table[col_name],
|
|
65
|
+
test_func=tests[args["format"]]["func"],
|
|
66
|
+
proportion=tests[args["format"]]["prop"],
|
|
67
|
+
skipna=skipna,
|
|
68
|
+
)
|
|
69
|
+
if not bool(test_result):
|
|
64
70
|
if verbose:
|
|
65
71
|
logging.warning("> Test failed, proceeding with full analysis")
|
|
66
72
|
return False, table, analysis
|
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
- For big files, analyse on sample then validate on whole file [#124](https://github.com/datagouv/csv-detective/pull/124) [#129](https://github.com/datagouv/csv-detective/pull/129)
|
|
9
9
|
- Fix imports [#125](https://github.com/datagouv/csv-detective/pull/125) [#126](https://github.com/datagouv/csv-detective/pull/126) [#127](https://github.com/datagouv/csv-detective/pull/127) [#128](https://github.com/datagouv/csv-detective/pull/128)
|
|
10
10
|
- Split aware and naive datetimes for hydra to cast them separately [#130](https://github.com/datagouv/csv-detective/pull/130)
|
|
11
|
+
- Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
|
|
12
|
+
- Remove `datetime_iso` due to ambiguous cast in db (can be naive or aware) [#132](https://github.com/datagouv/csv-detective/pull/132)
|
|
11
13
|
|
|
12
14
|
## 0.8.0 (2025-05-20)
|
|
13
15
|
|
|
@@ -4,8 +4,8 @@ csv_detective/explore_csv.py,sha256=VEeAJaz3FPOmGmQ-Yuf3FuSRRPULM03FrTf3qwZX52s,
|
|
|
4
4
|
csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
|
|
5
5
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
6
6
|
csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
|
|
7
|
-
csv_detective/validate.py,sha256=
|
|
8
|
-
csv_detective/detect_fields/__init__.py,sha256=
|
|
7
|
+
csv_detective/validate.py,sha256=5Li_vfvU9wdfoZjNjef-MBUoKcKoJ-c7381QoX9aDXY,2818
|
|
8
|
+
csv_detective/detect_fields/__init__.py,sha256=jThGn0_HO8U0mMoSbf38x8l46ABRQcmHcNLvjZqQQdc,984
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
|
|
@@ -67,12 +67,11 @@ csv_detective/detect_fields/other/url/__init__.py,sha256=L7h9fZldh1w86XwCx0x3Q1T
|
|
|
67
67
|
csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
|
|
68
68
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
69
|
csv_detective/detect_fields/temp/date/__init__.py,sha256=VC4_C5lQbjqTweC4T2p9GZAIO64zERhAuf53CPfXgw4,983
|
|
70
|
-
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=
|
|
71
|
-
csv_detective/detect_fields/temp/
|
|
72
|
-
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=q5Ow1yH9nCz8aY4uOHIKv8CCYIEPLUZlHzg8Nr59kBo,662
|
|
70
|
+
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRUxMGJYqfOK4wRDr3WMaGVAmIa_C2pXE,853
|
|
71
|
+
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
|
|
73
72
|
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
|
|
74
73
|
csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRhKTDT-FTqGOBpdartuShA,194
|
|
75
|
-
csv_detective/detect_labels/__init__.py,sha256=
|
|
74
|
+
csv_detective/detect_labels/__init__.py,sha256=oVq2fiO6QkaWB0wZImL8YVW7oiwPky8ivmLZAFmK55Q,864
|
|
76
75
|
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
76
|
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
77
|
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=fNWFW-Wo3n6azDBfmi0J0qnzP-p2StLxCc9eNiE9NNE,346
|
|
@@ -125,14 +124,13 @@ csv_detective/detect_labels/other/url/__init__.py,sha256=4Ajpdp8W0jS9aHZAAMyUlge
|
|
|
125
124
|
csv_detective/detect_labels/other/uuid/__init__.py,sha256=kXVb4oMy-Zv-OYmAIEoNFrBA20l9hbUTdvTfjeMmhjk,213
|
|
126
125
|
csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
126
|
csv_detective/detect_labels/temp/date/__init__.py,sha256=w0eeZIseAmPwL4OvCWzZXbxGOIXYRKiZUhEtgHiBXd0,604
|
|
128
|
-
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=d0laZNzHx-kSARs9Re8TZ11GNs99aMz6gXc72CJ6ul4,440
|
|
129
127
|
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=53ysj7QgsxXwG1le3zfSJd1oaTTf-Er3jBeYi_A4F9g,458
|
|
130
128
|
csv_detective/detect_labels/temp/year/__init__.py,sha256=7uWaCZY7dOG7nolW46IgBWmcu8K-9jPED-pOlMlErfo,433
|
|
131
129
|
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
130
|
csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
|
|
133
131
|
csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
|
|
134
132
|
csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
|
|
135
|
-
csv_detective/detection/formats.py,sha256=
|
|
133
|
+
csv_detective/detection/formats.py,sha256=3vf7VdjxTmdt5KaTqGBwT5GuZhHuw98R-sIemTcOIJg,6345
|
|
136
134
|
csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
|
|
137
135
|
csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
|
|
138
136
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
@@ -141,28 +139,28 @@ csv_detective/output/__init__.py,sha256=5KTevPfp_4MRxByJyOntQjToNfeG7dPQn-_13wSq
|
|
|
141
139
|
csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
|
|
142
140
|
csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
|
|
143
141
|
csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
|
|
144
|
-
csv_detective/output/schema.py,sha256=
|
|
145
|
-
csv_detective/output/utils.py,sha256=
|
|
142
|
+
csv_detective/output/schema.py,sha256=Hpav3RgIP7gOb93h154s1wNSlEZtHNJVzFDDwp54UcQ,13669
|
|
143
|
+
csv_detective/output/utils.py,sha256=RcOkFQihwfmEIOD-gwrUKi2r5CwBbs17vkuAf8n7-Wo,2405
|
|
146
144
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
csv_detective/parsing/columns.py,sha256=
|
|
145
|
+
csv_detective/parsing/columns.py,sha256=rLzAU36cHMpVynEPhj8uMdr3IRO3_Yq58Yw7Z6oLPiQ,5693
|
|
148
146
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
149
147
|
csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
|
|
150
148
|
csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
|
|
151
149
|
csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
|
|
152
150
|
csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
|
|
153
|
-
csv_detective-0.8.1.
|
|
154
|
-
csv_detective-0.8.1.
|
|
155
|
-
csv_detective-0.8.1.
|
|
156
|
-
csv_detective-0.8.1.
|
|
151
|
+
csv_detective-0.8.1.dev1544.data/data/share/csv_detective/CHANGELOG.md,sha256=nXqvtX1HbmSA3Ac2f0t2cAf789YftWuPljxh7jKfogI,9418
|
|
152
|
+
csv_detective-0.8.1.dev1544.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
153
|
+
csv_detective-0.8.1.dev1544.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
|
|
154
|
+
csv_detective-0.8.1.dev1544.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
157
155
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
156
|
tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
|
|
159
|
-
tests/test_fields.py,sha256=
|
|
157
|
+
tests/test_fields.py,sha256=tTFOmlb9gewtCwcZV7B6Gc3aH6xXK5kMUFSEBi7iIy4,10638
|
|
160
158
|
tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
|
|
161
159
|
tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
|
|
162
160
|
tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
|
|
163
161
|
tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
|
|
164
|
-
csv_detective-0.8.1.
|
|
165
|
-
csv_detective-0.8.1.
|
|
166
|
-
csv_detective-0.8.1.
|
|
167
|
-
csv_detective-0.8.1.
|
|
168
|
-
csv_detective-0.8.1.
|
|
162
|
+
csv_detective-0.8.1.dev1544.dist-info/METADATA,sha256=GfgsEWTVLsK-Ud8DHScLMYCbElp3J7Efsy1covTPlvA,10443
|
|
163
|
+
csv_detective-0.8.1.dev1544.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
164
|
+
csv_detective-0.8.1.dev1544.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
165
|
+
csv_detective-0.8.1.dev1544.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
166
|
+
csv_detective-0.8.1.dev1544.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -61,7 +61,6 @@ from csv_detective.detect_fields.other import (
|
|
|
61
61
|
from csv_detective.detect_fields.temp import (
|
|
62
62
|
date,
|
|
63
63
|
datetime_aware,
|
|
64
|
-
datetime_iso,
|
|
65
64
|
datetime_naive,
|
|
66
65
|
datetime_rfc822,
|
|
67
66
|
year,
|
|
@@ -345,17 +344,18 @@ fields = {
|
|
|
345
344
|
],
|
|
346
345
|
},
|
|
347
346
|
datetime_aware: {
|
|
348
|
-
True: [
|
|
347
|
+
True: [
|
|
348
|
+
"2021-06-22 10:20:10-04:00",
|
|
349
|
+
"2030-06-22 00:00:00.0028+02:00",
|
|
350
|
+
"2024-12-19T10:53:36.428000+00:00",
|
|
351
|
+
"1996/06/22 10:20:10 GMT",
|
|
352
|
+
],
|
|
349
353
|
False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT", "2021-06-44 10:20:10"],
|
|
350
354
|
},
|
|
351
355
|
datetime_naive: {
|
|
352
356
|
True: ["2021-06-22 10:20:10", "2030/06/22 00:00:00.0028"],
|
|
353
357
|
False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT", "2021-06-44 10:20:10+02:00"],
|
|
354
358
|
},
|
|
355
|
-
datetime_iso: {
|
|
356
|
-
True: ["2021-06-22T10:20:10"],
|
|
357
|
-
False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT"],
|
|
358
|
-
},
|
|
359
359
|
datetime_rfc822: {
|
|
360
360
|
True: ["Sun, 06 Nov 1994 08:49:37 GMT"],
|
|
361
361
|
False: ["2021-06-22T10:20:10"],
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
PROPORTION = 1
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
'''Renvoie True si val peut être une date au format iso, False sinon
|
|
8
|
-
Exemple: 2023-01-15T12:30:45.123456Z'''
|
|
9
|
-
return isinstance(val, str) and bool(
|
|
10
|
-
re.match(
|
|
11
|
-
r'^\d{4}-(0[1-9]|1[012])\-(0[1-9]|[12][0-9]|3[01])[Tt]'
|
|
12
|
-
r'([0-2])([0-9]):([0-5])([0-9]):([0-5])([0-9])'
|
|
13
|
-
r'(\.\d+)?([Zz]|[-+](0[0-9]|1[0-2]):[0-5][0-9])?$',
|
|
14
|
-
val
|
|
15
|
-
)
|
|
16
|
-
)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"datetime iso",
|
|
9
|
-
"datetime",
|
|
10
|
-
"timestamp",
|
|
11
|
-
"osm_timestamp",
|
|
12
|
-
"date",
|
|
13
|
-
"created at",
|
|
14
|
-
"last update",
|
|
15
|
-
"date maj",
|
|
16
|
-
"createdat",
|
|
17
|
-
"date naissance",
|
|
18
|
-
"date donnees",
|
|
19
|
-
]
|
|
20
|
-
return header_score(header, words_combinations_list)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/top_level.txt
RENAMED
|
File without changes
|