csv-detective 0.8.1.dev1526__py3-none-any.whl → 0.8.1.dev1544__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. csv_detective/detect_fields/__init__.py +1 -1
  2. csv_detective/detect_fields/temp/datetime_aware/__init__.py +3 -1
  3. csv_detective/detect_fields/temp/datetime_naive/__init__.py +2 -0
  4. csv_detective/detect_labels/__init__.py +1 -1
  5. csv_detective/detection/formats.py +0 -1
  6. csv_detective/output/schema.py +4 -3
  7. csv_detective/output/utils.py +0 -2
  8. csv_detective/parsing/columns.py +8 -5
  9. csv_detective/validate.py +12 -6
  10. {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1544.data}/data/share/csv_detective/CHANGELOG.md +2 -0
  11. {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/METADATA +1 -1
  12. {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/RECORD +19 -21
  13. tests/test_fields.py +6 -6
  14. csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -16
  15. csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -20
  16. {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1544.data}/data/share/csv_detective/LICENSE +0 -0
  17. {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1544.data}/data/share/csv_detective/README.md +0 -0
  18. {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/WHEEL +0 -0
  19. {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/entry_points.txt +0 -0
  20. {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/licenses/LICENSE +0 -0
  21. {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1544.dist-info}/top_level.txt +0 -0
@@ -57,4 +57,4 @@ from .geo import (
57
57
  )
58
58
 
59
59
  from .FR.temp import jour_de_la_semaine, mois_de_annee
60
- from .temp import year, date, datetime_aware, datetime_iso, datetime_naive, datetime_rfc822
60
+ from .temp import year, date, datetime_aware, datetime_naive, datetime_rfc822
@@ -8,7 +8,9 @@ PROPORTION = 1
8
8
  def _is(val: Optional[Any]) -> bool:
9
9
  """Detects timezone-aware datetimes only"""
10
10
  # early stops, to cut processing time
11
- if not isinstance(val, str) or len(val) > 30 or len(val) < 15:
11
+ # 21 is the minimal length of a datetime format YYMMDDTHH:MM:SS+HH:MM
12
+ # 32 is the maximal length of an ISO datetime format YYYY-MM-DDTHH:MM:SS.dddddd+HH:MM, keeping some slack
13
+ if not isinstance(val, str) or len(val) > 35 or len(val) < 21:
12
14
  return False
13
15
  threshold = 0.7
14
16
  if sum([char.isdigit() or char in {"-", "/", ":", " "} for char in val]) / len(val) < threshold:
@@ -8,6 +8,8 @@ PROPORTION = 1
8
8
  def _is(val: Optional[Any]) -> bool:
9
9
  """Detects naive datetimes only"""
10
10
  # early stops, to cut processing time
11
+ # 15 is the minimal length of a datetime format YYMMDDTHH:MM:SS
12
+ # 26 is the maximal length of an ISO datetime format YYYY-MM-DDTHH:MM:SS.dddddd, keeping some slack
11
13
  if not isinstance(val, str) or len(val) > 30 or len(val) < 15:
12
14
  return False
13
15
  threshold = 0.7
@@ -40,4 +40,4 @@ from .geo import (
40
40
  longitude_wgs
41
41
  )
42
42
  from .other import booleen, email, float, int, money, mongo_object_id, twitter, url, uuid
43
- from .temp import date, datetime_iso, datetime_rfc822, year
43
+ from .temp import date, datetime_rfc822, year
@@ -107,7 +107,6 @@ def detect_formats(
107
107
  "json": "json",
108
108
  "json_geojson": "json",
109
109
  "datetime_aware": "datetime",
110
- "datetime_iso": "datetime",
111
110
  "datetime_naive": "datetime",
112
111
  "datetime_rfc822": "datetime",
113
112
  "date": "date",
@@ -61,7 +61,8 @@ def get_description(format: str) -> str:
61
61
  "url": "Adresse URL",
62
62
  "uuid": "Identifiant unique au format UUID",
63
63
  "date": "Date",
64
- "datetime_iso": "Date au format datetime (ISO)",
64
+ "datetime_aware": "Date au format datetime avec fuseau horaire",
65
+ "datetime_naive": "Date au format datetime sans fuseau horaire",
65
66
  "datetime_rfc822": "Date au format datetime (RFC822)",
66
67
  "year": "Année",
67
68
  }
@@ -107,7 +108,6 @@ def get_validata_type(format: str) -> str:
107
108
  "string": "string",
108
109
  "date": "date",
109
110
  "datetime_aware": "datetime",
110
- "datetime_iso": "datetime",
111
111
  "datetime_naive": "datetime",
112
112
  "datetime_rfc822": "datetime",
113
113
  "json_geojson": "geojson",
@@ -146,7 +146,8 @@ def get_example(format: str) -> str:
146
146
  "csp_insee": "anciens agriculteurs exploitants",
147
147
  "date": "2020-01-01",
148
148
  "date_fr": "12 janvier 2020",
149
- "datetime_iso": "2020-01-01T00:00:00",
149
+ "datetime_aware": "2020-01-01T00:00:00+02:00",
150
+ "datetime_naive": "2020-01-01T00:00:00",
150
151
  "datetime_rfc822": "Tue, 1 Jan 2020 00:00:00 +0000",
151
152
  "departement": "Ain",
152
153
  "email": "example@example.com",
@@ -34,8 +34,6 @@ def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
34
34
  formats_to_remove.add("longitude_l93")
35
35
  if "code_region" in formats_detected:
36
36
  formats_to_remove.add("code_departement")
37
- if "datetime_iso" in formats_detected:
38
- formats_to_remove.add("datetime_naive")
39
37
  if "datetime_rfc822" in formats_detected:
40
38
  formats_to_remove.add("datetime_aware")
41
39
 
@@ -16,7 +16,7 @@ def test_col_val(
16
16
  skipna: bool = True,
17
17
  limited_output: bool = False,
18
18
  verbose: bool = False,
19
- ):
19
+ ) -> float:
20
20
  """Tests values of the serie using test_func.
21
21
  - skipna : if True indicates that NaNs are not counted as False
22
22
  - proportion : indicates the proportion of values that have to pass the test
@@ -81,10 +81,13 @@ def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna:
81
81
  if verbose:
82
82
  start = time()
83
83
  logging.info("Testing columns to get types")
84
- test_funcs = dict()
85
- for test in all_tests:
86
- name = test.__name__.split(".")[-1]
87
- test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
84
+ test_funcs = {
85
+ test.__name__.split(".")[-1]: {
86
+ "func": test._is,
87
+ "prop": test.PROPORTION,
88
+ }
89
+ for test in all_tests
90
+ }
88
91
  return_table = pd.DataFrame(columns=table.columns)
89
92
  for idx, (key, value) in enumerate(test_funcs.items()):
90
93
  if verbose:
csv_detective/validate.py CHANGED
@@ -5,11 +5,15 @@ import pandas as pd
5
5
 
6
6
  from csv_detective.load_tests import return_all_tests
7
7
  from csv_detective.parsing.load import load_file
8
+ from csv_detective.parsing.columns import test_col_val
8
9
 
9
10
  logging.basicConfig(level=logging.INFO)
10
11
 
11
12
  tests = {
12
- t.__name__.split(".")[-1]: t._is
13
+ t.__name__.split(".")[-1]: {
14
+ "func": t._is,
15
+ "prop": t.PROPORTION,
16
+ }
13
17
  for t in return_all_tests("ALL", "detect_fields")
14
18
  }
15
19
 
@@ -56,11 +60,13 @@ def validate(
56
60
  if args["format"] == "string":
57
61
  # no test for columns that have not been recognized as a specific format
58
62
  continue
59
- test_func = tests[args["format"]]
60
- col_data = table[col_name]
61
- if skipna:
62
- col_data = col_data.loc[~col_data.isna()]
63
- if not col_data.apply(test_func).all():
63
+ test_result: float = test_col_val(
64
+ serie=table[col_name],
65
+ test_func=tests[args["format"]]["func"],
66
+ proportion=tests[args["format"]]["prop"],
67
+ skipna=skipna,
68
+ )
69
+ if not bool(test_result):
64
70
  if verbose:
65
71
  logging.warning("> Test failed, proceeding with full analysis")
66
72
  return False, table, analysis
@@ -8,6 +8,8 @@
8
8
  - For big files, analyse on sample then validate on whole file [#124](https://github.com/datagouv/csv-detective/pull/124) [#129](https://github.com/datagouv/csv-detective/pull/129)
9
9
  - Fix imports [#125](https://github.com/datagouv/csv-detective/pull/125) [#126](https://github.com/datagouv/csv-detective/pull/126) [#127](https://github.com/datagouv/csv-detective/pull/127) [#128](https://github.com/datagouv/csv-detective/pull/128)
10
10
  - Split aware and naive datetimes for hydra to cast them separately [#130](https://github.com/datagouv/csv-detective/pull/130)
11
+ - Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
12
+ - Remove `datetime_iso` due to ambiguous cast in db (can be naive or aware) [#132](https://github.com/datagouv/csv-detective/pull/132)
11
13
 
12
14
  ## 0.8.0 (2025-05-20)
13
15
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.8.1.dev1526
3
+ Version: 0.8.1.dev1544
4
4
  Summary: Detect tabular files column content
5
5
  Home-page: https://github.com/datagouv/csv_detective
6
6
  Author: Etalab
@@ -4,8 +4,8 @@ csv_detective/explore_csv.py,sha256=VEeAJaz3FPOmGmQ-Yuf3FuSRRPULM03FrTf3qwZX52s,
4
4
  csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
5
5
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
6
6
  csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
7
- csv_detective/validate.py,sha256=d_4Phmjk6Y0Z0YYVw4vpoZy8E79K370reGgkpzx1mcQ,2644
8
- csv_detective/detect_fields/__init__.py,sha256=HYSy0P_aH6R8Z8Hvd8aMaBAQaZ1QwcsWHT0YPm0iYs0,998
7
+ csv_detective/validate.py,sha256=5Li_vfvU9wdfoZjNjef-MBUoKcKoJ-c7381QoX9aDXY,2818
8
+ csv_detective/detect_fields/__init__.py,sha256=jThGn0_HO8U0mMoSbf38x8l46ABRQcmHcNLvjZqQQdc,984
9
9
  csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
@@ -67,12 +67,11 @@ csv_detective/detect_fields/other/url/__init__.py,sha256=L7h9fZldh1w86XwCx0x3Q1T
67
67
  csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
68
68
  csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
69
  csv_detective/detect_fields/temp/date/__init__.py,sha256=VC4_C5lQbjqTweC4T2p9GZAIO64zERhAuf53CPfXgw4,983
70
- csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=Xi3fWiqm_S09AaMeHVrgx6bSieX1gEdjjM7GYsKqEx8,667
71
- csv_detective/detect_fields/temp/datetime_iso/__init__.py,sha256=DOfli-A7gPlZmiV2J6Ka5_yDUCaOgxis29LET_tfhA4,444
72
- csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=q5Ow1yH9nCz8aY4uOHIKv8CCYIEPLUZlHzg8Nr59kBo,662
70
+ csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRUxMGJYqfOK4wRDr3WMaGVAmIa_C2pXE,853
71
+ csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
73
72
  csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
74
73
  csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRhKTDT-FTqGOBpdartuShA,194
75
- csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
74
+ csv_detective/detect_labels/__init__.py,sha256=oVq2fiO6QkaWB0wZImL8YVW7oiwPky8ivmLZAFmK55Q,864
76
75
  csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
76
  csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
77
  csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=fNWFW-Wo3n6azDBfmi0J0qnzP-p2StLxCc9eNiE9NNE,346
@@ -125,14 +124,13 @@ csv_detective/detect_labels/other/url/__init__.py,sha256=4Ajpdp8W0jS9aHZAAMyUlge
125
124
  csv_detective/detect_labels/other/uuid/__init__.py,sha256=kXVb4oMy-Zv-OYmAIEoNFrBA20l9hbUTdvTfjeMmhjk,213
126
125
  csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
126
  csv_detective/detect_labels/temp/date/__init__.py,sha256=w0eeZIseAmPwL4OvCWzZXbxGOIXYRKiZUhEtgHiBXd0,604
128
- csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=d0laZNzHx-kSARs9Re8TZ11GNs99aMz6gXc72CJ6ul4,440
129
127
  csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=53ysj7QgsxXwG1le3zfSJd1oaTTf-Er3jBeYi_A4F9g,458
130
128
  csv_detective/detect_labels/temp/year/__init__.py,sha256=7uWaCZY7dOG7nolW46IgBWmcu8K-9jPED-pOlMlErfo,433
131
129
  csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
130
  csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
133
131
  csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
134
132
  csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
135
- csv_detective/detection/formats.py,sha256=LDrstnAJccDeOEvGbWA5Ppx4gdlJrKbqd7qqWRG2tHI,6382
133
+ csv_detective/detection/formats.py,sha256=3vf7VdjxTmdt5KaTqGBwT5GuZhHuw98R-sIemTcOIJg,6345
136
134
  csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
137
135
  csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
138
136
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
@@ -141,28 +139,28 @@ csv_detective/output/__init__.py,sha256=5KTevPfp_4MRxByJyOntQjToNfeG7dPQn-_13wSq
141
139
  csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
142
140
  csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
143
141
  csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
144
- csv_detective/output/schema.py,sha256=WxgajFuLfUTQQtmEdlO8ve2ULDzw2BYfz8QFwUsdDh0,13558
145
- csv_detective/output/utils.py,sha256=qFYhxJmkKrTUefdH7Owh-liZijswomCafic4cXYSyCg,2506
142
+ csv_detective/output/schema.py,sha256=Hpav3RgIP7gOb93h154s1wNSlEZtHNJVzFDDwp54UcQ,13669
143
+ csv_detective/output/utils.py,sha256=RcOkFQihwfmEIOD-gwrUKi2r5CwBbs17vkuAf8n7-Wo,2405
146
144
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
- csv_detective/parsing/columns.py,sha256=VzgG9Nwph5C_fLW_TuQC5BZVlPmOyjrH7Plvm_c8kWc,5675
145
+ csv_detective/parsing/columns.py,sha256=rLzAU36cHMpVynEPhj8uMdr3IRO3_Yq58Yw7Z6oLPiQ,5693
148
146
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
149
147
  csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
150
148
  csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
151
149
  csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
152
150
  csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
153
- csv_detective-0.8.1.dev1526.data/data/share/csv_detective/CHANGELOG.md,sha256=QBkuYfCNZtm-waJYz1YEITwR8kCMDKKZH6-ef7oj8tQ,9161
154
- csv_detective-0.8.1.dev1526.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
155
- csv_detective-0.8.1.dev1526.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
156
- csv_detective-0.8.1.dev1526.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
151
+ csv_detective-0.8.1.dev1544.data/data/share/csv_detective/CHANGELOG.md,sha256=nXqvtX1HbmSA3Ac2f0t2cAf789YftWuPljxh7jKfogI,9418
152
+ csv_detective-0.8.1.dev1544.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
153
+ csv_detective-0.8.1.dev1544.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
154
+ csv_detective-0.8.1.dev1544.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
157
155
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
156
  tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
159
- tests/test_fields.py,sha256=zeEQbHs0ougLzydmZLZs1l2UdrhKBEtdCCK64B4dhSU,10700
157
+ tests/test_fields.py,sha256=tTFOmlb9gewtCwcZV7B6Gc3aH6xXK5kMUFSEBi7iIy4,10638
160
158
  tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
161
159
  tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
162
160
  tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
163
161
  tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
164
- csv_detective-0.8.1.dev1526.dist-info/METADATA,sha256=6w8386meaPhTcYjmslsOqjkqvpLPZme5ikCsx7zJizo,10443
165
- csv_detective-0.8.1.dev1526.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
- csv_detective-0.8.1.dev1526.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
- csv_detective-0.8.1.dev1526.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
168
- csv_detective-0.8.1.dev1526.dist-info/RECORD,,
162
+ csv_detective-0.8.1.dev1544.dist-info/METADATA,sha256=GfgsEWTVLsK-Ud8DHScLMYCbElp3J7Efsy1covTPlvA,10443
163
+ csv_detective-0.8.1.dev1544.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
+ csv_detective-0.8.1.dev1544.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
165
+ csv_detective-0.8.1.dev1544.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
166
+ csv_detective-0.8.1.dev1544.dist-info/RECORD,,
tests/test_fields.py CHANGED
@@ -61,7 +61,6 @@ from csv_detective.detect_fields.other import (
61
61
  from csv_detective.detect_fields.temp import (
62
62
  date,
63
63
  datetime_aware,
64
- datetime_iso,
65
64
  datetime_naive,
66
65
  datetime_rfc822,
67
66
  year,
@@ -345,17 +344,18 @@ fields = {
345
344
  ],
346
345
  },
347
346
  datetime_aware: {
348
- True: ["2021-06-22 10:20:10-04:00", "2030-06-22 00:00:00.0028+02:00", "1996/06/22 10:20:10 GMT"],
347
+ True: [
348
+ "2021-06-22 10:20:10-04:00",
349
+ "2030-06-22 00:00:00.0028+02:00",
350
+ "2024-12-19T10:53:36.428000+00:00",
351
+ "1996/06/22 10:20:10 GMT",
352
+ ],
349
353
  False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT", "2021-06-44 10:20:10"],
350
354
  },
351
355
  datetime_naive: {
352
356
  True: ["2021-06-22 10:20:10", "2030/06/22 00:00:00.0028"],
353
357
  False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT", "2021-06-44 10:20:10+02:00"],
354
358
  },
355
- datetime_iso: {
356
- True: ["2021-06-22T10:20:10"],
357
- False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT"],
358
- },
359
359
  datetime_rfc822: {
360
360
  True: ["Sun, 06 Nov 1994 08:49:37 GMT"],
361
361
  False: ["2021-06-22T10:20:10"],
@@ -1,16 +0,0 @@
1
- import re
2
-
3
- PROPORTION = 1
4
-
5
-
6
- def _is(val):
7
- '''Renvoie True si val peut être une date au format iso, False sinon
8
- Exemple: 2023-01-15T12:30:45.123456Z'''
9
- return isinstance(val, str) and bool(
10
- re.match(
11
- r'^\d{4}-(0[1-9]|1[012])\-(0[1-9]|[12][0-9]|3[01])[Tt]'
12
- r'([0-2])([0-9]):([0-5])([0-9]):([0-5])([0-9])'
13
- r'(\.\d+)?([Zz]|[-+](0[0-9]|1[0-2]):[0-5][0-9])?$',
14
- val
15
- )
16
- )
@@ -1,20 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "datetime iso",
9
- "datetime",
10
- "timestamp",
11
- "osm_timestamp",
12
- "date",
13
- "created at",
14
- "last update",
15
- "date maj",
16
- "createdat",
17
- "date naissance",
18
- "date donnees",
19
- ]
20
- return header_score(header, words_combinations_list)