csv-detective 0.10.1.dev2559__py3-none-any.whl → 0.10.1.dev2581__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from time import time
3
+ from typing import Iterator
3
4
 
4
5
  import pandas as pd
5
6
 
@@ -27,7 +28,7 @@ def routine(
27
28
  cast_json: bool = True,
28
29
  verbose: bool = False,
29
30
  sheet_name: str | int | None = None,
30
- ) -> dict | tuple[dict, pd.DataFrame]:
31
+ ) -> dict | tuple[dict, Iterator[pd.DataFrame]]:
31
32
  """
32
33
  Returns a dict with information about the table and possible column contents, and if requested the DataFrame with columns cast according to analysis.
33
34
 
@@ -115,7 +116,7 @@ def validate_then_detect(
115
116
  output_df: bool = False,
116
117
  cast_json: bool = True,
117
118
  verbose: bool = False,
118
- ):
119
+ ) -> dict | tuple[dict, Iterator[pd.DataFrame]]:
119
120
  """
120
121
  Performs a validation of the given file against the given analysis.
121
122
  If the validation fails, performs a full analysis and return it.
@@ -12,7 +12,9 @@ pat = (
12
12
  + r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})"
13
13
  + r"?(([+-](0\d|1[0-9]|2[0-3]):([0-5][0-9]))|Z)$"
14
14
  )
15
- prefix = r"^\d{4}"
15
+ # date_casting is very (too?) good at finding date(time)s where there sometimes is just a number
16
+ # this prefix check asserts we only consider strings that have a somewhat fine structure trying to cast
17
+ prefix = r"^\d{2}[-/:]?\d{2}"
16
18
 
17
19
 
18
20
  def _is(val):
@@ -41,6 +43,8 @@ _test_values = {
41
43
  "2000-12-21 10:20:10.1Z",
42
44
  "2024-12-19T10:53:36.428000+00:00",
43
45
  "1996/06/22 10:20:10 GMT",
46
+ "12/31/2022 12:00:00-04:00",
47
+ "12:00:00-04:00 12/31/2022",
44
48
  ],
45
49
  False: [
46
50
  "2021-06-22T30:20:10",
@@ -36,6 +36,8 @@ _test_values = {
36
36
  "2021-06-22 10:20:10",
37
37
  "2030/06-22 00:00:00",
38
38
  "2030/06/22 00:00:00.0028",
39
+ "12/31/2022 12:00:00",
40
+ "12:00:00 12/31/2022",
39
41
  ],
40
42
  False: [
41
43
  "2021-06-22T30:20:10",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: csv-detective
3
- Version: 0.10.1.dev2559
3
+ Version: 0.10.1.dev2581
4
4
  Summary: Detect tabular files column content
5
5
  Keywords: CSV,data processing,encoding,guess,parser,tabular
6
6
  Author: data.gouv.fr
@@ -9,7 +9,7 @@ csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6H
9
9
  csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
10
10
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
11
11
  csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0JzeS6JbkxY,3576
12
- csv_detective/explore_csv.py,sha256=-LCHr7vyT0Q0oLtXeOO8pEevJ6-8Ib9JP3D7nVgZM8o,7090
12
+ csv_detective/explore_csv.py,sha256=qSf6N3tbp43BUMJF5wiXz3aYKaTez6ro-75KL2Arci4,7174
13
13
  csv_detective/format.py,sha256=XX_cSTQc0jlsQq3GUqHi7Cz36AiRrpjrwPmeoOTLMvo,2396
14
14
  csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
15
15
  csv_detective/formats/adresse.py,sha256=jALDpEDAWyAcgqEfNVRg_W1r6XaYuJKD_jAaP2l-bxk,1943
@@ -33,8 +33,8 @@ csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF5
33
33
  csv_detective/formats/data/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
34
34
  csv_detective/formats/date.py,sha256=X4ohXaFO8cXPJktUSumc3bfdlbDIWEYTG8S9ugVRcsE,2730
35
35
  csv_detective/formats/date_fr.py,sha256=3hTw5RommrhcgECFRSt9KgyB9zyi1j4W3UygEHmRgoE,502
36
- csv_detective/formats/datetime_aware.py,sha256=kSEVLAovUJEYYFMFk4RiHY50rnPkDlrjfUFwk7ogJYQ,1587
37
- csv_detective/formats/datetime_naive.py,sha256=CVC-yey5uoPAAr8VnrY_HbLUGOk9dqduM5yLAvAhgfc,1591
36
+ csv_detective/formats/datetime_aware.py,sha256=DhDaJE-ILzSJr9EH2c-t0ZSfWlUOXwzK_MXYbSnj-bE,1877
37
+ csv_detective/formats/datetime_naive.py,sha256=2eN7XFs6rRAovMjB-cThZJkbnek1bK16qYP8crJtCqI,1655
38
38
  csv_detective/formats/datetime_rfc822.py,sha256=l-SLb34hSuHxC2JQ-9SD-nG38JqzoozwUZiGtoybb0A,601
39
39
  csv_detective/formats/departement.py,sha256=UP9UF23BFq_-mIS8N10K5XkoCXwPmDeSoa_7lCAkI4w,768
40
40
  csv_detective/formats/email.py,sha256=Qen2EBDYY5TtWXwxrrTGWRrbIybz0ySlVpl4ZRk8pzA,517
@@ -86,7 +86,7 @@ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0
86
86
  csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
87
87
  csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
88
88
  csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
89
- csv_detective-0.10.1.dev2559.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
- csv_detective-0.10.1.dev2559.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
- csv_detective-0.10.1.dev2559.dist-info/METADATA,sha256=wzzRLpqOic3IYb99Uq1oO_D98wdr8EfRkZg6yVS6a-0,11064
92
- csv_detective-0.10.1.dev2559.dist-info/RECORD,,
89
+ csv_detective-0.10.1.dev2581.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
+ csv_detective-0.10.1.dev2581.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
+ csv_detective-0.10.1.dev2581.dist-info/METADATA,sha256=t7kro2nQ_nqJ4k_ZwY1c4lwwiWaiAfRtbXT-V3c8kkw,11064
92
+ csv_detective-0.10.1.dev2581.dist-info/RECORD,,