csv-detective 0.10.4.dev1__py3-none-any.whl → 0.10.12674__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. csv_detective/detection/__init__.py +0 -0
  2. csv_detective/detection/columns.py +0 -0
  3. csv_detective/detection/encoding.py +0 -0
  4. csv_detective/detection/engine.py +0 -0
  5. csv_detective/detection/formats.py +0 -2
  6. csv_detective/detection/headers.py +14 -12
  7. csv_detective/detection/rows.py +1 -1
  8. csv_detective/detection/separator.py +0 -0
  9. csv_detective/detection/variables.py +0 -0
  10. csv_detective/explore_csv.py +4 -15
  11. csv_detective/format.py +1 -1
  12. csv_detective/formats/__init__.py +0 -0
  13. csv_detective/formats/adresse.py +0 -0
  14. csv_detective/formats/binary.py +0 -0
  15. csv_detective/formats/booleen.py +0 -0
  16. csv_detective/formats/code_commune_insee.py +0 -0
  17. csv_detective/formats/code_csp_insee.py +0 -0
  18. csv_detective/formats/code_departement.py +0 -0
  19. csv_detective/formats/code_fantoir.py +0 -0
  20. csv_detective/formats/code_import.py +0 -0
  21. csv_detective/formats/code_postal.py +0 -0
  22. csv_detective/formats/code_region.py +0 -0
  23. csv_detective/formats/code_rna.py +0 -0
  24. csv_detective/formats/code_waldec.py +0 -0
  25. csv_detective/formats/commune.py +0 -0
  26. csv_detective/formats/csp_insee.py +0 -0
  27. csv_detective/formats/date.py +1 -10
  28. csv_detective/formats/date_fr.py +0 -0
  29. csv_detective/formats/datetime_aware.py +0 -0
  30. csv_detective/formats/datetime_naive.py +0 -0
  31. csv_detective/formats/datetime_rfc822.py +0 -0
  32. csv_detective/formats/departement.py +0 -0
  33. csv_detective/formats/email.py +0 -0
  34. csv_detective/formats/float.py +0 -0
  35. csv_detective/formats/geojson.py +0 -0
  36. csv_detective/formats/insee_ape700.py +0 -0
  37. csv_detective/formats/insee_canton.py +0 -0
  38. csv_detective/formats/int.py +0 -0
  39. csv_detective/formats/iso_country_code_alpha2.py +0 -0
  40. csv_detective/formats/iso_country_code_alpha3.py +0 -0
  41. csv_detective/formats/iso_country_code_numeric.py +0 -0
  42. csv_detective/formats/jour_de_la_semaine.py +0 -0
  43. csv_detective/formats/json.py +0 -0
  44. csv_detective/formats/latitude_l93.py +0 -0
  45. csv_detective/formats/latitude_wgs.py +0 -0
  46. csv_detective/formats/latitude_wgs_fr_metropole.py +0 -0
  47. csv_detective/formats/latlon_wgs.py +0 -0
  48. csv_detective/formats/longitude_l93.py +0 -0
  49. csv_detective/formats/longitude_wgs.py +0 -0
  50. csv_detective/formats/longitude_wgs_fr_metropole.py +0 -0
  51. csv_detective/formats/lonlat_wgs.py +0 -0
  52. csv_detective/formats/mois_de_lannee.py +0 -0
  53. csv_detective/formats/money.py +0 -0
  54. csv_detective/formats/mongo_object_id.py +0 -0
  55. csv_detective/formats/pays.py +0 -0
  56. csv_detective/formats/percent.py +0 -0
  57. csv_detective/formats/region.py +0 -0
  58. csv_detective/formats/sexe.py +0 -0
  59. csv_detective/formats/siren.py +0 -0
  60. csv_detective/formats/siret.py +0 -0
  61. csv_detective/formats/tel_fr.py +0 -0
  62. csv_detective/formats/uai.py +0 -0
  63. csv_detective/formats/url.py +0 -0
  64. csv_detective/formats/username.py +0 -0
  65. csv_detective/formats/uuid.py +0 -0
  66. csv_detective/formats/year.py +0 -0
  67. csv_detective/output/__init__.py +0 -0
  68. csv_detective/output/dataframe.py +2 -2
  69. csv_detective/output/example.py +0 -0
  70. csv_detective/output/profile.py +1 -1
  71. csv_detective/output/schema.py +0 -0
  72. csv_detective/output/utils.py +0 -0
  73. csv_detective/parsing/__init__.py +0 -0
  74. csv_detective/parsing/columns.py +5 -9
  75. csv_detective/parsing/compression.py +0 -0
  76. csv_detective/parsing/csv.py +0 -0
  77. csv_detective/parsing/excel.py +1 -1
  78. csv_detective/parsing/load.py +12 -11
  79. csv_detective/validate.py +36 -71
  80. {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.12674.dist-info}/METADATA +18 -15
  81. {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.12674.dist-info}/RECORD +22 -41
  82. csv_detective-0.10.12674.dist-info/WHEEL +4 -0
  83. {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.12674.dist-info}/entry_points.txt +1 -0
  84. csv_detective-0.10.4.dev1.dist-info/WHEEL +0 -5
  85. csv_detective-0.10.4.dev1.dist-info/licenses/LICENSE +0 -21
  86. csv_detective-0.10.4.dev1.dist-info/top_level.txt +0 -3
  87. tests/__init__.py +0 -0
  88. tests/data/a_test_file.csv +0 -407
  89. tests/data/a_test_file.json +0 -394
  90. tests/data/b_test_file.csv +0 -7
  91. tests/data/c_test_file.csv +0 -2
  92. tests/data/csv_file +0 -7
  93. tests/data/file.csv.gz +0 -0
  94. tests/data/file.ods +0 -0
  95. tests/data/file.xls +0 -0
  96. tests/data/file.xlsx +0 -0
  97. tests/data/xlsx_file +0 -0
  98. tests/test_example.py +0 -67
  99. tests/test_fields.py +0 -175
  100. tests/test_file.py +0 -469
  101. tests/test_labels.py +0 -26
  102. tests/test_structure.py +0 -45
  103. tests/test_validation.py +0 -163
csv_detective/validate.py CHANGED
@@ -1,13 +1,10 @@
1
1
  import logging
2
- from collections import defaultdict
3
2
 
4
3
  import pandas as pd
5
4
 
6
5
  from csv_detective.format import FormatsManager
7
6
  from csv_detective.parsing.columns import MAX_NUMBER_CATEGORICAL_VALUES, test_col_val
8
7
 
9
- # VALIDATION_CHUNK_SIZE is bigger than (analysis) CHUNK_SIZE because
10
- # it's faster to validate so we can afford to load more rows
11
8
  VALIDATION_CHUNK_SIZE = int(1e5)
12
9
  logging.basicConfig(level=logging.INFO)
13
10
 
@@ -19,9 +16,9 @@ def validate(
19
16
  previous_analysis: dict,
20
17
  verbose: bool = False,
21
18
  skipna: bool = True,
22
- ) -> tuple[bool, dict | None, dict[str, pd.Series] | None]:
19
+ ) -> tuple[bool, pd.DataFrame | None, dict | None, dict[str, pd.Series] | None]:
23
20
  """
24
- Verify is the given file has the same fields and formats as in the given analysis.
21
+ Verify is the given file has the same fields and types as in the given analysis.
25
22
 
26
23
  Args:
27
24
  file_path: the path of the file to validate
@@ -29,15 +26,6 @@ def validate(
29
26
  verbose: whether the code displays the steps it's going through
30
27
  skipna: whether to ignore NaN values in the checks
31
28
  """
32
- if verbose:
33
- logging.info(f"Checking given formats exist")
34
- for col_name, detected in previous_analysis["columns"].items():
35
- if detected["format"] == "string":
36
- continue
37
- elif detected["format"] not in formats:
38
- if verbose:
39
- logging.warning(f"> Unknown format `{detected['format']}` in analysis")
40
- return False, None, None
41
29
  try:
42
30
  if previous_analysis.get("separator"):
43
31
  # loading the table in chunks
@@ -70,94 +58,71 @@ def validate(
70
58
  ]
71
59
  )
72
60
  analysis = {k: v for k, v in previous_analysis.items() if k in ["engine", "sheet_name"]}
61
+ first_chunk = next(chunks)
73
62
  analysis.update(
74
63
  {k: v for k, v in previous_analysis.items() if k in ["header_row_idx", "header"]}
75
64
  )
76
65
  except Exception as e:
77
66
  if verbose:
78
67
  logging.warning(f"> Could not load the file with previous analysis values: {e}")
79
- return False, None, None
68
+ return False, None, None, None
80
69
  if verbose:
81
70
  logging.info("Comparing table with the previous analysis")
71
+ logging.info("- Checking if all columns match")
72
+ if len(first_chunk.columns) != len(previous_analysis["header"]) or any(
73
+ list(first_chunk.columns)[k] != previous_analysis["header"][k]
74
+ for k in range(len(previous_analysis["header"]))
75
+ ):
76
+ if verbose:
77
+ logging.warning("> Columns do not match, proceeding with full analysis")
78
+ return False, None, None, None
79
+ if verbose:
82
80
  logging.info(
83
81
  f"Testing previously detected formats on chunks of {VALIDATION_CHUNK_SIZE} rows"
84
82
  )
85
83
 
86
- # will contain hashes of each row of the file as index and the number of times
87
- # each hash was seen as values; used to compute nb_duplicates
88
- row_hashes_count = pd.Series()
89
- # will contain the number of times each value of each column is seen in the whole file
90
- # used for profile to read the file only once
91
- # naming it "count" to be iso with how col_values are made in detect_formats
92
- col_values: defaultdict[str, pd.Series] = defaultdict(lambda: pd.Series(name="count"))
84
+ # hashing rows to get nb_duplicates
85
+ row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
86
+ # getting values for profile to read the file only once
87
+ col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
93
88
  analysis["total_lines"] = 0
94
- checked_values: dict[str, int] = {col_name: 0 for col_name in previous_analysis["columns"]}
95
- valid_values: dict[str, int] = {col_name: 0 for col_name in previous_analysis["columns"]}
96
- for idx, chunk in enumerate(chunks):
89
+ for idx, chunk in enumerate([first_chunk, *chunks]):
97
90
  if verbose:
98
- logging.info(f"- Testing chunk number {idx}")
99
- if idx == 0:
100
- if verbose:
101
- logging.info("Checking if all columns match")
102
- if len(chunk.columns) != len(previous_analysis["header"]) or any(
103
- list(chunk.columns)[k] != previous_analysis["header"][k]
104
- for k in range(len(previous_analysis["header"]))
105
- ):
106
- if verbose:
107
- logging.warning("> Columns in the file do not match those of the analysis")
108
- return False, None, None
91
+ logging.info(f"> Testing chunk number {idx}")
109
92
  analysis["total_lines"] += len(chunk)
110
93
  row_hashes_count = row_hashes_count.add(
111
94
  pd.util.hash_pandas_object(chunk, index=False).value_counts(),
112
95
  fill_value=0,
113
96
  )
114
- for col_name, detected in previous_analysis["columns"].items():
97
+ for col in chunk.columns:
98
+ col_values[col] = col_values[col].add(
99
+ chunk[col].value_counts(dropna=False),
100
+ fill_value=0,
101
+ )
102
+ for col_name, args in previous_analysis["columns"].items():
115
103
  if verbose:
116
- logging.info(f"- Testing {col_name} for {detected['format']}")
117
- if detected["format"] == "string":
104
+ logging.info(f"- Testing {col_name} for {args['format']}")
105
+ if args["format"] == "string":
118
106
  # no test for columns that have not been recognized as a specific format
119
107
  continue
120
- to_check = chunk[col_name].dropna() if skipna else chunk[col_name]
121
- chunk_valid_values = sum(to_check.apply(formats[detected["format"]].func))
122
- if formats[detected["format"]].proportion == 1 and chunk_valid_values < len(to_check):
123
- # we can early stop in this case, not all values are valid while we want 100%
108
+ test_result: float = test_col_val(
109
+ serie=chunk[col_name],
110
+ format=formats[args["format"]],
111
+ skipna=skipna,
112
+ )
113
+ if not bool(test_result):
124
114
  if verbose:
125
- logging.warning(
126
- f"> Test failed for column {col_name} with format {detected['format']}"
127
- )
128
- return False, None, None
129
- checked_values[col_name] += len(to_check)
130
- valid_values[col_name] += chunk_valid_values
131
- col_values[col_name] = (
132
- col_values[col_name]
133
- .add(
134
- chunk[col_name].value_counts(dropna=False),
135
- fill_value=0,
136
- )
137
- .rename_axis(col_name)
138
- ) # rename_axis because *sometimes* pandas doesn't pass on the column's name ¯\_(ツ)_/¯
139
- del chunk
140
- # finally we loop through the formats that accept less than 100% valid values to check the proportion
141
- for col_name, detected in previous_analysis["columns"].items():
142
- if (
143
- checked_values[col_name] > 0
144
- and valid_values[col_name] / checked_values[col_name]
145
- < formats[detected["format"]].proportion
146
- ):
147
- if verbose:
148
- logging.warning(
149
- f"> Test failed for column {col_name} with format {detected['format']}"
150
- )
151
- return False, None, None
115
+ logging.warning("> Test failed, proceeding with full analysis")
116
+ return False, first_chunk, analysis, None
152
117
  if verbose:
153
118
  logging.info("> All checks successful")
154
119
  analysis["nb_duplicates"] = sum(row_hashes_count > 1)
155
- del row_hashes_count
156
120
  analysis["categorical"] = [
157
121
  col for col, values in col_values.items() if len(values) <= MAX_NUMBER_CATEGORICAL_VALUES
158
122
  ]
159
123
  return (
160
124
  True,
125
+ first_chunk,
161
126
  analysis
162
127
  | {
163
128
  k: previous_analysis[k]
@@ -1,29 +1,32 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.3
2
2
  Name: csv-detective
3
- Version: 0.10.4.dev1
3
+ Version: 0.10.12674
4
4
  Summary: Detect tabular files column content
5
- Author-email: "data.gouv.fr" <opendatateam@data.gouv.fr>
6
- License: MIT
7
- Project-URL: Source, https://github.com/datagouv/csv-detective
8
5
  Keywords: CSV,data processing,encoding,guess,parser,tabular
9
- Requires-Python: <3.15,>=3.10
10
- Description-Content-Type: text/markdown
11
- License-File: LICENSE
12
- Requires-Dist: dateparser<2,>=1.2.0
6
+ Author: data.gouv.fr
7
+ Author-email: data.gouv.fr <opendatateam@data.gouv.fr>
8
+ License: MIT
9
+ Requires-Dist: dateparser>=1.2.0,<2
13
10
  Requires-Dist: faust-cchardet==2.1.19
14
- Requires-Dist: pandas<3,>=2.2.0
15
- Requires-Dist: python-dateutil<3,>=2.8.2
16
- Requires-Dist: Unidecode<2,>=1.3.6
11
+ Requires-Dist: pandas>=2.2.0,<3
12
+ Requires-Dist: python-dateutil>=2.8.2,<3
13
+ Requires-Dist: unidecode>=1.3.6,<2
17
14
  Requires-Dist: openpyxl>=3.1.5
18
15
  Requires-Dist: xlrd>=2.0.1
19
16
  Requires-Dist: odfpy>=1.4.1
20
- Requires-Dist: requests<3,>=2.32.3
17
+ Requires-Dist: requests>=2.32.3,<3
21
18
  Requires-Dist: python-magic>=0.4.27
22
19
  Requires-Dist: frformat==0.4.0
23
- Requires-Dist: Faker>=33.0.0
20
+ Requires-Dist: faker>=33.0.0
24
21
  Requires-Dist: rstr>=3.2.2
25
22
  Requires-Dist: more-itertools>=10.8.0
26
- Dynamic: license-file
23
+ Requires-Dist: pytest>=8.3.0 ; extra == 'dev'
24
+ Requires-Dist: responses>=0.25.0 ; extra == 'dev'
25
+ Requires-Dist: ruff>=0.9.3 ; extra == 'dev'
26
+ Requires-Python: >=3.10, <3.15
27
+ Project-URL: Source, https://github.com/datagouv/csv-detective
28
+ Provides-Extra: dev
29
+ Description-Content-Type: text/markdown
27
30
 
28
31
  # CSV Detective
29
32
 
@@ -1,18 +1,16 @@
1
1
  csv_detective/__init__.py,sha256=zlYElTOp_I2_VG7ZdOTuAu0wuCXSc0cr3sH6gtk2bcg,152
2
2
  csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
3
- csv_detective/explore_csv.py,sha256=M8jabAP08raPY438v5UeBqJy3bBudTeuo-UNe2unWyE,7639
4
- csv_detective/format.py,sha256=VTdwg4gp9pq6WYhbkCxv9X2hXq0fMrzfooFchmIL0as,2911
5
- csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
6
- csv_detective/validate.py,sha256=7k0GC5AsTn5BbsRChetZZDmnTGiYLe40qPKiP3GruYs,7495
7
3
  csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
4
  csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
9
5
  csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
10
6
  csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
11
- csv_detective/detection/formats.py,sha256=cgECpxRaygwnedPhOteG1P_697qCoceeDrKK9G_O-u8,4812
12
- csv_detective/detection/headers.py,sha256=lnbWRxkI6rdyoWGtmxSfsPkqNjS0Nlpgw-pVevtmBP0,899
13
- csv_detective/detection/rows.py,sha256=JQsmKP8-i8wzcZIWI_13LUer5mpYRIqaKg6qW01ZO3A,750
7
+ csv_detective/detection/formats.py,sha256=9aIE4gwTN8c8pa-kofeJ7zalo8NqjGZabYD-G79kV5I,4734
8
+ csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6HfzOkU,1051
9
+ csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
14
10
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
15
11
  csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0JzeS6JbkxY,3576
12
+ csv_detective/explore_csv.py,sha256=qSf6N3tbp43BUMJF5wiXz3aYKaTez6ro-75KL2Arci4,7174
13
+ csv_detective/format.py,sha256=VglcxWBmjTvWNMhwSUZDfMdJcK9lAUum64Jxvm70AJ4,2898
16
14
  csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
17
15
  csv_detective/formats/adresse.py,sha256=79tIXeC1AUjUG9m0XGZUcP_BXvmLgd1M8XVfxgLNGDE,1966
18
16
  csv_detective/formats/binary.py,sha256=26qrbqv_Dqu0LhVPpQOz2xzglxse7Nz5EasbQ0xP38c,715
@@ -28,7 +26,12 @@ csv_detective/formats/code_rna.py,sha256=o6Kptrux6T2bSnWHi7MBCqIfVKbMMeN4dHlxxzk
28
26
  csv_detective/formats/code_waldec.py,sha256=j4-xpj_73c7IdgLoZJY_kRVj3HkpB7RFfGPN4NwPmVo,303
29
27
  csv_detective/formats/commune.py,sha256=QVscVy5Ij9kdzKJgIG2aFC_v1IRsov5M9Zkj_SHDWgs,541
30
28
  csv_detective/formats/csp_insee.py,sha256=y1w9zPQvijQi5v1Cuye0aX87ZVDC4FeFx1YC0dLqqp8,688
31
- csv_detective/formats/date.py,sha256=caMMvcqkbON8Cxp9oDYZsfmkSXuu-PiiJi8YUbypBso,3167
29
+ csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
30
+ csv_detective/formats/data/insee_ape700.txt,sha256=-_N-zAmcT7rK7ACRfsrM01Ton4_XtZGcNk-7lU28VHU,4397
31
+ csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=mLt_qcQ6D8hfy9zdi7fAK_zON1ojReKlKMA8c2VDoRU,752
32
+ csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=XFPdGBsyZCBg4D8IDn6VgwsycCwYVfuqPbyHfNeqGv0,1003
33
+ csv_detective/formats/data/iso_country_code_numeric.txt,sha256=sdGpn0PqDMlc59-7prThkihHrf7mwB6j5uEHpxGvLFE,1003
34
+ csv_detective/formats/date.py,sha256=Q6w1azLKNshJJVLOPBHj-77ZinXYMW_EKp_BGDshLLE,2802
32
35
  csv_detective/formats/date_fr.py,sha256=YnNXSgT6QekfTUJoS5yuRX8LeK-fmVDgLgVP9cP0e4M,505
33
36
  csv_detective/formats/datetime_aware.py,sha256=izKo6CA-MNIzmmM3Br4-FOESyqCS_YYK8N4V9D6CVEI,1909
34
37
  csv_detective/formats/datetime_naive.py,sha256=DZ0apAm3vIy4cdm5DynAeRueI_8rhuHYQtAOZ5yyZ5k,1681
@@ -68,44 +71,22 @@ csv_detective/formats/url.py,sha256=m3i_XhFRFaAxSACS05XfciQ-oyTCsP_0TASShCY2t7A,
68
71
  csv_detective/formats/username.py,sha256=6qviaFOtF2wg-gtvs0N8548JxFNE67Ue3a0JD0Kv7TQ,261
69
72
  csv_detective/formats/uuid.py,sha256=LxkRZFAOlfig5KKrravO9bgyYjmRBegzOtGyzjopVNc,352
70
73
  csv_detective/formats/year.py,sha256=tMc2HHr6Jga3PGWjmeHweK3G17DsjkIpIUUkCecXAm4,362
71
- csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
72
- csv_detective/formats/data/insee_ape700.txt,sha256=-_N-zAmcT7rK7ACRfsrM01Ton4_XtZGcNk-7lU28VHU,4397
73
- csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=mLt_qcQ6D8hfy9zdi7fAK_zON1ojReKlKMA8c2VDoRU,752
74
- csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=XFPdGBsyZCBg4D8IDn6VgwsycCwYVfuqPbyHfNeqGv0,1003
75
- csv_detective/formats/data/iso_country_code_numeric.txt,sha256=sdGpn0PqDMlc59-7prThkihHrf7mwB6j5uEHpxGvLFE,1003
76
74
  csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
77
- csv_detective/output/dataframe.py,sha256=juBMdj0eiL8c3OrJJ3kCf15Qs4-CFQfHqh91FnVbG9E,3656
75
+ csv_detective/output/dataframe.py,sha256=QX5vplx0AOKgnwwJ6dKvDHWRX9IGPStax-svXEyweJ8,3584
78
76
  csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
79
- csv_detective/output/profile.py,sha256=R9YMl-dANde69RXkFlZpvMDBsX7e1SyMAnlW8p1XNNM,4984
77
+ csv_detective/output/profile.py,sha256=ADr5DwuvwcBYxugjN38fHm11l6ivfzGHXPd8a87Ht-s,4985
80
78
  csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
81
79
  csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
82
80
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
- csv_detective/parsing/columns.py,sha256=MFtEJFLsFdlKdM5AXtgXbf5p6HRW6DuOC4XnxhFMpIY,9344
81
+ csv_detective/parsing/columns.py,sha256=rb5JywbKnYCT3Jb0ZaG1BnyPVtB3gy5mSD-K7qcOl8I,9257
84
82
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
85
83
  csv_detective/parsing/csv.py,sha256=5rw6gXZFQC1T4NT9CnW0AumidrYOkF8kjrfWGmk949I,1716
86
- csv_detective/parsing/excel.py,sha256=pX6dbhAdAdbRpoGcrGsL1lSaF-fbzEb4WcvwcCGEgFw,6978
87
- csv_detective/parsing/load.py,sha256=1Fk43ikIOJwtWJUY-e8oNeNOk4MMtpmZV7s-VbQBS1k,4345
84
+ csv_detective/parsing/excel.py,sha256=tb65I78tdYlZci_tzvvQt8U6bZSYKjeVdn2CEvsET1o,6972
85
+ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
88
86
  csv_detective/parsing/text.py,sha256=yDAcop5xJQc25UtbZcV0guHXAZQfm-H8WuJORTy8Rr8,1734
89
- csv_detective-0.10.4.dev1.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
90
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
92
- tests/test_fields.py,sha256=DSI-ZXDcRt69iZArYZZAr_3OEb-qvwgOVBZxmYAKIkI,5918
93
- tests/test_file.py,sha256=Ov9NGvZQxeoehxTpfcsnwEybebM0tnbmcRsFwe46cjg,15277
94
- tests/test_labels.py,sha256=lgxRbLrGV1C-MkASf3KIQ120BG-UHzFQ4pqDWaeBvaw,539
95
- tests/test_structure.py,sha256=XDbviuuvk-0Mu9Y9PI6He2e5hry2dXVJ6yBVwEqF_2o,1043
96
- tests/test_validation.py,sha256=309k3Axgbp-1Wh6qvCj2BpeMBp3HXzLi5j9UKm1bRQs,5384
97
- tests/data/a_test_file.csv,sha256=SOHjseGYqZer9yu3Bd3oS12Vw8MFsebo0BzrLZ_R4Cc,68871
98
- tests/data/a_test_file.json,sha256=fB9bCpAMFPxFw8KxHRFlgRqjYG819QVGrCQWxQvwkvo,10542
99
- tests/data/b_test_file.csv,sha256=wJGX62KhYjZi62De2XjZWClAzeRFEBsg3ET0IPX1BNU,98
100
- tests/data/c_test_file.csv,sha256=dz6axMyFscHIWR2Brqia_jvlBfQ30l1rFrxvcTqsmJ8,36
101
- tests/data/csv_file,sha256=nMAQx2PrQliu3czifCHXLyruZbvCNTyYqwZ4JYzImqA,70
102
- tests/data/file.csv.gz,sha256=mfGfqG5mGlojCs05A0IF7IUZe5r87bAe2FuQ0Uh5ZMI,108
103
- tests/data/file.ods,sha256=4dR7zWptz5djALIBVeWHQ20GaZNfA63fevIJGFIk1_U,11832
104
- tests/data/file.xls,sha256=QYmNX3FF0QfcQSzYQMtaMJaepJf5EZpDa1miKc4wMdQ,21495
105
- tests/data/file.xlsx,sha256=naWzL02PK4pdIjMzfEyfSW9GQhkYYd_e7bpJvB8Pb2w,8314
106
- tests/data/xlsx_file,sha256=NyOyN_rIe7ryJuHQLqjxVdKCc8V4s5pxyHl6wWFykCM,8305
107
- csv_detective-0.10.4.dev1.dist-info/METADATA,sha256=le1Rn1JIh8MoIf_RTc3Fi9DOOlvug4eR-Mwpw4AK0To,10925
108
- csv_detective-0.10.4.dev1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
109
- csv_detective-0.10.4.dev1.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
110
- csv_detective-0.10.4.dev1.dist-info/top_level.txt,sha256=KDI4gyOpkmormGgUvSWrE3jen2e0unIsxR2b96DRvcw,25
111
- csv_detective-0.10.4.dev1.dist-info/RECORD,,
87
+ csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
88
+ csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
89
+ csv_detective-0.10.12674.dist-info/WHEEL,sha256=XjEbIc5-wIORjWaafhI6vBtlxDBp7S9KiujWF1EM7Ak,79
90
+ csv_detective-0.10.12674.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
+ csv_detective-0.10.12674.dist-info/METADATA,sha256=TZIyuSI6QBmDeZoNZdYqarZ2R_GvaGazjB5WSkt8PFI,11060
92
+ csv_detective-0.10.12674.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.9.25
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  csv_detective = csv_detective.cli:run
3
+
@@ -1,5 +0,0 @@
1
- Wheel-Version: 1.0
2
- Generator: setuptools (80.10.2)
3
- Root-Is-Purelib: true
4
- Tag: py3-none-any
5
-
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 data.gouv.fr
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
@@ -1,3 +0,0 @@
1
- csv_detective
2
- dist
3
- tests
tests/__init__.py DELETED
File without changes