csv-detective 0.10.4.dev1__py3-none-any.whl → 0.10.12674__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detection/__init__.py +0 -0
- csv_detective/detection/columns.py +0 -0
- csv_detective/detection/encoding.py +0 -0
- csv_detective/detection/engine.py +0 -0
- csv_detective/detection/formats.py +0 -2
- csv_detective/detection/headers.py +14 -12
- csv_detective/detection/rows.py +1 -1
- csv_detective/detection/separator.py +0 -0
- csv_detective/detection/variables.py +0 -0
- csv_detective/explore_csv.py +4 -15
- csv_detective/format.py +1 -1
- csv_detective/formats/__init__.py +0 -0
- csv_detective/formats/adresse.py +0 -0
- csv_detective/formats/binary.py +0 -0
- csv_detective/formats/booleen.py +0 -0
- csv_detective/formats/code_commune_insee.py +0 -0
- csv_detective/formats/code_csp_insee.py +0 -0
- csv_detective/formats/code_departement.py +0 -0
- csv_detective/formats/code_fantoir.py +0 -0
- csv_detective/formats/code_import.py +0 -0
- csv_detective/formats/code_postal.py +0 -0
- csv_detective/formats/code_region.py +0 -0
- csv_detective/formats/code_rna.py +0 -0
- csv_detective/formats/code_waldec.py +0 -0
- csv_detective/formats/commune.py +0 -0
- csv_detective/formats/csp_insee.py +0 -0
- csv_detective/formats/date.py +1 -10
- csv_detective/formats/date_fr.py +0 -0
- csv_detective/formats/datetime_aware.py +0 -0
- csv_detective/formats/datetime_naive.py +0 -0
- csv_detective/formats/datetime_rfc822.py +0 -0
- csv_detective/formats/departement.py +0 -0
- csv_detective/formats/email.py +0 -0
- csv_detective/formats/float.py +0 -0
- csv_detective/formats/geojson.py +0 -0
- csv_detective/formats/insee_ape700.py +0 -0
- csv_detective/formats/insee_canton.py +0 -0
- csv_detective/formats/int.py +0 -0
- csv_detective/formats/iso_country_code_alpha2.py +0 -0
- csv_detective/formats/iso_country_code_alpha3.py +0 -0
- csv_detective/formats/iso_country_code_numeric.py +0 -0
- csv_detective/formats/jour_de_la_semaine.py +0 -0
- csv_detective/formats/json.py +0 -0
- csv_detective/formats/latitude_l93.py +0 -0
- csv_detective/formats/latitude_wgs.py +0 -0
- csv_detective/formats/latitude_wgs_fr_metropole.py +0 -0
- csv_detective/formats/latlon_wgs.py +0 -0
- csv_detective/formats/longitude_l93.py +0 -0
- csv_detective/formats/longitude_wgs.py +0 -0
- csv_detective/formats/longitude_wgs_fr_metropole.py +0 -0
- csv_detective/formats/lonlat_wgs.py +0 -0
- csv_detective/formats/mois_de_lannee.py +0 -0
- csv_detective/formats/money.py +0 -0
- csv_detective/formats/mongo_object_id.py +0 -0
- csv_detective/formats/pays.py +0 -0
- csv_detective/formats/percent.py +0 -0
- csv_detective/formats/region.py +0 -0
- csv_detective/formats/sexe.py +0 -0
- csv_detective/formats/siren.py +0 -0
- csv_detective/formats/siret.py +0 -0
- csv_detective/formats/tel_fr.py +0 -0
- csv_detective/formats/uai.py +0 -0
- csv_detective/formats/url.py +0 -0
- csv_detective/formats/username.py +0 -0
- csv_detective/formats/uuid.py +0 -0
- csv_detective/formats/year.py +0 -0
- csv_detective/output/__init__.py +0 -0
- csv_detective/output/dataframe.py +2 -2
- csv_detective/output/example.py +0 -0
- csv_detective/output/profile.py +1 -1
- csv_detective/output/schema.py +0 -0
- csv_detective/output/utils.py +0 -0
- csv_detective/parsing/__init__.py +0 -0
- csv_detective/parsing/columns.py +5 -9
- csv_detective/parsing/compression.py +0 -0
- csv_detective/parsing/csv.py +0 -0
- csv_detective/parsing/excel.py +1 -1
- csv_detective/parsing/load.py +12 -11
- csv_detective/validate.py +36 -71
- {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.12674.dist-info}/METADATA +18 -15
- {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.12674.dist-info}/RECORD +22 -41
- csv_detective-0.10.12674.dist-info/WHEEL +4 -0
- {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.12674.dist-info}/entry_points.txt +1 -0
- csv_detective-0.10.4.dev1.dist-info/WHEEL +0 -5
- csv_detective-0.10.4.dev1.dist-info/licenses/LICENSE +0 -21
- csv_detective-0.10.4.dev1.dist-info/top_level.txt +0 -3
- tests/__init__.py +0 -0
- tests/data/a_test_file.csv +0 -407
- tests/data/a_test_file.json +0 -394
- tests/data/b_test_file.csv +0 -7
- tests/data/c_test_file.csv +0 -2
- tests/data/csv_file +0 -7
- tests/data/file.csv.gz +0 -0
- tests/data/file.ods +0 -0
- tests/data/file.xls +0 -0
- tests/data/file.xlsx +0 -0
- tests/data/xlsx_file +0 -0
- tests/test_example.py +0 -67
- tests/test_fields.py +0 -175
- tests/test_file.py +0 -469
- tests/test_labels.py +0 -26
- tests/test_structure.py +0 -45
- tests/test_validation.py +0 -163
csv_detective/validate.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from collections import defaultdict
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
5
4
|
|
|
6
5
|
from csv_detective.format import FormatsManager
|
|
7
6
|
from csv_detective.parsing.columns import MAX_NUMBER_CATEGORICAL_VALUES, test_col_val
|
|
8
7
|
|
|
9
|
-
# VALIDATION_CHUNK_SIZE is bigger than (analysis) CHUNK_SIZE because
|
|
10
|
-
# it's faster to validate so we can afford to load more rows
|
|
11
8
|
VALIDATION_CHUNK_SIZE = int(1e5)
|
|
12
9
|
logging.basicConfig(level=logging.INFO)
|
|
13
10
|
|
|
@@ -19,9 +16,9 @@ def validate(
|
|
|
19
16
|
previous_analysis: dict,
|
|
20
17
|
verbose: bool = False,
|
|
21
18
|
skipna: bool = True,
|
|
22
|
-
) -> tuple[bool, dict | None, dict[str, pd.Series] | None]:
|
|
19
|
+
) -> tuple[bool, pd.DataFrame | None, dict | None, dict[str, pd.Series] | None]:
|
|
23
20
|
"""
|
|
24
|
-
Verify is the given file has the same fields and
|
|
21
|
+
Verify is the given file has the same fields and types as in the given analysis.
|
|
25
22
|
|
|
26
23
|
Args:
|
|
27
24
|
file_path: the path of the file to validate
|
|
@@ -29,15 +26,6 @@ def validate(
|
|
|
29
26
|
verbose: whether the code displays the steps it's going through
|
|
30
27
|
skipna: whether to ignore NaN values in the checks
|
|
31
28
|
"""
|
|
32
|
-
if verbose:
|
|
33
|
-
logging.info(f"Checking given formats exist")
|
|
34
|
-
for col_name, detected in previous_analysis["columns"].items():
|
|
35
|
-
if detected["format"] == "string":
|
|
36
|
-
continue
|
|
37
|
-
elif detected["format"] not in formats:
|
|
38
|
-
if verbose:
|
|
39
|
-
logging.warning(f"> Unknown format `{detected['format']}` in analysis")
|
|
40
|
-
return False, None, None
|
|
41
29
|
try:
|
|
42
30
|
if previous_analysis.get("separator"):
|
|
43
31
|
# loading the table in chunks
|
|
@@ -70,94 +58,71 @@ def validate(
|
|
|
70
58
|
]
|
|
71
59
|
)
|
|
72
60
|
analysis = {k: v for k, v in previous_analysis.items() if k in ["engine", "sheet_name"]}
|
|
61
|
+
first_chunk = next(chunks)
|
|
73
62
|
analysis.update(
|
|
74
63
|
{k: v for k, v in previous_analysis.items() if k in ["header_row_idx", "header"]}
|
|
75
64
|
)
|
|
76
65
|
except Exception as e:
|
|
77
66
|
if verbose:
|
|
78
67
|
logging.warning(f"> Could not load the file with previous analysis values: {e}")
|
|
79
|
-
return False, None, None
|
|
68
|
+
return False, None, None, None
|
|
80
69
|
if verbose:
|
|
81
70
|
logging.info("Comparing table with the previous analysis")
|
|
71
|
+
logging.info("- Checking if all columns match")
|
|
72
|
+
if len(first_chunk.columns) != len(previous_analysis["header"]) or any(
|
|
73
|
+
list(first_chunk.columns)[k] != previous_analysis["header"][k]
|
|
74
|
+
for k in range(len(previous_analysis["header"]))
|
|
75
|
+
):
|
|
76
|
+
if verbose:
|
|
77
|
+
logging.warning("> Columns do not match, proceeding with full analysis")
|
|
78
|
+
return False, None, None, None
|
|
79
|
+
if verbose:
|
|
82
80
|
logging.info(
|
|
83
81
|
f"Testing previously detected formats on chunks of {VALIDATION_CHUNK_SIZE} rows"
|
|
84
82
|
)
|
|
85
83
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# used for profile to read the file only once
|
|
91
|
-
# naming it "count" to be iso with how col_values are made in detect_formats
|
|
92
|
-
col_values: defaultdict[str, pd.Series] = defaultdict(lambda: pd.Series(name="count"))
|
|
84
|
+
# hashing rows to get nb_duplicates
|
|
85
|
+
row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
|
|
86
|
+
# getting values for profile to read the file only once
|
|
87
|
+
col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
|
|
93
88
|
analysis["total_lines"] = 0
|
|
94
|
-
|
|
95
|
-
valid_values: dict[str, int] = {col_name: 0 for col_name in previous_analysis["columns"]}
|
|
96
|
-
for idx, chunk in enumerate(chunks):
|
|
89
|
+
for idx, chunk in enumerate([first_chunk, *chunks]):
|
|
97
90
|
if verbose:
|
|
98
|
-
logging.info(f"
|
|
99
|
-
if idx == 0:
|
|
100
|
-
if verbose:
|
|
101
|
-
logging.info("Checking if all columns match")
|
|
102
|
-
if len(chunk.columns) != len(previous_analysis["header"]) or any(
|
|
103
|
-
list(chunk.columns)[k] != previous_analysis["header"][k]
|
|
104
|
-
for k in range(len(previous_analysis["header"]))
|
|
105
|
-
):
|
|
106
|
-
if verbose:
|
|
107
|
-
logging.warning("> Columns in the file do not match those of the analysis")
|
|
108
|
-
return False, None, None
|
|
91
|
+
logging.info(f"> Testing chunk number {idx}")
|
|
109
92
|
analysis["total_lines"] += len(chunk)
|
|
110
93
|
row_hashes_count = row_hashes_count.add(
|
|
111
94
|
pd.util.hash_pandas_object(chunk, index=False).value_counts(),
|
|
112
95
|
fill_value=0,
|
|
113
96
|
)
|
|
114
|
-
for
|
|
97
|
+
for col in chunk.columns:
|
|
98
|
+
col_values[col] = col_values[col].add(
|
|
99
|
+
chunk[col].value_counts(dropna=False),
|
|
100
|
+
fill_value=0,
|
|
101
|
+
)
|
|
102
|
+
for col_name, args in previous_analysis["columns"].items():
|
|
115
103
|
if verbose:
|
|
116
|
-
logging.info(f"- Testing {col_name} for {
|
|
117
|
-
if
|
|
104
|
+
logging.info(f"- Testing {col_name} for {args['format']}")
|
|
105
|
+
if args["format"] == "string":
|
|
118
106
|
# no test for columns that have not been recognized as a specific format
|
|
119
107
|
continue
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
108
|
+
test_result: float = test_col_val(
|
|
109
|
+
serie=chunk[col_name],
|
|
110
|
+
format=formats[args["format"]],
|
|
111
|
+
skipna=skipna,
|
|
112
|
+
)
|
|
113
|
+
if not bool(test_result):
|
|
124
114
|
if verbose:
|
|
125
|
-
logging.warning(
|
|
126
|
-
|
|
127
|
-
)
|
|
128
|
-
return False, None, None
|
|
129
|
-
checked_values[col_name] += len(to_check)
|
|
130
|
-
valid_values[col_name] += chunk_valid_values
|
|
131
|
-
col_values[col_name] = (
|
|
132
|
-
col_values[col_name]
|
|
133
|
-
.add(
|
|
134
|
-
chunk[col_name].value_counts(dropna=False),
|
|
135
|
-
fill_value=0,
|
|
136
|
-
)
|
|
137
|
-
.rename_axis(col_name)
|
|
138
|
-
) # rename_axis because *sometimes* pandas doesn't pass on the column's name ¯\_(ツ)_/¯
|
|
139
|
-
del chunk
|
|
140
|
-
# finally we loop through the formats that accept less than 100% valid values to check the proportion
|
|
141
|
-
for col_name, detected in previous_analysis["columns"].items():
|
|
142
|
-
if (
|
|
143
|
-
checked_values[col_name] > 0
|
|
144
|
-
and valid_values[col_name] / checked_values[col_name]
|
|
145
|
-
< formats[detected["format"]].proportion
|
|
146
|
-
):
|
|
147
|
-
if verbose:
|
|
148
|
-
logging.warning(
|
|
149
|
-
f"> Test failed for column {col_name} with format {detected['format']}"
|
|
150
|
-
)
|
|
151
|
-
return False, None, None
|
|
115
|
+
logging.warning("> Test failed, proceeding with full analysis")
|
|
116
|
+
return False, first_chunk, analysis, None
|
|
152
117
|
if verbose:
|
|
153
118
|
logging.info("> All checks successful")
|
|
154
119
|
analysis["nb_duplicates"] = sum(row_hashes_count > 1)
|
|
155
|
-
del row_hashes_count
|
|
156
120
|
analysis["categorical"] = [
|
|
157
121
|
col for col, values in col_values.items() if len(values) <= MAX_NUMBER_CATEGORICAL_VALUES
|
|
158
122
|
]
|
|
159
123
|
return (
|
|
160
124
|
True,
|
|
125
|
+
first_chunk,
|
|
161
126
|
analysis
|
|
162
127
|
| {
|
|
163
128
|
k: previous_analysis[k]
|
|
@@ -1,29 +1,32 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.12674
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
|
-
Author-email: "data.gouv.fr" <opendatateam@data.gouv.fr>
|
|
6
|
-
License: MIT
|
|
7
|
-
Project-URL: Source, https://github.com/datagouv/csv-detective
|
|
8
5
|
Keywords: CSV,data processing,encoding,guess,parser,tabular
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
License
|
|
12
|
-
Requires-Dist: dateparser
|
|
6
|
+
Author: data.gouv.fr
|
|
7
|
+
Author-email: data.gouv.fr <opendatateam@data.gouv.fr>
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Dist: dateparser>=1.2.0,<2
|
|
13
10
|
Requires-Dist: faust-cchardet==2.1.19
|
|
14
|
-
Requires-Dist: pandas
|
|
15
|
-
Requires-Dist: python-dateutil
|
|
16
|
-
Requires-Dist:
|
|
11
|
+
Requires-Dist: pandas>=2.2.0,<3
|
|
12
|
+
Requires-Dist: python-dateutil>=2.8.2,<3
|
|
13
|
+
Requires-Dist: unidecode>=1.3.6,<2
|
|
17
14
|
Requires-Dist: openpyxl>=3.1.5
|
|
18
15
|
Requires-Dist: xlrd>=2.0.1
|
|
19
16
|
Requires-Dist: odfpy>=1.4.1
|
|
20
|
-
Requires-Dist: requests
|
|
17
|
+
Requires-Dist: requests>=2.32.3,<3
|
|
21
18
|
Requires-Dist: python-magic>=0.4.27
|
|
22
19
|
Requires-Dist: frformat==0.4.0
|
|
23
|
-
Requires-Dist:
|
|
20
|
+
Requires-Dist: faker>=33.0.0
|
|
24
21
|
Requires-Dist: rstr>=3.2.2
|
|
25
22
|
Requires-Dist: more-itertools>=10.8.0
|
|
26
|
-
|
|
23
|
+
Requires-Dist: pytest>=8.3.0 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: responses>=0.25.0 ; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.9.3 ; extra == 'dev'
|
|
26
|
+
Requires-Python: >=3.10, <3.15
|
|
27
|
+
Project-URL: Source, https://github.com/datagouv/csv-detective
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
27
30
|
|
|
28
31
|
# CSV Detective
|
|
29
32
|
|
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
csv_detective/__init__.py,sha256=zlYElTOp_I2_VG7ZdOTuAu0wuCXSc0cr3sH6gtk2bcg,152
|
|
2
2
|
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
-
csv_detective/explore_csv.py,sha256=M8jabAP08raPY438v5UeBqJy3bBudTeuo-UNe2unWyE,7639
|
|
4
|
-
csv_detective/format.py,sha256=VTdwg4gp9pq6WYhbkCxv9X2hXq0fMrzfooFchmIL0as,2911
|
|
5
|
-
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
6
|
-
csv_detective/validate.py,sha256=7k0GC5AsTn5BbsRChetZZDmnTGiYLe40qPKiP3GruYs,7495
|
|
7
3
|
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
4
|
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
9
5
|
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
10
6
|
csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
|
|
11
|
-
csv_detective/detection/formats.py,sha256=
|
|
12
|
-
csv_detective/detection/headers.py,sha256=
|
|
13
|
-
csv_detective/detection/rows.py,sha256=
|
|
7
|
+
csv_detective/detection/formats.py,sha256=9aIE4gwTN8c8pa-kofeJ7zalo8NqjGZabYD-G79kV5I,4734
|
|
8
|
+
csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6HfzOkU,1051
|
|
9
|
+
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
14
10
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
15
11
|
csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0JzeS6JbkxY,3576
|
|
12
|
+
csv_detective/explore_csv.py,sha256=qSf6N3tbp43BUMJF5wiXz3aYKaTez6ro-75KL2Arci4,7174
|
|
13
|
+
csv_detective/format.py,sha256=VglcxWBmjTvWNMhwSUZDfMdJcK9lAUum64Jxvm70AJ4,2898
|
|
16
14
|
csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
|
|
17
15
|
csv_detective/formats/adresse.py,sha256=79tIXeC1AUjUG9m0XGZUcP_BXvmLgd1M8XVfxgLNGDE,1966
|
|
18
16
|
csv_detective/formats/binary.py,sha256=26qrbqv_Dqu0LhVPpQOz2xzglxse7Nz5EasbQ0xP38c,715
|
|
@@ -28,7 +26,12 @@ csv_detective/formats/code_rna.py,sha256=o6Kptrux6T2bSnWHi7MBCqIfVKbMMeN4dHlxxzk
|
|
|
28
26
|
csv_detective/formats/code_waldec.py,sha256=j4-xpj_73c7IdgLoZJY_kRVj3HkpB7RFfGPN4NwPmVo,303
|
|
29
27
|
csv_detective/formats/commune.py,sha256=QVscVy5Ij9kdzKJgIG2aFC_v1IRsov5M9Zkj_SHDWgs,541
|
|
30
28
|
csv_detective/formats/csp_insee.py,sha256=y1w9zPQvijQi5v1Cuye0aX87ZVDC4FeFx1YC0dLqqp8,688
|
|
31
|
-
csv_detective/formats/
|
|
29
|
+
csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
30
|
+
csv_detective/formats/data/insee_ape700.txt,sha256=-_N-zAmcT7rK7ACRfsrM01Ton4_XtZGcNk-7lU28VHU,4397
|
|
31
|
+
csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=mLt_qcQ6D8hfy9zdi7fAK_zON1ojReKlKMA8c2VDoRU,752
|
|
32
|
+
csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=XFPdGBsyZCBg4D8IDn6VgwsycCwYVfuqPbyHfNeqGv0,1003
|
|
33
|
+
csv_detective/formats/data/iso_country_code_numeric.txt,sha256=sdGpn0PqDMlc59-7prThkihHrf7mwB6j5uEHpxGvLFE,1003
|
|
34
|
+
csv_detective/formats/date.py,sha256=Q6w1azLKNshJJVLOPBHj-77ZinXYMW_EKp_BGDshLLE,2802
|
|
32
35
|
csv_detective/formats/date_fr.py,sha256=YnNXSgT6QekfTUJoS5yuRX8LeK-fmVDgLgVP9cP0e4M,505
|
|
33
36
|
csv_detective/formats/datetime_aware.py,sha256=izKo6CA-MNIzmmM3Br4-FOESyqCS_YYK8N4V9D6CVEI,1909
|
|
34
37
|
csv_detective/formats/datetime_naive.py,sha256=DZ0apAm3vIy4cdm5DynAeRueI_8rhuHYQtAOZ5yyZ5k,1681
|
|
@@ -68,44 +71,22 @@ csv_detective/formats/url.py,sha256=m3i_XhFRFaAxSACS05XfciQ-oyTCsP_0TASShCY2t7A,
|
|
|
68
71
|
csv_detective/formats/username.py,sha256=6qviaFOtF2wg-gtvs0N8548JxFNE67Ue3a0JD0Kv7TQ,261
|
|
69
72
|
csv_detective/formats/uuid.py,sha256=LxkRZFAOlfig5KKrravO9bgyYjmRBegzOtGyzjopVNc,352
|
|
70
73
|
csv_detective/formats/year.py,sha256=tMc2HHr6Jga3PGWjmeHweK3G17DsjkIpIUUkCecXAm4,362
|
|
71
|
-
csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
72
|
-
csv_detective/formats/data/insee_ape700.txt,sha256=-_N-zAmcT7rK7ACRfsrM01Ton4_XtZGcNk-7lU28VHU,4397
|
|
73
|
-
csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=mLt_qcQ6D8hfy9zdi7fAK_zON1ojReKlKMA8c2VDoRU,752
|
|
74
|
-
csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=XFPdGBsyZCBg4D8IDn6VgwsycCwYVfuqPbyHfNeqGv0,1003
|
|
75
|
-
csv_detective/formats/data/iso_country_code_numeric.txt,sha256=sdGpn0PqDMlc59-7prThkihHrf7mwB6j5uEHpxGvLFE,1003
|
|
76
74
|
csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
|
|
77
|
-
csv_detective/output/dataframe.py,sha256=
|
|
75
|
+
csv_detective/output/dataframe.py,sha256=QX5vplx0AOKgnwwJ6dKvDHWRX9IGPStax-svXEyweJ8,3584
|
|
78
76
|
csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
|
|
79
|
-
csv_detective/output/profile.py,sha256=
|
|
77
|
+
csv_detective/output/profile.py,sha256=ADr5DwuvwcBYxugjN38fHm11l6ivfzGHXPd8a87Ht-s,4985
|
|
80
78
|
csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
|
|
81
79
|
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
82
80
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
|
-
csv_detective/parsing/columns.py,sha256=
|
|
81
|
+
csv_detective/parsing/columns.py,sha256=rb5JywbKnYCT3Jb0ZaG1BnyPVtB3gy5mSD-K7qcOl8I,9257
|
|
84
82
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
85
83
|
csv_detective/parsing/csv.py,sha256=5rw6gXZFQC1T4NT9CnW0AumidrYOkF8kjrfWGmk949I,1716
|
|
86
|
-
csv_detective/parsing/excel.py,sha256=
|
|
87
|
-
csv_detective/parsing/load.py,sha256=
|
|
84
|
+
csv_detective/parsing/excel.py,sha256=tb65I78tdYlZci_tzvvQt8U6bZSYKjeVdn2CEvsET1o,6972
|
|
85
|
+
csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
|
|
88
86
|
csv_detective/parsing/text.py,sha256=yDAcop5xJQc25UtbZcV0guHXAZQfm-H8WuJORTy8Rr8,1734
|
|
89
|
-
csv_detective
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
tests/test_structure.py,sha256=XDbviuuvk-0Mu9Y9PI6He2e5hry2dXVJ6yBVwEqF_2o,1043
|
|
96
|
-
tests/test_validation.py,sha256=309k3Axgbp-1Wh6qvCj2BpeMBp3HXzLi5j9UKm1bRQs,5384
|
|
97
|
-
tests/data/a_test_file.csv,sha256=SOHjseGYqZer9yu3Bd3oS12Vw8MFsebo0BzrLZ_R4Cc,68871
|
|
98
|
-
tests/data/a_test_file.json,sha256=fB9bCpAMFPxFw8KxHRFlgRqjYG819QVGrCQWxQvwkvo,10542
|
|
99
|
-
tests/data/b_test_file.csv,sha256=wJGX62KhYjZi62De2XjZWClAzeRFEBsg3ET0IPX1BNU,98
|
|
100
|
-
tests/data/c_test_file.csv,sha256=dz6axMyFscHIWR2Brqia_jvlBfQ30l1rFrxvcTqsmJ8,36
|
|
101
|
-
tests/data/csv_file,sha256=nMAQx2PrQliu3czifCHXLyruZbvCNTyYqwZ4JYzImqA,70
|
|
102
|
-
tests/data/file.csv.gz,sha256=mfGfqG5mGlojCs05A0IF7IUZe5r87bAe2FuQ0Uh5ZMI,108
|
|
103
|
-
tests/data/file.ods,sha256=4dR7zWptz5djALIBVeWHQ20GaZNfA63fevIJGFIk1_U,11832
|
|
104
|
-
tests/data/file.xls,sha256=QYmNX3FF0QfcQSzYQMtaMJaepJf5EZpDa1miKc4wMdQ,21495
|
|
105
|
-
tests/data/file.xlsx,sha256=naWzL02PK4pdIjMzfEyfSW9GQhkYYd_e7bpJvB8Pb2w,8314
|
|
106
|
-
tests/data/xlsx_file,sha256=NyOyN_rIe7ryJuHQLqjxVdKCc8V4s5pxyHl6wWFykCM,8305
|
|
107
|
-
csv_detective-0.10.4.dev1.dist-info/METADATA,sha256=le1Rn1JIh8MoIf_RTc3Fi9DOOlvug4eR-Mwpw4AK0To,10925
|
|
108
|
-
csv_detective-0.10.4.dev1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
109
|
-
csv_detective-0.10.4.dev1.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
110
|
-
csv_detective-0.10.4.dev1.dist-info/top_level.txt,sha256=KDI4gyOpkmormGgUvSWrE3jen2e0unIsxR2b96DRvcw,25
|
|
111
|
-
csv_detective-0.10.4.dev1.dist-info/RECORD,,
|
|
87
|
+
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
88
|
+
csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
|
|
89
|
+
csv_detective-0.10.12674.dist-info/WHEEL,sha256=XjEbIc5-wIORjWaafhI6vBtlxDBp7S9KiujWF1EM7Ak,79
|
|
90
|
+
csv_detective-0.10.12674.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
|
|
91
|
+
csv_detective-0.10.12674.dist-info/METADATA,sha256=TZIyuSI6QBmDeZoNZdYqarZ2R_GvaGazjB5WSkt8PFI,11060
|
|
92
|
+
csv_detective-0.10.12674.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 data.gouv.fr
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
tests/__init__.py
DELETED
|
File without changes
|