csv-detective 0.10.4.dev1__py3-none-any.whl → 0.10.2549__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detection/__init__.py +0 -0
- csv_detective/detection/columns.py +0 -0
- csv_detective/detection/encoding.py +0 -0
- csv_detective/detection/engine.py +0 -0
- csv_detective/detection/formats.py +38 -13
- csv_detective/detection/headers.py +14 -12
- csv_detective/detection/rows.py +1 -1
- csv_detective/detection/separator.py +0 -0
- csv_detective/detection/variables.py +0 -0
- csv_detective/explore_csv.py +6 -18
- csv_detective/format.py +5 -12
- csv_detective/formats/__init__.py +0 -0
- csv_detective/formats/adresse.py +9 -9
- csv_detective/formats/binary.py +1 -2
- csv_detective/formats/booleen.py +2 -3
- csv_detective/formats/code_commune_insee.py +10 -12
- csv_detective/formats/code_csp_insee.py +1 -1
- csv_detective/formats/code_departement.py +7 -8
- csv_detective/formats/code_fantoir.py +5 -6
- csv_detective/formats/code_import.py +1 -1
- csv_detective/formats/code_postal.py +9 -10
- csv_detective/formats/code_region.py +6 -7
- csv_detective/formats/code_rna.py +6 -7
- csv_detective/formats/code_waldec.py +1 -1
- csv_detective/formats/commune.py +5 -5
- csv_detective/formats/csp_insee.py +5 -6
- csv_detective/formats/data/insee_ape700.txt +1 -1
- csv_detective/formats/data/iso_country_code_alpha2.txt +397 -153
- csv_detective/formats/data/iso_country_code_alpha3.txt +132 -132
- csv_detective/formats/data/iso_country_code_numeric.txt +94 -94
- csv_detective/formats/date.py +18 -28
- csv_detective/formats/date_fr.py +1 -1
- csv_detective/formats/datetime_aware.py +2 -7
- csv_detective/formats/datetime_naive.py +0 -3
- csv_detective/formats/datetime_rfc822.py +0 -1
- csv_detective/formats/departement.py +15 -15
- csv_detective/formats/email.py +13 -13
- csv_detective/formats/float.py +1 -2
- csv_detective/formats/geojson.py +10 -10
- csv_detective/formats/insee_ape700.py +8 -10
- csv_detective/formats/insee_canton.py +6 -6
- csv_detective/formats/int.py +1 -2
- csv_detective/formats/iso_country_code_alpha2.py +14 -14
- csv_detective/formats/iso_country_code_alpha3.py +13 -6
- csv_detective/formats/iso_country_code_numeric.py +9 -2
- csv_detective/formats/jour_de_la_semaine.py +12 -11
- csv_detective/formats/json.py +0 -6
- csv_detective/formats/latitude_l93.py +22 -8
- csv_detective/formats/latitude_wgs.py +29 -31
- csv_detective/formats/latitude_wgs_fr_metropole.py +30 -7
- csv_detective/formats/latlon_wgs.py +28 -30
- csv_detective/formats/longitude_l93.py +13 -8
- csv_detective/formats/longitude_wgs.py +19 -34
- csv_detective/formats/longitude_wgs_fr_metropole.py +19 -6
- csv_detective/formats/lonlat_wgs.py +11 -12
- csv_detective/formats/mois_de_lannee.py +1 -1
- csv_detective/formats/money.py +1 -1
- csv_detective/formats/mongo_object_id.py +1 -1
- csv_detective/formats/pays.py +13 -11
- csv_detective/formats/percent.py +1 -1
- csv_detective/formats/region.py +13 -13
- csv_detective/formats/sexe.py +1 -1
- csv_detective/formats/siren.py +10 -9
- csv_detective/formats/siret.py +9 -9
- csv_detective/formats/tel_fr.py +13 -7
- csv_detective/formats/uai.py +18 -17
- csv_detective/formats/url.py +16 -16
- csv_detective/formats/username.py +1 -1
- csv_detective/formats/uuid.py +1 -1
- csv_detective/formats/year.py +12 -7
- csv_detective/output/__init__.py +0 -0
- csv_detective/output/dataframe.py +3 -8
- csv_detective/output/example.py +0 -0
- csv_detective/output/profile.py +2 -6
- csv_detective/output/schema.py +0 -0
- csv_detective/output/utils.py +0 -0
- csv_detective/parsing/__init__.py +0 -0
- csv_detective/parsing/columns.py +5 -9
- csv_detective/parsing/compression.py +0 -0
- csv_detective/parsing/csv.py +0 -0
- csv_detective/parsing/excel.py +1 -1
- csv_detective/parsing/load.py +12 -11
- csv_detective/parsing/text.py +12 -13
- csv_detective/validate.py +36 -71
- {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.2549.dist-info}/METADATA +18 -15
- csv_detective-0.10.2549.dist-info/RECORD +92 -0
- csv_detective-0.10.2549.dist-info/WHEEL +4 -0
- {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.2549.dist-info}/entry_points.txt +1 -0
- csv_detective-0.10.4.dev1.dist-info/RECORD +0 -111
- csv_detective-0.10.4.dev1.dist-info/WHEEL +0 -5
- csv_detective-0.10.4.dev1.dist-info/licenses/LICENSE +0 -21
- csv_detective-0.10.4.dev1.dist-info/top_level.txt +0 -3
- tests/__init__.py +0 -0
- tests/data/a_test_file.csv +0 -407
- tests/data/a_test_file.json +0 -394
- tests/data/b_test_file.csv +0 -7
- tests/data/c_test_file.csv +0 -2
- tests/data/csv_file +0 -7
- tests/data/file.csv.gz +0 -0
- tests/data/file.ods +0 -0
- tests/data/file.xls +0 -0
- tests/data/file.xlsx +0 -0
- tests/data/xlsx_file +0 -0
- tests/test_example.py +0 -67
- tests/test_fields.py +0 -175
- tests/test_file.py +0 -469
- tests/test_labels.py +0 -26
- tests/test_structure.py +0 -45
- tests/test_validation.py +0 -163
csv_detective/parsing/columns.py
CHANGED
|
@@ -13,13 +13,6 @@ from csv_detective.utils import display_logs_depending_process_time
|
|
|
13
13
|
MAX_NUMBER_CATEGORICAL_VALUES = 25
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def handle_empty_columns(return_table: pd.DataFrame):
|
|
17
|
-
# handling that empty columns score 1 everywhere
|
|
18
|
-
for col in return_table.columns:
|
|
19
|
-
if sum(return_table[col]) == len(return_table):
|
|
20
|
-
return_table[col] = 0
|
|
21
|
-
|
|
22
|
-
|
|
23
16
|
def test_col_val(
|
|
24
17
|
serie: pd.Series,
|
|
25
18
|
format: Format,
|
|
@@ -40,7 +33,7 @@ def test_col_val(
|
|
|
40
33
|
|
|
41
34
|
try:
|
|
42
35
|
if skipna:
|
|
43
|
-
serie = serie.
|
|
36
|
+
serie = serie.loc[serie.notnull()]
|
|
44
37
|
ser_len = len(serie)
|
|
45
38
|
if ser_len == 0:
|
|
46
39
|
# being here means the whole column is NaN, so if skipna it's a pass
|
|
@@ -229,7 +222,10 @@ def test_col_chunks(
|
|
|
229
222
|
analysis["categorical"] = [
|
|
230
223
|
col for col, values in col_values.items() if len(values) <= MAX_NUMBER_CATEGORICAL_VALUES
|
|
231
224
|
]
|
|
232
|
-
|
|
225
|
+
# handling that empty columns score 1 everywhere
|
|
226
|
+
for col in return_table.columns:
|
|
227
|
+
if sum(return_table[col]) == len(return_table):
|
|
228
|
+
return_table[col] = 0
|
|
233
229
|
if verbose:
|
|
234
230
|
display_logs_depending_process_time(
|
|
235
231
|
f"Done testing chunks in {round(time() - start, 3)}s", time() - start
|
|
File without changes
|
csv_detective/parsing/csv.py
CHANGED
|
File without changes
|
csv_detective/parsing/excel.py
CHANGED
|
@@ -23,7 +23,7 @@ def parse_excel(
|
|
|
23
23
|
file_path: str,
|
|
24
24
|
num_rows: int = -1,
|
|
25
25
|
engine: str | None = None,
|
|
26
|
-
sheet_name: str |
|
|
26
|
+
sheet_name: str | None = None,
|
|
27
27
|
random_state: int = 42,
|
|
28
28
|
verbose: bool = False,
|
|
29
29
|
) -> tuple[pd.DataFrame, int, int, str, str, int]:
|
csv_detective/parsing/load.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import codecs
|
|
2
1
|
from io import BytesIO, StringIO
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
@@ -11,7 +10,7 @@ from csv_detective.detection.engine import (
|
|
|
11
10
|
EXCEL_ENGINES,
|
|
12
11
|
detect_engine,
|
|
13
12
|
)
|
|
14
|
-
from csv_detective.detection.headers import
|
|
13
|
+
from csv_detective.detection.headers import detect_headers
|
|
15
14
|
from csv_detective.detection.separator import detect_separator
|
|
16
15
|
from csv_detective.parsing.compression import unzip
|
|
17
16
|
from csv_detective.parsing.csv import parse_csv
|
|
@@ -28,12 +27,12 @@ def load_file(
|
|
|
28
27
|
encoding: str | None = None,
|
|
29
28
|
sep: str | None = None,
|
|
30
29
|
verbose: bool = False,
|
|
31
|
-
engine: str | None = None,
|
|
32
30
|
sheet_name: str | int | None = None,
|
|
33
31
|
) -> tuple[pd.DataFrame, dict]:
|
|
34
32
|
file_name = file_path.split("/")[-1]
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
engine = None
|
|
34
|
+
if "." not in file_name or not file_name.endswith("csv"):
|
|
35
|
+
# file has no extension, we'll investigate how to read it
|
|
37
36
|
engine = detect_engine(file_path, verbose=verbose)
|
|
38
37
|
|
|
39
38
|
if engine in EXCEL_ENGINES or any([file_path.endswith(k) for k in XLS_LIKE_EXT]):
|
|
@@ -46,6 +45,9 @@ def load_file(
|
|
|
46
45
|
)
|
|
47
46
|
if table.empty:
|
|
48
47
|
raise ValueError("Table seems to be empty")
|
|
48
|
+
header = table.columns.to_list()
|
|
49
|
+
if any(col.startswith("Unnamed") for col in header):
|
|
50
|
+
raise ValueError("Could not retrieve headers")
|
|
49
51
|
analysis = {
|
|
50
52
|
"engine": engine,
|
|
51
53
|
"sheet_name": sheet_name,
|
|
@@ -67,20 +69,21 @@ def load_file(
|
|
|
67
69
|
binary_file.seek(0)
|
|
68
70
|
# decoding and reading file
|
|
69
71
|
if is_url(file_path) or engine in COMPRESSION_ENGINES:
|
|
70
|
-
decoder = codecs.getincrementaldecoder(encoding)()
|
|
71
72
|
str_file = StringIO()
|
|
72
73
|
while True:
|
|
73
74
|
chunk = binary_file.read(1024**2)
|
|
74
75
|
if not chunk:
|
|
75
76
|
break
|
|
76
|
-
str_file.write(
|
|
77
|
+
str_file.write(chunk.decode(encoding=encoding))
|
|
77
78
|
del binary_file
|
|
78
79
|
str_file.seek(0)
|
|
79
80
|
else:
|
|
80
81
|
str_file = open(file_path, "r", encoding=encoding)
|
|
81
82
|
if sep is None:
|
|
82
83
|
sep = detect_separator(str_file, verbose=verbose)
|
|
83
|
-
header_row_idx =
|
|
84
|
+
header_row_idx, header = detect_headers(str_file, sep, verbose=verbose)
|
|
85
|
+
if header is None or (isinstance(header, list) and any([h is None for h in header])):
|
|
86
|
+
raise ValueError("Could not retrieve headers")
|
|
84
87
|
heading_columns = detect_heading_columns(str_file, sep, verbose=verbose)
|
|
85
88
|
trailing_columns = detect_trailing_columns(str_file, sep, heading_columns, verbose=verbose)
|
|
86
89
|
table, total_lines, nb_duplicates = parse_csv(
|
|
@@ -97,11 +100,9 @@ def load_file(
|
|
|
97
100
|
}
|
|
98
101
|
if engine is not None:
|
|
99
102
|
analysis["compression"] = engine
|
|
100
|
-
if any(not isinstance(col, str) or col.startswith("Unnamed:") for col in table.columns):
|
|
101
|
-
raise ValueError("Could not accurately detect the file's columns")
|
|
102
103
|
analysis |= {
|
|
103
104
|
"header_row_idx": header_row_idx,
|
|
104
|
-
"header":
|
|
105
|
+
"header": header,
|
|
105
106
|
}
|
|
106
107
|
if total_lines is not None:
|
|
107
108
|
analysis["total_lines"] = total_lines
|
csv_detective/parsing/text.py
CHANGED
|
@@ -36,22 +36,21 @@ def is_word_in_string(word: str, string: str):
|
|
|
36
36
|
return len(word) > 2 and word in string
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
def header_score(header: str,
|
|
39
|
+
def header_score(header: str, words_combinations_list: list[str]) -> float:
|
|
40
40
|
"""Returns:
|
|
41
|
-
-
|
|
42
|
-
- 0.5
|
|
41
|
+
- 1 if the header is exactly in the specified list
|
|
42
|
+
- 0.5 if any of the words is within the header
|
|
43
43
|
- 0 otherwise"""
|
|
44
44
|
processed_header = _process_text(header)
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
(
|
|
46
|
+
header_matches_words_combination = float(
|
|
47
|
+
any(words_combination == processed_header for words_combination in words_combinations_list)
|
|
48
48
|
)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
is_word_in_string(valid, processed_header) * credibility
|
|
55
|
-
for valid, credibility in valid_headers.items()
|
|
56
|
-
),
|
|
49
|
+
words_combination_in_header = 0.5 * (
|
|
50
|
+
any(
|
|
51
|
+
is_word_in_string(words_combination, processed_header)
|
|
52
|
+
for words_combination in words_combinations_list
|
|
53
|
+
)
|
|
57
54
|
)
|
|
55
|
+
|
|
56
|
+
return max(header_matches_words_combination, words_combination_in_header)
|
csv_detective/validate.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from collections import defaultdict
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
5
4
|
|
|
6
5
|
from csv_detective.format import FormatsManager
|
|
7
6
|
from csv_detective.parsing.columns import MAX_NUMBER_CATEGORICAL_VALUES, test_col_val
|
|
8
7
|
|
|
9
|
-
# VALIDATION_CHUNK_SIZE is bigger than (analysis) CHUNK_SIZE because
|
|
10
|
-
# it's faster to validate so we can afford to load more rows
|
|
11
8
|
VALIDATION_CHUNK_SIZE = int(1e5)
|
|
12
9
|
logging.basicConfig(level=logging.INFO)
|
|
13
10
|
|
|
@@ -19,9 +16,9 @@ def validate(
|
|
|
19
16
|
previous_analysis: dict,
|
|
20
17
|
verbose: bool = False,
|
|
21
18
|
skipna: bool = True,
|
|
22
|
-
) -> tuple[bool, dict | None, dict[str, pd.Series] | None]:
|
|
19
|
+
) -> tuple[bool, pd.DataFrame | None, dict | None, dict[str, pd.Series] | None]:
|
|
23
20
|
"""
|
|
24
|
-
Verify is the given file has the same fields and
|
|
21
|
+
Verify is the given file has the same fields and types as in the given analysis.
|
|
25
22
|
|
|
26
23
|
Args:
|
|
27
24
|
file_path: the path of the file to validate
|
|
@@ -29,15 +26,6 @@ def validate(
|
|
|
29
26
|
verbose: whether the code displays the steps it's going through
|
|
30
27
|
skipna: whether to ignore NaN values in the checks
|
|
31
28
|
"""
|
|
32
|
-
if verbose:
|
|
33
|
-
logging.info(f"Checking given formats exist")
|
|
34
|
-
for col_name, detected in previous_analysis["columns"].items():
|
|
35
|
-
if detected["format"] == "string":
|
|
36
|
-
continue
|
|
37
|
-
elif detected["format"] not in formats:
|
|
38
|
-
if verbose:
|
|
39
|
-
logging.warning(f"> Unknown format `{detected['format']}` in analysis")
|
|
40
|
-
return False, None, None
|
|
41
29
|
try:
|
|
42
30
|
if previous_analysis.get("separator"):
|
|
43
31
|
# loading the table in chunks
|
|
@@ -70,94 +58,71 @@ def validate(
|
|
|
70
58
|
]
|
|
71
59
|
)
|
|
72
60
|
analysis = {k: v for k, v in previous_analysis.items() if k in ["engine", "sheet_name"]}
|
|
61
|
+
first_chunk = next(chunks)
|
|
73
62
|
analysis.update(
|
|
74
63
|
{k: v for k, v in previous_analysis.items() if k in ["header_row_idx", "header"]}
|
|
75
64
|
)
|
|
76
65
|
except Exception as e:
|
|
77
66
|
if verbose:
|
|
78
67
|
logging.warning(f"> Could not load the file with previous analysis values: {e}")
|
|
79
|
-
return False, None, None
|
|
68
|
+
return False, None, None, None
|
|
80
69
|
if verbose:
|
|
81
70
|
logging.info("Comparing table with the previous analysis")
|
|
71
|
+
logging.info("- Checking if all columns match")
|
|
72
|
+
if len(first_chunk.columns) != len(previous_analysis["header"]) or any(
|
|
73
|
+
list(first_chunk.columns)[k] != previous_analysis["header"][k]
|
|
74
|
+
for k in range(len(previous_analysis["header"]))
|
|
75
|
+
):
|
|
76
|
+
if verbose:
|
|
77
|
+
logging.warning("> Columns do not match, proceeding with full analysis")
|
|
78
|
+
return False, None, None, None
|
|
79
|
+
if verbose:
|
|
82
80
|
logging.info(
|
|
83
81
|
f"Testing previously detected formats on chunks of {VALIDATION_CHUNK_SIZE} rows"
|
|
84
82
|
)
|
|
85
83
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# used for profile to read the file only once
|
|
91
|
-
# naming it "count" to be iso with how col_values are made in detect_formats
|
|
92
|
-
col_values: defaultdict[str, pd.Series] = defaultdict(lambda: pd.Series(name="count"))
|
|
84
|
+
# hashing rows to get nb_duplicates
|
|
85
|
+
row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
|
|
86
|
+
# getting values for profile to read the file only once
|
|
87
|
+
col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
|
|
93
88
|
analysis["total_lines"] = 0
|
|
94
|
-
|
|
95
|
-
valid_values: dict[str, int] = {col_name: 0 for col_name in previous_analysis["columns"]}
|
|
96
|
-
for idx, chunk in enumerate(chunks):
|
|
89
|
+
for idx, chunk in enumerate([first_chunk, *chunks]):
|
|
97
90
|
if verbose:
|
|
98
|
-
logging.info(f"
|
|
99
|
-
if idx == 0:
|
|
100
|
-
if verbose:
|
|
101
|
-
logging.info("Checking if all columns match")
|
|
102
|
-
if len(chunk.columns) != len(previous_analysis["header"]) or any(
|
|
103
|
-
list(chunk.columns)[k] != previous_analysis["header"][k]
|
|
104
|
-
for k in range(len(previous_analysis["header"]))
|
|
105
|
-
):
|
|
106
|
-
if verbose:
|
|
107
|
-
logging.warning("> Columns in the file do not match those of the analysis")
|
|
108
|
-
return False, None, None
|
|
91
|
+
logging.info(f"> Testing chunk number {idx}")
|
|
109
92
|
analysis["total_lines"] += len(chunk)
|
|
110
93
|
row_hashes_count = row_hashes_count.add(
|
|
111
94
|
pd.util.hash_pandas_object(chunk, index=False).value_counts(),
|
|
112
95
|
fill_value=0,
|
|
113
96
|
)
|
|
114
|
-
for
|
|
97
|
+
for col in chunk.columns:
|
|
98
|
+
col_values[col] = col_values[col].add(
|
|
99
|
+
chunk[col].value_counts(dropna=False),
|
|
100
|
+
fill_value=0,
|
|
101
|
+
)
|
|
102
|
+
for col_name, args in previous_analysis["columns"].items():
|
|
115
103
|
if verbose:
|
|
116
|
-
logging.info(f"- Testing {col_name} for {
|
|
117
|
-
if
|
|
104
|
+
logging.info(f"- Testing {col_name} for {args['format']}")
|
|
105
|
+
if args["format"] == "string":
|
|
118
106
|
# no test for columns that have not been recognized as a specific format
|
|
119
107
|
continue
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
108
|
+
test_result: float = test_col_val(
|
|
109
|
+
serie=chunk[col_name],
|
|
110
|
+
format=formats[args["format"]],
|
|
111
|
+
skipna=skipna,
|
|
112
|
+
)
|
|
113
|
+
if not bool(test_result):
|
|
124
114
|
if verbose:
|
|
125
|
-
logging.warning(
|
|
126
|
-
|
|
127
|
-
)
|
|
128
|
-
return False, None, None
|
|
129
|
-
checked_values[col_name] += len(to_check)
|
|
130
|
-
valid_values[col_name] += chunk_valid_values
|
|
131
|
-
col_values[col_name] = (
|
|
132
|
-
col_values[col_name]
|
|
133
|
-
.add(
|
|
134
|
-
chunk[col_name].value_counts(dropna=False),
|
|
135
|
-
fill_value=0,
|
|
136
|
-
)
|
|
137
|
-
.rename_axis(col_name)
|
|
138
|
-
) # rename_axis because *sometimes* pandas doesn't pass on the column's name ¯\_(ツ)_/¯
|
|
139
|
-
del chunk
|
|
140
|
-
# finally we loop through the formats that accept less than 100% valid values to check the proportion
|
|
141
|
-
for col_name, detected in previous_analysis["columns"].items():
|
|
142
|
-
if (
|
|
143
|
-
checked_values[col_name] > 0
|
|
144
|
-
and valid_values[col_name] / checked_values[col_name]
|
|
145
|
-
< formats[detected["format"]].proportion
|
|
146
|
-
):
|
|
147
|
-
if verbose:
|
|
148
|
-
logging.warning(
|
|
149
|
-
f"> Test failed for column {col_name} with format {detected['format']}"
|
|
150
|
-
)
|
|
151
|
-
return False, None, None
|
|
115
|
+
logging.warning("> Test failed, proceeding with full analysis")
|
|
116
|
+
return False, first_chunk, analysis, None
|
|
152
117
|
if verbose:
|
|
153
118
|
logging.info("> All checks successful")
|
|
154
119
|
analysis["nb_duplicates"] = sum(row_hashes_count > 1)
|
|
155
|
-
del row_hashes_count
|
|
156
120
|
analysis["categorical"] = [
|
|
157
121
|
col for col, values in col_values.items() if len(values) <= MAX_NUMBER_CATEGORICAL_VALUES
|
|
158
122
|
]
|
|
159
123
|
return (
|
|
160
124
|
True,
|
|
125
|
+
first_chunk,
|
|
161
126
|
analysis
|
|
162
127
|
| {
|
|
163
128
|
k: previous_analysis[k]
|
|
@@ -1,29 +1,32 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.2549
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
|
-
Author-email: "data.gouv.fr" <opendatateam@data.gouv.fr>
|
|
6
|
-
License: MIT
|
|
7
|
-
Project-URL: Source, https://github.com/datagouv/csv-detective
|
|
8
5
|
Keywords: CSV,data processing,encoding,guess,parser,tabular
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
License
|
|
12
|
-
Requires-Dist: dateparser
|
|
6
|
+
Author: data.gouv.fr
|
|
7
|
+
Author-email: data.gouv.fr <opendatateam@data.gouv.fr>
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Dist: dateparser>=1.2.0,<2
|
|
13
10
|
Requires-Dist: faust-cchardet==2.1.19
|
|
14
|
-
Requires-Dist: pandas
|
|
15
|
-
Requires-Dist: python-dateutil
|
|
16
|
-
Requires-Dist:
|
|
11
|
+
Requires-Dist: pandas>=2.2.0,<3
|
|
12
|
+
Requires-Dist: python-dateutil>=2.8.2,<3
|
|
13
|
+
Requires-Dist: unidecode>=1.3.6,<2
|
|
17
14
|
Requires-Dist: openpyxl>=3.1.5
|
|
18
15
|
Requires-Dist: xlrd>=2.0.1
|
|
19
16
|
Requires-Dist: odfpy>=1.4.1
|
|
20
|
-
Requires-Dist: requests
|
|
17
|
+
Requires-Dist: requests>=2.32.3,<3
|
|
21
18
|
Requires-Dist: python-magic>=0.4.27
|
|
22
19
|
Requires-Dist: frformat==0.4.0
|
|
23
|
-
Requires-Dist:
|
|
20
|
+
Requires-Dist: faker>=33.0.0
|
|
24
21
|
Requires-Dist: rstr>=3.2.2
|
|
25
22
|
Requires-Dist: more-itertools>=10.8.0
|
|
26
|
-
|
|
23
|
+
Requires-Dist: pytest>=8.3.0 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: responses>=0.25.0 ; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.9.3 ; extra == 'dev'
|
|
26
|
+
Requires-Python: >=3.10, <3.15
|
|
27
|
+
Project-URL: Source, https://github.com/datagouv/csv_detective
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
27
30
|
|
|
28
31
|
# CSV Detective
|
|
29
32
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
csv_detective/__init__.py,sha256=zlYElTOp_I2_VG7ZdOTuAu0wuCXSc0cr3sH6gtk2bcg,152
|
|
2
|
+
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
+
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
5
|
+
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
6
|
+
csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
|
|
7
|
+
csv_detective/detection/formats.py,sha256=kQEht5lr9hFhYe0Zn1lfj9jOKaqYrXNrM_tkQX24pEk,5410
|
|
8
|
+
csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6HfzOkU,1051
|
|
9
|
+
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
10
|
+
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
11
|
+
csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0JzeS6JbkxY,3576
|
|
12
|
+
csv_detective/explore_csv.py,sha256=-LCHr7vyT0Q0oLtXeOO8pEevJ6-8Ib9JP3D7nVgZM8o,7090
|
|
13
|
+
csv_detective/format.py,sha256=XX_cSTQc0jlsQq3GUqHi7Cz36AiRrpjrwPmeoOTLMvo,2396
|
|
14
|
+
csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
|
|
15
|
+
csv_detective/formats/adresse.py,sha256=jALDpEDAWyAcgqEfNVRg_W1r6XaYuJKD_jAaP2l-bxk,1943
|
|
16
|
+
csv_detective/formats/binary.py,sha256=OCGRDh5p27sA4yjrpKIp3b2_PfHJYUe5QxIArf-fCxA,676
|
|
17
|
+
csv_detective/formats/booleen.py,sha256=AnDDKShkSYpWO4POhwY2V7_C4yPWbmqBu8CJPgQ9Gwc,648
|
|
18
|
+
csv_detective/formats/code_commune_insee.py,sha256=MhwCPVAhwWH-MyaNAIVRNbqKfeNe3oiCpzEGfpHkpJY,504
|
|
19
|
+
csv_detective/formats/code_csp_insee.py,sha256=_JQ-YbnHMenNnwIg1xBmNVqgCa1tLD2hbPN1soODhDk,656
|
|
20
|
+
csv_detective/formats/code_departement.py,sha256=odwVbmktgjEhL-dSFHXuCRVwhkF8bL8G7VlpVTnMY2A,628
|
|
21
|
+
csv_detective/formats/code_fantoir.py,sha256=nFVFYJEP2HHE2TyhR_dhGdPCMLfCROBO_B8wxwQn7T8,366
|
|
22
|
+
csv_detective/formats/code_import.py,sha256=N5NVvnHkRwC7ARHoM77R-2cYSeyNmPoRIn6JL3Fbnjs,346
|
|
23
|
+
csv_detective/formats/code_postal.py,sha256=C6XMkiVTxhMFvfyvJmGp3iwvh722EzMwD_UdqQU4aR0,427
|
|
24
|
+
csv_detective/formats/code_region.py,sha256=VFKh1rGYVYTNWBJZ2_m0xS4rhJlrI_Gr8q8RXuZCr-w,366
|
|
25
|
+
csv_detective/formats/code_rna.py,sha256=WExlQtlAUfOFT4N3MKsMBhZVxTdNzgexFjmXhZdRM1w,512
|
|
26
|
+
csv_detective/formats/code_waldec.py,sha256=kJEJfikbhMfVwtA8hBpup0tpeSFoY_rWrEdXQxgNwhg,297
|
|
27
|
+
csv_detective/formats/commune.py,sha256=oVpwINGqpwMOT43KkasozipJ9hBeoQ5FrKV_wIeVJGE,532
|
|
28
|
+
csv_detective/formats/csp_insee.py,sha256=HE6NK6Sw91mLFeAAKwWUXZZfXX6fiA0zK4RI4YdkUFY,656
|
|
29
|
+
csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
30
|
+
csv_detective/formats/data/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
|
|
31
|
+
csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
|
|
32
|
+
csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
|
|
33
|
+
csv_detective/formats/data/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
|
|
34
|
+
csv_detective/formats/date.py,sha256=X4ohXaFO8cXPJktUSumc3bfdlbDIWEYTG8S9ugVRcsE,2730
|
|
35
|
+
csv_detective/formats/date_fr.py,sha256=3hTw5RommrhcgECFRSt9KgyB9zyi1j4W3UygEHmRgoE,502
|
|
36
|
+
csv_detective/formats/datetime_aware.py,sha256=kSEVLAovUJEYYFMFk4RiHY50rnPkDlrjfUFwk7ogJYQ,1587
|
|
37
|
+
csv_detective/formats/datetime_naive.py,sha256=CVC-yey5uoPAAr8VnrY_HbLUGOk9dqduM5yLAvAhgfc,1591
|
|
38
|
+
csv_detective/formats/datetime_rfc822.py,sha256=l-SLb34hSuHxC2JQ-9SD-nG38JqzoozwUZiGtoybb0A,601
|
|
39
|
+
csv_detective/formats/departement.py,sha256=UP9UF23BFq_-mIS8N10K5XkoCXwPmDeSoa_7lCAkI4w,768
|
|
40
|
+
csv_detective/formats/email.py,sha256=Qen2EBDYY5TtWXwxrrTGWRrbIybz0ySlVpl4ZRk8pzA,517
|
|
41
|
+
csv_detective/formats/float.py,sha256=DF8CwBC4Vk-PFRlIawDr6OUPTtZjAiKYguvilDGUcmY,1033
|
|
42
|
+
csv_detective/formats/geojson.py,sha256=udbBxCBRmb0o6TD8z5ryemfqdinBz6njNJU0XcbfMig,757
|
|
43
|
+
csv_detective/formats/insee_ape700.py,sha256=cLs3Eersqm4wX6oqsqp0Vb3WGPJb2xY5Za_vh0uLgKc,780
|
|
44
|
+
csv_detective/formats/insee_canton.py,sha256=Q5jczsOmh1wPP2KtDkcmqZ7Hlv50Zz9YvPIbxy46qs0,531
|
|
45
|
+
csv_detective/formats/int.py,sha256=ZBUOn50luMtlNKWPyOaMIkY3J4f4hA0MqwcoFtksozU,482
|
|
46
|
+
csv_detective/formats/iso_country_code_alpha2.py,sha256=vIep_j0xuqlXKyuvk8c8GaJC73HuJqKfQ4QzQKHsPc0,613
|
|
47
|
+
csv_detective/formats/iso_country_code_alpha3.py,sha256=yOmm91O8ot6KoUBfss5cqykDfeeMNCwafDAvPNvbufA,668
|
|
48
|
+
csv_detective/formats/iso_country_code_numeric.py,sha256=989ypOmjIrNTV9vFnrBlbpRWQ9whd3Rv9gNasdF_O4g,685
|
|
49
|
+
csv_detective/formats/jour_de_la_semaine.py,sha256=c5QBw9eZfwRs_jL_Ckm95UH-TxlExdFmfZNYW7-_iZI,606
|
|
50
|
+
csv_detective/formats/json.py,sha256=E-s7IHW0q5WgAJVK0I-5Rv7W_RdofROB5wnIXbNegZQ,446
|
|
51
|
+
csv_detective/formats/latitude_l93.py,sha256=GteGpxAht-jeOBLr_deCuEXA_LliVYIAmyr_7jFAWgI,986
|
|
52
|
+
csv_detective/formats/latitude_wgs.py,sha256=HPcFlLzJNqynLugDQ07vO04rOCNBuAabVJEP8FQ89Q0,780
|
|
53
|
+
csv_detective/formats/latitude_wgs_fr_metropole.py,sha256=ruGzQLJPiMV2AlnsBneQIhMzstseddzWA0bDg5gfTG4,791
|
|
54
|
+
csv_detective/formats/latlon_wgs.py,sha256=CbNi4Y-ZgBfNyYi54xwcZGLpEusiLAWVpFP1YgHtI1M,1224
|
|
55
|
+
csv_detective/formats/longitude_l93.py,sha256=vJE4k_DyQOjAruqu_Q0E2sJKZB4mXGGN6bS9WCelsbs,768
|
|
56
|
+
csv_detective/formats/longitude_wgs.py,sha256=DUZCUxJQl53HHVQbXlz_lWXoAZhy3MvJWcPNdiK5cCM,552
|
|
57
|
+
csv_detective/formats/longitude_wgs_fr_metropole.py,sha256=wPlJP06K0BVWfrx1wwEAKK93AKIqvsuw705gKAlWAfQ,550
|
|
58
|
+
csv_detective/formats/lonlat_wgs.py,sha256=BgtTl2ReI0hSQB-7mcR4TDxx-QzvA1B9fiZWxTb5xPI,1005
|
|
59
|
+
csv_detective/formats/mois_de_lannee.py,sha256=4_mmdr9S83utVCgPaK_epkeBm2mhwdUWQEoB_Fhdh2o,759
|
|
60
|
+
csv_detective/formats/money.py,sha256=HpjrmfUmbG8sXF557XbYzQ7TLtpNVRgpC991gGokO8I,414
|
|
61
|
+
csv_detective/formats/mongo_object_id.py,sha256=XsiP4iMxfBBIeuL-4g5bm3jgS6yUMJC2X5CmrEJ40oI,296
|
|
62
|
+
csv_detective/formats/pays.py,sha256=FRvoQwIWiKbm0RC62Sus1X0Y_yJ-cfvdB5RYhkY-4NY,693
|
|
63
|
+
csv_detective/formats/percent.py,sha256=s6eQBMwJr2uyTZMUCK1_ifA0c4Rt2iEe9_E_hKKU_mk,308
|
|
64
|
+
csv_detective/formats/region.py,sha256=CkN7JTsZB1X3bH5xohbtMCxL5BX9MSpith36_1mHMd4,1483
|
|
65
|
+
csv_detective/formats/sexe.py,sha256=yioD4W6EkgUgo74rxn6KLZtN_0XYXtmA4mqVyI7e1mU,387
|
|
66
|
+
csv_detective/formats/siren.py,sha256=ieLe50vdSnkXadcUI8VXnnId9GFGHyIBWVTP6bJtyMo,758
|
|
67
|
+
csv_detective/formats/siret.py,sha256=ehkZgOH-HggN6IgxF4G0DMut_6giZ3gc4g9wMdwZFHQ,997
|
|
68
|
+
csv_detective/formats/tel_fr.py,sha256=yKCqIlqKO2yKucCoCjYfSjqNKfTjqFcmNXxg6THG0WE,624
|
|
69
|
+
csv_detective/formats/uai.py,sha256=uT5gjdTmoFH9QPZdTFkJgiyuKLW0B6KmT6yqHQeaeOU,711
|
|
70
|
+
csv_detective/formats/url.py,sha256=j6tCbcEzQw7U53ixeeFfhzueN8syVgQsjmAmY7RRWdU,1049
|
|
71
|
+
csv_detective/formats/username.py,sha256=y38OggfWpEQsGi0JnD9QRM30musa29lO6nz-qybR24U,249
|
|
72
|
+
csv_detective/formats/uuid.py,sha256=ekMEFfzQtz0cLudzmu3AoCM0Yf5pu23qAcFNFgHWJ1A,346
|
|
73
|
+
csv_detective/formats/year.py,sha256=pkAfYPKZdy0g1ZoHGgJNpgTS5y5weGEKXCVMGaxIX8k,472
|
|
74
|
+
csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
|
|
75
|
+
csv_detective/output/dataframe.py,sha256=Hnd-AY51U0JMACcpuaK9wwO4oCX9Nd7ZLUTqavgJWRA,3406
|
|
76
|
+
csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
|
|
77
|
+
csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf44,4876
|
|
78
|
+
csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
|
|
79
|
+
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
80
|
+
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
+
csv_detective/parsing/columns.py,sha256=rb5JywbKnYCT3Jb0ZaG1BnyPVtB3gy5mSD-K7qcOl8I,9257
|
|
82
|
+
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
83
|
+
csv_detective/parsing/csv.py,sha256=5rw6gXZFQC1T4NT9CnW0AumidrYOkF8kjrfWGmk949I,1716
|
|
84
|
+
csv_detective/parsing/excel.py,sha256=tb65I78tdYlZci_tzvvQt8U6bZSYKjeVdn2CEvsET1o,6972
|
|
85
|
+
csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
|
|
86
|
+
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
87
|
+
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
88
|
+
csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
|
|
89
|
+
csv_detective-0.10.2549.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
|
|
90
|
+
csv_detective-0.10.2549.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
|
|
91
|
+
csv_detective-0.10.2549.dist-info/METADATA,sha256=lgToJl6ykKcfAUxs9FggBaY9QeaKkRXlLRkieMcvMYk,11059
|
|
92
|
+
csv_detective-0.10.2549.dist-info/RECORD,,
|
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
csv_detective/__init__.py,sha256=zlYElTOp_I2_VG7ZdOTuAu0wuCXSc0cr3sH6gtk2bcg,152
|
|
2
|
-
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
-
csv_detective/explore_csv.py,sha256=M8jabAP08raPY438v5UeBqJy3bBudTeuo-UNe2unWyE,7639
|
|
4
|
-
csv_detective/format.py,sha256=VTdwg4gp9pq6WYhbkCxv9X2hXq0fMrzfooFchmIL0as,2911
|
|
5
|
-
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
6
|
-
csv_detective/validate.py,sha256=7k0GC5AsTn5BbsRChetZZDmnTGiYLe40qPKiP3GruYs,7495
|
|
7
|
-
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
9
|
-
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
10
|
-
csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
|
|
11
|
-
csv_detective/detection/formats.py,sha256=cgECpxRaygwnedPhOteG1P_697qCoceeDrKK9G_O-u8,4812
|
|
12
|
-
csv_detective/detection/headers.py,sha256=lnbWRxkI6rdyoWGtmxSfsPkqNjS0Nlpgw-pVevtmBP0,899
|
|
13
|
-
csv_detective/detection/rows.py,sha256=JQsmKP8-i8wzcZIWI_13LUer5mpYRIqaKg6qW01ZO3A,750
|
|
14
|
-
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
15
|
-
csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0JzeS6JbkxY,3576
|
|
16
|
-
csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
|
|
17
|
-
csv_detective/formats/adresse.py,sha256=79tIXeC1AUjUG9m0XGZUcP_BXvmLgd1M8XVfxgLNGDE,1966
|
|
18
|
-
csv_detective/formats/binary.py,sha256=26qrbqv_Dqu0LhVPpQOz2xzglxse7Nz5EasbQ0xP38c,715
|
|
19
|
-
csv_detective/formats/booleen.py,sha256=o7sm2RB5TM_ENY-2KluQMW3BRwM31YUVv5wzG4CZRRo,686
|
|
20
|
-
csv_detective/formats/code_commune_insee.py,sha256=pEk4JoUKCloZ3vRW664N2KlGm0TYSsVOdcp6EWISG7o,575
|
|
21
|
-
csv_detective/formats/code_csp_insee.py,sha256=AlUbmn5PHYmISkTCdrQJiUaf9hIjqoOCN0cg2I8pVeI,662
|
|
22
|
-
csv_detective/formats/code_departement.py,sha256=TmTnTJ8V4qlKSmnnXbUnlEJc0exxjI4dKr4MjsLHlnU,672
|
|
23
|
-
csv_detective/formats/code_fantoir.py,sha256=K2bmVGSH9igHmr2VEL0ErpbyQ9OoM4QXxsBUoYXdoeQ,399
|
|
24
|
-
csv_detective/formats/code_import.py,sha256=NI7kVUkkN8zmASEMNEqI8GkB_f__rgeV0bDw5hn9HCk,351
|
|
25
|
-
csv_detective/formats/code_postal.py,sha256=xsg8cRjYsaCncF1ruiCew2aXb8czsRle58JEW62bcsk,474
|
|
26
|
-
csv_detective/formats/code_region.py,sha256=UepGCeidR0OzMxP-fONPuRaUYTG-EFSpC0hUm6iiPSM,404
|
|
27
|
-
csv_detective/formats/code_rna.py,sha256=o6Kptrux6T2bSnWHi7MBCqIfVKbMMeN4dHlxxzkGesE,543
|
|
28
|
-
csv_detective/formats/code_waldec.py,sha256=j4-xpj_73c7IdgLoZJY_kRVj3HkpB7RFfGPN4NwPmVo,303
|
|
29
|
-
csv_detective/formats/commune.py,sha256=QVscVy5Ij9kdzKJgIG2aFC_v1IRsov5M9Zkj_SHDWgs,541
|
|
30
|
-
csv_detective/formats/csp_insee.py,sha256=y1w9zPQvijQi5v1Cuye0aX87ZVDC4FeFx1YC0dLqqp8,688
|
|
31
|
-
csv_detective/formats/date.py,sha256=caMMvcqkbON8Cxp9oDYZsfmkSXuu-PiiJi8YUbypBso,3167
|
|
32
|
-
csv_detective/formats/date_fr.py,sha256=YnNXSgT6QekfTUJoS5yuRX8LeK-fmVDgLgVP9cP0e4M,505
|
|
33
|
-
csv_detective/formats/datetime_aware.py,sha256=izKo6CA-MNIzmmM3Br4-FOESyqCS_YYK8N4V9D6CVEI,1909
|
|
34
|
-
csv_detective/formats/datetime_naive.py,sha256=DZ0apAm3vIy4cdm5DynAeRueI_8rhuHYQtAOZ5yyZ5k,1681
|
|
35
|
-
csv_detective/formats/datetime_rfc822.py,sha256=URyS-_5zyImWwY-IX3hSGueyCJfQkfvVDpD2UsDzW3g,627
|
|
36
|
-
csv_detective/formats/departement.py,sha256=ve_Pm6qyw1nDLDDN4YxADVASmD1dnJtgQpvY9lakyxE,810
|
|
37
|
-
csv_detective/formats/email.py,sha256=ED5YCOM1tEMb_ybkjDuuOErLXT1bsRgNq6WbDT_x-Ws,550
|
|
38
|
-
csv_detective/formats/float.py,sha256=8QzbeKklOkZSZjtiCfx-vyYs6lkyo1_w3gl8d__66CQ,1065
|
|
39
|
-
csv_detective/formats/geojson.py,sha256=jXKpTHnEsEH_0JJ393mgvF0IYP5AoarJC_-aTuNY_0k,785
|
|
40
|
-
csv_detective/formats/insee_ape700.py,sha256=WxblAHFCOIxigANQKV6mMjMwKauPGQmxqvk8DIKHD2Q,833
|
|
41
|
-
csv_detective/formats/insee_canton.py,sha256=_jTL47d46PosKO-1Kg5ak88QPCyeKkEP-noj1fCRMTE,545
|
|
42
|
-
csv_detective/formats/int.py,sha256=VSAPL0MCxTsdi2CgtHratd2e_7FSEZHTfdUgGnVuP5U,518
|
|
43
|
-
csv_detective/formats/iso_country_code_alpha2.py,sha256=4WI4vSVQOW5JURjN1lu32M8Mce5t29zshgjCPNB3dk8,644
|
|
44
|
-
csv_detective/formats/iso_country_code_alpha3.py,sha256=zcOb0UFECoMBuDXxvKwc46xQowyuWws_jxf5Aod8jTE,618
|
|
45
|
-
csv_detective/formats/iso_country_code_numeric.py,sha256=bpBUb5IQTU-s_-E81iq9dA6_ld7hrSY9jLi0fyWNNNo,617
|
|
46
|
-
csv_detective/formats/jour_de_la_semaine.py,sha256=5ScA-UU1EGP_WgycP3NpGVxTeKOyAZlNFOA2mtOuCdc,603
|
|
47
|
-
csv_detective/formats/json.py,sha256=5mCr50RvKFsbMQ-Ad1ORZ6UvOS9v3GUCh6z37mNT57I,534
|
|
48
|
-
csv_detective/formats/latitude_l93.py,sha256=PIXHXaOuVdlcYJKPvUJ5hcEF52U9an6Je5vQ-hyN4rs,813
|
|
49
|
-
csv_detective/formats/latitude_wgs.py,sha256=PNuoUJMxPsfpHhn3wN6q4HyTqNyFjfbfhfkQGyx9L4g,975
|
|
50
|
-
csv_detective/formats/latitude_wgs_fr_metropole.py,sha256=IOGF6j6I_eZS35rwRmZDt9XRrnJctlt_eUGsNVXIGlw,386
|
|
51
|
-
csv_detective/formats/latlon_wgs.py,sha256=sGKjKCqTCpitO-sv9qPrBJMaeyg8N57TUdvvS-OqWEo,1363
|
|
52
|
-
csv_detective/formats/longitude_l93.py,sha256=BnY8rx0ITlMI2x68yF_GjwwS72S-X_ZXD5V3W0Mgzg4,803
|
|
53
|
-
csv_detective/formats/longitude_wgs.py,sha256=3I5cflrVcfEH3SHd7BMK284t_-Y5C1AyOS480zDcfTI,1031
|
|
54
|
-
csv_detective/formats/longitude_wgs_fr_metropole.py,sha256=vhL_UBdqvEgrNdZ65A0jyga24OtthU-B4WUGEg9evpc,386
|
|
55
|
-
csv_detective/formats/lonlat_wgs.py,sha256=_LEbZoi-f79ez-CKvzY-HoXL8t648URcXC4DRdtjbEU,1082
|
|
56
|
-
csv_detective/formats/mois_de_lannee.py,sha256=BpzzAClRX-dbLwic42t4LAzceTtZFE236oPZT53EODs,765
|
|
57
|
-
csv_detective/formats/money.py,sha256=kv09gQjpsplDdPy5IZBrdtQDv1FB96Jpk22JGj5JEf4,432
|
|
58
|
-
csv_detective/formats/mongo_object_id.py,sha256=7UmSIHlYsaSpmv3rcyn-0eKdZr2qu2SssaihfWqBCYI,302
|
|
59
|
-
csv_detective/formats/pays.py,sha256=O1db12cOUPYV9tz_72s77xbLgb3eMCdgOR8k7ZkfznQ,662
|
|
60
|
-
csv_detective/formats/percent.py,sha256=RLV77E4B23rhaF5UMcvWQX9iv_ZNnx8Z9Q8Vz6yn-1Y,348
|
|
61
|
-
csv_detective/formats/region.py,sha256=6o-MZAjl2UZZDKdLnEEBHBcdukoIgrUJygpfVRB3PM0,1518
|
|
62
|
-
csv_detective/formats/sexe.py,sha256=_DQZVPhWLf-0O7bn21BQ9D3kNwkr8WqG6psN15kOC74,388
|
|
63
|
-
csv_detective/formats/siren.py,sha256=CLqIOMg8D5CFrJfdxV14JXoZDlS1qaEm3G5JoxiUmDY,734
|
|
64
|
-
csv_detective/formats/siret.py,sha256=Vi61w2-pX0wYwODWBJVHncLrCcY2xzpUpoJAsheJits,1023
|
|
65
|
-
csv_detective/formats/tel_fr.py,sha256=os5VYWKjrryUIWRRcKZV70Qy0Fvj14LpLVjzM57YaIo,520
|
|
66
|
-
csv_detective/formats/uai.py,sha256=_Mi86Iu9QIadDtFVfbR6nFa5nyy29vnLagkZ_9ct564,732
|
|
67
|
-
csv_detective/formats/url.py,sha256=m3i_XhFRFaAxSACS05XfciQ-oyTCsP_0TASShCY2t7A,1091
|
|
68
|
-
csv_detective/formats/username.py,sha256=6qviaFOtF2wg-gtvs0N8548JxFNE67Ue3a0JD0Kv7TQ,261
|
|
69
|
-
csv_detective/formats/uuid.py,sha256=LxkRZFAOlfig5KKrravO9bgyYjmRBegzOtGyzjopVNc,352
|
|
70
|
-
csv_detective/formats/year.py,sha256=tMc2HHr6Jga3PGWjmeHweK3G17DsjkIpIUUkCecXAm4,362
|
|
71
|
-
csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
72
|
-
csv_detective/formats/data/insee_ape700.txt,sha256=-_N-zAmcT7rK7ACRfsrM01Ton4_XtZGcNk-7lU28VHU,4397
|
|
73
|
-
csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=mLt_qcQ6D8hfy9zdi7fAK_zON1ojReKlKMA8c2VDoRU,752
|
|
74
|
-
csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=XFPdGBsyZCBg4D8IDn6VgwsycCwYVfuqPbyHfNeqGv0,1003
|
|
75
|
-
csv_detective/formats/data/iso_country_code_numeric.txt,sha256=sdGpn0PqDMlc59-7prThkihHrf7mwB6j5uEHpxGvLFE,1003
|
|
76
|
-
csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
|
|
77
|
-
csv_detective/output/dataframe.py,sha256=juBMdj0eiL8c3OrJJ3kCf15Qs4-CFQfHqh91FnVbG9E,3656
|
|
78
|
-
csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
|
|
79
|
-
csv_detective/output/profile.py,sha256=R9YMl-dANde69RXkFlZpvMDBsX7e1SyMAnlW8p1XNNM,4984
|
|
80
|
-
csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
|
|
81
|
-
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
82
|
-
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
|
-
csv_detective/parsing/columns.py,sha256=MFtEJFLsFdlKdM5AXtgXbf5p6HRW6DuOC4XnxhFMpIY,9344
|
|
84
|
-
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
85
|
-
csv_detective/parsing/csv.py,sha256=5rw6gXZFQC1T4NT9CnW0AumidrYOkF8kjrfWGmk949I,1716
|
|
86
|
-
csv_detective/parsing/excel.py,sha256=pX6dbhAdAdbRpoGcrGsL1lSaF-fbzEb4WcvwcCGEgFw,6978
|
|
87
|
-
csv_detective/parsing/load.py,sha256=1Fk43ikIOJwtWJUY-e8oNeNOk4MMtpmZV7s-VbQBS1k,4345
|
|
88
|
-
csv_detective/parsing/text.py,sha256=yDAcop5xJQc25UtbZcV0guHXAZQfm-H8WuJORTy8Rr8,1734
|
|
89
|
-
csv_detective-0.10.4.dev1.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
90
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
|
-
tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
|
|
92
|
-
tests/test_fields.py,sha256=DSI-ZXDcRt69iZArYZZAr_3OEb-qvwgOVBZxmYAKIkI,5918
|
|
93
|
-
tests/test_file.py,sha256=Ov9NGvZQxeoehxTpfcsnwEybebM0tnbmcRsFwe46cjg,15277
|
|
94
|
-
tests/test_labels.py,sha256=lgxRbLrGV1C-MkASf3KIQ120BG-UHzFQ4pqDWaeBvaw,539
|
|
95
|
-
tests/test_structure.py,sha256=XDbviuuvk-0Mu9Y9PI6He2e5hry2dXVJ6yBVwEqF_2o,1043
|
|
96
|
-
tests/test_validation.py,sha256=309k3Axgbp-1Wh6qvCj2BpeMBp3HXzLi5j9UKm1bRQs,5384
|
|
97
|
-
tests/data/a_test_file.csv,sha256=SOHjseGYqZer9yu3Bd3oS12Vw8MFsebo0BzrLZ_R4Cc,68871
|
|
98
|
-
tests/data/a_test_file.json,sha256=fB9bCpAMFPxFw8KxHRFlgRqjYG819QVGrCQWxQvwkvo,10542
|
|
99
|
-
tests/data/b_test_file.csv,sha256=wJGX62KhYjZi62De2XjZWClAzeRFEBsg3ET0IPX1BNU,98
|
|
100
|
-
tests/data/c_test_file.csv,sha256=dz6axMyFscHIWR2Brqia_jvlBfQ30l1rFrxvcTqsmJ8,36
|
|
101
|
-
tests/data/csv_file,sha256=nMAQx2PrQliu3czifCHXLyruZbvCNTyYqwZ4JYzImqA,70
|
|
102
|
-
tests/data/file.csv.gz,sha256=mfGfqG5mGlojCs05A0IF7IUZe5r87bAe2FuQ0Uh5ZMI,108
|
|
103
|
-
tests/data/file.ods,sha256=4dR7zWptz5djALIBVeWHQ20GaZNfA63fevIJGFIk1_U,11832
|
|
104
|
-
tests/data/file.xls,sha256=QYmNX3FF0QfcQSzYQMtaMJaepJf5EZpDa1miKc4wMdQ,21495
|
|
105
|
-
tests/data/file.xlsx,sha256=naWzL02PK4pdIjMzfEyfSW9GQhkYYd_e7bpJvB8Pb2w,8314
|
|
106
|
-
tests/data/xlsx_file,sha256=NyOyN_rIe7ryJuHQLqjxVdKCc8V4s5pxyHl6wWFykCM,8305
|
|
107
|
-
csv_detective-0.10.4.dev1.dist-info/METADATA,sha256=le1Rn1JIh8MoIf_RTc3Fi9DOOlvug4eR-Mwpw4AK0To,10925
|
|
108
|
-
csv_detective-0.10.4.dev1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
109
|
-
csv_detective-0.10.4.dev1.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
110
|
-
csv_detective-0.10.4.dev1.dist-info/top_level.txt,sha256=KDI4gyOpkmormGgUvSWrE3jen2e0unIsxR2b96DRvcw,25
|
|
111
|
-
csv_detective-0.10.4.dev1.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 data.gouv.fr
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
tests/__init__.py
DELETED
|
File without changes
|