csv-detective 0.8.1.dev1703__py3-none-any.whl → 0.8.1.dev1729__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/cli.py +6 -9
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +78 -78
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -1
- csv_detective/detect_fields/FR/geo/code_region/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/commune/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/departement/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +1 -2
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +1 -2
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/pays/__init__.py +6 -6
- csv_detective/detect_fields/FR/geo/region/__init__.py +6 -4
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +15 -14
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +4 -3
- csv_detective/detect_fields/FR/other/date_fr/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +4 -3
- csv_detective/detect_fields/FR/other/sexe/__init__.py +2 -2
- csv_detective/detect_fields/FR/other/siren/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/siret/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/uai/__init__.py +2 -2
- csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +15 -15
- csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +27 -27
- csv_detective/detect_fields/__init__.py +94 -43
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +5 -5
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +5 -5
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +5 -5
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py +1 -1
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py +1 -1
- csv_detective/detect_fields/other/booleen/__init__.py +1 -1
- csv_detective/detect_fields/other/email/__init__.py +4 -2
- csv_detective/detect_fields/other/int/__init__.py +3 -3
- csv_detective/detect_fields/other/mongo_object_id/__init__.py +2 -2
- csv_detective/detect_fields/other/twitter/__init__.py +2 -2
- csv_detective/detect_fields/other/uuid/__init__.py +4 -5
- csv_detective/detect_fields/temp/date/__init__.py +3 -2
- csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +6 -6
- csv_detective/detect_fields/temp/year/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -1
- csv_detective/detect_labels/__init__.py +51 -1
- csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +1 -0
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +1 -1
- csv_detective/detection/columns.py +9 -9
- csv_detective/detection/encoding.py +6 -4
- csv_detective/detection/engine.py +6 -5
- csv_detective/detection/formats.py +19 -19
- csv_detective/detection/headers.py +3 -5
- csv_detective/detection/rows.py +1 -1
- csv_detective/detection/variables.py +6 -7
- csv_detective/explore_csv.py +7 -8
- csv_detective/load_tests.py +7 -16
- csv_detective/output/__init__.py +3 -7
- csv_detective/output/dataframe.py +9 -5
- csv_detective/output/example.py +13 -13
- csv_detective/output/profile.py +30 -23
- csv_detective/output/schema.py +20 -23
- csv_detective/output/utils.py +15 -15
- csv_detective/parsing/columns.py +23 -12
- csv_detective/parsing/csv.py +1 -1
- csv_detective/parsing/excel.py +10 -11
- csv_detective/parsing/load.py +11 -8
- csv_detective/parsing/text.py +4 -9
- csv_detective/s3_utils.py +3 -7
- csv_detective/utils.py +4 -2
- csv_detective/validate.py +18 -13
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1729.dist-info}/METADATA +12 -2
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1729.dist-info}/RECORD +79 -79
- tests/test_example.py +2 -6
- tests/test_fields.py +16 -10
- tests/test_file.py +10 -9
- tests/test_labels.py +3 -2
- tests/test_structure.py +4 -3
- tests/test_validation.py +9 -6
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1729.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1729.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1729.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1729.dist-info}/top_level.txt +0 -0
csv_detective/parsing/columns.py
CHANGED
|
@@ -28,6 +28,7 @@ def test_col_val(
|
|
|
28
28
|
# TODO : change for a cleaner method and only test columns in modules labels
|
|
29
29
|
def apply_test_func(serie: pd.Series, test_func: Callable, _range: int):
|
|
30
30
|
return serie.sample(n=_range).apply(test_func)
|
|
31
|
+
|
|
31
32
|
try:
|
|
32
33
|
if skipna:
|
|
33
34
|
serie = serie[serie.notnull()]
|
|
@@ -60,11 +61,13 @@ def test_col_val(
|
|
|
60
61
|
if verbose and time() - start > 3:
|
|
61
62
|
display_logs_depending_process_time(
|
|
62
63
|
f"\t/!\\ Column '{serie.name}' took too long ({round(time() - start, 3)}s)",
|
|
63
|
-
time() - start
|
|
64
|
+
time() - start,
|
|
64
65
|
)
|
|
65
66
|
|
|
66
67
|
|
|
67
|
-
def test_col_label(
|
|
68
|
+
def test_col_label(
|
|
69
|
+
label: str, test_func: Callable, proportion: float = 1, limited_output: bool = False
|
|
70
|
+
):
|
|
68
71
|
"""Tests label (from header) using test_func.
|
|
69
72
|
- proportion : indicates the minimum score to pass the test for the serie
|
|
70
73
|
to be detected as a certain format
|
|
@@ -76,7 +79,13 @@ def test_col_label(label: str, test_func: Callable, proportion: float = 1, limit
|
|
|
76
79
|
return result if result >= proportion else 0
|
|
77
80
|
|
|
78
81
|
|
|
79
|
-
def test_col(
|
|
82
|
+
def test_col(
|
|
83
|
+
table: pd.DataFrame,
|
|
84
|
+
all_tests: list,
|
|
85
|
+
limited_output: bool,
|
|
86
|
+
skipna: bool = True,
|
|
87
|
+
verbose: bool = False,
|
|
88
|
+
):
|
|
80
89
|
if verbose:
|
|
81
90
|
start = time()
|
|
82
91
|
logging.info("Testing columns to get types")
|
|
@@ -106,11 +115,13 @@ def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna:
|
|
|
106
115
|
)
|
|
107
116
|
if verbose:
|
|
108
117
|
display_logs_depending_process_time(
|
|
109
|
-
f'\t> Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
|
|
110
|
-
time() - start_type
|
|
118
|
+
f'\t> Done with type "{key}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(test_funcs)})',
|
|
119
|
+
time() - start_type,
|
|
111
120
|
)
|
|
112
121
|
if verbose:
|
|
113
|
-
display_logs_depending_process_time(
|
|
122
|
+
display_logs_depending_process_time(
|
|
123
|
+
f"Done testing columns in {round(time() - start, 3)}s", time() - start
|
|
124
|
+
)
|
|
114
125
|
return return_table
|
|
115
126
|
|
|
116
127
|
|
|
@@ -128,16 +139,16 @@ def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbo
|
|
|
128
139
|
if verbose:
|
|
129
140
|
start_type = time()
|
|
130
141
|
return_table.loc[key] = [
|
|
131
|
-
test_col_label(
|
|
132
|
-
col_name, value["func"], value["prop"], limited_output=limited_output
|
|
133
|
-
)
|
|
142
|
+
test_col_label(col_name, value["func"], value["prop"], limited_output=limited_output)
|
|
134
143
|
for col_name in table.columns
|
|
135
144
|
]
|
|
136
145
|
if verbose:
|
|
137
146
|
display_logs_depending_process_time(
|
|
138
|
-
f'\t- Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
|
|
139
|
-
time() - start_type
|
|
147
|
+
f'\t- Done with type "{key}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(test_funcs)})',
|
|
148
|
+
time() - start_type,
|
|
140
149
|
)
|
|
141
150
|
if verbose:
|
|
142
|
-
display_logs_depending_process_time(
|
|
151
|
+
display_logs_depending_process_time(
|
|
152
|
+
f"Done testing labels in {round(time() - start, 3)}s", time() - start
|
|
153
|
+
)
|
|
143
154
|
return return_table
|
csv_detective/parsing/csv.py
CHANGED
|
@@ -49,7 +49,7 @@ def parse_csv(
|
|
|
49
49
|
raise ValueError("Could not load file")
|
|
50
50
|
if verbose:
|
|
51
51
|
display_logs_depending_process_time(
|
|
52
|
-
f
|
|
52
|
+
f"Table parsed successfully in {round(time() - start, 3)}s",
|
|
53
53
|
time() - start,
|
|
54
54
|
)
|
|
55
55
|
return table, total_lines, nb_duplicates
|
csv_detective/parsing/excel.py
CHANGED
|
@@ -28,14 +28,13 @@ def parse_excel(
|
|
|
28
28
|
random_state: int = 42,
|
|
29
29
|
verbose: bool = False,
|
|
30
30
|
) -> tuple[pd.DataFrame, int, int, str, str, int]:
|
|
31
|
-
""""Excel-like parsing is really slow, could be a good improvement for future development"""
|
|
31
|
+
""" "Excel-like parsing is really slow, could be a good improvement for future development"""
|
|
32
32
|
if verbose:
|
|
33
33
|
start = time()
|
|
34
34
|
no_sheet_specified = sheet_name is None
|
|
35
35
|
|
|
36
|
-
if (
|
|
37
|
-
|
|
38
|
-
any([file_path.endswith(k) for k in NEW_EXCEL_EXT + OLD_EXCEL_EXT])
|
|
36
|
+
if engine in ["openpyxl", "xlrd"] or any(
|
|
37
|
+
[file_path.endswith(k) for k in NEW_EXCEL_EXT + OLD_EXCEL_EXT]
|
|
39
38
|
):
|
|
40
39
|
remote_content = None
|
|
41
40
|
if is_url(file_path):
|
|
@@ -50,7 +49,7 @@ def parse_excel(
|
|
|
50
49
|
if sheet_name is None:
|
|
51
50
|
if verbose:
|
|
52
51
|
display_logs_depending_process_time(
|
|
53
|
-
f
|
|
52
|
+
f"Detected {engine_to_file[engine]} file, no sheet specified, reading the largest one",
|
|
54
53
|
time() - start,
|
|
55
54
|
)
|
|
56
55
|
try:
|
|
@@ -58,8 +57,8 @@ def parse_excel(
|
|
|
58
57
|
# openpyxl doesn't want to open files that don't have a valid extension
|
|
59
58
|
# see: https://foss.heptapod.net/openpyxl/openpyxl/-/issues/2157
|
|
60
59
|
# if the file is remote, we have a remote content anyway so it's fine
|
|
61
|
-
if not remote_content and
|
|
62
|
-
with open(file_path,
|
|
60
|
+
if not remote_content and "." not in file_path.split("/")[-1]:
|
|
61
|
+
with open(file_path, "rb") as f:
|
|
63
62
|
remote_content = BytesIO(f.read())
|
|
64
63
|
# faster than loading all sheets
|
|
65
64
|
wb = openpyxl.load_workbook(remote_content or file_path, read_only=True)
|
|
@@ -82,7 +81,7 @@ def parse_excel(
|
|
|
82
81
|
# sometimes a xls file is recognized as ods
|
|
83
82
|
if verbose:
|
|
84
83
|
display_logs_depending_process_time(
|
|
85
|
-
|
|
84
|
+
"Could not read file with classic xls reader, trying with ODS",
|
|
86
85
|
time() - start,
|
|
87
86
|
)
|
|
88
87
|
engine = "odf"
|
|
@@ -95,7 +94,7 @@ def parse_excel(
|
|
|
95
94
|
if sheet_name is None:
|
|
96
95
|
if verbose:
|
|
97
96
|
display_logs_depending_process_time(
|
|
98
|
-
f
|
|
97
|
+
f"Detected {engine_to_file[engine]} file, no sheet specified, reading the largest one",
|
|
99
98
|
time() - start,
|
|
100
99
|
)
|
|
101
100
|
tables = pd.read_excel(
|
|
@@ -132,7 +131,7 @@ def parse_excel(
|
|
|
132
131
|
table = table.sample(num_rows, random_state=random_state)
|
|
133
132
|
if verbose:
|
|
134
133
|
display_logs_depending_process_time(
|
|
135
|
-
f
|
|
134
|
+
f"Table parsed successfully in {round(time() - start, 3)}s",
|
|
136
135
|
time() - start,
|
|
137
136
|
)
|
|
138
137
|
return table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx
|
|
@@ -163,7 +162,7 @@ def parse_excel(
|
|
|
163
162
|
table = table.sample(num_rows, random_state=random_state)
|
|
164
163
|
if verbose:
|
|
165
164
|
display_logs_depending_process_time(
|
|
166
|
-
f
|
|
165
|
+
f"Table parsed successfully in {round(time() - start, 3)}s",
|
|
167
166
|
time() - start,
|
|
168
167
|
)
|
|
169
168
|
return table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx
|
csv_detective/parsing/load.py
CHANGED
|
@@ -14,6 +14,7 @@ from csv_detective.detection.engine import (
|
|
|
14
14
|
from csv_detective.detection.headers import detect_headers
|
|
15
15
|
from csv_detective.detection.separator import detect_separator
|
|
16
16
|
from csv_detective.utils import is_url
|
|
17
|
+
|
|
17
18
|
from .compression import unzip
|
|
18
19
|
from .csv import parse_csv
|
|
19
20
|
from .excel import (
|
|
@@ -30,9 +31,9 @@ def load_file(
|
|
|
30
31
|
verbose: bool = False,
|
|
31
32
|
sheet_name: Optional[Union[str, int]] = None,
|
|
32
33
|
) -> tuple[pd.DataFrame, dict]:
|
|
33
|
-
file_name = file_path.split(
|
|
34
|
+
file_name = file_path.split("/")[-1]
|
|
34
35
|
engine = None
|
|
35
|
-
if
|
|
36
|
+
if "." not in file_name or not file_name.endswith("csv"):
|
|
36
37
|
# file has no extension, we'll investigate how to read it
|
|
37
38
|
engine = detect_engine(file_path, verbose=verbose)
|
|
38
39
|
|
|
@@ -88,10 +89,12 @@ def load_file(
|
|
|
88
89
|
"heading_columns": heading_columns,
|
|
89
90
|
"trailing_columns": trailing_columns,
|
|
90
91
|
}
|
|
91
|
-
analysis.update(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
92
|
+
analysis.update(
|
|
93
|
+
{
|
|
94
|
+
"header_row_idx": header_row_idx,
|
|
95
|
+
"header": header,
|
|
96
|
+
"total_lines": total_lines,
|
|
97
|
+
"nb_duplicates": nb_duplicates,
|
|
98
|
+
}
|
|
99
|
+
)
|
|
97
100
|
return table, analysis
|
csv_detective/parsing/text.py
CHANGED
|
@@ -2,9 +2,7 @@ from re import finditer
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def camel_case_split(identifier: str):
|
|
5
|
-
matches = finditer(
|
|
6
|
-
".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", identifier
|
|
7
|
-
)
|
|
5
|
+
matches = finditer(".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", identifier)
|
|
8
6
|
return " ".join([m.group(0) for m in matches])
|
|
9
7
|
|
|
10
8
|
|
|
@@ -46,15 +44,12 @@ def header_score(header: str, words_combinations_list: list[str]) -> float:
|
|
|
46
44
|
processed_header = _process_text(header)
|
|
47
45
|
|
|
48
46
|
header_matches_words_combination = float(
|
|
49
|
-
any(
|
|
50
|
-
words_combination == processed_header for words_combination in words_combinations_list
|
|
51
|
-
)
|
|
47
|
+
any(words_combination == processed_header for words_combination in words_combinations_list)
|
|
52
48
|
)
|
|
53
49
|
words_combination_in_header = 0.5 * (
|
|
54
50
|
any(
|
|
55
|
-
is_word_in_string(
|
|
56
|
-
|
|
57
|
-
) for words_combination in words_combinations_list
|
|
51
|
+
is_word_in_string(words_combination, processed_header)
|
|
52
|
+
for words_combination in words_combinations_list
|
|
58
53
|
)
|
|
59
54
|
)
|
|
60
55
|
|
csv_detective/s3_utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import boto3
|
|
2
1
|
import logging
|
|
3
2
|
|
|
3
|
+
import boto3
|
|
4
4
|
from botocore.client import Config
|
|
5
5
|
from botocore.exceptions import ClientError
|
|
6
6
|
|
|
@@ -27,9 +27,7 @@ def download_from_minio(
|
|
|
27
27
|
s3 = get_s3_client(netloc, minio_user, minio_pwd)
|
|
28
28
|
try:
|
|
29
29
|
s3.download_file(bucket, key, filepath)
|
|
30
|
-
logging.info(
|
|
31
|
-
f"Resource downloaded from minio at {get_minio_url(netloc, bucket, key)}"
|
|
32
|
-
)
|
|
30
|
+
logging.info(f"Resource downloaded from minio at {get_minio_url(netloc, bucket, key)}")
|
|
33
31
|
except ClientError as e:
|
|
34
32
|
logging.error(e)
|
|
35
33
|
|
|
@@ -41,8 +39,6 @@ def upload_to_minio(
|
|
|
41
39
|
s3 = get_s3_client(netloc, minio_user, minio_pwd)
|
|
42
40
|
try:
|
|
43
41
|
s3.upload_file(filepath, bucket, key)
|
|
44
|
-
logging.info(
|
|
45
|
-
f"Resource saved into minio at {get_minio_url(netloc, bucket, key)}"
|
|
46
|
-
)
|
|
42
|
+
logging.info(f"Resource saved into minio at {get_minio_url(netloc, bucket, key)}")
|
|
47
43
|
except ClientError as e:
|
|
48
44
|
logging.error(e)
|
csv_detective/utils.py
CHANGED
|
@@ -4,7 +4,9 @@ from typing import Optional
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
6
|
logging.basicConfig(level=logging.INFO)
|
|
7
|
-
logging.addLevelName(
|
|
7
|
+
logging.addLevelName(
|
|
8
|
+
logging.CRITICAL, "\033[1;41m%s\033[1;0m" % logging.getLevelName(logging.CRITICAL)
|
|
9
|
+
)
|
|
8
10
|
logging.addLevelName(logging.WARN, "\033[1;31m%s\033[1;0m" % logging.getLevelName(logging.WARN))
|
|
9
11
|
|
|
10
12
|
THRESHOLD_WARN = 1
|
|
@@ -26,7 +28,7 @@ def display_logs_depending_process_time(prompt: str, duration: float) -> None:
|
|
|
26
28
|
def is_url(file_path: str) -> bool:
|
|
27
29
|
# could be more sophisticated if needed
|
|
28
30
|
# using the URL detection test was considered but too broad (schema required to use requests)
|
|
29
|
-
return file_path.startswith(
|
|
31
|
+
return file_path.startswith("http")
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
def prevent_nan(value: float) -> Optional[float]:
|
csv_detective/validate.py
CHANGED
|
@@ -4,8 +4,8 @@ from typing import Optional, Union
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
6
|
from csv_detective.load_tests import return_all_tests
|
|
7
|
-
from csv_detective.parsing.load import load_file
|
|
8
7
|
from csv_detective.parsing.columns import test_col_val
|
|
8
|
+
from csv_detective.parsing.load import load_file
|
|
9
9
|
|
|
10
10
|
logging.basicConfig(level=logging.INFO)
|
|
11
11
|
|
|
@@ -47,9 +47,8 @@ def validate(
|
|
|
47
47
|
if verbose:
|
|
48
48
|
logging.info("Comparing table with the previous analysis")
|
|
49
49
|
logging.info("- Checking if all columns match")
|
|
50
|
-
if (
|
|
51
|
-
|
|
52
|
-
or any(col_name not in previous_analysis["header"] for col_name in analysis["header"])
|
|
50
|
+
if any(col_name not in analysis["header"] for col_name in previous_analysis["header"]) or any(
|
|
51
|
+
col_name not in previous_analysis["header"] for col_name in analysis["header"]
|
|
53
52
|
):
|
|
54
53
|
if verbose:
|
|
55
54
|
logging.warning("> Columns do not match, proceeding with full analysis")
|
|
@@ -72,12 +71,18 @@ def validate(
|
|
|
72
71
|
return False, table, analysis
|
|
73
72
|
if verbose:
|
|
74
73
|
logging.info("> All checks successful")
|
|
75
|
-
return
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
return (
|
|
75
|
+
True,
|
|
76
|
+
table,
|
|
77
|
+
analysis
|
|
78
|
+
| {
|
|
79
|
+
k: previous_analysis[k]
|
|
80
|
+
for k in [
|
|
81
|
+
"categorical",
|
|
82
|
+
"columns",
|
|
83
|
+
"columns_fields",
|
|
84
|
+
"columns_labels",
|
|
85
|
+
"formats",
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.8.1.
|
|
3
|
+
Version: 0.8.1.dev1729
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
5
|
Author-email: Etalab <opendatateam@data.gouv.fr>
|
|
6
6
|
License: MIT
|
|
@@ -27,6 +27,7 @@ Provides-Extra: dev
|
|
|
27
27
|
Requires-Dist: pytest>=8.3.0; extra == "dev"
|
|
28
28
|
Requires-Dist: responses>=0.25.0; extra == "dev"
|
|
29
29
|
Requires-Dist: bumpx>=0.3.10; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.9.3; extra == "dev"
|
|
30
31
|
Dynamic: license-file
|
|
31
32
|
|
|
32
33
|
# CSV Detective
|
|
@@ -211,12 +212,21 @@ Organisations such as [data.gouv.fr](http://data.gouv.fr) aggregate huge amounts
|
|
|
211
212
|
|
|
212
213
|
An early version of this analysis of all resources on data.gouv.fr can be found [here](https://github.com/Leobouloc/data.gouv-exploration).
|
|
213
214
|
|
|
215
|
+
## Linting
|
|
216
|
+
|
|
217
|
+
Remember to format, lint, and sort imports with [Ruff](https://docs.astral.sh/ruff/) before committing (checks will remind you anyway):
|
|
218
|
+
```bash
|
|
219
|
+
pip install .[dev]
|
|
220
|
+
ruff check --fix .
|
|
221
|
+
ruff format .
|
|
222
|
+
```
|
|
223
|
+
|
|
214
224
|
## Release
|
|
215
225
|
|
|
216
226
|
The release process uses `bumpx`.
|
|
217
227
|
|
|
218
228
|
```shell
|
|
219
|
-
pip install -
|
|
229
|
+
pip install -e .[dev]
|
|
220
230
|
```
|
|
221
231
|
|
|
222
232
|
### Process
|
|
@@ -1,78 +1,78 @@
|
|
|
1
1
|
csv_detective/__init__.py,sha256=XY7pnoNHlocvyUiK8EQpJYPSQt5BRWWJD8KiPlvI9pU,164
|
|
2
|
-
csv_detective/cli.py,sha256=
|
|
3
|
-
csv_detective/explore_csv.py,sha256=
|
|
4
|
-
csv_detective/load_tests.py,sha256=
|
|
5
|
-
csv_detective/s3_utils.py,sha256=
|
|
6
|
-
csv_detective/utils.py,sha256=
|
|
7
|
-
csv_detective/validate.py,sha256=
|
|
8
|
-
csv_detective/detect_fields/__init__.py,sha256=
|
|
2
|
+
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
+
csv_detective/explore_csv.py,sha256=sEMza4Z27ac88fGq7tUiK1zlfvuftztHhHVoa0c2EVU,9191
|
|
4
|
+
csv_detective/load_tests.py,sha256=uVKweLq3cf-yB5ZZI-m9tBVs_SWNcOw8sDJa97TOJGo,2266
|
|
5
|
+
csv_detective/s3_utils.py,sha256=z1KTVVkdurMv21o-rZu7_aluMJnSi-d5uxnQbqT2NoI,1407
|
|
6
|
+
csv_detective/utils.py,sha256=u9I1tsyMfVr2eIYiGCD7Iu30d55H3za44-N3cV2nj8M,1013
|
|
7
|
+
csv_detective/validate.py,sha256=RLHXLrRuynkdcvHUlSEbyglPvdbNYlT1Z4nQI-BdYdA,2898
|
|
8
|
+
csv_detective/detect_fields/__init__.py,sha256=ZZ7u9zsMtCqPC2xxeLp57UTCbqpKFJi6D_LO1ew15BU,1980
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=
|
|
11
|
+
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=Q5tVRMW5QdFLfiNm42JmIwNRuBR5ZI3dQhzHPzXVnzo,1676
|
|
12
12
|
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
|
|
13
|
-
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=
|
|
13
|
+
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=DwgDopvfoUmOdDLsFKHGtufM3PG5ahwiLFRrDimaDNM,379
|
|
14
14
|
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
|
|
15
|
-
csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=
|
|
16
|
-
csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=
|
|
17
|
-
csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=
|
|
18
|
-
csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=
|
|
19
|
-
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=
|
|
20
|
-
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=
|
|
21
|
-
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=
|
|
22
|
-
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=
|
|
23
|
-
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=
|
|
24
|
-
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=
|
|
25
|
-
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=
|
|
15
|
+
csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=yjR6ob_h9fd5sa1YH6P0UbCsrHjdBGjsPIx02SHwlfE,133
|
|
16
|
+
csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=9pR2tVS4J2KrytCVuh-R86HGRMWutIK9FVQ30wKfCPg,253
|
|
17
|
+
csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=5vw4zjlmWaR2djxuQOUrmwsNIc9HgAE-zdxwerVR3S0,380
|
|
18
|
+
csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=UsMEW1EVVgnw-daOc1jBkEaGKvqTONSAGnj1s3QgM8w,400
|
|
19
|
+
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=YsAGiblFexBxvu_E3XaXhy_bordc6c1oKPgDzTsDeXw,374
|
|
20
|
+
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=RjkDSZzIbp4nnvDpa5GomDpyIJGvwErX7TgC4dlBJ14,437
|
|
21
|
+
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=7xmYpTYoHvFfcuocAhm6dP_j4sMII_hG1PMSrWId4FY,344
|
|
22
|
+
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=JbKuGK5UmUGAQKPFpN4RSLf3axJ5D1aCjzRXYHW-iXU,441
|
|
23
|
+
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=5VWDaHZvGhJAJu5XQrj6gLx5CVA9dNOE30eTXQ3pSf0,344
|
|
24
|
+
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=85y-5qNRAWJrKqL0wh9iPMUBQjvPwc9lv1cYB2m0daQ,364
|
|
25
|
+
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=6mJRaGsCPBY5JHHe8EWxEjDpAOIfvBPTaZKJb3_n3gU,1077
|
|
26
26
|
csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=
|
|
27
|
+
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=8f9n4F7T8Q44z4-sQL7d1OgvLObUPwC7D0iDLhHu8KQ,568
|
|
28
28
|
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
|
|
29
29
|
csv_detective/detect_fields/FR/other/code_import/__init__.py,sha256=zJ9YfPa5p--uHNQFeO1gTjxDy2Um_r-MxQd29VBNjFw,243
|
|
30
30
|
csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
|
|
31
31
|
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=41SYNzCzUFh4trQlwG-9UC0-1Wi4fTcv8Byi_dd9Lq4,168
|
|
32
|
-
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=
|
|
32
|
+
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=cKIldBWb37pqBeKuV5jgAlRHeF9SyqlRL4n-qfGMZGI,497
|
|
33
33
|
csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
34
|
-
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=
|
|
35
|
-
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=
|
|
34
|
+
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=Ewi9u7jcYsxhqu2al8aEVYQ8dO9H7GmRjo_l8BYt0j0,284
|
|
35
|
+
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=u6Ri4ntWrDPYezsVlwpRTbzU8xsDfkJYGdOE2spkQpQ,520
|
|
36
36
|
csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
|
|
37
|
-
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=
|
|
38
|
-
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=
|
|
39
|
-
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=
|
|
40
|
-
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=
|
|
41
|
-
csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=
|
|
37
|
+
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=dPVjgD3QBe4PUA4Bl_YDxZqFObF8KcoDz6zDYH2qfnk,269
|
|
38
|
+
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=7wpSq4eRfYC2p711Me1XCY64PIWyK_TJNw3lidxuzJE,442
|
|
39
|
+
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=YJPXYnzKJ4Y8XuBf1lRrLkImrZ6D7zitKl0KPry4CcU,707
|
|
40
|
+
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=zXVRu80ehUulhhxu1FTWoOK81CaSr7MfTh4HJEYdEKA,343
|
|
41
|
+
csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=mglrlTSBKYnGUOfGVM-xyk5KqUvQIFIjaoj31CO36zo,327
|
|
42
42
|
csv_detective/detect_fields/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=
|
|
44
|
-
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=
|
|
43
|
+
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=ax34EqC712WT5JqiAKBWz6L7vmVpLNWmBF2wmjUUFiM,396
|
|
44
|
+
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=Z59nO-UpIrUT9ZaQ6MuPQLFbu8AE0gYdkSleAj4WX_k,582
|
|
45
45
|
csv_detective/detect_fields/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=
|
|
46
|
+
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=X5kUggATKRJItJLaSDpv4MQPwo49iGBwlwQQjLTe77E,433
|
|
47
47
|
csv_detective/detect_fields/geo/iso_country_code_alpha2/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
|
|
48
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=
|
|
48
|
+
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=JvFLoQeJdbw5VYXUZqD9vsp0LQDoFE2Sd5gPA6K-0Lo,409
|
|
49
49
|
csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
|
|
50
|
-
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=
|
|
50
|
+
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=AnAridM4C8hcm4PeNdr8969czgrzM4KemGVZWAJSM1U,436
|
|
51
51
|
csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
|
|
52
52
|
csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=6wlwlxQmsVIZ21g-THvH3nBj-I8FuoF2sBlZAoEMGiQ,393
|
|
53
|
-
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=
|
|
53
|
+
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=sdor-L1WDHv5opg1Le13mru4ImSA-yEbxchlWENuUFE,327
|
|
54
54
|
csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=IXDTqD4YFUJYI1FYZ5ZfkqXY6KvNY7sgBVFRAvgTHtI,454
|
|
55
|
-
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=
|
|
55
|
+
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=gPnNTe-L9xjBVE-30VCJiK6IVZttj6Cy6zu1IL5907Y,330
|
|
56
56
|
csv_detective/detect_fields/geo/lonlat_wgs/__init__.py,sha256=CnBMYevfGdhBvureF3oc_zqT-RZjG419iAuUlugQFLc,454
|
|
57
57
|
csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
csv_detective/detect_fields/other/booleen/__init__.py,sha256=
|
|
59
|
-
csv_detective/detect_fields/other/email/__init__.py,sha256=
|
|
58
|
+
csv_detective/detect_fields/other/booleen/__init__.py,sha256=37ZUJACrZA9FQBYLDeVJGze7_I9x-ZWv5yWuBcqHcwI,497
|
|
59
|
+
csv_detective/detect_fields/other/email/__init__.py,sha256=p235wILf0fR9TeSEuyuPgoysAv9zg23a4vzdy3YJlxE,192
|
|
60
60
|
csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
|
|
61
|
-
csv_detective/detect_fields/other/int/__init__.py,sha256=
|
|
61
|
+
csv_detective/detect_fields/other/int/__init__.py,sha256=4SQAgaYTafeBL6hdT7Wp_xwcRNQsOWlYjaXKl78EuDw,320
|
|
62
62
|
csv_detective/detect_fields/other/json/__init__.py,sha256=AkRWZAidEM1dWkVRFThEBI5M7kMUu5Yu12iCViGM8lU,310
|
|
63
63
|
csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
|
|
64
|
-
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=
|
|
64
|
+
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=aZqxdbWzrL-syADA3_uYcOWcIuelvsnLzPLBEnkKJ8w,156
|
|
65
65
|
csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
|
|
66
|
-
csv_detective/detect_fields/other/twitter/__init__.py,sha256=
|
|
66
|
+
csv_detective/detect_fields/other/twitter/__init__.py,sha256=Npu6ZbyNfHq1y7xn0Gd62GbOcyz4WNq82FrFSKb547w,154
|
|
67
67
|
csv_detective/detect_fields/other/url/__init__.py,sha256=L7h9fZldh1w86XwCx0x3Q1TXSJ_nIId1C-l1yFzZYrA,299
|
|
68
|
-
csv_detective/detect_fields/other/uuid/__init__.py,sha256=
|
|
68
|
+
csv_detective/detect_fields/other/uuid/__init__.py,sha256=XFxbIsdIhRw0dtFxBXQBhicE4yy7P4jmwYXeJhq6FVY,215
|
|
69
69
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
csv_detective/detect_fields/temp/date/__init__.py,sha256=
|
|
70
|
+
csv_detective/detect_fields/temp/date/__init__.py,sha256=uVOszufihKqiQmS0wz7nUuQ2Dz-Tq9fSk1nf3S00mg4,1010
|
|
71
71
|
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRUxMGJYqfOK4wRDr3WMaGVAmIa_C2pXE,853
|
|
72
72
|
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
|
|
73
|
-
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256
|
|
74
|
-
csv_detective/detect_fields/temp/year/__init__.py,sha256=
|
|
75
|
-
csv_detective/detect_labels/__init__.py,sha256=
|
|
73
|
+
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=-pFdIIPgaLq2_QbFJ9zwy4YIwZuC73F0A_cNDntTuvQ,512
|
|
74
|
+
csv_detective/detect_fields/temp/year/__init__.py,sha256=gHchVciZExbGZLMBcbBaDXB0IgGptkQc4RhfSOMY0Ww,194
|
|
75
|
+
csv_detective/detect_labels/__init__.py,sha256=93s93DRNeFw9fJiGp0rW3iRWZX3WOeVau2PAaF4QlPE,1777
|
|
76
76
|
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
78
|
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=fNWFW-Wo3n6azDBfmi0J0qnzP-p2StLxCc9eNiE9NNE,346
|
|
@@ -100,7 +100,7 @@ csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=N7LzmtNwZER
|
|
|
100
100
|
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=ZWhc8S9L1X2fFh2g5Ja-LuhsfHg_lALKrur6yDnGDPk,238
|
|
101
101
|
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=g7Y7IvW9VKO528z1MSPxfFtRB7kQXSiG7QQ-VZRfFEk,386
|
|
102
102
|
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=-gvdxUnv3LRfje60ljC4F3B2c1LBcWfV3zZbV3VJZ08,323
|
|
103
|
-
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=
|
|
103
|
+
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=4jIZ9cmN73XhP4ayGcEMcB_y0X45oRk1Lq2p_pNfgok,426
|
|
104
104
|
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=5L6JowK9y6y9uZNg6hWzknMSzh0SurkwQeTINNKTdYY,599
|
|
105
105
|
csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=Vmv7Hp6LxR-bh3aXOBCHYzJVyCHtGoiWzJ40xnfTvdA,357
|
|
@@ -113,14 +113,14 @@ csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=On8VOCDD0EspZra6
|
|
|
113
113
|
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=ME_KjniqDSdAwXP7XnKXyr5IA75KrGSLIhvPNfsux6E,664
|
|
114
114
|
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp0Xs0FsPsOPpRWhDe1ockbWw8s,923
|
|
115
115
|
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
|
|
116
|
-
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=
|
|
116
|
+
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=7gbumJFp5xhz4GZ4uTAJQoxw5D53WJZddptyANmdEws,346
|
|
117
117
|
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
118
|
csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
|
|
119
119
|
csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
|
|
120
120
|
csv_detective/detect_labels/other/float/__init__.py,sha256=X0axZN2GAfC_y01zRfIyvOfRsOy2KNQcQ-mlQAKxqT4,216
|
|
121
121
|
csv_detective/detect_labels/other/int/__init__.py,sha256=_1AY7thEBCcgSBQQ2YbY4YaPaxGRQ71BtmaFaX088ig,215
|
|
122
122
|
csv_detective/detect_labels/other/money/__init__.py,sha256=1JRArDZ5r6gtyuKijH_fuuVFVc0f3MN5gPyAf4GPqzs,249
|
|
123
|
-
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256
|
|
123
|
+
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=-NsB_Glm6KRGmIusAY9YoGPrdws6RwkYRPUiJUUPv3Y,209
|
|
124
124
|
csv_detective/detect_labels/other/twitter/__init__.py,sha256=96WhOB6nOutzSFOC5ZJYFSlhHDJRn2SkT4nYNj8E6ww,241
|
|
125
125
|
csv_detective/detect_labels/other/url/__init__.py,sha256=4Ajpdp8W0jS9aHZAAMyUlgefjSgpB7Y6ci29KNkwAoI,485
|
|
126
126
|
csv_detective/detect_labels/other/uuid/__init__.py,sha256=kXVb4oMy-Zv-OYmAIEoNFrBA20l9hbUTdvTfjeMmhjk,213
|
|
@@ -129,40 +129,40 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=w0eeZIseAmPwL4OvCWzZXbx
|
|
|
129
129
|
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=53ysj7QgsxXwG1le3zfSJd1oaTTf-Er3jBeYi_A4F9g,458
|
|
130
130
|
csv_detective/detect_labels/temp/year/__init__.py,sha256=7uWaCZY7dOG7nolW46IgBWmcu8K-9jPED-pOlMlErfo,433
|
|
131
131
|
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
|
-
csv_detective/detection/columns.py,sha256=
|
|
133
|
-
csv_detective/detection/encoding.py,sha256=
|
|
134
|
-
csv_detective/detection/engine.py,sha256=
|
|
135
|
-
csv_detective/detection/formats.py,sha256=
|
|
136
|
-
csv_detective/detection/headers.py,sha256=
|
|
137
|
-
csv_detective/detection/rows.py,sha256=
|
|
132
|
+
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
133
|
+
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
134
|
+
csv_detective/detection/engine.py,sha256=1Z4vzjxwPRZ9-vv8nw-zU2sgBZtOsEz0UoKjGaSwVJU,1543
|
|
135
|
+
csv_detective/detection/formats.py,sha256=dzJPdi2rP2jTHZBk9UHpJL3c5N-PSohCymHs-OZt45c,6211
|
|
136
|
+
csv_detective/detection/headers.py,sha256=y5iR4jWH5fUtAH_Zg0zxWSVG_INCHlXJFMbhPpI2YMo,1148
|
|
137
|
+
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
138
138
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
139
|
-
csv_detective/detection/variables.py,sha256=
|
|
140
|
-
csv_detective/output/__init__.py,sha256=
|
|
141
|
-
csv_detective/output/dataframe.py,sha256=
|
|
142
|
-
csv_detective/output/example.py,sha256=
|
|
143
|
-
csv_detective/output/profile.py,sha256=
|
|
144
|
-
csv_detective/output/schema.py,sha256=
|
|
145
|
-
csv_detective/output/utils.py,sha256=
|
|
139
|
+
csv_detective/detection/variables.py,sha256=wfsA_MOk14TPMOY7gkvpTGpo9-USzMnFaAou3MPHqxc,3536
|
|
140
|
+
csv_detective/output/__init__.py,sha256=f-UFv_iULpVF_Fy39H4sfACEnrthjK4N3mCAVPkjnKw,1860
|
|
141
|
+
csv_detective/output/dataframe.py,sha256=UpLuSxx_SFbKpem1n-xY7jF16MXGpKQYEWjaSMIiB4s,2215
|
|
142
|
+
csv_detective/output/example.py,sha256=XrnPS_uC0cICn7tgnLWNctpUbnPzl7fIMzNTzJEWGJc,8655
|
|
143
|
+
csv_detective/output/profile.py,sha256=Jeh0mrfH_hAVxV2E5I4XzdCm7ZAGAV_Xj3AXOi77lcA,3130
|
|
144
|
+
csv_detective/output/schema.py,sha256=5Duw5qnsJ-LaVC6JgF7p1zZAkehDzsbXA4iTSJUgLNM,13760
|
|
145
|
+
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
146
146
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
csv_detective/parsing/columns.py,sha256=
|
|
147
|
+
csv_detective/parsing/columns.py,sha256=fbvQMu12gAmz4TnNCL7pLnMFB-mWN_O-zEoj8jEGj0A,5696
|
|
148
148
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
149
|
-
csv_detective/parsing/csv.py,sha256=
|
|
150
|
-
csv_detective/parsing/excel.py,sha256=
|
|
151
|
-
csv_detective/parsing/load.py,sha256=
|
|
152
|
-
csv_detective/parsing/text.py,sha256=
|
|
153
|
-
csv_detective-0.8.1.
|
|
149
|
+
csv_detective/parsing/csv.py,sha256=qZFLOT3YCPoHF0svfVfQBnS8eHtucjDZ7dFITAPgLhc,1626
|
|
150
|
+
csv_detective/parsing/excel.py,sha256=ULUDw76z6hs1Xm2yL9KBM0EOvIsfBLkxwqTZfDEx6aE,7045
|
|
151
|
+
csv_detective/parsing/load.py,sha256=C3M8nvgWenOb8aDFi5dpDGCoAw9EBqr4EB63zbz2M14,3699
|
|
152
|
+
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
153
|
+
csv_detective-0.8.1.dev1729.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
154
154
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
|
-
tests/test_example.py,sha256=
|
|
156
|
-
tests/test_fields.py,sha256=
|
|
157
|
-
tests/test_file.py,sha256=
|
|
158
|
-
tests/test_labels.py,sha256=
|
|
159
|
-
tests/test_structure.py,sha256=
|
|
160
|
-
tests/test_validation.py,sha256=
|
|
155
|
+
tests/test_example.py,sha256=iO4RxMHZxnBAiKm6fsFar5OVg8hYKnqNZCw0SUnEuQQ,1972
|
|
156
|
+
tests/test_fields.py,sha256=Y2mBfV9ZdxTHYwHnkzGbpo1k_qJRLC8nU-zzAUxFmAE,11964
|
|
157
|
+
tests/test_file.py,sha256=YuVbSfeo_ASPiLT8CyxXqJENcDpj4wAFXzLwu_GzsOA,8437
|
|
158
|
+
tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
|
|
159
|
+
tests/test_structure.py,sha256=GRDYKy0UcdqlN4qglzsRC0puFj5cb-SVvONjvcPvtAA,1400
|
|
160
|
+
tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
|
|
161
161
|
venv/bin/activate_this.py,sha256=NRy3waFmwW1pOaNUp33wNN0vD1Kzkd-zXX-Sgl4EiVI,1286
|
|
162
162
|
venv/bin/jp.py,sha256=7z7dvRg0M7HzpZG4ssQID7nScjvQx7bcYTxJWDOrS6E,1717
|
|
163
163
|
venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
|
|
164
|
-
csv_detective-0.8.1.
|
|
165
|
-
csv_detective-0.8.1.
|
|
166
|
-
csv_detective-0.8.1.
|
|
167
|
-
csv_detective-0.8.1.
|
|
168
|
-
csv_detective-0.8.1.
|
|
164
|
+
csv_detective-0.8.1.dev1729.dist-info/METADATA,sha256=d8206Q0vrz70oOi2MG0ECreuwWkNUcCtkU_bi9HBFMI,9767
|
|
165
|
+
csv_detective-0.8.1.dev1729.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
csv_detective-0.8.1.dev1729.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
167
|
+
csv_detective-0.8.1.dev1729.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
|
|
168
|
+
csv_detective-0.8.1.dev1729.dist-info/RECORD,,
|
tests/test_example.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
1
|
import re
|
|
3
2
|
from uuid import UUID
|
|
3
|
+
|
|
4
4
|
from csv_detective import create_example_csv_file
|
|
5
5
|
|
|
6
6
|
|
|
@@ -41,11 +41,7 @@ def test_example_creation():
|
|
|
41
41
|
"name": "nb_produits",
|
|
42
42
|
"type": "int",
|
|
43
43
|
},
|
|
44
|
-
{
|
|
45
|
-
"name": "note",
|
|
46
|
-
"type": "float",
|
|
47
|
-
"args": {"num_range": [1, 20]}
|
|
48
|
-
},
|
|
44
|
+
{"name": "note", "type": "float", "args": {"num_range": [1, 20]}},
|
|
49
45
|
]
|
|
50
46
|
df = create_example_csv_file(
|
|
51
47
|
fields=fields,
|