csv-detective 0.7.5.dev1197__py3-none-any.whl → 0.7.5.dev1209__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/sexe/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/commune/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/departement/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/pays/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/region/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/sexe/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/siren/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/siret/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/uai/__init__.py +1 -1
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +1 -1
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +1 -1
- csv_detective/detect_labels/geo/json_geojson/__init__.py +1 -1
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +1 -1
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +1 -1
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +1 -1
- csv_detective/detect_labels/other/booleen/__init__.py +1 -1
- csv_detective/detect_labels/other/email/__init__.py +1 -1
- csv_detective/detect_labels/other/float/__init__.py +1 -1
- csv_detective/detect_labels/other/int/__init__.py +1 -1
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +1 -1
- csv_detective/detect_labels/other/twitter/__init__.py +1 -1
- csv_detective/detect_labels/other/url/__init__.py +1 -1
- csv_detective/detect_labels/other/uuid/__init__.py +1 -1
- csv_detective/detect_labels/temp/date/__init__.py +1 -1
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +1 -1
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +1 -1
- csv_detective/detect_labels/temp/year/__init__.py +1 -1
- csv_detective/detection/columns.py +89 -0
- csv_detective/detection/encoding.py +27 -0
- csv_detective/detection/engine.py +46 -0
- csv_detective/detection/headers.py +32 -0
- csv_detective/detection/rows.py +18 -0
- csv_detective/detection/separator.py +44 -0
- csv_detective/detection/variables.py +98 -0
- csv_detective/explore_csv.py +40 -124
- csv_detective/output/dataframe.py +55 -0
- csv_detective/{create_example.py → output/example.py} +10 -9
- csv_detective/output/profile.py +87 -0
- csv_detective/{schema_generation.py → output/schema.py} +344 -343
- csv_detective/output/utils.py +51 -0
- csv_detective/parsing/columns.py +141 -0
- csv_detective/parsing/compression.py +11 -0
- csv_detective/parsing/csv.py +55 -0
- csv_detective/parsing/excel.py +169 -0
- csv_detective/parsing/load.py +97 -0
- csv_detective/utils.py +10 -236
- {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1209.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1209.dist-info}/METADATA +1 -1
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1209.dist-info}/RECORD +84 -70
- tests/test_fields.py +7 -6
- tests/test_file.py +15 -14
- csv_detective/detection.py +0 -633
- /csv_detective/{process_text.py → parsing/text.py} +0 -0
- {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1209.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1209.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1209.dist-info}/WHEEL +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1209.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1209.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1209.dist-info}/top_level.txt +0 -0
csv_detective/utils.py
CHANGED
|
@@ -1,15 +1,6 @@
|
|
|
1
|
-
from typing import Callable, Optional, Union
|
|
2
|
-
import json
|
|
3
|
-
import pandas as pd
|
|
4
1
|
import logging
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
from csv_detective.detect_fields.other.booleen import bool_casting
|
|
9
|
-
from csv_detective.detect_fields.other.float import float_casting
|
|
10
|
-
from csv_detective.detect_fields.temp.date import date_casting
|
|
11
|
-
|
|
12
|
-
logging.basicConfig(level=logging.INFO)
|
|
2
|
+
import math
|
|
3
|
+
from typing import Optional
|
|
13
4
|
|
|
14
5
|
|
|
15
6
|
def display_logs_depending_process_time(prompt: str, duration: float):
|
|
@@ -25,193 +16,20 @@ def display_logs_depending_process_time(prompt: str, duration: float):
|
|
|
25
16
|
if duration < threshold_warn:
|
|
26
17
|
logging.info(prompt)
|
|
27
18
|
elif duration < threshold_critical:
|
|
28
|
-
logging.
|
|
19
|
+
logging.warning(prompt)
|
|
29
20
|
else:
|
|
30
21
|
logging.critical(prompt)
|
|
31
22
|
|
|
32
23
|
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
proportion: float = 0.9,
|
|
37
|
-
skipna: bool = True,
|
|
38
|
-
limited_output: bool = False,
|
|
39
|
-
verbose: bool = False,
|
|
40
|
-
):
|
|
41
|
-
"""Tests values of the serie using test_func.
|
|
42
|
-
- skipna : if True indicates that NaNs are not counted as False
|
|
43
|
-
- proportion : indicates the proportion of values that have to pass the test
|
|
44
|
-
for the serie to be detected as a certain format
|
|
45
|
-
"""
|
|
46
|
-
if verbose:
|
|
47
|
-
start = time()
|
|
48
|
-
|
|
49
|
-
# TODO : change for a cleaner method and only test columns in modules labels
|
|
50
|
-
def apply_test_func(serie: pd.Series, test_func: Callable, _range: int):
|
|
51
|
-
return serie.sample(n=_range).apply(test_func)
|
|
52
|
-
try:
|
|
53
|
-
if skipna:
|
|
54
|
-
serie = serie[serie.notnull()]
|
|
55
|
-
ser_len = len(serie)
|
|
56
|
-
if ser_len == 0:
|
|
57
|
-
return 0.0
|
|
58
|
-
if not limited_output:
|
|
59
|
-
result = apply_test_func(serie, test_func, ser_len).sum() / ser_len
|
|
60
|
-
return result if result >= proportion else 0.0
|
|
61
|
-
else:
|
|
62
|
-
if proportion == 1: # Then try first 1 value, then 5, then all
|
|
63
|
-
for _range in [
|
|
64
|
-
min(1, ser_len),
|
|
65
|
-
min(5, ser_len),
|
|
66
|
-
ser_len,
|
|
67
|
-
]: # Pour ne pas faire d'opérations inutiles, on commence par 1,
|
|
68
|
-
# puis 5 valeurs puis la serie complète
|
|
69
|
-
if all(apply_test_func(serie, test_func, _range)):
|
|
70
|
-
# print(serie.name, ': check OK')
|
|
71
|
-
pass
|
|
72
|
-
else:
|
|
73
|
-
return 0.0
|
|
74
|
-
return 1.0
|
|
75
|
-
else:
|
|
76
|
-
# if we have a proportion, statistically it's OK to analyse up to 10k rows
|
|
77
|
-
# (arbitrary number) and get a significant result
|
|
78
|
-
to_analyse = min(ser_len, 10000)
|
|
79
|
-
result = apply_test_func(serie, test_func, to_analyse).sum() / to_analyse
|
|
80
|
-
return result if result >= proportion else 0.0
|
|
81
|
-
finally:
|
|
82
|
-
if verbose and time() - start > 3:
|
|
83
|
-
display_logs_depending_process_time(
|
|
84
|
-
f"\t/!\\ Column '{serie.name}' took too long ({round(time() - start, 3)}s)",
|
|
85
|
-
time() - start
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def test_col_label(label: str, test_func: Callable, proportion: float = 1, limited_output: bool = False):
|
|
90
|
-
"""Tests label (from header) using test_func.
|
|
91
|
-
- proportion : indicates the minimum score to pass the test for the serie
|
|
92
|
-
to be detected as a certain format
|
|
93
|
-
"""
|
|
94
|
-
if not limited_output:
|
|
95
|
-
return test_func(label)
|
|
96
|
-
else:
|
|
97
|
-
result = test_func(label)
|
|
98
|
-
return result if result >= proportion else 0
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna: bool = True, verbose: bool = False):
|
|
102
|
-
# Initialising dict for tests
|
|
103
|
-
if verbose:
|
|
104
|
-
start = time()
|
|
105
|
-
logging.info("Testing columns to get types")
|
|
106
|
-
test_funcs = dict()
|
|
107
|
-
for test in all_tests:
|
|
108
|
-
name = test.__name__.split(".")[-1]
|
|
109
|
-
test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
|
|
110
|
-
return_table = pd.DataFrame(columns=table.columns)
|
|
111
|
-
for idx, (key, value) in enumerate(test_funcs.items()):
|
|
112
|
-
if verbose:
|
|
113
|
-
start_type = time()
|
|
114
|
-
logging.info(f"\t- Starting with type '{key}'")
|
|
115
|
-
# improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
|
|
116
|
-
# => the following needs to change, "apply" means all columns are tested for one type at once
|
|
117
|
-
return_table.loc[key] = table.apply(
|
|
118
|
-
lambda serie: test_col_val(
|
|
119
|
-
serie,
|
|
120
|
-
value["func"],
|
|
121
|
-
value["prop"],
|
|
122
|
-
skipna=skipna,
|
|
123
|
-
limited_output=limited_output,
|
|
124
|
-
verbose=verbose,
|
|
125
|
-
)
|
|
126
|
-
)
|
|
127
|
-
if verbose:
|
|
128
|
-
display_logs_depending_process_time(
|
|
129
|
-
f'\t> Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
|
|
130
|
-
time() - start_type
|
|
131
|
-
)
|
|
132
|
-
if verbose:
|
|
133
|
-
display_logs_depending_process_time(f"Done testing columns in {round(time() - start, 3)}s", time() - start)
|
|
134
|
-
return return_table
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbose: bool = False):
|
|
138
|
-
# Initialising dict for tests
|
|
139
|
-
if verbose:
|
|
140
|
-
start = time()
|
|
141
|
-
logging.info("Testing labels to get types")
|
|
142
|
-
test_funcs = dict()
|
|
143
|
-
for test in all_tests:
|
|
144
|
-
name = test.__name__.split(".")[-1]
|
|
145
|
-
test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
|
|
146
|
-
|
|
147
|
-
return_table = pd.DataFrame(columns=table.columns)
|
|
148
|
-
for idx, (key, value) in enumerate(test_funcs.items()):
|
|
149
|
-
if verbose:
|
|
150
|
-
start_type = time()
|
|
151
|
-
return_table.loc[key] = [
|
|
152
|
-
test_col_label(
|
|
153
|
-
col_name, value["func"], value["prop"], limited_output=limited_output
|
|
154
|
-
)
|
|
155
|
-
for col_name in table.columns
|
|
156
|
-
]
|
|
157
|
-
if verbose:
|
|
158
|
-
display_logs_depending_process_time(
|
|
159
|
-
f'\t- Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
|
|
160
|
-
time() - start_type
|
|
161
|
-
)
|
|
162
|
-
if verbose:
|
|
163
|
-
display_logs_depending_process_time(f"Done testing labels in {round(time() - start, 3)}s", time() - start)
|
|
164
|
-
return return_table
|
|
24
|
+
def is_url(file_path: str) -> bool:
|
|
25
|
+
# could be more sophisticated if needed
|
|
26
|
+
return file_path.startswith('http')
|
|
165
27
|
|
|
166
28
|
|
|
167
|
-
def
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
return_dict_cols_intermediary[column_name] = []
|
|
172
|
-
for detected_value_type in return_dict_cols[column_name]:
|
|
173
|
-
if return_dict_cols[column_name][detected_value_type] == 0:
|
|
174
|
-
continue
|
|
175
|
-
dict_tmp = {}
|
|
176
|
-
dict_tmp["format"] = detected_value_type
|
|
177
|
-
dict_tmp["score"] = return_dict_cols[column_name][detected_value_type]
|
|
178
|
-
return_dict_cols_intermediary[column_name].append(dict_tmp)
|
|
179
|
-
|
|
180
|
-
# Clean dict using priorities
|
|
181
|
-
formats_detected = {
|
|
182
|
-
x["format"] for x in return_dict_cols_intermediary[column_name]
|
|
183
|
-
}
|
|
184
|
-
formats_to_remove = set()
|
|
185
|
-
# Deprioritise float and int detection vs others
|
|
186
|
-
if len(formats_detected - {"float", "int"}) > 0:
|
|
187
|
-
formats_to_remove = formats_to_remove.union({"float", "int"})
|
|
188
|
-
if "int" in formats_detected:
|
|
189
|
-
formats_to_remove.add("float")
|
|
190
|
-
if "latitude_wgs_fr_metropole" in formats_detected:
|
|
191
|
-
formats_to_remove.add("latitude_l93")
|
|
192
|
-
formats_to_remove.add("latitude_wgs")
|
|
193
|
-
if "longitude_wgs_fr_metropole" in formats_detected:
|
|
194
|
-
formats_to_remove.add("longitude_l93")
|
|
195
|
-
formats_to_remove.add("longitude_wgs")
|
|
196
|
-
if "longitude_wgs" in formats_detected:
|
|
197
|
-
formats_to_remove.add("longitude_l93")
|
|
198
|
-
if "code_region" in formats_detected:
|
|
199
|
-
formats_to_remove.add("code_departement")
|
|
200
|
-
|
|
201
|
-
formats_to_keep = formats_detected - formats_to_remove
|
|
202
|
-
|
|
203
|
-
detections = return_dict_cols_intermediary[column_name]
|
|
204
|
-
detections = [x for x in detections if x["format"] in formats_to_keep]
|
|
205
|
-
if not limited_output:
|
|
206
|
-
return_dict_cols_intermediary[column_name] = detections
|
|
207
|
-
else:
|
|
208
|
-
return_dict_cols_intermediary[column_name] = (
|
|
209
|
-
max(detections, key=lambda x: x["score"])
|
|
210
|
-
if len(detections) > 0
|
|
211
|
-
else {"format": "string", "score": 1.0}
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
return return_dict_cols_intermediary
|
|
29
|
+
def prevent_nan(value: float) -> Optional[float]:
|
|
30
|
+
if math.isnan(value):
|
|
31
|
+
return None
|
|
32
|
+
return value
|
|
215
33
|
|
|
216
34
|
|
|
217
35
|
def full_word_strictly_inside_string(word: str, string: str):
|
|
@@ -221,47 +39,3 @@ def full_word_strictly_inside_string(word: str, string: str):
|
|
|
221
39
|
or (string.startswith(word + " "))
|
|
222
40
|
or (string.endswith(" " + word))
|
|
223
41
|
)
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def cast(value: str, _type: str) -> Optional[Union[str, float, bool, date, datetime]]:
|
|
227
|
-
if not isinstance(value, str) or not value:
|
|
228
|
-
# None is the current default value in hydra, should we keep this?
|
|
229
|
-
return None
|
|
230
|
-
if _type == "float":
|
|
231
|
-
return float_casting(value)
|
|
232
|
-
if _type == "bool":
|
|
233
|
-
return bool_casting(value)
|
|
234
|
-
if _type == "json":
|
|
235
|
-
# in hydra json are given to postgres as strings, conversion is done by postgres
|
|
236
|
-
return json.loads(value)
|
|
237
|
-
if _type == "date":
|
|
238
|
-
_date = date_casting(value)
|
|
239
|
-
return _date.date() if _date else None
|
|
240
|
-
if _type == "datetime":
|
|
241
|
-
return date_casting(value)
|
|
242
|
-
raise ValueError(f"Unknown type `{_type}`")
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
def cast_df(df: pd.DataFrame, columns: dict, cast_json: bool = True, verbose: bool = False) -> pd.DataFrame:
|
|
246
|
-
if verbose:
|
|
247
|
-
start = time()
|
|
248
|
-
output_df = pd.DataFrame()
|
|
249
|
-
for col_name, detection in columns.items():
|
|
250
|
-
if detection["python_type"] == "string" or (detection["python_type"] == "json" and not cast_json):
|
|
251
|
-
# no change if detected type is string
|
|
252
|
-
output_df[col_name] = df[col_name].copy()
|
|
253
|
-
elif detection["python_type"] == "int":
|
|
254
|
-
# to allow having ints and NaN in the same column
|
|
255
|
-
output_df[col_name] = df[col_name].copy().astype(pd.Int64Dtype())
|
|
256
|
-
else:
|
|
257
|
-
output_df[col_name] = df[col_name].apply(
|
|
258
|
-
lambda col: cast(col, _type=detection["python_type"])
|
|
259
|
-
)
|
|
260
|
-
# to save RAM
|
|
261
|
-
del df[col_name]
|
|
262
|
-
if verbose:
|
|
263
|
-
display_logs_depending_process_time(
|
|
264
|
-
f'Casting columns completed in {round(time() - start, 3)}s',
|
|
265
|
-
time() - start,
|
|
266
|
-
)
|
|
267
|
-
return output_df
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
- Fix bool casting [#109](https://github.com/datagouv/csv-detective/pull/109)
|
|
12
12
|
- Handle csv.gz files [#110](https://github.com/datagouv/csv-detective/pull/110)
|
|
13
13
|
- Refactor file tests [#110](https://github.com/datagouv/csv-detective/pull/110)
|
|
14
|
+
- Restructure repo (breaking changes) [#111](https://github.com/datagouv/csv-detective/pull/111)
|
|
14
15
|
|
|
15
16
|
## 0.7.4 (2024-11-15)
|
|
16
17
|
|
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
csv_detective/__init__.py,sha256=
|
|
1
|
+
csv_detective/__init__.py,sha256=GCHgu0BhH5ACV7cf-1gDr9nRyvSoeQ1vRw9SjEHeMT4,143
|
|
2
2
|
csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
|
|
3
|
-
csv_detective/
|
|
4
|
-
csv_detective/detection.py,sha256=dqjAKR-h7QC2pbl7FEUleS15bvGHBiTleu9CtVKp_Vo,22806
|
|
5
|
-
csv_detective/explore_csv.py,sha256=HM4RlNV2eWfP9wTDvhrow-_yDMbGuE3JDvFCfmMNWyY,18087
|
|
6
|
-
csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
|
|
3
|
+
csv_detective/explore_csv.py,sha256=aJ2pG7lK4sgY9Pv31zEzFVGByxkfw4wwgrQqfgUtBOo,14903
|
|
7
4
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
8
|
-
csv_detective/
|
|
9
|
-
csv_detective/utils.py,sha256=yO9INaLh-QX-FFL2A153AlMqftE04wb0hpN6HJvsKGg,10581
|
|
5
|
+
csv_detective/utils.py,sha256=KAYfSJXnPuAXnSc38Jm57oQ_JP_0kUkmI1OV6gN5_ys,1116
|
|
10
6
|
csv_detective/detect_fields/__init__.py,sha256=NVfE3BQVExgXb-BPbhDvlkM5-0naEVLpZ4aM_OGHYfE,931
|
|
11
7
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
8
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=
|
|
9
|
+
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
|
|
14
10
|
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
|
|
15
11
|
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=unr-Y4zquKSM5PVUiQGnOm-zQvaN8qd3v_XHf0W2VH8,378
|
|
16
12
|
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
|
|
@@ -26,16 +22,16 @@ csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256
|
|
|
26
22
|
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=2q5T4SmCK6ZFF1mrv7d-q9tOIQKBcROI24y_UYIuvz0,383
|
|
27
23
|
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=JbFKDd4jAnd9yb7YqP36MoLdO1JFPm1cg60fGXt6ZvI,1074
|
|
28
24
|
csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=
|
|
25
|
+
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=SRWJvg3Ikyjmop9iL14igTjxNGpO-QB3fpADI_bLYEY,566
|
|
30
26
|
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
|
|
31
27
|
csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
|
|
32
28
|
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=g9n5sOjRlk4I9YFZjdaTYrXf8ftXRDunGZOUpYhN4fA,295
|
|
33
|
-
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=
|
|
29
|
+
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=lvcaVKgOPrCaZb-Y1-wYCbLYB_CQjCJFNAzfWDwtTVE,496
|
|
34
30
|
csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
35
31
|
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=kMV52djlG0y4o0ELEZuvTv_FvooYOgTnV1aWhycFJDc,284
|
|
36
|
-
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=
|
|
32
|
+
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=g8pOqJPKVpQiMd78zgrjXJWYeWkYhu8r3D4IQX519HQ,519
|
|
37
33
|
csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
|
|
38
|
-
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=
|
|
34
|
+
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=iYkLe3MM51GWyBX_4BTq5PWDX_EeYRbEHWKMr8oE1MQ,269
|
|
39
35
|
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=ohSwUL2rXqTXPG5WDAh2SP-lp1SzFCYgo4IhJ-PXmdk,442
|
|
40
36
|
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=ThEeT6rXmS0EvHW8y4A_74bILyErDGxLe9v3elHOFs8,707
|
|
41
37
|
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=BF47aMTe0rUIx66iurIo7fM9Nrk0YorQ7WmFLnkWonI,343
|
|
@@ -73,72 +69,90 @@ csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRh
|
|
|
73
69
|
csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
|
|
74
70
|
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
71
|
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
|
-
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=
|
|
77
|
-
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=
|
|
78
|
-
csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=
|
|
79
|
-
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=
|
|
80
|
-
csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=
|
|
81
|
-
csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=
|
|
82
|
-
csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=
|
|
83
|
-
csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=
|
|
84
|
-
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=
|
|
85
|
-
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=
|
|
86
|
-
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=
|
|
87
|
-
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=
|
|
88
|
-
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=
|
|
89
|
-
csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=
|
|
90
|
-
csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=
|
|
72
|
+
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=e5ROxhrXNCefLwL5lXTWHO0PEWwLHfqmowm7XoeqZ2I,1063
|
|
73
|
+
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=D_9QFvAeX5Nwp4qtQ0NEpKR0jpRlDx-rNBSrlYrw4nw,1096
|
|
74
|
+
csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=rpzxUVsZyazVVguOorLadiJv_Vz1n04ijm0RbVmRDts,1025
|
|
75
|
+
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=VUqv3G-JO-9CJU4-EX5DXs4O22Lqm75vuOy9MngoojA,949
|
|
76
|
+
csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=USIYj7PiULI_WCfDxpzRCW9tv8-FNYKWopsVZ3H79mE,1070
|
|
77
|
+
csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=f9WroGVfB5jUzd_Rjs4XocZT2Ma-xZd2On9StUHy3F4,1012
|
|
78
|
+
csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=iYD0UPhRVKYFv8DAEfe_RoQlE47igZ_MacsHxVLyYcM,948
|
|
79
|
+
csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=fqNziX5ID6mVE5nVNviOsncVqkYyVvj7J_8hxN7_D1w,1229
|
|
80
|
+
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=EAcQ2FqTKQdxhSYr5VCuEpjc7BdGwTdMkLL_VL6ay7Y,957
|
|
81
|
+
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=X3vGdh_DHzWZXuV2-L9QhuWTLjHyaPZyS__s9Y5yiNg,1386
|
|
82
|
+
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
|
|
83
|
+
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Pf00tBADr7HvJLeW_YqY3QU1EBVJDi365woheAzsNKY,1139
|
|
84
|
+
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=LfvgcrjVsXmxT6xC3X8eQIiQ_STvPRwjUbUQ4TyfJE0,1144
|
|
85
|
+
csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=RsI_QXMJOZ5PpKcoKWy7AmUHFjehHXcUezquZyt1eq4,1169
|
|
86
|
+
csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=h9pE3xu2-PFw1jmDenkoKWmFkYmpK9-UgCboPlL7Aeg,1164
|
|
91
87
|
csv_detective/detect_labels/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
|
-
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=
|
|
93
|
-
csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=
|
|
94
|
-
csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=
|
|
95
|
-
csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256
|
|
96
|
-
csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=
|
|
97
|
-
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=
|
|
98
|
-
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=
|
|
99
|
-
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=
|
|
100
|
-
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=
|
|
101
|
-
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=
|
|
102
|
-
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=
|
|
88
|
+
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=lcLdEdNo4rhLvqzP3C0rmU_1PaQvTdpviXt9xGSaGFc,939
|
|
89
|
+
csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=DJykTRguggOlsIuyjYezJ99c8MGCSwwwCLcoQjfN40o,1024
|
|
90
|
+
csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=idLo99rELzs1uc4mOcby9RLZLhhpsOp5AoTudT2jPwM,934
|
|
91
|
+
csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=J5G8pldzBdXRaopYNzGDztRFIsI_7rdaAPQ_kSuz5PU,1043
|
|
92
|
+
csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=9EXCmzKSa5PSWrPbVeLscbJCaiwQEXX-1rCr79U8XLA,975
|
|
93
|
+
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=9bq2171SrmDIHx4A0cAeSHfWyQl40e-dIR9_ur4cEHQ,1124
|
|
94
|
+
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=AEKBGWEKxDoT8k9BF-v9vl1SHc4DffiiFyhip-6tC78,956
|
|
95
|
+
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=9w2VCs8kq-XVRmxxwqZYIynfCPwbFbl-pBPqXtnXx8Y,1103
|
|
96
|
+
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Yqrp7NDEN0WRA_oktMb0wWoLQ99rzIvNvJ8jVhBCRD8,1040
|
|
97
|
+
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=gdzclIAjhr_k-a04l_FDz9kQywBfSA6vqa0UQxdaqNw,1143
|
|
98
|
+
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=mB0hC2JUKGnhGl6MUDFzSM_-t-Tvt3Vm21Gr_JXkL3k,1316
|
|
103
99
|
csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
|
-
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=
|
|
105
|
-
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=
|
|
100
|
+
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=FHXmOIjH4e5n_mahtScgOVYUAi_M4PeHAnsuIm5LxCA,1074
|
|
101
|
+
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=hX0FPAia4x28GD398WvpeaBQ4_3F5G3xAhySmZBdi5w,934
|
|
106
102
|
csv_detective/detect_labels/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
|
-
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=
|
|
108
|
-
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=
|
|
109
|
-
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=
|
|
110
|
-
csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=
|
|
111
|
-
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=
|
|
112
|
-
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=
|
|
113
|
-
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=
|
|
103
|
+
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
|
|
104
|
+
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
|
|
105
|
+
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
|
|
106
|
+
csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=0sYS6bF_xmmhqsJ0Wrx7GC3qBAYjK7uhVud_ZbIQHHQ,1072
|
|
107
|
+
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
|
|
108
|
+
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=SwR1NU0vpk8YdHTIk1wk9zQpNoUsoABq-K8GfRMY0fw,1705
|
|
109
|
+
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=z4rOrkCypI5JodgX9alTrV03IpetgAW4BGJuNvFlU4s,1145
|
|
114
110
|
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
csv_detective/detect_labels/other/booleen/__init__.py,sha256=
|
|
116
|
-
csv_detective/detect_labels/other/email/__init__.py,sha256=
|
|
117
|
-
csv_detective/detect_labels/other/float/__init__.py,sha256=
|
|
118
|
-
csv_detective/detect_labels/other/int/__init__.py,sha256=
|
|
111
|
+
csv_detective/detect_labels/other/booleen/__init__.py,sha256=uvQ7yDVAlEO8AY44OMblh_ZrxPTOmdvFtbcQEanpWSo,987
|
|
112
|
+
csv_detective/detect_labels/other/email/__init__.py,sha256=VRUYZXGn-hRqE2sY0JY-Oh_wtT568orDTBxBGYsgqxE,1148
|
|
113
|
+
csv_detective/detect_labels/other/float/__init__.py,sha256=jIr1r9FFy8NWvi5fOuIhj52bc7cZmM3OeTo-c6TUWII,926
|
|
114
|
+
csv_detective/detect_labels/other/int/__init__.py,sha256=G1GAlKNaOZH_l39Zpw85xkl7JcdnY5PlEEroyU78hlY,933
|
|
119
115
|
csv_detective/detect_labels/other/money/__init__.py,sha256=kBEGuUy6kYkOI3vC_a7waBciG2ipyV9bhC330U8WaoI,279
|
|
120
116
|
csv_detective/detect_labels/other/money/check_col_name.py,sha256=zgp5eUnf3XRQuxgdEGfxPfUnniO8Pzw19uK0ICr2pf8,414
|
|
121
|
-
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=
|
|
122
|
-
csv_detective/detect_labels/other/twitter/__init__.py,sha256=
|
|
123
|
-
csv_detective/detect_labels/other/url/__init__.py,sha256=
|
|
124
|
-
csv_detective/detect_labels/other/uuid/__init__.py,sha256=
|
|
117
|
+
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=3TW59y4vo4Pkx_fQrmEs1-gZbdJeNiK7ip25cpR829U,927
|
|
118
|
+
csv_detective/detect_labels/other/twitter/__init__.py,sha256=x3b522ov_g-kmcq4k4eoZ8FQqrXdnlRJJit5UbnzIrQ,959
|
|
119
|
+
csv_detective/detect_labels/other/url/__init__.py,sha256=wVQsWQzOuBY-cD7wn_PXcWLVEkknBA2lBCu8SRWsQG4,1202
|
|
120
|
+
csv_detective/detect_labels/other/uuid/__init__.py,sha256=ySxqFvtGHguoiOyD5A1YRFY3SuubkgBAEY_Ud5kZVPM,931
|
|
125
121
|
csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
126
|
-
csv_detective/detect_labels/temp/date/__init__.py,sha256
|
|
127
|
-
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=
|
|
128
|
-
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=
|
|
129
|
-
csv_detective/detect_labels/temp/year/__init__.py,sha256=
|
|
130
|
-
csv_detective
|
|
131
|
-
csv_detective
|
|
132
|
-
csv_detective
|
|
133
|
-
csv_detective
|
|
122
|
+
csv_detective/detect_labels/temp/date/__init__.py,sha256=CRv-S0figO6MOPdE0Lv5hWdjtIr6EmWzwlcjn5ofIxo,1322
|
|
123
|
+
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=0lFdN5Z43m6Qm-wBqcyM_mceUmI4s3vqgLCM-Jlgoxw,1157
|
|
124
|
+
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=4N0EGJA_2vXC1iFptvzpU6IN7AIJH5MFUrRY2p7Cjfs,1175
|
|
125
|
+
csv_detective/detect_labels/temp/year/__init__.py,sha256=3U9j8Hux432KdGtIyArq_-vScn-5eYFwpn976WM9N4M,1150
|
|
126
|
+
csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
|
|
127
|
+
csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
|
|
128
|
+
csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
|
|
129
|
+
csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
|
|
130
|
+
csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
|
|
131
|
+
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
132
|
+
csv_detective/detection/variables.py,sha256=3qEMtjZ_zyIFXvTnFgK7ZMDx8C12uQXKfFjEj2moyJc,3558
|
|
133
|
+
csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
|
|
134
|
+
csv_detective/output/example.py,sha256=i8PkdXxidF7qR_9aK8vh12JpZdJQryhBgyrMS8iy5rk,8642
|
|
135
|
+
csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
|
|
136
|
+
csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
|
|
137
|
+
csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
|
|
138
|
+
csv_detective/parsing/columns.py,sha256=Oj0Ddp2fPZeL70GDWdF7GY2RmhiVdz0IEvoBJFt-wao,5701
|
|
139
|
+
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
140
|
+
csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
|
|
141
|
+
csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
|
|
142
|
+
csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
|
|
143
|
+
csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
|
|
144
|
+
csv_detective-0.7.5.dev1209.data/data/share/csv_detective/CHANGELOG.md,sha256=povo1ufNJvsxJLkzdjYLgkTy9E-MNFWTg6elXe2nyqU,7625
|
|
145
|
+
csv_detective-0.7.5.dev1209.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
146
|
+
csv_detective-0.7.5.dev1209.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
|
|
147
|
+
csv_detective-0.7.5.dev1209.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
134
148
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
135
149
|
tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
|
|
136
|
-
tests/test_fields.py,sha256=
|
|
137
|
-
tests/test_file.py,sha256=
|
|
150
|
+
tests/test_fields.py,sha256=fcgycaFxacOcN0WdwuUvxef_ejd6tRHNpkD5pxMjMXE,11141
|
|
151
|
+
tests/test_file.py,sha256=EleTssys5fCP4N0W1eTZN35uijzoF15e3dIcuIlrMsk,7865
|
|
138
152
|
tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
|
|
139
153
|
tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
|
|
140
|
-
csv_detective-0.7.5.
|
|
141
|
-
csv_detective-0.7.5.
|
|
142
|
-
csv_detective-0.7.5.
|
|
143
|
-
csv_detective-0.7.5.
|
|
144
|
-
csv_detective-0.7.5.
|
|
154
|
+
csv_detective-0.7.5.dev1209.dist-info/METADATA,sha256=LwKAMVqoJjZfnrWAJV_nv_V3oprmbmmaNmX9e4Zvruc,1386
|
|
155
|
+
csv_detective-0.7.5.dev1209.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
156
|
+
csv_detective-0.7.5.dev1209.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
157
|
+
csv_detective-0.7.5.dev1209.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
158
|
+
csv_detective-0.7.5.dev1209.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import
|
|
1
|
+
from datetime import date as _date, datetime as _datetime
|
|
2
|
+
|
|
2
3
|
from numpy import random
|
|
4
|
+
import pandas as pd
|
|
3
5
|
import pytest
|
|
4
|
-
from datetime import date as _date, datetime as _datetime
|
|
5
6
|
|
|
6
7
|
from csv_detective.detect_fields.FR.geo import (
|
|
7
8
|
adresse,
|
|
@@ -43,12 +44,12 @@ from csv_detective.detect_fields.other import (
|
|
|
43
44
|
float as test_float,
|
|
44
45
|
)
|
|
45
46
|
from csv_detective.detect_fields.temp import date, datetime_iso, datetime_rfc822, year
|
|
46
|
-
from csv_detective.detection import (
|
|
47
|
+
from csv_detective.detection.variables import (
|
|
47
48
|
detect_continuous_variable,
|
|
48
|
-
|
|
49
|
+
detect_categorical_variable,
|
|
49
50
|
)
|
|
50
51
|
from csv_detective.explore_csv import return_all_tests
|
|
51
|
-
from csv_detective.
|
|
52
|
+
from csv_detective.output.dataframe import cast
|
|
52
53
|
|
|
53
54
|
|
|
54
55
|
def test_all_tests_return_bool():
|
|
@@ -71,7 +72,7 @@ def test_detetect_categorical_variable():
|
|
|
71
72
|
}
|
|
72
73
|
df = pd.DataFrame(df_dict, dtype="unicode")
|
|
73
74
|
|
|
74
|
-
res, _ =
|
|
75
|
+
res, _ = detect_categorical_variable(df)
|
|
75
76
|
assert len(res.values) and all(k in res.values for k in ["cat", "cat2"])
|
|
76
77
|
|
|
77
78
|
|
tests/test_file.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
import pandas as pd
|
|
2
2
|
import pytest
|
|
3
3
|
import responses
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
from csv_detective import routine
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def test_columns_output_on_file():
|
|
8
9
|
output = routine(
|
|
9
|
-
|
|
10
|
+
file_path="tests/data/a_test_file.csv",
|
|
10
11
|
num_rows=-1,
|
|
11
12
|
output_profile=False,
|
|
12
13
|
save_results=False,
|
|
@@ -40,7 +41,7 @@ def test_columns_output_on_file():
|
|
|
40
41
|
|
|
41
42
|
def test_profile_output_on_file():
|
|
42
43
|
output = routine(
|
|
43
|
-
|
|
44
|
+
file_path="tests/data/a_test_file.csv",
|
|
44
45
|
num_rows=-1,
|
|
45
46
|
output_profile=True,
|
|
46
47
|
save_results=False,
|
|
@@ -72,7 +73,7 @@ def test_profile_output_on_file():
|
|
|
72
73
|
def test_profile_with_num_rows():
|
|
73
74
|
with pytest.raises(ValueError):
|
|
74
75
|
routine(
|
|
75
|
-
|
|
76
|
+
file_path="tests/data/a_test_file.csv",
|
|
76
77
|
num_rows=50,
|
|
77
78
|
output_profile=True,
|
|
78
79
|
save_results=False,
|
|
@@ -85,7 +86,7 @@ def test_exception_different_number_of_columns():
|
|
|
85
86
|
"""
|
|
86
87
|
with pytest.raises(ValueError):
|
|
87
88
|
routine(
|
|
88
|
-
|
|
89
|
+
file_path="tests/data/c_test_file.csv",
|
|
89
90
|
num_rows=-1,
|
|
90
91
|
output_profile=True,
|
|
91
92
|
save_results=False,
|
|
@@ -94,7 +95,7 @@ def test_exception_different_number_of_columns():
|
|
|
94
95
|
|
|
95
96
|
def test_code_dep_reg_on_file():
|
|
96
97
|
output = routine(
|
|
97
|
-
|
|
98
|
+
file_path="tests/data/b_test_file.csv",
|
|
98
99
|
num_rows=-1,
|
|
99
100
|
output_profile=False,
|
|
100
101
|
save_results=False,
|
|
@@ -106,7 +107,7 @@ def test_code_dep_reg_on_file():
|
|
|
106
107
|
|
|
107
108
|
def test_schema_on_file():
|
|
108
109
|
output = routine(
|
|
109
|
-
|
|
110
|
+
file_path="tests/data/b_test_file.csv",
|
|
110
111
|
num_rows=-1,
|
|
111
112
|
output_schema=True,
|
|
112
113
|
save_results=False,
|
|
@@ -149,7 +150,7 @@ params_others = [
|
|
|
149
150
|
def test_non_csv_files(params):
|
|
150
151
|
file_name, checks = params
|
|
151
152
|
_ = routine(
|
|
152
|
-
|
|
153
|
+
file_path=f"tests/data/{file_name}",
|
|
153
154
|
num_rows=-1,
|
|
154
155
|
output_profile=False,
|
|
155
156
|
save_results=False,
|
|
@@ -181,11 +182,11 @@ def test_urls(mocked_responses, params):
|
|
|
181
182
|
url = f"http://example.com/{file_name}"
|
|
182
183
|
mocked_responses.get(
|
|
183
184
|
url,
|
|
184
|
-
body=open(f"tests/{file_name}", "rb").read(),
|
|
185
|
+
body=open(f"tests/data/{file_name}", "rb").read(),
|
|
185
186
|
status=200,
|
|
186
187
|
)
|
|
187
188
|
_ = routine(
|
|
188
|
-
|
|
189
|
+
file_path=url,
|
|
189
190
|
num_rows=-1,
|
|
190
191
|
output_profile=False,
|
|
191
192
|
save_results=False,
|
|
@@ -211,7 +212,7 @@ def test_nan_values(expected_type):
|
|
|
211
212
|
# if skipping NaN, the column contains only ints
|
|
212
213
|
skipna, expected_type = expected_type
|
|
213
214
|
output = routine(
|
|
214
|
-
|
|
215
|
+
file_path="tests/data/b_test_file.csv",
|
|
215
216
|
num_rows=-1,
|
|
216
217
|
save_results=False,
|
|
217
218
|
skipna=skipna,
|
|
@@ -221,7 +222,7 @@ def test_nan_values(expected_type):
|
|
|
221
222
|
|
|
222
223
|
def test_output_df():
|
|
223
224
|
output, df = routine(
|
|
224
|
-
|
|
225
|
+
file_path="tests/data/b_test_file.csv",
|
|
225
226
|
num_rows=-1,
|
|
226
227
|
output_profile=False,
|
|
227
228
|
save_results=False,
|
|
@@ -249,7 +250,7 @@ def test_cast_json(mocked_responses, cast_json):
|
|
|
249
250
|
status=200,
|
|
250
251
|
)
|
|
251
252
|
analysis, df = routine(
|
|
252
|
-
|
|
253
|
+
file_path='http://example.com/test.csv',
|
|
253
254
|
num_rows=-1,
|
|
254
255
|
output_profile=False,
|
|
255
256
|
save_results=False,
|