csv-detective 0.7.5.dev1180__py3-none-any.whl → 0.7.5.dev1209__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/sexe/__init__.py +1 -1
- csv_detective/detect_fields/temp/date/__init__.py +5 -1
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/commune/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/departement/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/pays/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/region/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/sexe/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/siren/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/siret/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/uai/__init__.py +1 -1
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +1 -1
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +1 -1
- csv_detective/detect_labels/geo/json_geojson/__init__.py +1 -1
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +1 -1
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +1 -1
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +1 -1
- csv_detective/detect_labels/other/booleen/__init__.py +1 -1
- csv_detective/detect_labels/other/email/__init__.py +1 -1
- csv_detective/detect_labels/other/float/__init__.py +1 -1
- csv_detective/detect_labels/other/int/__init__.py +1 -1
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +1 -1
- csv_detective/detect_labels/other/twitter/__init__.py +1 -1
- csv_detective/detect_labels/other/url/__init__.py +1 -1
- csv_detective/detect_labels/other/uuid/__init__.py +1 -1
- csv_detective/detect_labels/temp/date/__init__.py +1 -1
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +1 -1
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +1 -1
- csv_detective/detect_labels/temp/year/__init__.py +1 -1
- csv_detective/detection/columns.py +89 -0
- csv_detective/detection/encoding.py +27 -0
- csv_detective/detection/engine.py +46 -0
- csv_detective/detection/headers.py +32 -0
- csv_detective/detection/rows.py +18 -0
- csv_detective/detection/separator.py +44 -0
- csv_detective/detection/variables.py +98 -0
- csv_detective/explore_csv.py +40 -110
- csv_detective/output/dataframe.py +55 -0
- csv_detective/{create_example.py → output/example.py} +10 -9
- csv_detective/output/profile.py +87 -0
- csv_detective/{schema_generation.py → output/schema.py} +344 -343
- csv_detective/output/utils.py +51 -0
- csv_detective/parsing/columns.py +141 -0
- csv_detective/parsing/compression.py +11 -0
- csv_detective/parsing/csv.py +55 -0
- csv_detective/parsing/excel.py +169 -0
- csv_detective/parsing/load.py +97 -0
- csv_detective/utils.py +10 -236
- {csv_detective-0.7.5.dev1180.data → csv_detective-0.7.5.dev1209.data}/data/share/csv_detective/CHANGELOG.md +3 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/METADATA +3 -2
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/RECORD +85 -71
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/WHEEL +1 -1
- tests/test_fields.py +7 -6
- tests/test_file.py +56 -57
- csv_detective/detection.py +0 -618
- /csv_detective/{process_text.py → parsing/text.py} +0 -0
- {csv_detective-0.7.5.dev1180.data → csv_detective-0.7.5.dev1209.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1180.data → csv_detective-0.7.5.dev1209.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info/licenses}/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/top_level.txt +0 -0
csv_detective/utils.py
CHANGED
|
@@ -1,15 +1,6 @@
|
|
|
1
|
-
from typing import Callable, Optional, Union
|
|
2
|
-
import json
|
|
3
|
-
import pandas as pd
|
|
4
1
|
import logging
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
from csv_detective.detect_fields.other.booleen import bool_casting
|
|
9
|
-
from csv_detective.detect_fields.other.float import float_casting
|
|
10
|
-
from csv_detective.detect_fields.temp.date import date_casting
|
|
11
|
-
|
|
12
|
-
logging.basicConfig(level=logging.INFO)
|
|
2
|
+
import math
|
|
3
|
+
from typing import Optional
|
|
13
4
|
|
|
14
5
|
|
|
15
6
|
def display_logs_depending_process_time(prompt: str, duration: float):
|
|
@@ -25,193 +16,20 @@ def display_logs_depending_process_time(prompt: str, duration: float):
|
|
|
25
16
|
if duration < threshold_warn:
|
|
26
17
|
logging.info(prompt)
|
|
27
18
|
elif duration < threshold_critical:
|
|
28
|
-
logging.
|
|
19
|
+
logging.warning(prompt)
|
|
29
20
|
else:
|
|
30
21
|
logging.critical(prompt)
|
|
31
22
|
|
|
32
23
|
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
proportion: float = 0.9,
|
|
37
|
-
skipna: bool = True,
|
|
38
|
-
limited_output: bool = False,
|
|
39
|
-
verbose: bool = False,
|
|
40
|
-
):
|
|
41
|
-
"""Tests values of the serie using test_func.
|
|
42
|
-
- skipna : if True indicates that NaNs are not counted as False
|
|
43
|
-
- proportion : indicates the proportion of values that have to pass the test
|
|
44
|
-
for the serie to be detected as a certain format
|
|
45
|
-
"""
|
|
46
|
-
if verbose:
|
|
47
|
-
start = time()
|
|
48
|
-
|
|
49
|
-
# TODO : change for a cleaner method and only test columns in modules labels
|
|
50
|
-
def apply_test_func(serie: pd.Series, test_func: Callable, _range: int):
|
|
51
|
-
return serie.sample(n=_range).apply(test_func)
|
|
52
|
-
try:
|
|
53
|
-
if skipna:
|
|
54
|
-
serie = serie[serie.notnull()]
|
|
55
|
-
ser_len = len(serie)
|
|
56
|
-
if ser_len == 0:
|
|
57
|
-
return 0.0
|
|
58
|
-
if not limited_output:
|
|
59
|
-
result = apply_test_func(serie, test_func, ser_len).sum() / ser_len
|
|
60
|
-
return result if result >= proportion else 0.0
|
|
61
|
-
else:
|
|
62
|
-
if proportion == 1: # Then try first 1 value, then 5, then all
|
|
63
|
-
for _range in [
|
|
64
|
-
min(1, ser_len),
|
|
65
|
-
min(5, ser_len),
|
|
66
|
-
ser_len,
|
|
67
|
-
]: # Pour ne pas faire d'opérations inutiles, on commence par 1,
|
|
68
|
-
# puis 5 valeurs puis la serie complète
|
|
69
|
-
if all(apply_test_func(serie, test_func, _range)):
|
|
70
|
-
# print(serie.name, ': check OK')
|
|
71
|
-
pass
|
|
72
|
-
else:
|
|
73
|
-
return 0.0
|
|
74
|
-
return 1.0
|
|
75
|
-
else:
|
|
76
|
-
# if we have a proportion, statistically it's OK to analyse up to 10k rows
|
|
77
|
-
# (arbitrary number) and get a significant result
|
|
78
|
-
to_analyse = min(ser_len, 10000)
|
|
79
|
-
result = apply_test_func(serie, test_func, to_analyse).sum() / to_analyse
|
|
80
|
-
return result if result >= proportion else 0.0
|
|
81
|
-
finally:
|
|
82
|
-
if verbose and time() - start > 3:
|
|
83
|
-
display_logs_depending_process_time(
|
|
84
|
-
f"\t/!\\ Column '{serie.name}' took too long ({round(time() - start, 3)}s)",
|
|
85
|
-
time() - start
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def test_col_label(label: str, test_func: Callable, proportion: float = 1, limited_output: bool = False):
|
|
90
|
-
"""Tests label (from header) using test_func.
|
|
91
|
-
- proportion : indicates the minimum score to pass the test for the serie
|
|
92
|
-
to be detected as a certain format
|
|
93
|
-
"""
|
|
94
|
-
if not limited_output:
|
|
95
|
-
return test_func(label)
|
|
96
|
-
else:
|
|
97
|
-
result = test_func(label)
|
|
98
|
-
return result if result >= proportion else 0
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna: bool = True, verbose: bool = False):
|
|
102
|
-
# Initialising dict for tests
|
|
103
|
-
if verbose:
|
|
104
|
-
start = time()
|
|
105
|
-
logging.info("Testing columns to get types")
|
|
106
|
-
test_funcs = dict()
|
|
107
|
-
for test in all_tests:
|
|
108
|
-
name = test.__name__.split(".")[-1]
|
|
109
|
-
test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
|
|
110
|
-
return_table = pd.DataFrame(columns=table.columns)
|
|
111
|
-
for idx, (key, value) in enumerate(test_funcs.items()):
|
|
112
|
-
if verbose:
|
|
113
|
-
start_type = time()
|
|
114
|
-
logging.info(f"\t- Starting with type '{key}'")
|
|
115
|
-
# improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
|
|
116
|
-
# => the following needs to change, "apply" means all columns are tested for one type at once
|
|
117
|
-
return_table.loc[key] = table.apply(
|
|
118
|
-
lambda serie: test_col_val(
|
|
119
|
-
serie,
|
|
120
|
-
value["func"],
|
|
121
|
-
value["prop"],
|
|
122
|
-
skipna=skipna,
|
|
123
|
-
limited_output=limited_output,
|
|
124
|
-
verbose=verbose,
|
|
125
|
-
)
|
|
126
|
-
)
|
|
127
|
-
if verbose:
|
|
128
|
-
display_logs_depending_process_time(
|
|
129
|
-
f'\t> Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
|
|
130
|
-
time() - start_type
|
|
131
|
-
)
|
|
132
|
-
if verbose:
|
|
133
|
-
display_logs_depending_process_time(f"Done testing columns in {round(time() - start, 3)}s", time() - start)
|
|
134
|
-
return return_table
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbose: bool = False):
|
|
138
|
-
# Initialising dict for tests
|
|
139
|
-
if verbose:
|
|
140
|
-
start = time()
|
|
141
|
-
logging.info("Testing labels to get types")
|
|
142
|
-
test_funcs = dict()
|
|
143
|
-
for test in all_tests:
|
|
144
|
-
name = test.__name__.split(".")[-1]
|
|
145
|
-
test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
|
|
146
|
-
|
|
147
|
-
return_table = pd.DataFrame(columns=table.columns)
|
|
148
|
-
for idx, (key, value) in enumerate(test_funcs.items()):
|
|
149
|
-
if verbose:
|
|
150
|
-
start_type = time()
|
|
151
|
-
return_table.loc[key] = [
|
|
152
|
-
test_col_label(
|
|
153
|
-
col_name, value["func"], value["prop"], limited_output=limited_output
|
|
154
|
-
)
|
|
155
|
-
for col_name in table.columns
|
|
156
|
-
]
|
|
157
|
-
if verbose:
|
|
158
|
-
display_logs_depending_process_time(
|
|
159
|
-
f'\t- Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
|
|
160
|
-
time() - start_type
|
|
161
|
-
)
|
|
162
|
-
if verbose:
|
|
163
|
-
display_logs_depending_process_time(f"Done testing labels in {round(time() - start, 3)}s", time() - start)
|
|
164
|
-
return return_table
|
|
24
|
+
def is_url(file_path: str) -> bool:
|
|
25
|
+
# could be more sophisticated if needed
|
|
26
|
+
return file_path.startswith('http')
|
|
165
27
|
|
|
166
28
|
|
|
167
|
-
def
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
return_dict_cols_intermediary[column_name] = []
|
|
172
|
-
for detected_value_type in return_dict_cols[column_name]:
|
|
173
|
-
if return_dict_cols[column_name][detected_value_type] == 0:
|
|
174
|
-
continue
|
|
175
|
-
dict_tmp = {}
|
|
176
|
-
dict_tmp["format"] = detected_value_type
|
|
177
|
-
dict_tmp["score"] = return_dict_cols[column_name][detected_value_type]
|
|
178
|
-
return_dict_cols_intermediary[column_name].append(dict_tmp)
|
|
179
|
-
|
|
180
|
-
# Clean dict using priorities
|
|
181
|
-
formats_detected = {
|
|
182
|
-
x["format"] for x in return_dict_cols_intermediary[column_name]
|
|
183
|
-
}
|
|
184
|
-
formats_to_remove = set()
|
|
185
|
-
# Deprioritise float and int detection vs others
|
|
186
|
-
if len(formats_detected - {"float", "int"}) > 0:
|
|
187
|
-
formats_to_remove = formats_to_remove.union({"float", "int"})
|
|
188
|
-
if "int" in formats_detected:
|
|
189
|
-
formats_to_remove.add("float")
|
|
190
|
-
if "latitude_wgs_fr_metropole" in formats_detected:
|
|
191
|
-
formats_to_remove.add("latitude_l93")
|
|
192
|
-
formats_to_remove.add("latitude_wgs")
|
|
193
|
-
if "longitude_wgs_fr_metropole" in formats_detected:
|
|
194
|
-
formats_to_remove.add("longitude_l93")
|
|
195
|
-
formats_to_remove.add("longitude_wgs")
|
|
196
|
-
if "longitude_wgs" in formats_detected:
|
|
197
|
-
formats_to_remove.add("longitude_l93")
|
|
198
|
-
if "code_region" in formats_detected:
|
|
199
|
-
formats_to_remove.add("code_departement")
|
|
200
|
-
|
|
201
|
-
formats_to_keep = formats_detected - formats_to_remove
|
|
202
|
-
|
|
203
|
-
detections = return_dict_cols_intermediary[column_name]
|
|
204
|
-
detections = [x for x in detections if x["format"] in formats_to_keep]
|
|
205
|
-
if not limited_output:
|
|
206
|
-
return_dict_cols_intermediary[column_name] = detections
|
|
207
|
-
else:
|
|
208
|
-
return_dict_cols_intermediary[column_name] = (
|
|
209
|
-
max(detections, key=lambda x: x["score"])
|
|
210
|
-
if len(detections) > 0
|
|
211
|
-
else {"format": "string", "score": 1.0}
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
return return_dict_cols_intermediary
|
|
29
|
+
def prevent_nan(value: float) -> Optional[float]:
|
|
30
|
+
if math.isnan(value):
|
|
31
|
+
return None
|
|
32
|
+
return value
|
|
215
33
|
|
|
216
34
|
|
|
217
35
|
def full_word_strictly_inside_string(word: str, string: str):
|
|
@@ -221,47 +39,3 @@ def full_word_strictly_inside_string(word: str, string: str):
|
|
|
221
39
|
or (string.startswith(word + " "))
|
|
222
40
|
or (string.endswith(" " + word))
|
|
223
41
|
)
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def cast(value: str, _type: str) -> Optional[Union[str, float, bool, date, datetime]]:
|
|
227
|
-
if not isinstance(value, str) or not value:
|
|
228
|
-
# None is the current default value in hydra, should we keep this?
|
|
229
|
-
return None
|
|
230
|
-
if _type == "float":
|
|
231
|
-
return float_casting(value)
|
|
232
|
-
if _type == "bool":
|
|
233
|
-
return bool_casting(value)
|
|
234
|
-
if _type == "json":
|
|
235
|
-
# in hydra json are given to postgres as strings, conversion is done by postgres
|
|
236
|
-
return json.loads(value)
|
|
237
|
-
if _type == "date":
|
|
238
|
-
_date = date_casting(value)
|
|
239
|
-
return _date.date() if _date else None
|
|
240
|
-
if _type == "datetime":
|
|
241
|
-
return date_casting(value)
|
|
242
|
-
raise ValueError(f"Unknown type `{_type}`")
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
def cast_df(df: pd.DataFrame, columns: dict, cast_json: bool = True, verbose: bool = False) -> pd.DataFrame:
|
|
246
|
-
if verbose:
|
|
247
|
-
start = time()
|
|
248
|
-
output_df = pd.DataFrame()
|
|
249
|
-
for col_name, detection in columns.items():
|
|
250
|
-
if detection["python_type"] == "string" or (detection["python_type"] == "json" and not cast_json):
|
|
251
|
-
# no change if detected type is string
|
|
252
|
-
output_df[col_name] = df[col_name].copy()
|
|
253
|
-
elif detection["python_type"] == "int":
|
|
254
|
-
# to allow having ints and NaN in the same column
|
|
255
|
-
output_df[col_name] = df[col_name].copy().astype(pd.Int64Dtype())
|
|
256
|
-
else:
|
|
257
|
-
output_df[col_name] = df[col_name].apply(
|
|
258
|
-
lambda col: cast(col, _type=detection["python_type"])
|
|
259
|
-
)
|
|
260
|
-
# to save RAM
|
|
261
|
-
del df[col_name]
|
|
262
|
-
if verbose:
|
|
263
|
-
display_logs_depending_process_time(
|
|
264
|
-
f'Casting columns completed in {round(time() - start, 3)}s',
|
|
265
|
-
time() - start,
|
|
266
|
-
)
|
|
267
|
-
return output_df
|
|
@@ -9,6 +9,9 @@
|
|
|
9
9
|
- Raise an error if the encoding could not be guessed [#106](https://github.com/datagouv/csv-detective/pull/106)
|
|
10
10
|
- Allow to only specify tests to skip ("all but...") [#108](https://github.com/datagouv/csv-detective/pull/108)
|
|
11
11
|
- Fix bool casting [#109](https://github.com/datagouv/csv-detective/pull/109)
|
|
12
|
+
- Handle csv.gz files [#110](https://github.com/datagouv/csv-detective/pull/110)
|
|
13
|
+
- Refactor file tests [#110](https://github.com/datagouv/csv-detective/pull/110)
|
|
14
|
+
- Restructure repo (breaking changes) [#111](https://github.com/datagouv/csv-detective/pull/111)
|
|
12
15
|
|
|
13
16
|
## 0.7.4 (2024-11-15)
|
|
14
17
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: csv_detective
|
|
3
|
-
Version: 0.7.5.
|
|
3
|
+
Version: 0.7.5.dev1209
|
|
4
4
|
Summary: Detect CSV column content
|
|
5
5
|
Home-page: https://github.com/etalab/csv_detective
|
|
6
6
|
Author: Etalab
|
|
@@ -37,5 +37,6 @@ Dynamic: description-content-type
|
|
|
37
37
|
Dynamic: home-page
|
|
38
38
|
Dynamic: keywords
|
|
39
39
|
Dynamic: license
|
|
40
|
+
Dynamic: license-file
|
|
40
41
|
Dynamic: requires-dist
|
|
41
42
|
Dynamic: summary
|
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
csv_detective/__init__.py,sha256=
|
|
1
|
+
csv_detective/__init__.py,sha256=GCHgu0BhH5ACV7cf-1gDr9nRyvSoeQ1vRw9SjEHeMT4,143
|
|
2
2
|
csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
|
|
3
|
-
csv_detective/
|
|
4
|
-
csv_detective/detection.py,sha256=zrP8qvLDvhVXTHi7Ty8G_ga4zfZPjBhuyApqFQkPq2Y,22373
|
|
5
|
-
csv_detective/explore_csv.py,sha256=BY1_X7OH2Lod08DTBwWaGvguc2OhpwOko4nlI8rf0HM,17470
|
|
6
|
-
csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
|
|
3
|
+
csv_detective/explore_csv.py,sha256=aJ2pG7lK4sgY9Pv31zEzFVGByxkfw4wwgrQqfgUtBOo,14903
|
|
7
4
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
8
|
-
csv_detective/
|
|
9
|
-
csv_detective/utils.py,sha256=yO9INaLh-QX-FFL2A153AlMqftE04wb0hpN6HJvsKGg,10581
|
|
5
|
+
csv_detective/utils.py,sha256=KAYfSJXnPuAXnSc38Jm57oQ_JP_0kUkmI1OV6gN5_ys,1116
|
|
10
6
|
csv_detective/detect_fields/__init__.py,sha256=NVfE3BQVExgXb-BPbhDvlkM5-0naEVLpZ4aM_OGHYfE,931
|
|
11
7
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
8
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=
|
|
9
|
+
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
|
|
14
10
|
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
|
|
15
11
|
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=unr-Y4zquKSM5PVUiQGnOm-zQvaN8qd3v_XHf0W2VH8,378
|
|
16
12
|
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
|
|
@@ -26,16 +22,16 @@ csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256
|
|
|
26
22
|
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=2q5T4SmCK6ZFF1mrv7d-q9tOIQKBcROI24y_UYIuvz0,383
|
|
27
23
|
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=JbFKDd4jAnd9yb7YqP36MoLdO1JFPm1cg60fGXt6ZvI,1074
|
|
28
24
|
csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=
|
|
25
|
+
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=SRWJvg3Ikyjmop9iL14igTjxNGpO-QB3fpADI_bLYEY,566
|
|
30
26
|
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
|
|
31
27
|
csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
|
|
32
28
|
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=g9n5sOjRlk4I9YFZjdaTYrXf8ftXRDunGZOUpYhN4fA,295
|
|
33
|
-
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=
|
|
29
|
+
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=lvcaVKgOPrCaZb-Y1-wYCbLYB_CQjCJFNAzfWDwtTVE,496
|
|
34
30
|
csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
35
31
|
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=kMV52djlG0y4o0ELEZuvTv_FvooYOgTnV1aWhycFJDc,284
|
|
36
|
-
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=
|
|
32
|
+
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=g8pOqJPKVpQiMd78zgrjXJWYeWkYhu8r3D4IQX519HQ,519
|
|
37
33
|
csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
|
|
38
|
-
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=
|
|
34
|
+
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=iYkLe3MM51GWyBX_4BTq5PWDX_EeYRbEHWKMr8oE1MQ,269
|
|
39
35
|
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=ohSwUL2rXqTXPG5WDAh2SP-lp1SzFCYgo4IhJ-PXmdk,442
|
|
40
36
|
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=ThEeT6rXmS0EvHW8y4A_74bILyErDGxLe9v3elHOFs8,707
|
|
41
37
|
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=BF47aMTe0rUIx66iurIo7fM9Nrk0YorQ7WmFLnkWonI,343
|
|
@@ -65,7 +61,7 @@ csv_detective/detect_fields/other/twitter/__init__.py,sha256=qbwLKsTBRFQ4PyTNVeE
|
|
|
65
61
|
csv_detective/detect_fields/other/url/__init__.py,sha256=9WaTqCglEsw_lJG_xZsBMdxJXg2yuQ92_fkX6CXWNV0,286
|
|
66
62
|
csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
|
|
67
63
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
-
csv_detective/detect_fields/temp/date/__init__.py,sha256=
|
|
64
|
+
csv_detective/detect_fields/temp/date/__init__.py,sha256=1a_Ra9fmT4wgGMrcknXP7eN7A2QiaMF0Yjy0-BMihtA,987
|
|
69
65
|
csv_detective/detect_fields/temp/datetime/__init__.py,sha256=Ykwhk2ospjY9P0KOG0AitgqN0sld6UmhOlbMz_XGQzQ,597
|
|
70
66
|
csv_detective/detect_fields/temp/datetime_iso/__init__.py,sha256=DOfli-A7gPlZmiV2J6Ka5_yDUCaOgxis29LET_tfhA4,444
|
|
71
67
|
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
|
|
@@ -73,72 +69,90 @@ csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRh
|
|
|
73
69
|
csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
|
|
74
70
|
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
71
|
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
|
-
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=
|
|
77
|
-
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=
|
|
78
|
-
csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=
|
|
79
|
-
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=
|
|
80
|
-
csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=
|
|
81
|
-
csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=
|
|
82
|
-
csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=
|
|
83
|
-
csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=
|
|
84
|
-
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=
|
|
85
|
-
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=
|
|
86
|
-
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=
|
|
87
|
-
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=
|
|
88
|
-
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=
|
|
89
|
-
csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=
|
|
90
|
-
csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=
|
|
72
|
+
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=e5ROxhrXNCefLwL5lXTWHO0PEWwLHfqmowm7XoeqZ2I,1063
|
|
73
|
+
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=D_9QFvAeX5Nwp4qtQ0NEpKR0jpRlDx-rNBSrlYrw4nw,1096
|
|
74
|
+
csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=rpzxUVsZyazVVguOorLadiJv_Vz1n04ijm0RbVmRDts,1025
|
|
75
|
+
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=VUqv3G-JO-9CJU4-EX5DXs4O22Lqm75vuOy9MngoojA,949
|
|
76
|
+
csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=USIYj7PiULI_WCfDxpzRCW9tv8-FNYKWopsVZ3H79mE,1070
|
|
77
|
+
csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=f9WroGVfB5jUzd_Rjs4XocZT2Ma-xZd2On9StUHy3F4,1012
|
|
78
|
+
csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=iYD0UPhRVKYFv8DAEfe_RoQlE47igZ_MacsHxVLyYcM,948
|
|
79
|
+
csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=fqNziX5ID6mVE5nVNviOsncVqkYyVvj7J_8hxN7_D1w,1229
|
|
80
|
+
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=EAcQ2FqTKQdxhSYr5VCuEpjc7BdGwTdMkLL_VL6ay7Y,957
|
|
81
|
+
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=X3vGdh_DHzWZXuV2-L9QhuWTLjHyaPZyS__s9Y5yiNg,1386
|
|
82
|
+
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
|
|
83
|
+
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Pf00tBADr7HvJLeW_YqY3QU1EBVJDi365woheAzsNKY,1139
|
|
84
|
+
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=LfvgcrjVsXmxT6xC3X8eQIiQ_STvPRwjUbUQ4TyfJE0,1144
|
|
85
|
+
csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=RsI_QXMJOZ5PpKcoKWy7AmUHFjehHXcUezquZyt1eq4,1169
|
|
86
|
+
csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=h9pE3xu2-PFw1jmDenkoKWmFkYmpK9-UgCboPlL7Aeg,1164
|
|
91
87
|
csv_detective/detect_labels/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
|
-
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=
|
|
93
|
-
csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=
|
|
94
|
-
csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=
|
|
95
|
-
csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256
|
|
96
|
-
csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=
|
|
97
|
-
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=
|
|
98
|
-
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=
|
|
99
|
-
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=
|
|
100
|
-
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=
|
|
101
|
-
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=
|
|
102
|
-
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=
|
|
88
|
+
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=lcLdEdNo4rhLvqzP3C0rmU_1PaQvTdpviXt9xGSaGFc,939
|
|
89
|
+
csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=DJykTRguggOlsIuyjYezJ99c8MGCSwwwCLcoQjfN40o,1024
|
|
90
|
+
csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=idLo99rELzs1uc4mOcby9RLZLhhpsOp5AoTudT2jPwM,934
|
|
91
|
+
csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=J5G8pldzBdXRaopYNzGDztRFIsI_7rdaAPQ_kSuz5PU,1043
|
|
92
|
+
csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=9EXCmzKSa5PSWrPbVeLscbJCaiwQEXX-1rCr79U8XLA,975
|
|
93
|
+
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=9bq2171SrmDIHx4A0cAeSHfWyQl40e-dIR9_ur4cEHQ,1124
|
|
94
|
+
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=AEKBGWEKxDoT8k9BF-v9vl1SHc4DffiiFyhip-6tC78,956
|
|
95
|
+
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=9w2VCs8kq-XVRmxxwqZYIynfCPwbFbl-pBPqXtnXx8Y,1103
|
|
96
|
+
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Yqrp7NDEN0WRA_oktMb0wWoLQ99rzIvNvJ8jVhBCRD8,1040
|
|
97
|
+
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=gdzclIAjhr_k-a04l_FDz9kQywBfSA6vqa0UQxdaqNw,1143
|
|
98
|
+
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=mB0hC2JUKGnhGl6MUDFzSM_-t-Tvt3Vm21Gr_JXkL3k,1316
|
|
103
99
|
csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
|
-
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=
|
|
105
|
-
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=
|
|
100
|
+
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=FHXmOIjH4e5n_mahtScgOVYUAi_M4PeHAnsuIm5LxCA,1074
|
|
101
|
+
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=hX0FPAia4x28GD398WvpeaBQ4_3F5G3xAhySmZBdi5w,934
|
|
106
102
|
csv_detective/detect_labels/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
|
-
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=
|
|
108
|
-
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=
|
|
109
|
-
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=
|
|
110
|
-
csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=
|
|
111
|
-
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=
|
|
112
|
-
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=
|
|
113
|
-
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=
|
|
103
|
+
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
|
|
104
|
+
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
|
|
105
|
+
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
|
|
106
|
+
csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=0sYS6bF_xmmhqsJ0Wrx7GC3qBAYjK7uhVud_ZbIQHHQ,1072
|
|
107
|
+
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
|
|
108
|
+
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=SwR1NU0vpk8YdHTIk1wk9zQpNoUsoABq-K8GfRMY0fw,1705
|
|
109
|
+
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=z4rOrkCypI5JodgX9alTrV03IpetgAW4BGJuNvFlU4s,1145
|
|
114
110
|
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
csv_detective/detect_labels/other/booleen/__init__.py,sha256=
|
|
116
|
-
csv_detective/detect_labels/other/email/__init__.py,sha256=
|
|
117
|
-
csv_detective/detect_labels/other/float/__init__.py,sha256=
|
|
118
|
-
csv_detective/detect_labels/other/int/__init__.py,sha256=
|
|
111
|
+
csv_detective/detect_labels/other/booleen/__init__.py,sha256=uvQ7yDVAlEO8AY44OMblh_ZrxPTOmdvFtbcQEanpWSo,987
|
|
112
|
+
csv_detective/detect_labels/other/email/__init__.py,sha256=VRUYZXGn-hRqE2sY0JY-Oh_wtT568orDTBxBGYsgqxE,1148
|
|
113
|
+
csv_detective/detect_labels/other/float/__init__.py,sha256=jIr1r9FFy8NWvi5fOuIhj52bc7cZmM3OeTo-c6TUWII,926
|
|
114
|
+
csv_detective/detect_labels/other/int/__init__.py,sha256=G1GAlKNaOZH_l39Zpw85xkl7JcdnY5PlEEroyU78hlY,933
|
|
119
115
|
csv_detective/detect_labels/other/money/__init__.py,sha256=kBEGuUy6kYkOI3vC_a7waBciG2ipyV9bhC330U8WaoI,279
|
|
120
116
|
csv_detective/detect_labels/other/money/check_col_name.py,sha256=zgp5eUnf3XRQuxgdEGfxPfUnniO8Pzw19uK0ICr2pf8,414
|
|
121
|
-
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=
|
|
122
|
-
csv_detective/detect_labels/other/twitter/__init__.py,sha256=
|
|
123
|
-
csv_detective/detect_labels/other/url/__init__.py,sha256=
|
|
124
|
-
csv_detective/detect_labels/other/uuid/__init__.py,sha256=
|
|
117
|
+
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=3TW59y4vo4Pkx_fQrmEs1-gZbdJeNiK7ip25cpR829U,927
|
|
118
|
+
csv_detective/detect_labels/other/twitter/__init__.py,sha256=x3b522ov_g-kmcq4k4eoZ8FQqrXdnlRJJit5UbnzIrQ,959
|
|
119
|
+
csv_detective/detect_labels/other/url/__init__.py,sha256=wVQsWQzOuBY-cD7wn_PXcWLVEkknBA2lBCu8SRWsQG4,1202
|
|
120
|
+
csv_detective/detect_labels/other/uuid/__init__.py,sha256=ySxqFvtGHguoiOyD5A1YRFY3SuubkgBAEY_Ud5kZVPM,931
|
|
125
121
|
csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
126
|
-
csv_detective/detect_labels/temp/date/__init__.py,sha256
|
|
127
|
-
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=
|
|
128
|
-
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=
|
|
129
|
-
csv_detective/detect_labels/temp/year/__init__.py,sha256=
|
|
130
|
-
csv_detective
|
|
131
|
-
csv_detective
|
|
132
|
-
csv_detective
|
|
122
|
+
csv_detective/detect_labels/temp/date/__init__.py,sha256=CRv-S0figO6MOPdE0Lv5hWdjtIr6EmWzwlcjn5ofIxo,1322
|
|
123
|
+
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=0lFdN5Z43m6Qm-wBqcyM_mceUmI4s3vqgLCM-Jlgoxw,1157
|
|
124
|
+
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=4N0EGJA_2vXC1iFptvzpU6IN7AIJH5MFUrRY2p7Cjfs,1175
|
|
125
|
+
csv_detective/detect_labels/temp/year/__init__.py,sha256=3U9j8Hux432KdGtIyArq_-vScn-5eYFwpn976WM9N4M,1150
|
|
126
|
+
csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
|
|
127
|
+
csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
|
|
128
|
+
csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
|
|
129
|
+
csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
|
|
130
|
+
csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
|
|
131
|
+
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
132
|
+
csv_detective/detection/variables.py,sha256=3qEMtjZ_zyIFXvTnFgK7ZMDx8C12uQXKfFjEj2moyJc,3558
|
|
133
|
+
csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
|
|
134
|
+
csv_detective/output/example.py,sha256=i8PkdXxidF7qR_9aK8vh12JpZdJQryhBgyrMS8iy5rk,8642
|
|
135
|
+
csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
|
|
136
|
+
csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
|
|
137
|
+
csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
|
|
138
|
+
csv_detective/parsing/columns.py,sha256=Oj0Ddp2fPZeL70GDWdF7GY2RmhiVdz0IEvoBJFt-wao,5701
|
|
139
|
+
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
140
|
+
csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
|
|
141
|
+
csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
|
|
142
|
+
csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
|
|
143
|
+
csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
|
|
144
|
+
csv_detective-0.7.5.dev1209.data/data/share/csv_detective/CHANGELOG.md,sha256=povo1ufNJvsxJLkzdjYLgkTy9E-MNFWTg6elXe2nyqU,7625
|
|
145
|
+
csv_detective-0.7.5.dev1209.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
146
|
+
csv_detective-0.7.5.dev1209.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
|
|
147
|
+
csv_detective-0.7.5.dev1209.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
133
148
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
149
|
tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
|
|
135
|
-
tests/test_fields.py,sha256=
|
|
136
|
-
tests/test_file.py,sha256=
|
|
150
|
+
tests/test_fields.py,sha256=fcgycaFxacOcN0WdwuUvxef_ejd6tRHNpkD5pxMjMXE,11141
|
|
151
|
+
tests/test_file.py,sha256=EleTssys5fCP4N0W1eTZN35uijzoF15e3dIcuIlrMsk,7865
|
|
137
152
|
tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
|
|
138
153
|
tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
|
|
139
|
-
csv_detective-0.7.5.
|
|
140
|
-
csv_detective-0.7.5.
|
|
141
|
-
csv_detective-0.7.5.
|
|
142
|
-
csv_detective-0.7.5.
|
|
143
|
-
csv_detective-0.7.5.
|
|
144
|
-
csv_detective-0.7.5.dev1180.dist-info/RECORD,,
|
|
154
|
+
csv_detective-0.7.5.dev1209.dist-info/METADATA,sha256=LwKAMVqoJjZfnrWAJV_nv_V3oprmbmmaNmX9e4Zvruc,1386
|
|
155
|
+
csv_detective-0.7.5.dev1209.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
156
|
+
csv_detective-0.7.5.dev1209.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
157
|
+
csv_detective-0.7.5.dev1209.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
158
|
+
csv_detective-0.7.5.dev1209.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import
|
|
1
|
+
from datetime import date as _date, datetime as _datetime
|
|
2
|
+
|
|
2
3
|
from numpy import random
|
|
4
|
+
import pandas as pd
|
|
3
5
|
import pytest
|
|
4
|
-
from datetime import date as _date, datetime as _datetime
|
|
5
6
|
|
|
6
7
|
from csv_detective.detect_fields.FR.geo import (
|
|
7
8
|
adresse,
|
|
@@ -43,12 +44,12 @@ from csv_detective.detect_fields.other import (
|
|
|
43
44
|
float as test_float,
|
|
44
45
|
)
|
|
45
46
|
from csv_detective.detect_fields.temp import date, datetime_iso, datetime_rfc822, year
|
|
46
|
-
from csv_detective.detection import (
|
|
47
|
+
from csv_detective.detection.variables import (
|
|
47
48
|
detect_continuous_variable,
|
|
48
|
-
|
|
49
|
+
detect_categorical_variable,
|
|
49
50
|
)
|
|
50
51
|
from csv_detective.explore_csv import return_all_tests
|
|
51
|
-
from csv_detective.
|
|
52
|
+
from csv_detective.output.dataframe import cast
|
|
52
53
|
|
|
53
54
|
|
|
54
55
|
def test_all_tests_return_bool():
|
|
@@ -71,7 +72,7 @@ def test_detetect_categorical_variable():
|
|
|
71
72
|
}
|
|
72
73
|
df = pd.DataFrame(df_dict, dtype="unicode")
|
|
73
74
|
|
|
74
|
-
res, _ =
|
|
75
|
+
res, _ = detect_categorical_variable(df)
|
|
75
76
|
assert len(res.values) and all(k in res.values for k in ["cat", "cat2"])
|
|
76
77
|
|
|
77
78
|
|