csv-detective 0.8.1.dev1703__py3-none-any.whl → 0.8.1.dev1720__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/cli.py +6 -9
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +78 -78
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -1
- csv_detective/detect_fields/FR/geo/code_region/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/commune/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/departement/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +1 -2
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +1 -2
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/pays/__init__.py +6 -6
- csv_detective/detect_fields/FR/geo/region/__init__.py +6 -4
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +15 -14
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +4 -3
- csv_detective/detect_fields/FR/other/date_fr/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +4 -3
- csv_detective/detect_fields/FR/other/sexe/__init__.py +2 -2
- csv_detective/detect_fields/FR/other/siren/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/siret/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/uai/__init__.py +2 -2
- csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +15 -15
- csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +27 -27
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +5 -5
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +5 -5
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +5 -5
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py +1 -1
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py +1 -1
- csv_detective/detect_fields/other/booleen/__init__.py +1 -1
- csv_detective/detect_fields/other/email/__init__.py +4 -2
- csv_detective/detect_fields/other/int/__init__.py +3 -3
- csv_detective/detect_fields/other/mongo_object_id/__init__.py +2 -2
- csv_detective/detect_fields/other/twitter/__init__.py +2 -2
- csv_detective/detect_fields/other/uuid/__init__.py +4 -5
- csv_detective/detect_fields/temp/date/__init__.py +3 -2
- csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +6 -6
- csv_detective/detect_fields/temp/year/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -1
- csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +1 -0
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +1 -1
- csv_detective/detection/columns.py +9 -9
- csv_detective/detection/encoding.py +6 -4
- csv_detective/detection/engine.py +6 -5
- csv_detective/detection/formats.py +19 -19
- csv_detective/detection/headers.py +3 -5
- csv_detective/detection/rows.py +1 -1
- csv_detective/detection/variables.py +4 -4
- csv_detective/explore_csv.py +7 -8
- csv_detective/load_tests.py +6 -14
- csv_detective/output/__init__.py +3 -7
- csv_detective/output/dataframe.py +9 -5
- csv_detective/output/example.py +13 -13
- csv_detective/output/profile.py +30 -23
- csv_detective/output/schema.py +20 -23
- csv_detective/output/utils.py +15 -15
- csv_detective/parsing/columns.py +23 -12
- csv_detective/parsing/csv.py +1 -1
- csv_detective/parsing/excel.py +10 -11
- csv_detective/parsing/load.py +11 -8
- csv_detective/parsing/text.py +4 -9
- csv_detective/s3_utils.py +3 -7
- csv_detective/utils.py +4 -2
- csv_detective/validate.py +18 -13
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/METADATA +2 -1
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/RECORD +77 -77
- tests/test_example.py +2 -6
- tests/test_fields.py +16 -10
- tests/test_file.py +10 -9
- tests/test_labels.py +3 -2
- tests/test_structure.py +3 -1
- tests/test_validation.py +9 -6
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/top_level.txt +0 -0
csv_detective/validate.py
CHANGED
|
@@ -4,8 +4,8 @@ from typing import Optional, Union
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
6
|
from csv_detective.load_tests import return_all_tests
|
|
7
|
-
from csv_detective.parsing.load import load_file
|
|
8
7
|
from csv_detective.parsing.columns import test_col_val
|
|
8
|
+
from csv_detective.parsing.load import load_file
|
|
9
9
|
|
|
10
10
|
logging.basicConfig(level=logging.INFO)
|
|
11
11
|
|
|
@@ -47,9 +47,8 @@ def validate(
|
|
|
47
47
|
if verbose:
|
|
48
48
|
logging.info("Comparing table with the previous analysis")
|
|
49
49
|
logging.info("- Checking if all columns match")
|
|
50
|
-
if (
|
|
51
|
-
|
|
52
|
-
or any(col_name not in previous_analysis["header"] for col_name in analysis["header"])
|
|
50
|
+
if any(col_name not in analysis["header"] for col_name in previous_analysis["header"]) or any(
|
|
51
|
+
col_name not in previous_analysis["header"] for col_name in analysis["header"]
|
|
53
52
|
):
|
|
54
53
|
if verbose:
|
|
55
54
|
logging.warning("> Columns do not match, proceeding with full analysis")
|
|
@@ -72,12 +71,18 @@ def validate(
|
|
|
72
71
|
return False, table, analysis
|
|
73
72
|
if verbose:
|
|
74
73
|
logging.info("> All checks successful")
|
|
75
|
-
return
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
return (
|
|
75
|
+
True,
|
|
76
|
+
table,
|
|
77
|
+
analysis
|
|
78
|
+
| {
|
|
79
|
+
k: previous_analysis[k]
|
|
80
|
+
for k in [
|
|
81
|
+
"categorical",
|
|
82
|
+
"columns",
|
|
83
|
+
"columns_fields",
|
|
84
|
+
"columns_labels",
|
|
85
|
+
"formats",
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.8.1.
|
|
3
|
+
Version: 0.8.1.dev1720
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
5
|
Author-email: Etalab <opendatateam@data.gouv.fr>
|
|
6
6
|
License: MIT
|
|
@@ -27,6 +27,7 @@ Provides-Extra: dev
|
|
|
27
27
|
Requires-Dist: pytest>=8.3.0; extra == "dev"
|
|
28
28
|
Requires-Dist: responses>=0.25.0; extra == "dev"
|
|
29
29
|
Requires-Dist: bumpx>=0.3.10; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.9.3; extra == "dev"
|
|
30
31
|
Dynamic: license-file
|
|
31
32
|
|
|
32
33
|
# CSV Detective
|
|
@@ -1,77 +1,77 @@
|
|
|
1
1
|
csv_detective/__init__.py,sha256=XY7pnoNHlocvyUiK8EQpJYPSQt5BRWWJD8KiPlvI9pU,164
|
|
2
|
-
csv_detective/cli.py,sha256=
|
|
3
|
-
csv_detective/explore_csv.py,sha256=
|
|
4
|
-
csv_detective/load_tests.py,sha256=
|
|
5
|
-
csv_detective/s3_utils.py,sha256=
|
|
6
|
-
csv_detective/utils.py,sha256=
|
|
7
|
-
csv_detective/validate.py,sha256=
|
|
2
|
+
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
+
csv_detective/explore_csv.py,sha256=sEMza4Z27ac88fGq7tUiK1zlfvuftztHhHVoa0c2EVU,9191
|
|
4
|
+
csv_detective/load_tests.py,sha256=q-ukYcNWGIpgxvlcO6OcUTtd9Dq7wczoW1YvFt4OA54,2274
|
|
5
|
+
csv_detective/s3_utils.py,sha256=z1KTVVkdurMv21o-rZu7_aluMJnSi-d5uxnQbqT2NoI,1407
|
|
6
|
+
csv_detective/utils.py,sha256=u9I1tsyMfVr2eIYiGCD7Iu30d55H3za44-N3cV2nj8M,1013
|
|
7
|
+
csv_detective/validate.py,sha256=RLHXLrRuynkdcvHUlSEbyglPvdbNYlT1Z4nQI-BdYdA,2898
|
|
8
8
|
csv_detective/detect_fields/__init__.py,sha256=0A5SZTp_IhhJ9z7lWeH4K5_0uwMK_VdMudjPm7oggVg,1000
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=
|
|
11
|
+
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=Q5tVRMW5QdFLfiNm42JmIwNRuBR5ZI3dQhzHPzXVnzo,1676
|
|
12
12
|
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
|
|
13
|
-
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=
|
|
13
|
+
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=DwgDopvfoUmOdDLsFKHGtufM3PG5ahwiLFRrDimaDNM,379
|
|
14
14
|
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
|
|
15
|
-
csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=
|
|
16
|
-
csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=
|
|
17
|
-
csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=
|
|
18
|
-
csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=
|
|
19
|
-
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=
|
|
20
|
-
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=
|
|
21
|
-
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=
|
|
22
|
-
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=
|
|
23
|
-
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=
|
|
24
|
-
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=
|
|
25
|
-
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=
|
|
15
|
+
csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=yjR6ob_h9fd5sa1YH6P0UbCsrHjdBGjsPIx02SHwlfE,133
|
|
16
|
+
csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=9pR2tVS4J2KrytCVuh-R86HGRMWutIK9FVQ30wKfCPg,253
|
|
17
|
+
csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=5vw4zjlmWaR2djxuQOUrmwsNIc9HgAE-zdxwerVR3S0,380
|
|
18
|
+
csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=UsMEW1EVVgnw-daOc1jBkEaGKvqTONSAGnj1s3QgM8w,400
|
|
19
|
+
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=YsAGiblFexBxvu_E3XaXhy_bordc6c1oKPgDzTsDeXw,374
|
|
20
|
+
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=RjkDSZzIbp4nnvDpa5GomDpyIJGvwErX7TgC4dlBJ14,437
|
|
21
|
+
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=7xmYpTYoHvFfcuocAhm6dP_j4sMII_hG1PMSrWId4FY,344
|
|
22
|
+
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=JbKuGK5UmUGAQKPFpN4RSLf3axJ5D1aCjzRXYHW-iXU,441
|
|
23
|
+
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=5VWDaHZvGhJAJu5XQrj6gLx5CVA9dNOE30eTXQ3pSf0,344
|
|
24
|
+
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=85y-5qNRAWJrKqL0wh9iPMUBQjvPwc9lv1cYB2m0daQ,364
|
|
25
|
+
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=6mJRaGsCPBY5JHHe8EWxEjDpAOIfvBPTaZKJb3_n3gU,1077
|
|
26
26
|
csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=
|
|
27
|
+
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=8f9n4F7T8Q44z4-sQL7d1OgvLObUPwC7D0iDLhHu8KQ,568
|
|
28
28
|
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
|
|
29
29
|
csv_detective/detect_fields/FR/other/code_import/__init__.py,sha256=zJ9YfPa5p--uHNQFeO1gTjxDy2Um_r-MxQd29VBNjFw,243
|
|
30
30
|
csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
|
|
31
31
|
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=41SYNzCzUFh4trQlwG-9UC0-1Wi4fTcv8Byi_dd9Lq4,168
|
|
32
|
-
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=
|
|
32
|
+
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=cKIldBWb37pqBeKuV5jgAlRHeF9SyqlRL4n-qfGMZGI,497
|
|
33
33
|
csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
34
|
-
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=
|
|
35
|
-
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=
|
|
34
|
+
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=Ewi9u7jcYsxhqu2al8aEVYQ8dO9H7GmRjo_l8BYt0j0,284
|
|
35
|
+
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=u6Ri4ntWrDPYezsVlwpRTbzU8xsDfkJYGdOE2spkQpQ,520
|
|
36
36
|
csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
|
|
37
|
-
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=
|
|
38
|
-
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=
|
|
39
|
-
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=
|
|
40
|
-
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=
|
|
41
|
-
csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=
|
|
37
|
+
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=dPVjgD3QBe4PUA4Bl_YDxZqFObF8KcoDz6zDYH2qfnk,269
|
|
38
|
+
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=7wpSq4eRfYC2p711Me1XCY64PIWyK_TJNw3lidxuzJE,442
|
|
39
|
+
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=YJPXYnzKJ4Y8XuBf1lRrLkImrZ6D7zitKl0KPry4CcU,707
|
|
40
|
+
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=zXVRu80ehUulhhxu1FTWoOK81CaSr7MfTh4HJEYdEKA,343
|
|
41
|
+
csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=mglrlTSBKYnGUOfGVM-xyk5KqUvQIFIjaoj31CO36zo,327
|
|
42
42
|
csv_detective/detect_fields/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=
|
|
44
|
-
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=
|
|
43
|
+
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=ax34EqC712WT5JqiAKBWz6L7vmVpLNWmBF2wmjUUFiM,396
|
|
44
|
+
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=Z59nO-UpIrUT9ZaQ6MuPQLFbu8AE0gYdkSleAj4WX_k,582
|
|
45
45
|
csv_detective/detect_fields/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=
|
|
46
|
+
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=X5kUggATKRJItJLaSDpv4MQPwo49iGBwlwQQjLTe77E,433
|
|
47
47
|
csv_detective/detect_fields/geo/iso_country_code_alpha2/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
|
|
48
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=
|
|
48
|
+
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=JvFLoQeJdbw5VYXUZqD9vsp0LQDoFE2Sd5gPA6K-0Lo,409
|
|
49
49
|
csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
|
|
50
|
-
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=
|
|
50
|
+
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=AnAridM4C8hcm4PeNdr8969czgrzM4KemGVZWAJSM1U,436
|
|
51
51
|
csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
|
|
52
52
|
csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=6wlwlxQmsVIZ21g-THvH3nBj-I8FuoF2sBlZAoEMGiQ,393
|
|
53
|
-
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=
|
|
53
|
+
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=sdor-L1WDHv5opg1Le13mru4ImSA-yEbxchlWENuUFE,327
|
|
54
54
|
csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=IXDTqD4YFUJYI1FYZ5ZfkqXY6KvNY7sgBVFRAvgTHtI,454
|
|
55
|
-
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=
|
|
55
|
+
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=gPnNTe-L9xjBVE-30VCJiK6IVZttj6Cy6zu1IL5907Y,330
|
|
56
56
|
csv_detective/detect_fields/geo/lonlat_wgs/__init__.py,sha256=CnBMYevfGdhBvureF3oc_zqT-RZjG419iAuUlugQFLc,454
|
|
57
57
|
csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
csv_detective/detect_fields/other/booleen/__init__.py,sha256=
|
|
59
|
-
csv_detective/detect_fields/other/email/__init__.py,sha256=
|
|
58
|
+
csv_detective/detect_fields/other/booleen/__init__.py,sha256=37ZUJACrZA9FQBYLDeVJGze7_I9x-ZWv5yWuBcqHcwI,497
|
|
59
|
+
csv_detective/detect_fields/other/email/__init__.py,sha256=p235wILf0fR9TeSEuyuPgoysAv9zg23a4vzdy3YJlxE,192
|
|
60
60
|
csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
|
|
61
|
-
csv_detective/detect_fields/other/int/__init__.py,sha256=
|
|
61
|
+
csv_detective/detect_fields/other/int/__init__.py,sha256=4SQAgaYTafeBL6hdT7Wp_xwcRNQsOWlYjaXKl78EuDw,320
|
|
62
62
|
csv_detective/detect_fields/other/json/__init__.py,sha256=AkRWZAidEM1dWkVRFThEBI5M7kMUu5Yu12iCViGM8lU,310
|
|
63
63
|
csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
|
|
64
|
-
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=
|
|
64
|
+
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=aZqxdbWzrL-syADA3_uYcOWcIuelvsnLzPLBEnkKJ8w,156
|
|
65
65
|
csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
|
|
66
|
-
csv_detective/detect_fields/other/twitter/__init__.py,sha256=
|
|
66
|
+
csv_detective/detect_fields/other/twitter/__init__.py,sha256=Npu6ZbyNfHq1y7xn0Gd62GbOcyz4WNq82FrFSKb547w,154
|
|
67
67
|
csv_detective/detect_fields/other/url/__init__.py,sha256=L7h9fZldh1w86XwCx0x3Q1TXSJ_nIId1C-l1yFzZYrA,299
|
|
68
|
-
csv_detective/detect_fields/other/uuid/__init__.py,sha256=
|
|
68
|
+
csv_detective/detect_fields/other/uuid/__init__.py,sha256=XFxbIsdIhRw0dtFxBXQBhicE4yy7P4jmwYXeJhq6FVY,215
|
|
69
69
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
csv_detective/detect_fields/temp/date/__init__.py,sha256=
|
|
70
|
+
csv_detective/detect_fields/temp/date/__init__.py,sha256=uVOszufihKqiQmS0wz7nUuQ2Dz-Tq9fSk1nf3S00mg4,1010
|
|
71
71
|
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRUxMGJYqfOK4wRDr3WMaGVAmIa_C2pXE,853
|
|
72
72
|
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
|
|
73
|
-
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256
|
|
74
|
-
csv_detective/detect_fields/temp/year/__init__.py,sha256=
|
|
73
|
+
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=-pFdIIPgaLq2_QbFJ9zwy4YIwZuC73F0A_cNDntTuvQ,512
|
|
74
|
+
csv_detective/detect_fields/temp/year/__init__.py,sha256=gHchVciZExbGZLMBcbBaDXB0IgGptkQc4RhfSOMY0Ww,194
|
|
75
75
|
csv_detective/detect_labels/__init__.py,sha256=8vrFUrMc8a_VOC5gvYNMKL-Do_q9eMTrghJRI9Xotvk,883
|
|
76
76
|
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -100,7 +100,7 @@ csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=N7LzmtNwZER
|
|
|
100
100
|
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=ZWhc8S9L1X2fFh2g5Ja-LuhsfHg_lALKrur6yDnGDPk,238
|
|
101
101
|
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=g7Y7IvW9VKO528z1MSPxfFtRB7kQXSiG7QQ-VZRfFEk,386
|
|
102
102
|
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=-gvdxUnv3LRfje60ljC4F3B2c1LBcWfV3zZbV3VJZ08,323
|
|
103
|
-
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=
|
|
103
|
+
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=4jIZ9cmN73XhP4ayGcEMcB_y0X45oRk1Lq2p_pNfgok,426
|
|
104
104
|
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=5L6JowK9y6y9uZNg6hWzknMSzh0SurkwQeTINNKTdYY,599
|
|
105
105
|
csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=Vmv7Hp6LxR-bh3aXOBCHYzJVyCHtGoiWzJ40xnfTvdA,357
|
|
@@ -113,14 +113,14 @@ csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=On8VOCDD0EspZra6
|
|
|
113
113
|
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=ME_KjniqDSdAwXP7XnKXyr5IA75KrGSLIhvPNfsux6E,664
|
|
114
114
|
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp0Xs0FsPsOPpRWhDe1ockbWw8s,923
|
|
115
115
|
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
|
|
116
|
-
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=
|
|
116
|
+
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=7gbumJFp5xhz4GZ4uTAJQoxw5D53WJZddptyANmdEws,346
|
|
117
117
|
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
118
|
csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
|
|
119
119
|
csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
|
|
120
120
|
csv_detective/detect_labels/other/float/__init__.py,sha256=X0axZN2GAfC_y01zRfIyvOfRsOy2KNQcQ-mlQAKxqT4,216
|
|
121
121
|
csv_detective/detect_labels/other/int/__init__.py,sha256=_1AY7thEBCcgSBQQ2YbY4YaPaxGRQ71BtmaFaX088ig,215
|
|
122
122
|
csv_detective/detect_labels/other/money/__init__.py,sha256=1JRArDZ5r6gtyuKijH_fuuVFVc0f3MN5gPyAf4GPqzs,249
|
|
123
|
-
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256
|
|
123
|
+
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=-NsB_Glm6KRGmIusAY9YoGPrdws6RwkYRPUiJUUPv3Y,209
|
|
124
124
|
csv_detective/detect_labels/other/twitter/__init__.py,sha256=96WhOB6nOutzSFOC5ZJYFSlhHDJRn2SkT4nYNj8E6ww,241
|
|
125
125
|
csv_detective/detect_labels/other/url/__init__.py,sha256=4Ajpdp8W0jS9aHZAAMyUlgefjSgpB7Y6ci29KNkwAoI,485
|
|
126
126
|
csv_detective/detect_labels/other/uuid/__init__.py,sha256=kXVb4oMy-Zv-OYmAIEoNFrBA20l9hbUTdvTfjeMmhjk,213
|
|
@@ -129,40 +129,40 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=w0eeZIseAmPwL4OvCWzZXbx
|
|
|
129
129
|
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=53ysj7QgsxXwG1le3zfSJd1oaTTf-Er3jBeYi_A4F9g,458
|
|
130
130
|
csv_detective/detect_labels/temp/year/__init__.py,sha256=7uWaCZY7dOG7nolW46IgBWmcu8K-9jPED-pOlMlErfo,433
|
|
131
131
|
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
|
-
csv_detective/detection/columns.py,sha256=
|
|
133
|
-
csv_detective/detection/encoding.py,sha256=
|
|
134
|
-
csv_detective/detection/engine.py,sha256=
|
|
135
|
-
csv_detective/detection/formats.py,sha256=
|
|
136
|
-
csv_detective/detection/headers.py,sha256=
|
|
137
|
-
csv_detective/detection/rows.py,sha256=
|
|
132
|
+
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
133
|
+
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
134
|
+
csv_detective/detection/engine.py,sha256=1Z4vzjxwPRZ9-vv8nw-zU2sgBZtOsEz0UoKjGaSwVJU,1543
|
|
135
|
+
csv_detective/detection/formats.py,sha256=dzJPdi2rP2jTHZBk9UHpJL3c5N-PSohCymHs-OZt45c,6211
|
|
136
|
+
csv_detective/detection/headers.py,sha256=y5iR4jWH5fUtAH_Zg0zxWSVG_INCHlXJFMbhPpI2YMo,1148
|
|
137
|
+
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
138
138
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
139
|
-
csv_detective/detection/variables.py,sha256=
|
|
140
|
-
csv_detective/output/__init__.py,sha256=
|
|
141
|
-
csv_detective/output/dataframe.py,sha256=
|
|
142
|
-
csv_detective/output/example.py,sha256=
|
|
143
|
-
csv_detective/output/profile.py,sha256=
|
|
144
|
-
csv_detective/output/schema.py,sha256=
|
|
145
|
-
csv_detective/output/utils.py,sha256=
|
|
139
|
+
csv_detective/detection/variables.py,sha256=njfPj1hhWowe8qgrdCr4gtZyr1l2DGA08n06LnmnziY,3550
|
|
140
|
+
csv_detective/output/__init__.py,sha256=f-UFv_iULpVF_Fy39H4sfACEnrthjK4N3mCAVPkjnKw,1860
|
|
141
|
+
csv_detective/output/dataframe.py,sha256=UpLuSxx_SFbKpem1n-xY7jF16MXGpKQYEWjaSMIiB4s,2215
|
|
142
|
+
csv_detective/output/example.py,sha256=XrnPS_uC0cICn7tgnLWNctpUbnPzl7fIMzNTzJEWGJc,8655
|
|
143
|
+
csv_detective/output/profile.py,sha256=Jeh0mrfH_hAVxV2E5I4XzdCm7ZAGAV_Xj3AXOi77lcA,3130
|
|
144
|
+
csv_detective/output/schema.py,sha256=5Duw5qnsJ-LaVC6JgF7p1zZAkehDzsbXA4iTSJUgLNM,13760
|
|
145
|
+
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
146
146
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
csv_detective/parsing/columns.py,sha256=
|
|
147
|
+
csv_detective/parsing/columns.py,sha256=fbvQMu12gAmz4TnNCL7pLnMFB-mWN_O-zEoj8jEGj0A,5696
|
|
148
148
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
149
|
-
csv_detective/parsing/csv.py,sha256=
|
|
150
|
-
csv_detective/parsing/excel.py,sha256=
|
|
151
|
-
csv_detective/parsing/load.py,sha256=
|
|
152
|
-
csv_detective/parsing/text.py,sha256=
|
|
153
|
-
csv_detective-0.8.1.
|
|
149
|
+
csv_detective/parsing/csv.py,sha256=qZFLOT3YCPoHF0svfVfQBnS8eHtucjDZ7dFITAPgLhc,1626
|
|
150
|
+
csv_detective/parsing/excel.py,sha256=ULUDw76z6hs1Xm2yL9KBM0EOvIsfBLkxwqTZfDEx6aE,7045
|
|
151
|
+
csv_detective/parsing/load.py,sha256=C3M8nvgWenOb8aDFi5dpDGCoAw9EBqr4EB63zbz2M14,3699
|
|
152
|
+
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
153
|
+
csv_detective-0.8.1.dev1720.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
154
154
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
|
-
tests/test_example.py,sha256=
|
|
156
|
-
tests/test_fields.py,sha256=
|
|
157
|
-
tests/test_file.py,sha256=
|
|
158
|
-
tests/test_labels.py,sha256=
|
|
159
|
-
tests/test_structure.py,sha256=
|
|
160
|
-
tests/test_validation.py,sha256=
|
|
155
|
+
tests/test_example.py,sha256=iO4RxMHZxnBAiKm6fsFar5OVg8hYKnqNZCw0SUnEuQQ,1972
|
|
156
|
+
tests/test_fields.py,sha256=Y2mBfV9ZdxTHYwHnkzGbpo1k_qJRLC8nU-zzAUxFmAE,11964
|
|
157
|
+
tests/test_file.py,sha256=YuVbSfeo_ASPiLT8CyxXqJENcDpj4wAFXzLwu_GzsOA,8437
|
|
158
|
+
tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
|
|
159
|
+
tests/test_structure.py,sha256=lxgNeyoDPnd2PqesffCJOdPuf-g6fP7UnGQiO3umHMc,1408
|
|
160
|
+
tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
|
|
161
161
|
venv/bin/activate_this.py,sha256=NRy3waFmwW1pOaNUp33wNN0vD1Kzkd-zXX-Sgl4EiVI,1286
|
|
162
162
|
venv/bin/jp.py,sha256=7z7dvRg0M7HzpZG4ssQID7nScjvQx7bcYTxJWDOrS6E,1717
|
|
163
163
|
venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
|
|
164
|
-
csv_detective-0.8.1.
|
|
165
|
-
csv_detective-0.8.1.
|
|
166
|
-
csv_detective-0.8.1.
|
|
167
|
-
csv_detective-0.8.1.
|
|
168
|
-
csv_detective-0.8.1.
|
|
164
|
+
csv_detective-0.8.1.dev1720.dist-info/METADATA,sha256=XvQxdxFvH2FWSWn3JpVR7IMzfUCANxs5kSfh1_JeHIQ,9570
|
|
165
|
+
csv_detective-0.8.1.dev1720.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
csv_detective-0.8.1.dev1720.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
167
|
+
csv_detective-0.8.1.dev1720.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
|
|
168
|
+
csv_detective-0.8.1.dev1720.dist-info/RECORD,,
|
tests/test_example.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
1
|
import re
|
|
3
2
|
from uuid import UUID
|
|
3
|
+
|
|
4
4
|
from csv_detective import create_example_csv_file
|
|
5
5
|
|
|
6
6
|
|
|
@@ -41,11 +41,7 @@ def test_example_creation():
|
|
|
41
41
|
"name": "nb_produits",
|
|
42
42
|
"type": "int",
|
|
43
43
|
},
|
|
44
|
-
{
|
|
45
|
-
"name": "note",
|
|
46
|
-
"type": "float",
|
|
47
|
-
"args": {"num_range": [1, 20]}
|
|
48
|
-
},
|
|
44
|
+
{"name": "note", "type": "float", "args": {"num_range": [1, 20]}},
|
|
49
45
|
]
|
|
50
46
|
df = create_example_csv_file(
|
|
51
47
|
fields=fields,
|
tests/test_fields.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from datetime import date as _date
|
|
1
|
+
from datetime import date as _date
|
|
2
|
+
from datetime import datetime as _datetime
|
|
2
3
|
|
|
3
|
-
from numpy import random
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import pytest
|
|
6
|
+
from numpy import random
|
|
6
7
|
|
|
7
8
|
from csv_detective.detect_fields.FR.geo import (
|
|
8
9
|
adresse,
|
|
@@ -23,8 +24,8 @@ from csv_detective.detect_fields.FR.geo import (
|
|
|
23
24
|
)
|
|
24
25
|
from csv_detective.detect_fields.FR.other import (
|
|
25
26
|
code_csp_insee,
|
|
26
|
-
code_rna,
|
|
27
27
|
code_import,
|
|
28
|
+
code_rna,
|
|
28
29
|
code_waldec,
|
|
29
30
|
csp_insee,
|
|
30
31
|
date_fr,
|
|
@@ -56,9 +57,13 @@ from csv_detective.detect_fields.other import (
|
|
|
56
57
|
twitter,
|
|
57
58
|
url,
|
|
58
59
|
uuid,
|
|
59
|
-
|
|
60
|
+
)
|
|
61
|
+
from csv_detective.detect_fields.other import (
|
|
60
62
|
float as test_float,
|
|
61
63
|
)
|
|
64
|
+
from csv_detective.detect_fields.other import (
|
|
65
|
+
int as test_int,
|
|
66
|
+
)
|
|
62
67
|
from csv_detective.detect_fields.temp import (
|
|
63
68
|
date,
|
|
64
69
|
datetime_aware,
|
|
@@ -67,8 +72,8 @@ from csv_detective.detect_fields.temp import (
|
|
|
67
72
|
year,
|
|
68
73
|
)
|
|
69
74
|
from csv_detective.detection.variables import (
|
|
70
|
-
detect_continuous_variable,
|
|
71
75
|
detect_categorical_variable,
|
|
76
|
+
detect_continuous_variable,
|
|
72
77
|
)
|
|
73
78
|
from csv_detective.load_tests import return_all_tests
|
|
74
79
|
from csv_detective.output.dataframe import cast
|
|
@@ -225,10 +230,7 @@ fields = {
|
|
|
225
230
|
True: ["13 fevrier 1996"],
|
|
226
231
|
False: ["44 march 2025"],
|
|
227
232
|
},
|
|
228
|
-
insee_ape700: {
|
|
229
|
-
True: ["0116Z"],
|
|
230
|
-
False: ["0116A"]
|
|
231
|
-
},
|
|
233
|
+
insee_ape700: {True: ["0116Z"], False: ["0116A"]},
|
|
232
234
|
tel_fr: {
|
|
233
235
|
True: ["0134643467"],
|
|
234
236
|
False: ["6625388263", "01288398"],
|
|
@@ -360,7 +362,11 @@ fields = {
|
|
|
360
362
|
},
|
|
361
363
|
datetime_naive: {
|
|
362
364
|
True: ["2021-06-22 10:20:10", "2030/06/22 00:00:00.0028"],
|
|
363
|
-
False: [
|
|
365
|
+
False: [
|
|
366
|
+
"2021-06-22T30:20:10",
|
|
367
|
+
"Sun, 06 Nov 1994 08:49:37 GMT",
|
|
368
|
+
"2021-06-44 10:20:10+02:00",
|
|
369
|
+
],
|
|
364
370
|
},
|
|
365
371
|
datetime_rfc822: {
|
|
366
372
|
True: ["Sun, 06 Nov 1994 08:49:37 GMT"],
|
tests/test_file.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from unittest.mock import patch
|
|
2
|
+
|
|
1
3
|
import pandas as pd
|
|
2
4
|
import pytest
|
|
3
5
|
import responses
|
|
4
|
-
from unittest.mock import patch
|
|
5
6
|
|
|
6
7
|
from csv_detective import routine
|
|
7
8
|
|
|
@@ -70,10 +71,10 @@ def test_profile_output_on_file():
|
|
|
70
71
|
[
|
|
71
72
|
c in list(output["profile"]["NUMCOM"].keys())
|
|
72
73
|
for c in [
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
"min",
|
|
75
|
+
"max",
|
|
76
|
+
"mean",
|
|
77
|
+
"std",
|
|
77
78
|
]
|
|
78
79
|
]
|
|
79
80
|
)
|
|
@@ -191,7 +192,7 @@ def mocked_responses():
|
|
|
191
192
|
"params",
|
|
192
193
|
# ideally we'd like to do the same with params_others but pandas.read_excel uses urllib
|
|
193
194
|
# which doesn't support the way we mock the response, TBC
|
|
194
|
-
params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 404})]
|
|
195
|
+
params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 404})],
|
|
195
196
|
)
|
|
196
197
|
def test_urls(mocked_responses, params):
|
|
197
198
|
file_name, checks = params
|
|
@@ -261,17 +262,17 @@ def test_cast_json(mocked_responses, cast_json):
|
|
|
261
262
|
cast_json, expected_type = cast_json
|
|
262
263
|
expected_content = 'id,a_simple_dict\n1,{"a": 1}\n2,{"b": 2}\n3,{"c": 3}\n'
|
|
263
264
|
mocked_responses.get(
|
|
264
|
-
|
|
265
|
+
"http://example.com/test.csv",
|
|
265
266
|
body=expected_content,
|
|
266
267
|
status=200,
|
|
267
268
|
)
|
|
268
269
|
analysis, df = routine(
|
|
269
|
-
file_path=
|
|
270
|
+
file_path="http://example.com/test.csv",
|
|
270
271
|
num_rows=-1,
|
|
271
272
|
output_profile=False,
|
|
272
273
|
save_results=False,
|
|
273
274
|
output_df=True,
|
|
274
275
|
cast_json=cast_json,
|
|
275
276
|
)
|
|
276
|
-
assert analysis[
|
|
277
|
+
assert analysis["columns"]["a_simple_dict"]["python_type"] == "json"
|
|
277
278
|
assert isinstance(df["a_simple_dict"][0], expected_type)
|
tests/test_labels.py
CHANGED
|
@@ -10,13 +10,14 @@ def test_money_labels():
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@pytest.mark.parametrize(
|
|
13
|
-
"params",
|
|
13
|
+
"params",
|
|
14
|
+
[
|
|
14
15
|
("latitude", 1.0),
|
|
15
16
|
("lat", 1.0),
|
|
16
17
|
("coord_lat", 0.5),
|
|
17
18
|
("y", 1.0),
|
|
18
19
|
("nb_cycles", 0.0),
|
|
19
|
-
]
|
|
20
|
+
],
|
|
20
21
|
)
|
|
21
22
|
def test_latitude(params):
|
|
22
23
|
header, expected = params
|
tests/test_structure.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
2
3
|
# flake8: noqa
|
|
3
4
|
from csv_detective import detect_fields, detect_labels
|
|
4
5
|
from csv_detective.load_tests import return_all_tests
|
|
@@ -18,7 +19,8 @@ def tests_conformity():
|
|
|
18
19
|
if "__pycache__" not in dirname:
|
|
19
20
|
subfolders.append(os.path.join(dirpath, dirname))
|
|
20
21
|
final_subfolders = [
|
|
21
|
-
sf
|
|
22
|
+
sf
|
|
23
|
+
for sf in subfolders
|
|
22
24
|
if not any(other_sf.startswith(sf) for other_sf in subfolders if sf != other_sf)
|
|
23
25
|
]
|
|
24
26
|
for f_sf in final_subfolders:
|
tests/test_validation.py
CHANGED
|
@@ -30,13 +30,16 @@ def get_nested_value(source_dict: dict, key_chain: list[str]):
|
|
|
30
30
|
((False, None, None), {"separator": "|"}),
|
|
31
31
|
((False, None, None), {"encoding": "unknown"}),
|
|
32
32
|
((False, None, None), {"header": ["a", "b"]}),
|
|
33
|
-
(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"
|
|
37
|
-
|
|
33
|
+
(
|
|
34
|
+
(False, pd.DataFrame, dict),
|
|
35
|
+
{
|
|
36
|
+
"columns.NUMCOM": {
|
|
37
|
+
"python_type": "int",
|
|
38
|
+
"format": "int",
|
|
39
|
+
"score": 1.0,
|
|
40
|
+
},
|
|
38
41
|
},
|
|
39
|
-
|
|
42
|
+
),
|
|
40
43
|
),
|
|
41
44
|
)
|
|
42
45
|
def test_validation(_params):
|
|
File without changes
|
{csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1703.dist-info → csv_detective-0.8.1.dev1720.dist-info}/top_level.txt
RENAMED
|
File without changes
|