csv-detective 0.8.1.dev1674__py3-none-any.whl → 0.8.1.dev1720__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +0 -2
- csv_detective/cli.py +6 -9
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +78 -78
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -1
- csv_detective/detect_fields/FR/geo/code_region/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/commune/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/departement/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +2 -2
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +1 -2
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +1 -2
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/pays/__init__.py +6 -6
- csv_detective/detect_fields/FR/geo/region/__init__.py +6 -4
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +15 -14
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +4 -3
- csv_detective/detect_fields/FR/other/date_fr/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +4 -3
- csv_detective/detect_fields/FR/other/sexe/__init__.py +2 -2
- csv_detective/detect_fields/FR/other/siren/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/siret/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py +3 -3
- csv_detective/detect_fields/FR/other/uai/__init__.py +2 -2
- csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +15 -15
- csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +27 -27
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +5 -5
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +5 -5
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +5 -5
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py +1 -1
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py +1 -1
- csv_detective/detect_fields/other/booleen/__init__.py +1 -1
- csv_detective/detect_fields/other/email/__init__.py +4 -2
- csv_detective/detect_fields/other/int/__init__.py +3 -3
- csv_detective/detect_fields/other/mongo_object_id/__init__.py +2 -2
- csv_detective/detect_fields/other/twitter/__init__.py +2 -2
- csv_detective/detect_fields/other/uuid/__init__.py +4 -5
- csv_detective/detect_fields/temp/date/__init__.py +3 -2
- csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +6 -6
- csv_detective/detect_fields/temp/year/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -1
- csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +1 -0
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +1 -1
- csv_detective/detection/columns.py +9 -9
- csv_detective/detection/encoding.py +6 -4
- csv_detective/detection/engine.py +6 -5
- csv_detective/detection/formats.py +19 -19
- csv_detective/detection/headers.py +3 -5
- csv_detective/detection/rows.py +1 -1
- csv_detective/detection/variables.py +4 -4
- csv_detective/explore_csv.py +7 -8
- csv_detective/load_tests.py +6 -14
- csv_detective/output/__init__.py +3 -7
- csv_detective/output/dataframe.py +9 -5
- csv_detective/output/example.py +13 -13
- csv_detective/output/profile.py +30 -23
- csv_detective/output/schema.py +20 -23
- csv_detective/output/utils.py +15 -15
- csv_detective/parsing/columns.py +23 -12
- csv_detective/parsing/csv.py +1 -1
- csv_detective/parsing/excel.py +10 -11
- csv_detective/parsing/load.py +11 -8
- csv_detective/parsing/text.py +4 -9
- csv_detective/s3_utils.py +3 -7
- csv_detective/utils.py +4 -2
- csv_detective/validate.py +18 -13
- csv_detective-0.8.1.dev1674.data/data/share/csv_detective/README.md → csv_detective-0.8.1.dev1720.dist-info/METADATA +32 -0
- {csv_detective-0.8.1.dev1674.dist-info → csv_detective-0.8.1.dev1720.dist-info}/RECORD +81 -81
- {csv_detective-0.8.1.dev1674.dist-info → csv_detective-0.8.1.dev1720.dist-info}/top_level.txt +2 -0
- tests/test_example.py +2 -6
- tests/test_fields.py +16 -10
- tests/test_file.py +10 -9
- tests/test_labels.py +3 -2
- tests/test_structure.py +3 -1
- tests/test_validation.py +9 -6
- venv/bin/activate_this.py +38 -0
- venv/bin/jp.py +54 -0
- venv/bin/runxlrd.py +410 -0
- csv_detective-0.8.1.dev1674.data/data/share/csv_detective/CHANGELOG.md +0 -186
- csv_detective-0.8.1.dev1674.dist-info/METADATA +0 -268
- csv_detective-0.8.1.dev1674.dist-info/licenses/LICENSE +0 -21
- {csv_detective-0.8.1.dev1674.dist-info → csv_detective-0.8.1.dev1720.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1674.dist-info → csv_detective-0.8.1.dev1720.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1674.data/data/share/csv_detective → csv_detective-0.8.1.dev1720.dist-info/licenses}/LICENSE +0 -0
csv_detective/validate.py
CHANGED
|
@@ -4,8 +4,8 @@ from typing import Optional, Union
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
6
|
from csv_detective.load_tests import return_all_tests
|
|
7
|
-
from csv_detective.parsing.load import load_file
|
|
8
7
|
from csv_detective.parsing.columns import test_col_val
|
|
8
|
+
from csv_detective.parsing.load import load_file
|
|
9
9
|
|
|
10
10
|
logging.basicConfig(level=logging.INFO)
|
|
11
11
|
|
|
@@ -47,9 +47,8 @@ def validate(
|
|
|
47
47
|
if verbose:
|
|
48
48
|
logging.info("Comparing table with the previous analysis")
|
|
49
49
|
logging.info("- Checking if all columns match")
|
|
50
|
-
if (
|
|
51
|
-
|
|
52
|
-
or any(col_name not in previous_analysis["header"] for col_name in analysis["header"])
|
|
50
|
+
if any(col_name not in analysis["header"] for col_name in previous_analysis["header"]) or any(
|
|
51
|
+
col_name not in previous_analysis["header"] for col_name in analysis["header"]
|
|
53
52
|
):
|
|
54
53
|
if verbose:
|
|
55
54
|
logging.warning("> Columns do not match, proceeding with full analysis")
|
|
@@ -72,12 +71,18 @@ def validate(
|
|
|
72
71
|
return False, table, analysis
|
|
73
72
|
if verbose:
|
|
74
73
|
logging.info("> All checks successful")
|
|
75
|
-
return
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
return (
|
|
75
|
+
True,
|
|
76
|
+
table,
|
|
77
|
+
analysis
|
|
78
|
+
| {
|
|
79
|
+
k: previous_analysis[k]
|
|
80
|
+
for k in [
|
|
81
|
+
"categorical",
|
|
82
|
+
"columns",
|
|
83
|
+
"columns_fields",
|
|
84
|
+
"columns_labels",
|
|
85
|
+
"formats",
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
)
|
|
@@ -1,3 +1,35 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: csv-detective
|
|
3
|
+
Version: 0.8.1.dev1720
|
|
4
|
+
Summary: Detect tabular files column content
|
|
5
|
+
Author-email: Etalab <opendatateam@data.gouv.fr>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Source, https://github.com/datagouv/csv_detective
|
|
8
|
+
Keywords: CSV,data processing,encoding,guess,parser,tabular
|
|
9
|
+
Requires-Python: <3.14,>=3.9
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: boto3<2,>=1.34.0
|
|
13
|
+
Requires-Dist: dateparser<2,>=1.2.0
|
|
14
|
+
Requires-Dist: faust-cchardet==2.1.19
|
|
15
|
+
Requires-Dist: pandas<3,>=2.2.0
|
|
16
|
+
Requires-Dist: python-dateutil<3,>=2.8.2
|
|
17
|
+
Requires-Dist: Unidecode<2,>=1.3.6
|
|
18
|
+
Requires-Dist: openpyxl==3.1.5
|
|
19
|
+
Requires-Dist: xlrd==2.0.1
|
|
20
|
+
Requires-Dist: odfpy==1.4.1
|
|
21
|
+
Requires-Dist: requests<3,>=2.32.3
|
|
22
|
+
Requires-Dist: python-magic==0.4.27
|
|
23
|
+
Requires-Dist: frformat==0.4.0
|
|
24
|
+
Requires-Dist: Faker>=33.0.0
|
|
25
|
+
Requires-Dist: rstr==3.2.2
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=8.3.0; extra == "dev"
|
|
28
|
+
Requires-Dist: responses>=0.25.0; extra == "dev"
|
|
29
|
+
Requires-Dist: bumpx>=0.3.10; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.9.3; extra == "dev"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
1
33
|
# CSV Detective
|
|
2
34
|
|
|
3
35
|
This is a package to **automatically detect column content in tabular files**. The script reads either the whole file or the first few rows and performs various checks to see for each column if it matches with various content types. This is currently done through regex and string comparison.
|
|
@@ -1,77 +1,77 @@
|
|
|
1
|
-
csv_detective/__init__.py,sha256=
|
|
2
|
-
csv_detective/cli.py,sha256=
|
|
3
|
-
csv_detective/explore_csv.py,sha256=
|
|
4
|
-
csv_detective/load_tests.py,sha256=
|
|
5
|
-
csv_detective/s3_utils.py,sha256=
|
|
6
|
-
csv_detective/utils.py,sha256=
|
|
7
|
-
csv_detective/validate.py,sha256=
|
|
1
|
+
csv_detective/__init__.py,sha256=XY7pnoNHlocvyUiK8EQpJYPSQt5BRWWJD8KiPlvI9pU,164
|
|
2
|
+
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
+
csv_detective/explore_csv.py,sha256=sEMza4Z27ac88fGq7tUiK1zlfvuftztHhHVoa0c2EVU,9191
|
|
4
|
+
csv_detective/load_tests.py,sha256=q-ukYcNWGIpgxvlcO6OcUTtd9Dq7wczoW1YvFt4OA54,2274
|
|
5
|
+
csv_detective/s3_utils.py,sha256=z1KTVVkdurMv21o-rZu7_aluMJnSi-d5uxnQbqT2NoI,1407
|
|
6
|
+
csv_detective/utils.py,sha256=u9I1tsyMfVr2eIYiGCD7Iu30d55H3za44-N3cV2nj8M,1013
|
|
7
|
+
csv_detective/validate.py,sha256=RLHXLrRuynkdcvHUlSEbyglPvdbNYlT1Z4nQI-BdYdA,2898
|
|
8
8
|
csv_detective/detect_fields/__init__.py,sha256=0A5SZTp_IhhJ9z7lWeH4K5_0uwMK_VdMudjPm7oggVg,1000
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=
|
|
11
|
+
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=Q5tVRMW5QdFLfiNm42JmIwNRuBR5ZI3dQhzHPzXVnzo,1676
|
|
12
12
|
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
|
|
13
|
-
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=
|
|
13
|
+
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=DwgDopvfoUmOdDLsFKHGtufM3PG5ahwiLFRrDimaDNM,379
|
|
14
14
|
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
|
|
15
|
-
csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=
|
|
16
|
-
csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=
|
|
17
|
-
csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=
|
|
18
|
-
csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=
|
|
19
|
-
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=
|
|
20
|
-
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=
|
|
21
|
-
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=
|
|
22
|
-
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=
|
|
23
|
-
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=
|
|
24
|
-
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=
|
|
25
|
-
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=
|
|
15
|
+
csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=yjR6ob_h9fd5sa1YH6P0UbCsrHjdBGjsPIx02SHwlfE,133
|
|
16
|
+
csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=9pR2tVS4J2KrytCVuh-R86HGRMWutIK9FVQ30wKfCPg,253
|
|
17
|
+
csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=5vw4zjlmWaR2djxuQOUrmwsNIc9HgAE-zdxwerVR3S0,380
|
|
18
|
+
csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=UsMEW1EVVgnw-daOc1jBkEaGKvqTONSAGnj1s3QgM8w,400
|
|
19
|
+
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=YsAGiblFexBxvu_E3XaXhy_bordc6c1oKPgDzTsDeXw,374
|
|
20
|
+
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=RjkDSZzIbp4nnvDpa5GomDpyIJGvwErX7TgC4dlBJ14,437
|
|
21
|
+
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=7xmYpTYoHvFfcuocAhm6dP_j4sMII_hG1PMSrWId4FY,344
|
|
22
|
+
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=JbKuGK5UmUGAQKPFpN4RSLf3axJ5D1aCjzRXYHW-iXU,441
|
|
23
|
+
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=5VWDaHZvGhJAJu5XQrj6gLx5CVA9dNOE30eTXQ3pSf0,344
|
|
24
|
+
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=85y-5qNRAWJrKqL0wh9iPMUBQjvPwc9lv1cYB2m0daQ,364
|
|
25
|
+
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=6mJRaGsCPBY5JHHe8EWxEjDpAOIfvBPTaZKJb3_n3gU,1077
|
|
26
26
|
csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=
|
|
27
|
+
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=8f9n4F7T8Q44z4-sQL7d1OgvLObUPwC7D0iDLhHu8KQ,568
|
|
28
28
|
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
|
|
29
29
|
csv_detective/detect_fields/FR/other/code_import/__init__.py,sha256=zJ9YfPa5p--uHNQFeO1gTjxDy2Um_r-MxQd29VBNjFw,243
|
|
30
30
|
csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
|
|
31
31
|
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=41SYNzCzUFh4trQlwG-9UC0-1Wi4fTcv8Byi_dd9Lq4,168
|
|
32
|
-
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=
|
|
32
|
+
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=cKIldBWb37pqBeKuV5jgAlRHeF9SyqlRL4n-qfGMZGI,497
|
|
33
33
|
csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
34
|
-
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=
|
|
35
|
-
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=
|
|
34
|
+
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=Ewi9u7jcYsxhqu2al8aEVYQ8dO9H7GmRjo_l8BYt0j0,284
|
|
35
|
+
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=u6Ri4ntWrDPYezsVlwpRTbzU8xsDfkJYGdOE2spkQpQ,520
|
|
36
36
|
csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
|
|
37
|
-
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=
|
|
38
|
-
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=
|
|
39
|
-
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=
|
|
40
|
-
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=
|
|
41
|
-
csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=
|
|
37
|
+
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=dPVjgD3QBe4PUA4Bl_YDxZqFObF8KcoDz6zDYH2qfnk,269
|
|
38
|
+
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=7wpSq4eRfYC2p711Me1XCY64PIWyK_TJNw3lidxuzJE,442
|
|
39
|
+
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=YJPXYnzKJ4Y8XuBf1lRrLkImrZ6D7zitKl0KPry4CcU,707
|
|
40
|
+
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=zXVRu80ehUulhhxu1FTWoOK81CaSr7MfTh4HJEYdEKA,343
|
|
41
|
+
csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=mglrlTSBKYnGUOfGVM-xyk5KqUvQIFIjaoj31CO36zo,327
|
|
42
42
|
csv_detective/detect_fields/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=
|
|
44
|
-
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=
|
|
43
|
+
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=ax34EqC712WT5JqiAKBWz6L7vmVpLNWmBF2wmjUUFiM,396
|
|
44
|
+
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=Z59nO-UpIrUT9ZaQ6MuPQLFbu8AE0gYdkSleAj4WX_k,582
|
|
45
45
|
csv_detective/detect_fields/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=
|
|
46
|
+
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=X5kUggATKRJItJLaSDpv4MQPwo49iGBwlwQQjLTe77E,433
|
|
47
47
|
csv_detective/detect_fields/geo/iso_country_code_alpha2/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
|
|
48
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=
|
|
48
|
+
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=JvFLoQeJdbw5VYXUZqD9vsp0LQDoFE2Sd5gPA6K-0Lo,409
|
|
49
49
|
csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
|
|
50
|
-
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=
|
|
50
|
+
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=AnAridM4C8hcm4PeNdr8969czgrzM4KemGVZWAJSM1U,436
|
|
51
51
|
csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
|
|
52
52
|
csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=6wlwlxQmsVIZ21g-THvH3nBj-I8FuoF2sBlZAoEMGiQ,393
|
|
53
|
-
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=
|
|
53
|
+
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=sdor-L1WDHv5opg1Le13mru4ImSA-yEbxchlWENuUFE,327
|
|
54
54
|
csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=IXDTqD4YFUJYI1FYZ5ZfkqXY6KvNY7sgBVFRAvgTHtI,454
|
|
55
|
-
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=
|
|
55
|
+
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=gPnNTe-L9xjBVE-30VCJiK6IVZttj6Cy6zu1IL5907Y,330
|
|
56
56
|
csv_detective/detect_fields/geo/lonlat_wgs/__init__.py,sha256=CnBMYevfGdhBvureF3oc_zqT-RZjG419iAuUlugQFLc,454
|
|
57
57
|
csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
csv_detective/detect_fields/other/booleen/__init__.py,sha256=
|
|
59
|
-
csv_detective/detect_fields/other/email/__init__.py,sha256=
|
|
58
|
+
csv_detective/detect_fields/other/booleen/__init__.py,sha256=37ZUJACrZA9FQBYLDeVJGze7_I9x-ZWv5yWuBcqHcwI,497
|
|
59
|
+
csv_detective/detect_fields/other/email/__init__.py,sha256=p235wILf0fR9TeSEuyuPgoysAv9zg23a4vzdy3YJlxE,192
|
|
60
60
|
csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
|
|
61
|
-
csv_detective/detect_fields/other/int/__init__.py,sha256=
|
|
61
|
+
csv_detective/detect_fields/other/int/__init__.py,sha256=4SQAgaYTafeBL6hdT7Wp_xwcRNQsOWlYjaXKl78EuDw,320
|
|
62
62
|
csv_detective/detect_fields/other/json/__init__.py,sha256=AkRWZAidEM1dWkVRFThEBI5M7kMUu5Yu12iCViGM8lU,310
|
|
63
63
|
csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
|
|
64
|
-
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=
|
|
64
|
+
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=aZqxdbWzrL-syADA3_uYcOWcIuelvsnLzPLBEnkKJ8w,156
|
|
65
65
|
csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
|
|
66
|
-
csv_detective/detect_fields/other/twitter/__init__.py,sha256=
|
|
66
|
+
csv_detective/detect_fields/other/twitter/__init__.py,sha256=Npu6ZbyNfHq1y7xn0Gd62GbOcyz4WNq82FrFSKb547w,154
|
|
67
67
|
csv_detective/detect_fields/other/url/__init__.py,sha256=L7h9fZldh1w86XwCx0x3Q1TXSJ_nIId1C-l1yFzZYrA,299
|
|
68
|
-
csv_detective/detect_fields/other/uuid/__init__.py,sha256=
|
|
68
|
+
csv_detective/detect_fields/other/uuid/__init__.py,sha256=XFxbIsdIhRw0dtFxBXQBhicE4yy7P4jmwYXeJhq6FVY,215
|
|
69
69
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
csv_detective/detect_fields/temp/date/__init__.py,sha256=
|
|
70
|
+
csv_detective/detect_fields/temp/date/__init__.py,sha256=uVOszufihKqiQmS0wz7nUuQ2Dz-Tq9fSk1nf3S00mg4,1010
|
|
71
71
|
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRUxMGJYqfOK4wRDr3WMaGVAmIa_C2pXE,853
|
|
72
72
|
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
|
|
73
|
-
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256
|
|
74
|
-
csv_detective/detect_fields/temp/year/__init__.py,sha256=
|
|
73
|
+
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=-pFdIIPgaLq2_QbFJ9zwy4YIwZuC73F0A_cNDntTuvQ,512
|
|
74
|
+
csv_detective/detect_fields/temp/year/__init__.py,sha256=gHchVciZExbGZLMBcbBaDXB0IgGptkQc4RhfSOMY0Ww,194
|
|
75
75
|
csv_detective/detect_labels/__init__.py,sha256=8vrFUrMc8a_VOC5gvYNMKL-Do_q9eMTrghJRI9Xotvk,883
|
|
76
76
|
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -100,7 +100,7 @@ csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=N7LzmtNwZER
|
|
|
100
100
|
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=ZWhc8S9L1X2fFh2g5Ja-LuhsfHg_lALKrur6yDnGDPk,238
|
|
101
101
|
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=g7Y7IvW9VKO528z1MSPxfFtRB7kQXSiG7QQ-VZRfFEk,386
|
|
102
102
|
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=-gvdxUnv3LRfje60ljC4F3B2c1LBcWfV3zZbV3VJZ08,323
|
|
103
|
-
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=
|
|
103
|
+
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=4jIZ9cmN73XhP4ayGcEMcB_y0X45oRk1Lq2p_pNfgok,426
|
|
104
104
|
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=5L6JowK9y6y9uZNg6hWzknMSzh0SurkwQeTINNKTdYY,599
|
|
105
105
|
csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=Vmv7Hp6LxR-bh3aXOBCHYzJVyCHtGoiWzJ40xnfTvdA,357
|
|
@@ -113,14 +113,14 @@ csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=On8VOCDD0EspZra6
|
|
|
113
113
|
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=ME_KjniqDSdAwXP7XnKXyr5IA75KrGSLIhvPNfsux6E,664
|
|
114
114
|
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp0Xs0FsPsOPpRWhDe1ockbWw8s,923
|
|
115
115
|
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
|
|
116
|
-
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=
|
|
116
|
+
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=7gbumJFp5xhz4GZ4uTAJQoxw5D53WJZddptyANmdEws,346
|
|
117
117
|
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
118
|
csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
|
|
119
119
|
csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
|
|
120
120
|
csv_detective/detect_labels/other/float/__init__.py,sha256=X0axZN2GAfC_y01zRfIyvOfRsOy2KNQcQ-mlQAKxqT4,216
|
|
121
121
|
csv_detective/detect_labels/other/int/__init__.py,sha256=_1AY7thEBCcgSBQQ2YbY4YaPaxGRQ71BtmaFaX088ig,215
|
|
122
122
|
csv_detective/detect_labels/other/money/__init__.py,sha256=1JRArDZ5r6gtyuKijH_fuuVFVc0f3MN5gPyAf4GPqzs,249
|
|
123
|
-
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256
|
|
123
|
+
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=-NsB_Glm6KRGmIusAY9YoGPrdws6RwkYRPUiJUUPv3Y,209
|
|
124
124
|
csv_detective/detect_labels/other/twitter/__init__.py,sha256=96WhOB6nOutzSFOC5ZJYFSlhHDJRn2SkT4nYNj8E6ww,241
|
|
125
125
|
csv_detective/detect_labels/other/url/__init__.py,sha256=4Ajpdp8W0jS9aHZAAMyUlgefjSgpB7Y6ci29KNkwAoI,485
|
|
126
126
|
csv_detective/detect_labels/other/uuid/__init__.py,sha256=kXVb4oMy-Zv-OYmAIEoNFrBA20l9hbUTdvTfjeMmhjk,213
|
|
@@ -129,40 +129,40 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=w0eeZIseAmPwL4OvCWzZXbx
|
|
|
129
129
|
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=53ysj7QgsxXwG1le3zfSJd1oaTTf-Er3jBeYi_A4F9g,458
|
|
130
130
|
csv_detective/detect_labels/temp/year/__init__.py,sha256=7uWaCZY7dOG7nolW46IgBWmcu8K-9jPED-pOlMlErfo,433
|
|
131
131
|
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
|
-
csv_detective/detection/columns.py,sha256=
|
|
133
|
-
csv_detective/detection/encoding.py,sha256=
|
|
134
|
-
csv_detective/detection/engine.py,sha256=
|
|
135
|
-
csv_detective/detection/formats.py,sha256=
|
|
136
|
-
csv_detective/detection/headers.py,sha256=
|
|
137
|
-
csv_detective/detection/rows.py,sha256=
|
|
132
|
+
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
133
|
+
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
134
|
+
csv_detective/detection/engine.py,sha256=1Z4vzjxwPRZ9-vv8nw-zU2sgBZtOsEz0UoKjGaSwVJU,1543
|
|
135
|
+
csv_detective/detection/formats.py,sha256=dzJPdi2rP2jTHZBk9UHpJL3c5N-PSohCymHs-OZt45c,6211
|
|
136
|
+
csv_detective/detection/headers.py,sha256=y5iR4jWH5fUtAH_Zg0zxWSVG_INCHlXJFMbhPpI2YMo,1148
|
|
137
|
+
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
138
138
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
139
|
-
csv_detective/detection/variables.py,sha256=
|
|
140
|
-
csv_detective/output/__init__.py,sha256=
|
|
141
|
-
csv_detective/output/dataframe.py,sha256=
|
|
142
|
-
csv_detective/output/example.py,sha256=
|
|
143
|
-
csv_detective/output/profile.py,sha256=
|
|
144
|
-
csv_detective/output/schema.py,sha256=
|
|
145
|
-
csv_detective/output/utils.py,sha256=
|
|
139
|
+
csv_detective/detection/variables.py,sha256=njfPj1hhWowe8qgrdCr4gtZyr1l2DGA08n06LnmnziY,3550
|
|
140
|
+
csv_detective/output/__init__.py,sha256=f-UFv_iULpVF_Fy39H4sfACEnrthjK4N3mCAVPkjnKw,1860
|
|
141
|
+
csv_detective/output/dataframe.py,sha256=UpLuSxx_SFbKpem1n-xY7jF16MXGpKQYEWjaSMIiB4s,2215
|
|
142
|
+
csv_detective/output/example.py,sha256=XrnPS_uC0cICn7tgnLWNctpUbnPzl7fIMzNTzJEWGJc,8655
|
|
143
|
+
csv_detective/output/profile.py,sha256=Jeh0mrfH_hAVxV2E5I4XzdCm7ZAGAV_Xj3AXOi77lcA,3130
|
|
144
|
+
csv_detective/output/schema.py,sha256=5Duw5qnsJ-LaVC6JgF7p1zZAkehDzsbXA4iTSJUgLNM,13760
|
|
145
|
+
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
146
146
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
csv_detective/parsing/columns.py,sha256=
|
|
147
|
+
csv_detective/parsing/columns.py,sha256=fbvQMu12gAmz4TnNCL7pLnMFB-mWN_O-zEoj8jEGj0A,5696
|
|
148
148
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
149
|
-
csv_detective/parsing/csv.py,sha256=
|
|
150
|
-
csv_detective/parsing/excel.py,sha256=
|
|
151
|
-
csv_detective/parsing/load.py,sha256=
|
|
152
|
-
csv_detective/parsing/text.py,sha256=
|
|
153
|
-
csv_detective-0.8.1.
|
|
154
|
-
csv_detective-0.8.1.dev1674.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
155
|
-
csv_detective-0.8.1.dev1674.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
|
|
156
|
-
csv_detective-0.8.1.dev1674.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
149
|
+
csv_detective/parsing/csv.py,sha256=qZFLOT3YCPoHF0svfVfQBnS8eHtucjDZ7dFITAPgLhc,1626
|
|
150
|
+
csv_detective/parsing/excel.py,sha256=ULUDw76z6hs1Xm2yL9KBM0EOvIsfBLkxwqTZfDEx6aE,7045
|
|
151
|
+
csv_detective/parsing/load.py,sha256=C3M8nvgWenOb8aDFi5dpDGCoAw9EBqr4EB63zbz2M14,3699
|
|
152
|
+
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
153
|
+
csv_detective-0.8.1.dev1720.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
157
154
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
|
-
tests/test_example.py,sha256=
|
|
159
|
-
tests/test_fields.py,sha256=
|
|
160
|
-
tests/test_file.py,sha256=
|
|
161
|
-
tests/test_labels.py,sha256=
|
|
162
|
-
tests/test_structure.py,sha256=
|
|
163
|
-
tests/test_validation.py,sha256=
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
csv_detective-0.8.1.
|
|
168
|
-
csv_detective-0.8.1.
|
|
155
|
+
tests/test_example.py,sha256=iO4RxMHZxnBAiKm6fsFar5OVg8hYKnqNZCw0SUnEuQQ,1972
|
|
156
|
+
tests/test_fields.py,sha256=Y2mBfV9ZdxTHYwHnkzGbpo1k_qJRLC8nU-zzAUxFmAE,11964
|
|
157
|
+
tests/test_file.py,sha256=YuVbSfeo_ASPiLT8CyxXqJENcDpj4wAFXzLwu_GzsOA,8437
|
|
158
|
+
tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
|
|
159
|
+
tests/test_structure.py,sha256=lxgNeyoDPnd2PqesffCJOdPuf-g6fP7UnGQiO3umHMc,1408
|
|
160
|
+
tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
|
|
161
|
+
venv/bin/activate_this.py,sha256=NRy3waFmwW1pOaNUp33wNN0vD1Kzkd-zXX-Sgl4EiVI,1286
|
|
162
|
+
venv/bin/jp.py,sha256=7z7dvRg0M7HzpZG4ssQID7nScjvQx7bcYTxJWDOrS6E,1717
|
|
163
|
+
venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
|
|
164
|
+
csv_detective-0.8.1.dev1720.dist-info/METADATA,sha256=XvQxdxFvH2FWSWn3JpVR7IMzfUCANxs5kSfh1_JeHIQ,9570
|
|
165
|
+
csv_detective-0.8.1.dev1720.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
csv_detective-0.8.1.dev1720.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
167
|
+
csv_detective-0.8.1.dev1720.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
|
|
168
|
+
csv_detective-0.8.1.dev1720.dist-info/RECORD,,
|
tests/test_example.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
1
|
import re
|
|
3
2
|
from uuid import UUID
|
|
3
|
+
|
|
4
4
|
from csv_detective import create_example_csv_file
|
|
5
5
|
|
|
6
6
|
|
|
@@ -41,11 +41,7 @@ def test_example_creation():
|
|
|
41
41
|
"name": "nb_produits",
|
|
42
42
|
"type": "int",
|
|
43
43
|
},
|
|
44
|
-
{
|
|
45
|
-
"name": "note",
|
|
46
|
-
"type": "float",
|
|
47
|
-
"args": {"num_range": [1, 20]}
|
|
48
|
-
},
|
|
44
|
+
{"name": "note", "type": "float", "args": {"num_range": [1, 20]}},
|
|
49
45
|
]
|
|
50
46
|
df = create_example_csv_file(
|
|
51
47
|
fields=fields,
|
tests/test_fields.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from datetime import date as _date
|
|
1
|
+
from datetime import date as _date
|
|
2
|
+
from datetime import datetime as _datetime
|
|
2
3
|
|
|
3
|
-
from numpy import random
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import pytest
|
|
6
|
+
from numpy import random
|
|
6
7
|
|
|
7
8
|
from csv_detective.detect_fields.FR.geo import (
|
|
8
9
|
adresse,
|
|
@@ -23,8 +24,8 @@ from csv_detective.detect_fields.FR.geo import (
|
|
|
23
24
|
)
|
|
24
25
|
from csv_detective.detect_fields.FR.other import (
|
|
25
26
|
code_csp_insee,
|
|
26
|
-
code_rna,
|
|
27
27
|
code_import,
|
|
28
|
+
code_rna,
|
|
28
29
|
code_waldec,
|
|
29
30
|
csp_insee,
|
|
30
31
|
date_fr,
|
|
@@ -56,9 +57,13 @@ from csv_detective.detect_fields.other import (
|
|
|
56
57
|
twitter,
|
|
57
58
|
url,
|
|
58
59
|
uuid,
|
|
59
|
-
|
|
60
|
+
)
|
|
61
|
+
from csv_detective.detect_fields.other import (
|
|
60
62
|
float as test_float,
|
|
61
63
|
)
|
|
64
|
+
from csv_detective.detect_fields.other import (
|
|
65
|
+
int as test_int,
|
|
66
|
+
)
|
|
62
67
|
from csv_detective.detect_fields.temp import (
|
|
63
68
|
date,
|
|
64
69
|
datetime_aware,
|
|
@@ -67,8 +72,8 @@ from csv_detective.detect_fields.temp import (
|
|
|
67
72
|
year,
|
|
68
73
|
)
|
|
69
74
|
from csv_detective.detection.variables import (
|
|
70
|
-
detect_continuous_variable,
|
|
71
75
|
detect_categorical_variable,
|
|
76
|
+
detect_continuous_variable,
|
|
72
77
|
)
|
|
73
78
|
from csv_detective.load_tests import return_all_tests
|
|
74
79
|
from csv_detective.output.dataframe import cast
|
|
@@ -225,10 +230,7 @@ fields = {
|
|
|
225
230
|
True: ["13 fevrier 1996"],
|
|
226
231
|
False: ["44 march 2025"],
|
|
227
232
|
},
|
|
228
|
-
insee_ape700: {
|
|
229
|
-
True: ["0116Z"],
|
|
230
|
-
False: ["0116A"]
|
|
231
|
-
},
|
|
233
|
+
insee_ape700: {True: ["0116Z"], False: ["0116A"]},
|
|
232
234
|
tel_fr: {
|
|
233
235
|
True: ["0134643467"],
|
|
234
236
|
False: ["6625388263", "01288398"],
|
|
@@ -360,7 +362,11 @@ fields = {
|
|
|
360
362
|
},
|
|
361
363
|
datetime_naive: {
|
|
362
364
|
True: ["2021-06-22 10:20:10", "2030/06/22 00:00:00.0028"],
|
|
363
|
-
False: [
|
|
365
|
+
False: [
|
|
366
|
+
"2021-06-22T30:20:10",
|
|
367
|
+
"Sun, 06 Nov 1994 08:49:37 GMT",
|
|
368
|
+
"2021-06-44 10:20:10+02:00",
|
|
369
|
+
],
|
|
364
370
|
},
|
|
365
371
|
datetime_rfc822: {
|
|
366
372
|
True: ["Sun, 06 Nov 1994 08:49:37 GMT"],
|
tests/test_file.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from unittest.mock import patch
|
|
2
|
+
|
|
1
3
|
import pandas as pd
|
|
2
4
|
import pytest
|
|
3
5
|
import responses
|
|
4
|
-
from unittest.mock import patch
|
|
5
6
|
|
|
6
7
|
from csv_detective import routine
|
|
7
8
|
|
|
@@ -70,10 +71,10 @@ def test_profile_output_on_file():
|
|
|
70
71
|
[
|
|
71
72
|
c in list(output["profile"]["NUMCOM"].keys())
|
|
72
73
|
for c in [
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
"min",
|
|
75
|
+
"max",
|
|
76
|
+
"mean",
|
|
77
|
+
"std",
|
|
77
78
|
]
|
|
78
79
|
]
|
|
79
80
|
)
|
|
@@ -191,7 +192,7 @@ def mocked_responses():
|
|
|
191
192
|
"params",
|
|
192
193
|
# ideally we'd like to do the same with params_others but pandas.read_excel uses urllib
|
|
193
194
|
# which doesn't support the way we mock the response, TBC
|
|
194
|
-
params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 404})]
|
|
195
|
+
params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 404})],
|
|
195
196
|
)
|
|
196
197
|
def test_urls(mocked_responses, params):
|
|
197
198
|
file_name, checks = params
|
|
@@ -261,17 +262,17 @@ def test_cast_json(mocked_responses, cast_json):
|
|
|
261
262
|
cast_json, expected_type = cast_json
|
|
262
263
|
expected_content = 'id,a_simple_dict\n1,{"a": 1}\n2,{"b": 2}\n3,{"c": 3}\n'
|
|
263
264
|
mocked_responses.get(
|
|
264
|
-
|
|
265
|
+
"http://example.com/test.csv",
|
|
265
266
|
body=expected_content,
|
|
266
267
|
status=200,
|
|
267
268
|
)
|
|
268
269
|
analysis, df = routine(
|
|
269
|
-
file_path=
|
|
270
|
+
file_path="http://example.com/test.csv",
|
|
270
271
|
num_rows=-1,
|
|
271
272
|
output_profile=False,
|
|
272
273
|
save_results=False,
|
|
273
274
|
output_df=True,
|
|
274
275
|
cast_json=cast_json,
|
|
275
276
|
)
|
|
276
|
-
assert analysis[
|
|
277
|
+
assert analysis["columns"]["a_simple_dict"]["python_type"] == "json"
|
|
277
278
|
assert isinstance(df["a_simple_dict"][0], expected_type)
|
tests/test_labels.py
CHANGED
|
@@ -10,13 +10,14 @@ def test_money_labels():
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@pytest.mark.parametrize(
|
|
13
|
-
"params",
|
|
13
|
+
"params",
|
|
14
|
+
[
|
|
14
15
|
("latitude", 1.0),
|
|
15
16
|
("lat", 1.0),
|
|
16
17
|
("coord_lat", 0.5),
|
|
17
18
|
("y", 1.0),
|
|
18
19
|
("nb_cycles", 0.0),
|
|
19
|
-
]
|
|
20
|
+
],
|
|
20
21
|
)
|
|
21
22
|
def test_latitude(params):
|
|
22
23
|
header, expected = params
|
tests/test_structure.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
2
3
|
# flake8: noqa
|
|
3
4
|
from csv_detective import detect_fields, detect_labels
|
|
4
5
|
from csv_detective.load_tests import return_all_tests
|
|
@@ -18,7 +19,8 @@ def tests_conformity():
|
|
|
18
19
|
if "__pycache__" not in dirname:
|
|
19
20
|
subfolders.append(os.path.join(dirpath, dirname))
|
|
20
21
|
final_subfolders = [
|
|
21
|
-
sf
|
|
22
|
+
sf
|
|
23
|
+
for sf in subfolders
|
|
22
24
|
if not any(other_sf.startswith(sf) for other_sf in subfolders if sf != other_sf)
|
|
23
25
|
]
|
|
24
26
|
for f_sf in final_subfolders:
|
tests/test_validation.py
CHANGED
|
@@ -30,13 +30,16 @@ def get_nested_value(source_dict: dict, key_chain: list[str]):
|
|
|
30
30
|
((False, None, None), {"separator": "|"}),
|
|
31
31
|
((False, None, None), {"encoding": "unknown"}),
|
|
32
32
|
((False, None, None), {"header": ["a", "b"]}),
|
|
33
|
-
(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"
|
|
37
|
-
|
|
33
|
+
(
|
|
34
|
+
(False, pd.DataFrame, dict),
|
|
35
|
+
{
|
|
36
|
+
"columns.NUMCOM": {
|
|
37
|
+
"python_type": "int",
|
|
38
|
+
"format": "int",
|
|
39
|
+
"score": 1.0,
|
|
40
|
+
},
|
|
38
41
|
},
|
|
39
|
-
|
|
42
|
+
),
|
|
40
43
|
),
|
|
41
44
|
)
|
|
42
45
|
def test_validation(_params):
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Activate virtualenv for current interpreter:
|
|
3
|
+
|
|
4
|
+
import runpy
|
|
5
|
+
runpy.run_path(this_file)
|
|
6
|
+
|
|
7
|
+
This can be used when you must use an existing Python interpreter, not the virtualenv bin/python.
|
|
8
|
+
""" # noqa: D415
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import site
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
abs_file = os.path.abspath(__file__)
|
|
18
|
+
except NameError as exc:
|
|
19
|
+
msg = "You must use import runpy; runpy.run_path(this_file)"
|
|
20
|
+
raise AssertionError(msg) from exc
|
|
21
|
+
|
|
22
|
+
bin_dir = os.path.dirname(abs_file)
|
|
23
|
+
base = bin_dir[: -len('bin') - 1] # strip away the bin part from the __file__, plus the path separator
|
|
24
|
+
|
|
25
|
+
# prepend bin to PATH (this file is inside the bin directory)
|
|
26
|
+
os.environ["PATH"] = os.pathsep.join([bin_dir, *os.environ.get("PATH", "").split(os.pathsep)])
|
|
27
|
+
os.environ["VIRTUAL_ENV"] = base # virtual env is right above bin directory
|
|
28
|
+
os.environ["VIRTUAL_ENV_PROMPT"] = '' or os.path.basename(base)
|
|
29
|
+
|
|
30
|
+
# add the virtual environments libraries to the host python import mechanism
|
|
31
|
+
prev_length = len(sys.path)
|
|
32
|
+
for lib in '../lib/python3.9/site-packages'.split(os.pathsep):
|
|
33
|
+
path = os.path.realpath(os.path.join(bin_dir, lib))
|
|
34
|
+
site.addsitedir(path.decode("utf-8") if '' else path)
|
|
35
|
+
sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]
|
|
36
|
+
|
|
37
|
+
sys.real_prefix = sys.prefix
|
|
38
|
+
sys.prefix = base
|
venv/bin/jp.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/home/circleci/project/venv/bin/python
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import json
|
|
5
|
+
import argparse
|
|
6
|
+
from pprint import pformat
|
|
7
|
+
|
|
8
|
+
import jmespath
|
|
9
|
+
from jmespath import exceptions
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main():
|
|
13
|
+
parser = argparse.ArgumentParser()
|
|
14
|
+
parser.add_argument('expression')
|
|
15
|
+
parser.add_argument('-f', '--filename',
|
|
16
|
+
help=('The filename containing the input data. '
|
|
17
|
+
'If a filename is not given then data is '
|
|
18
|
+
'read from stdin.'))
|
|
19
|
+
parser.add_argument('--ast', action='store_true',
|
|
20
|
+
help=('Pretty print the AST, do not search the data.'))
|
|
21
|
+
args = parser.parse_args()
|
|
22
|
+
expression = args.expression
|
|
23
|
+
if args.ast:
|
|
24
|
+
# Only print the AST
|
|
25
|
+
expression = jmespath.compile(args.expression)
|
|
26
|
+
sys.stdout.write(pformat(expression.parsed))
|
|
27
|
+
sys.stdout.write('\n')
|
|
28
|
+
return 0
|
|
29
|
+
if args.filename:
|
|
30
|
+
with open(args.filename, 'r') as f:
|
|
31
|
+
data = json.load(f)
|
|
32
|
+
else:
|
|
33
|
+
data = sys.stdin.read()
|
|
34
|
+
data = json.loads(data)
|
|
35
|
+
try:
|
|
36
|
+
sys.stdout.write(json.dumps(
|
|
37
|
+
jmespath.search(expression, data), indent=4, ensure_ascii=False))
|
|
38
|
+
sys.stdout.write('\n')
|
|
39
|
+
except exceptions.ArityError as e:
|
|
40
|
+
sys.stderr.write("invalid-arity: %s\n" % e)
|
|
41
|
+
return 1
|
|
42
|
+
except exceptions.JMESPathTypeError as e:
|
|
43
|
+
sys.stderr.write("invalid-type: %s\n" % e)
|
|
44
|
+
return 1
|
|
45
|
+
except exceptions.UnknownFunctionError as e:
|
|
46
|
+
sys.stderr.write("unknown-function: %s\n" % e)
|
|
47
|
+
return 1
|
|
48
|
+
except exceptions.ParseError as e:
|
|
49
|
+
sys.stderr.write("syntax-error: %s\n" % e)
|
|
50
|
+
return 1
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == '__main__':
|
|
54
|
+
sys.exit(main())
|