csv-detective 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +7 -1
- csv_detective/cli.py +33 -21
- csv_detective/{detect_fields/FR → detection}/__init__.py +0 -0
- csv_detective/detection/columns.py +89 -0
- csv_detective/detection/encoding.py +29 -0
- csv_detective/detection/engine.py +46 -0
- csv_detective/detection/formats.py +156 -0
- csv_detective/detection/headers.py +28 -0
- csv_detective/detection/rows.py +18 -0
- csv_detective/detection/separator.py +44 -0
- csv_detective/detection/variables.py +97 -0
- csv_detective/explore_csv.py +151 -377
- csv_detective/format.py +67 -0
- csv_detective/formats/__init__.py +9 -0
- csv_detective/formats/adresse.py +116 -0
- csv_detective/formats/binary.py +26 -0
- csv_detective/formats/booleen.py +35 -0
- csv_detective/formats/code_commune_insee.py +26 -0
- csv_detective/formats/code_csp_insee.py +36 -0
- csv_detective/formats/code_departement.py +29 -0
- csv_detective/formats/code_fantoir.py +21 -0
- csv_detective/formats/code_import.py +17 -0
- csv_detective/formats/code_postal.py +25 -0
- csv_detective/formats/code_region.py +22 -0
- csv_detective/formats/code_rna.py +29 -0
- csv_detective/formats/code_waldec.py +17 -0
- csv_detective/formats/commune.py +27 -0
- csv_detective/formats/csp_insee.py +31 -0
- csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
- csv_detective/formats/date.py +99 -0
- csv_detective/formats/date_fr.py +22 -0
- csv_detective/formats/datetime_aware.py +45 -0
- csv_detective/formats/datetime_naive.py +48 -0
- csv_detective/formats/datetime_rfc822.py +24 -0
- csv_detective/formats/departement.py +37 -0
- csv_detective/formats/email.py +28 -0
- csv_detective/formats/float.py +29 -0
- csv_detective/formats/geojson.py +36 -0
- csv_detective/formats/insee_ape700.py +31 -0
- csv_detective/formats/insee_canton.py +28 -0
- csv_detective/formats/int.py +23 -0
- csv_detective/formats/iso_country_code_alpha2.py +30 -0
- csv_detective/formats/iso_country_code_alpha3.py +30 -0
- csv_detective/formats/iso_country_code_numeric.py +31 -0
- csv_detective/formats/jour_de_la_semaine.py +41 -0
- csv_detective/formats/json.py +20 -0
- csv_detective/formats/latitude_l93.py +48 -0
- csv_detective/formats/latitude_wgs.py +42 -0
- csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
- csv_detective/formats/latlon_wgs.py +53 -0
- csv_detective/formats/longitude_l93.py +39 -0
- csv_detective/formats/longitude_wgs.py +32 -0
- csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
- csv_detective/formats/lonlat_wgs.py +36 -0
- csv_detective/formats/mois_de_lannee.py +48 -0
- csv_detective/formats/money.py +18 -0
- csv_detective/formats/mongo_object_id.py +14 -0
- csv_detective/formats/pays.py +35 -0
- csv_detective/formats/percent.py +16 -0
- csv_detective/formats/region.py +70 -0
- csv_detective/formats/sexe.py +17 -0
- csv_detective/formats/siren.py +37 -0
- csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -29
- csv_detective/formats/tel_fr.py +36 -0
- csv_detective/formats/uai.py +36 -0
- csv_detective/formats/url.py +46 -0
- csv_detective/formats/username.py +14 -0
- csv_detective/formats/uuid.py +16 -0
- csv_detective/formats/year.py +28 -0
- csv_detective/output/__init__.py +65 -0
- csv_detective/output/dataframe.py +96 -0
- csv_detective/output/example.py +250 -0
- csv_detective/output/profile.py +119 -0
- csv_detective/{schema_generation.py → output/schema.py} +268 -343
- csv_detective/output/utils.py +74 -0
- csv_detective/{detect_fields/FR/geo → parsing}/__init__.py +0 -0
- csv_detective/parsing/columns.py +235 -0
- csv_detective/parsing/compression.py +11 -0
- csv_detective/parsing/csv.py +56 -0
- csv_detective/parsing/excel.py +167 -0
- csv_detective/parsing/load.py +111 -0
- csv_detective/parsing/text.py +56 -0
- csv_detective/utils.py +23 -196
- csv_detective/validate.py +138 -0
- csv_detective-0.9.3.dev2438.dist-info/METADATA +267 -0
- csv_detective-0.9.3.dev2438.dist-info/RECORD +92 -0
- csv_detective-0.9.3.dev2438.dist-info/WHEEL +4 -0
- {csv_detective-0.6.7.dist-info → csv_detective-0.9.3.dev2438.dist-info}/entry_points.txt +1 -0
- csv_detective/all_packages.txt +0 -104
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +0 -100
- csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -24
- csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt +0 -37600
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py +0 -11
- csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -15
- csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt +0 -26122
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt +0 -36822
- csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -27
- csv_detective/detect_fields/FR/geo/commune/__init__.py +0 -21
- csv_detective/detect_fields/FR/geo/commune/commune.txt +0 -36745
- csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/departement/departement.txt +0 -101
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +0 -20
- csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt +0 -2055
- csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt +0 -2055
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -17
- csv_detective/detect_fields/FR/geo/pays/pays.txt +0 -248
- csv_detective/detect_fields/FR/geo/region/__init__.py +0 -16
- csv_detective/detect_fields/FR/geo/region/region.txt +0 -44
- csv_detective/detect_fields/FR/other/__init__.py +0 -0
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +0 -26
- csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
- csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -8
- csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -12
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +0 -16
- csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +0 -16
- csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -9
- csv_detective/detect_fields/FR/other/siren/__init__.py +0 -18
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -15
- csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
- csv_detective/detect_fields/FR/temp/__init__.py +0 -0
- csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +0 -23
- csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +0 -37
- csv_detective/detect_fields/__init__.py +0 -57
- csv_detective/detect_fields/geo/__init__.py +0 -0
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
- csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -22
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -15
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/other/__init__.py +0 -0
- csv_detective/detect_fields/other/booleen/__init__.py +0 -21
- csv_detective/detect_fields/other/email/__init__.py +0 -8
- csv_detective/detect_fields/other/float/__init__.py +0 -17
- csv_detective/detect_fields/other/int/__init__.py +0 -12
- csv_detective/detect_fields/other/json/__init__.py +0 -24
- csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
- csv_detective/detect_fields/other/twitter/__init__.py +0 -8
- csv_detective/detect_fields/other/url/__init__.py +0 -11
- csv_detective/detect_fields/other/uuid/__init__.py +0 -11
- csv_detective/detect_fields/temp/__init__.py +0 -0
- csv_detective/detect_fields/temp/date/__init__.py +0 -62
- csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -18
- csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +0 -21
- csv_detective/detect_fields/temp/year/__init__.py +0 -10
- csv_detective/detect_labels/FR/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -40
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -42
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -41
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -47
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -54
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -55
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -44
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -45
- csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -45
- csv_detective/detect_labels/FR/geo/region/__init__.py +0 -45
- csv_detective/detect_labels/FR/other/__init__.py +0 -0
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -38
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -37
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -40
- csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/siren/__init__.py +0 -41
- csv_detective/detect_labels/FR/other/siret/__init__.py +0 -40
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -45
- csv_detective/detect_labels/FR/other/uai/__init__.py +0 -50
- csv_detective/detect_labels/FR/temp/__init__.py +0 -0
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -41
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -33
- csv_detective/detect_labels/__init__.py +0 -43
- csv_detective/detect_labels/geo/__init__.py +0 -0
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -41
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -41
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -41
- csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -42
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -55
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -67
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -45
- csv_detective/detect_labels/other/__init__.py +0 -0
- csv_detective/detect_labels/other/booleen/__init__.py +0 -34
- csv_detective/detect_labels/other/email/__init__.py +0 -45
- csv_detective/detect_labels/other/float/__init__.py +0 -33
- csv_detective/detect_labels/other/int/__init__.py +0 -33
- csv_detective/detect_labels/other/money/__init__.py +0 -11
- csv_detective/detect_labels/other/money/check_col_name.py +0 -8
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -33
- csv_detective/detect_labels/other/twitter/__init__.py +0 -33
- csv_detective/detect_labels/other/url/__init__.py +0 -48
- csv_detective/detect_labels/other/uuid/__init__.py +0 -33
- csv_detective/detect_labels/temp/__init__.py +0 -0
- csv_detective/detect_labels/temp/date/__init__.py +0 -51
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -45
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -44
- csv_detective/detect_labels/temp/year/__init__.py +0 -44
- csv_detective/detection.py +0 -361
- csv_detective/process_text.py +0 -39
- csv_detective/s3_utils.py +0 -48
- csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md +0 -118
- csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt +0 -661
- csv_detective-0.6.7.data/data/share/csv_detective/README.md +0 -247
- csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt +0 -661
- csv_detective-0.6.7.dist-info/METADATA +0 -23
- csv_detective-0.6.7.dist-info/RECORD +0 -150
- csv_detective-0.6.7.dist-info/WHEEL +0 -5
- csv_detective-0.6.7.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/test_fields.py +0 -360
- tests/test_file.py +0 -116
- tests/test_labels.py +0 -7
- /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: csv_detective
|
|
3
|
-
Version: 0.6.7
|
|
4
|
-
Summary: Detect CSV column content
|
|
5
|
-
Home-page: https://github.com/etalab/csv_detective
|
|
6
|
-
Author: Etalab
|
|
7
|
-
Author-email: opendatateam@data.gouv.fr
|
|
8
|
-
License: http://www.fsf.org/licensing/licenses/agpl-3.0.html
|
|
9
|
-
Keywords: CSV data processing encoding guess parser tabular
|
|
10
|
-
Classifier: Development Status :: 2 - Pre-Alpha
|
|
11
|
-
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
12
|
-
Classifier: Operating System :: POSIX
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
15
|
-
Description-Content-Type: text/markdown
|
|
16
|
-
License-File: LICENSE.AGPL.txt
|
|
17
|
-
Requires-Dist: boto3 ==1.26.65
|
|
18
|
-
Requires-Dist: faust-cchardet ==2.1.19
|
|
19
|
-
Requires-Dist: pandas ==1.5.3
|
|
20
|
-
Requires-Dist: pytest ==7.2.1
|
|
21
|
-
Requires-Dist: python-dateutil ==2.8.2
|
|
22
|
-
Requires-Dist: Unidecode ==1.3.6
|
|
23
|
-
|
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
csv_detective/__init__.py,sha256=GXnXV3cFmzDnIeFijtPpxojjjKDkoNrbwKLQwhYI7RY,22
|
|
2
|
-
csv_detective/all_packages.txt,sha256=MzN0kVnX99LU0PCqrO7JBjrYMngXn7GByhGYwTvbo74,4681
|
|
3
|
-
csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
|
|
4
|
-
csv_detective/detection.py,sha256=FDC7gLe4CNWrMf9py78JXv1vpvjTrS7UwdCA7TQ7u_Y,12249
|
|
5
|
-
csv_detective/explore_csv.py,sha256=KBLMKlyc_5bRJLQH9MYAx9VDTeuD5TzTdjflHR4WnPE,14605
|
|
6
|
-
csv_detective/process_text.py,sha256=1KMgoTnfUKghNcmDQp_84ryLwcMP2PR3sTyCnuOeeAI,1236
|
|
7
|
-
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
8
|
-
csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
|
|
9
|
-
csv_detective/utils.py,sha256=0cpPlcgB6oOpxPtVv6cSqwJGYOYt8GN8uxviABxLwd0,8263
|
|
10
|
-
csv_detective/detect_fields/__init__.py,sha256=CchNbi1vrgIGh_uBexXZTzfjBETDY0kQLjI-PAquU8M,921
|
|
11
|
-
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=s0Tvyjf09DHksFRlbbtHwepIxRJZk328PvZuac7h0Ok,1647
|
|
14
|
-
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=34ugFPPjduSjH466aw3XcMcCi97kAaJX2RiguEHOW5M,705
|
|
15
|
-
csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt,sha256=DgMtDlS5tX464k0RgSnSmgbIN9Z3BfK7gOt-k1uLfyk,225600
|
|
16
|
-
csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=9jadtQHMDpcdYKEis5BJpQrpWvIubKarUECPcPRyFJY,382
|
|
17
|
-
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=L9poDp6kuZBdpVjVoHqfslDGEXWl8t2U8Hl6FXCz4N4,397
|
|
18
|
-
csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt,sha256=nMQcnFaoTyLnIOSRnv0e30F1plvHeMEzr_ZfMwoh6bM,130610
|
|
19
|
-
csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=2DDZl-AuDRKJKg2AEsICzupEMf1_w6wEbgNUFGMHyAE,464
|
|
20
|
-
csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt,sha256=J4XyH2TAYqTtBTyk0ySrmIMXpTlu4H7aFnd861gcf2U,220932
|
|
21
|
-
csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=tGFxKHl1sVtFBftSWGi1SXXoDPJcuDcZA_c6YXqjStY,333
|
|
22
|
-
csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=uhWvl6NqI-AfE2mPCFrPfuBUKR_MAZM1kn81UabdjmM,540
|
|
23
|
-
csv_detective/detect_fields/FR/geo/commune/commune.txt,sha256=IiKhO6-1XmcbAjPXLRvPyR3trPbCAHwCrLDJjRhutT8,468480
|
|
24
|
-
csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=E7BdP-5PEW9N6AVuzr4vfccebuvC9M286RB1YvDdZN0,525
|
|
25
|
-
csv_detective/detect_fields/FR/geo/departement/departement.txt,sha256=rgNc9QO3ahH5Z-FzDnrp8YaMP140KNi2EYFkoreIAhk,1012
|
|
26
|
-
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=yBjm694LpLphaLoEaLq1D3J52oHiSjc_FgxW-viMaV0,543
|
|
27
|
-
csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt,sha256=foCRrjuobtpKqkjaEKLwUzzk467Lo9Z8sOwFFXrmtj8,98052
|
|
28
|
-
csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt,sha256=8eiau0QfU58AWqhl9N8N1kYHWmh1U8D-Nyfb5R7z4kg,25539
|
|
29
|
-
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=gOGelADhufPOuhuAl_m9wsYyJoWBUjqaN1gZttf-qA8,343
|
|
30
|
-
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=WjPHg8s0ND6bOwS-yo6FP1dnwD-6SWg9oH1K0avHsbI,344
|
|
31
|
-
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=Gf8IPsjzBYumGuV5-7_eW8s_Gm2tOMgDBKgwDZY3kcw,344
|
|
32
|
-
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=d4fLK4IndwllDhsddyTbyRiPfc8O9wT0pLIRI_C3QvQ,344
|
|
33
|
-
csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=lDTRA4EkbjcFKoOcWaZ0c8OBcwig4Bovj7hIzWipZvI,402
|
|
34
|
-
csv_detective/detect_fields/FR/geo/pays/pays.txt,sha256=prUV8eSVIuxNspIYHDPg9x_MC0DgFnbr29uxFLcGceU,2549
|
|
35
|
-
csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=dDjK_TrsdOKgsiS-5Ofs98ND6v1Of_iw6XrfhSnntn4,400
|
|
36
|
-
csv_detective/detect_fields/FR/geo/region/region.txt,sha256=wv77qAdQ0FSc33kvnhmg4LBBVKE4tvdgpZV_NDz3o-Y,560
|
|
37
|
-
csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=XCegXF9rmf0yYv2_xFt_HemYsEP_GAzk-eNLa1PUVJw,512
|
|
39
|
-
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
|
|
40
|
-
csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z4AFN4BeBFO9OAZ4lkE4mybSDyMd7uu1PwhDjq2It_k,120
|
|
41
|
-
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=vfCl1ENCTiJUDF4Dlse5tT2LD9k7Ed674a81VrAe1g0,270
|
|
42
|
-
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=47r_-CJneGX0C4OB_YHOvMn3TCvg4zKULh1ervIEDdQ,442
|
|
43
|
-
csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
44
|
-
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=xJdMpBJoqe_XDkh-WkH8ZtT2AN43vfAEzueK7faL3GM,259
|
|
45
|
-
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=G6EliaNJw_RCpHaAGfTTCkdKe_OxPqDUjpcLikmr4eQ,465
|
|
46
|
-
csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
|
|
47
|
-
csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=YkX4vC85oul30H1Qejsnid_WFv2i7CKK8LH83x6SfRk,215
|
|
48
|
-
csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=jjCXN5xJPD_pOGNSOc3XDIm5jMX4YS2Nk0nfwt89GWs,388
|
|
49
|
-
csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=Yo0YwttbJUZCSIamWBmZmVrAiOLABHywcY5b7CJ_14c,653
|
|
50
|
-
csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=8rrNHCs9mjQ7RVpOhjBMMOsgCI3ormvw4LnrUxno4YI,289
|
|
51
|
-
csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=6pbVoU5g3Jok4OrWmVITHazXriTpKRPkrv-PHppklI4,299
|
|
52
|
-
csv_detective/detect_fields/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=jklVG-8Hcv1bjsKLqC6uN0zwmGkPPTgSa0p4iThz6DE,341
|
|
54
|
-
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=dzk4BAszoKO_FAAPYrocWpKg7zpl8JukQTPkfSmt3H4,527
|
|
55
|
-
csv_detective/detect_fields/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=rwIqUgW0DUOXevg1I-ah6uhm00QcKde2bc2YOmwZ1Jc,405
|
|
57
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha2/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
|
|
58
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=_xnwOcW2G3tBxHBFGPguK0fo-Kc8j9s_KC6mqTyjrbY,381
|
|
59
|
-
csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
|
|
60
|
-
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=GAHzTvbAgG6kCNFzLh6K-m01CbFLgL_mnCP3jWmzWPY,408
|
|
61
|
-
csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
|
|
62
|
-
csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=FPHOfTrfXJs62-NgeOcNGOvwPd7I1fEVp8lTdMNfj3w,433
|
|
63
|
-
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
|
|
64
|
-
csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=yvjNFyiF-xbhsL0LzC_mS3-_m74t47tItNxbd_nrQsM,254
|
|
65
|
-
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=myGcoTQUhVPyHDDXeWu4yj7vuqVkobbu1MbDaw3wS5Q,323
|
|
66
|
-
csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
csv_detective/detect_fields/other/booleen/__init__.py,sha256=wfAa0aPOO63SIjQoPXfB50xl7YfEQ8xwYYhv8K-VOkE,256
|
|
68
|
-
csv_detective/detect_fields/other/email/__init__.py,sha256=TDC1XK9a3A-KTAE9hxVy-V3xmMcVoQHjfYSM0NzRt_A,153
|
|
69
|
-
csv_detective/detect_fields/other/float/__init__.py,sha256=P9VdTaRHYOxCcr4r5Om0-BtcdjlKiEdQtUJgPKvuWik,427
|
|
70
|
-
csv_detective/detect_fields/other/int/__init__.py,sha256=pkYmP_DG5wIB6-tVAbbj4jRVSgzGTRdpOftnkVsX0fw,218
|
|
71
|
-
csv_detective/detect_fields/other/json/__init__.py,sha256=CMhfnA0_O3B6FBwsYgjaTPOv_wQB2nBDNWO9jSYyP4Y,527
|
|
72
|
-
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=znlOyispn4k_oFxiVXX7LwhFG5bz63Nhv5_zny7Zbj0,131
|
|
73
|
-
csv_detective/detect_fields/other/twitter/__init__.py,sha256=EoRUXII5aoSrPCSLaKcTjH2g0P5ojF2fOlxeqm5NfQs,129
|
|
74
|
-
csv_detective/detect_fields/other/url/__init__.py,sha256=ihhkhectA73ovpZ42n0VbgXvtUa6TjhHlEKvrsT4pY0,232
|
|
75
|
-
csv_detective/detect_fields/other/uuid/__init__.py,sha256=uqX0DE-KQUHrkhVssU1AeLEFUw-yIisoabQo7uF5a4s,198
|
|
76
|
-
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
-
csv_detective/detect_fields/temp/date/__init__.py,sha256=Nc-ZYYM-E7Z-MH8flOkG4iBl238l2XcQF95Nh1lFiu8,1891
|
|
78
|
-
csv_detective/detect_fields/temp/datetime_iso/__init__.py,sha256=8LrsbnKea64zSLOV6ENHDLlmCWtEUhGqLsV75ftZHHk,430
|
|
79
|
-
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=t7WxeA0ZavES_pvqHoO4JAAErpC-ifGFFncZ-ehjTTM,511
|
|
80
|
-
csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRhKTDT-FTqGOBpdartuShA,194
|
|
81
|
-
csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
|
|
82
|
-
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
|
-
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
-
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=r14SVoVJiaabyr6lTahI_Qsk0EH3F8UVSi6TRnDQS7o,1063
|
|
85
|
-
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=qJKvP6g98ceSxaFtd37-bI-9uFhQvdwFSh1n3MrzrOo,1096
|
|
86
|
-
csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=FtPwHudArmsgkjCT_IM-I4_wALOsKjiK0-TEsYe9tw4,1025
|
|
87
|
-
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=irrBwY_TXAGRhOAcH1Xqi9D7P5Ajk2854ee1qXwuTnA,949
|
|
88
|
-
csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=L74MwxadiT_MVrEEWUlMbhUsE_kk7xz_E2BHrZMUxMs,1070
|
|
89
|
-
csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=Di9j-AKCogKxavnPgGjA_P8hy8g6JyJ0GBOO0k4l-qY,1012
|
|
90
|
-
csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=8Jhx4neUt5iyyK_b1D4WWsdxi3mpz7cNZQ28fFF4xaE,948
|
|
91
|
-
csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=N8MYMhqhspoLAUgD25pIrsqDKRuwTGnXXm8Chr8wih8,1229
|
|
92
|
-
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=8Tcqzjn-dGGjpxzo-2TqmEYpyfEhcqa1XNcQgMnqq88,957
|
|
93
|
-
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=nUT7SL4sKP_q9LTbiMBaCzJ029yBMP_phAD_CiOVHfc,1386
|
|
94
|
-
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=DCyN6-k1FH1kfTy4tFZWIH6lyaKeT-vgWnDh8TB7JhU,1381
|
|
95
|
-
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Lcqc8Agjxy2dPulu65NRel4uxRLPcQrAGrLsBTYT8EQ,1139
|
|
96
|
-
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=xd_W-L9pkKpsMT1IZ0fVMdty1dmT75uS8gmekb_InAw,1144
|
|
97
|
-
csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=HJ3hNV3xeAN46YP6c-tqQgHMNvltm-tgApfofR5FraE,1169
|
|
98
|
-
csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=ZPw8LXIuV8OvFVY_DA3MkvpAFzB6Rs749Ppr0Wc4lao,1164
|
|
99
|
-
csv_detective/detect_labels/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
|
-
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=rk5S6UGL0vND2X2ty1HJJH3qswUbaV6ZJHHKAywRa6o,939
|
|
101
|
-
csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=MBF9qZIGbX1dP3DJBI71pbqrGcgOR1xXAbGdiHX0pc4,1024
|
|
102
|
-
csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=RDU8jDZgutfxnJl5lQkbqymJmGeeGXpR2i4CuGfqU10,934
|
|
103
|
-
csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=-GoB9i83O3_rg81Ry3ZtLOdMhlrMPZ34he4hn9U6qDc,1043
|
|
104
|
-
csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=7eV737iM1X9MTHureWiCpnxAUJ2_YjI14Vs41MGEX2Q,975
|
|
105
|
-
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=6UDx_6JRG__aSXTKKPq-2oBJR7ZiWg0HhSLDl4pETm4,1124
|
|
106
|
-
csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=87BcCHmofOMqfHfHzmwZzIplcBMAm1AUMxVNvigigTQ,956
|
|
107
|
-
csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=oWkOquzLLbDwBlAs_hoic_UQu7LFOmFZ76570vwRgdc,1103
|
|
108
|
-
csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Xx-oajnzxEe6pEAYafsnZo7S-mLfnB0pP3z5gv3kJy0,1040
|
|
109
|
-
csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=WWglp7xmb_Wz8bxrYYmB46cRyCJKNNqguubziMktZZI,1143
|
|
110
|
-
csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=sVcw6fwQi9ocIEmLEJRi9m4WvTLg_ORwaW0KaJqeMB8,1316
|
|
111
|
-
csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
|
-
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=a3fKjduxRIMNu7TF124pG--Mb21PIqZYnQwYU4APLBw,1074
|
|
113
|
-
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=taNWDz1_0KE4cOS4SeJcC8igMSA2LBbv8TvbCg50-TY,934
|
|
114
|
-
csv_detective/detect_labels/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
|
|
116
|
-
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
|
|
117
|
-
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
|
|
118
|
-
csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=p6mXMb1GMkrs27WmoqRqPE3wCbs3iPL4FWfcc280bGA,1072
|
|
119
|
-
csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=DCyN6-k1FH1kfTy4tFZWIH6lyaKeT-vgWnDh8TB7JhU,1381
|
|
120
|
-
csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=xfzeQ1KXmEZAkpVQT_qAYsC4RnXKl11dTB9PoFExGgQ,1705
|
|
121
|
-
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=knv3HnIerZ6oUPrzGkW2GJjsiTnCklqZ9_koNJCG91I,1145
|
|
122
|
-
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
123
|
-
csv_detective/detect_labels/other/booleen/__init__.py,sha256=ahxzBUcJYc5f1J8aAeYDhtSc-URJHS6ruGTAbZXaoG8,987
|
|
124
|
-
csv_detective/detect_labels/other/email/__init__.py,sha256=2pf0e8o3L57damyi4BLrqA9Opw0trZl2wWDHY88s41E,1148
|
|
125
|
-
csv_detective/detect_labels/other/float/__init__.py,sha256=9JC0-B-aVqlLe3FeN8uH5HZjIc2V6hZ7JFStkSLsHW0,926
|
|
126
|
-
csv_detective/detect_labels/other/int/__init__.py,sha256=i9xN8TYBy4C5b1vYO1l3Rkvn4uq_tft8Rip_ErSUIt8,933
|
|
127
|
-
csv_detective/detect_labels/other/money/__init__.py,sha256=kBEGuUy6kYkOI3vC_a7waBciG2ipyV9bhC330U8WaoI,279
|
|
128
|
-
csv_detective/detect_labels/other/money/check_col_name.py,sha256=zgp5eUnf3XRQuxgdEGfxPfUnniO8Pzw19uK0ICr2pf8,414
|
|
129
|
-
csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=Y-inIGmeH3lZcN9kR6icE3QypS54qJWv8aE4GQUudpc,927
|
|
130
|
-
csv_detective/detect_labels/other/twitter/__init__.py,sha256=D8G4vGsFL9a99OJz-03wp4HbZSvT-y1IxyRJiSsqxFc,959
|
|
131
|
-
csv_detective/detect_labels/other/url/__init__.py,sha256=vqUQvn5o6JZU8iRsSG3AYqggjlhzagozVYWwpuSReV8,1202
|
|
132
|
-
csv_detective/detect_labels/other/uuid/__init__.py,sha256=OdMUxqvqMdGaY5nph7CbIF_Q0LSxljxE72kCMT4m-Zk,931
|
|
133
|
-
csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
|
-
csv_detective/detect_labels/temp/date/__init__.py,sha256=GrIbo64WVM3hi7ShBRKKyKUZxkZlVKhpgk41FxkM1VI,1281
|
|
135
|
-
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
|
|
136
|
-
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
|
|
137
|
-
csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
|
|
138
|
-
csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md,sha256=urZrWA8jhrqctpQke5NPhzYZINE8UXc7AczcqaxbK3U,4465
|
|
139
|
-
csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
140
|
-
csv_detective-0.6.7.data/data/share/csv_detective/README.md,sha256=5pxTU1Ljer8Gw0rOZZvi6vyC3QhjFrGaFJloy2N9GMs,9339
|
|
141
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
142
|
-
tests/test_fields.py,sha256=xu80qwzZVQIH6dVRf1dE1lru4EzM2XtGc7YoMDPRAX4,7439
|
|
143
|
-
tests/test_file.py,sha256=tWULaaHIokkwKXW_9hfbONYDZncbOXCFL0I0sXtQ4YA,3913
|
|
144
|
-
tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
|
|
145
|
-
csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
146
|
-
csv_detective-0.6.7.dist-info/METADATA,sha256=acU2wBZzO6LDCEuDjR49xqX4N0xmqhDR4pM8Uzm-KKg,871
|
|
147
|
-
csv_detective-0.6.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
148
|
-
csv_detective-0.6.7.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
149
|
-
csv_detective-0.6.7.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
150
|
-
csv_detective-0.6.7.dist-info/RECORD,,
|
tests/__init__.py
DELETED
|
File without changes
|
tests/test_fields.py
DELETED
|
@@ -1,360 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
from numpy import random
|
|
3
|
-
|
|
4
|
-
from csv_detective.detect_fields.FR.other import (
|
|
5
|
-
code_csp_insee,
|
|
6
|
-
csp_insee,
|
|
7
|
-
sexe,
|
|
8
|
-
siren,
|
|
9
|
-
tel_fr,
|
|
10
|
-
code_rna,
|
|
11
|
-
code_waldec,
|
|
12
|
-
)
|
|
13
|
-
from csv_detective.detect_fields.other import email, url, uuid, mongo_object_id, json
|
|
14
|
-
|
|
15
|
-
from csv_detective.detect_fields.FR.geo import (
|
|
16
|
-
adresse,
|
|
17
|
-
code_commune_insee,
|
|
18
|
-
commune,
|
|
19
|
-
departement,
|
|
20
|
-
pays,
|
|
21
|
-
region,
|
|
22
|
-
)
|
|
23
|
-
from csv_detective.detect_fields.geo import (
|
|
24
|
-
iso_country_code_alpha2,
|
|
25
|
-
iso_country_code_alpha3,
|
|
26
|
-
iso_country_code_numeric,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
from csv_detective.detect_fields.FR.temp import jour_de_la_semaine
|
|
30
|
-
from csv_detective.detect_fields.temp import year, date, datetime_iso, datetime_rfc822
|
|
31
|
-
|
|
32
|
-
from csv_detective.detection import (
|
|
33
|
-
detetect_categorical_variable,
|
|
34
|
-
detect_continuous_variable,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# categorical
|
|
39
|
-
def test_detetect_categorical_variable():
|
|
40
|
-
categorical_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
|
|
41
|
-
not_categorical_col = [i for i in range(100)]
|
|
42
|
-
|
|
43
|
-
df_dict = {"cat": categorical_col, "not_cat": not_categorical_col}
|
|
44
|
-
df = pd.DataFrame(df_dict, dtype="unicode")
|
|
45
|
-
|
|
46
|
-
res, _ = detetect_categorical_variable(df)
|
|
47
|
-
assert res.values and res.values[0] == "cat"
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
# continuous
|
|
51
|
-
def test_detect_continuous_variable():
|
|
52
|
-
continuous_col = random.random(100)
|
|
53
|
-
continuous_col_2 = [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7, 21, 3] * 10
|
|
54
|
-
not_continuous_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
|
|
55
|
-
|
|
56
|
-
df_dict = {"cont": continuous_col, "not_cont": not_continuous_col}
|
|
57
|
-
df_dict_2 = {"cont": continuous_col_2, "not_cont": not_continuous_col}
|
|
58
|
-
|
|
59
|
-
df = pd.DataFrame(df_dict, dtype="unicode")
|
|
60
|
-
df2 = pd.DataFrame(df_dict_2, dtype="unicode")
|
|
61
|
-
|
|
62
|
-
res = detect_continuous_variable(df)
|
|
63
|
-
res2 = detect_continuous_variable(df2, continuous_th=0.65)
|
|
64
|
-
assert res.values and res.values[0] == "cont"
|
|
65
|
-
assert res2.values and res2.values[0] == "cont"
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# csp_insee
|
|
69
|
-
def test_match_csp_insee():
|
|
70
|
-
val = "employes de la poste"
|
|
71
|
-
assert csp_insee._is(val)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def test_do_not_match_csp_insee():
|
|
75
|
-
val = "super-heros"
|
|
76
|
-
assert not csp_insee._is(val)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
# code_csp_insee
|
|
80
|
-
def test_match_code_csp_insee():
|
|
81
|
-
val = "121f"
|
|
82
|
-
assert code_csp_insee._is(val)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def test_do_not_match_code_csp_insee():
|
|
86
|
-
val = "121x"
|
|
87
|
-
assert not code_csp_insee._is(val)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# sexe
|
|
91
|
-
def test_match_sexe():
|
|
92
|
-
val = "homme"
|
|
93
|
-
assert sexe._is(val)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def test_do_not_match_sexe():
|
|
97
|
-
val = "hermaphrodite"
|
|
98
|
-
assert not sexe._is(val)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
# tel_fr
|
|
102
|
-
def test_match_tel_fr():
|
|
103
|
-
val = "0134643467"
|
|
104
|
-
assert tel_fr._is(val)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def test_do_not_match_tel_fr():
|
|
108
|
-
val = "3345689715"
|
|
109
|
-
assert not tel_fr._is(val)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
# email
|
|
113
|
-
def test_match_email():
|
|
114
|
-
val = "cdo_intern@data.gouv.fr"
|
|
115
|
-
assert email._is(val)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def test_do_not_match_email():
|
|
119
|
-
val = "cdo@@gouv.sfd"
|
|
120
|
-
assert not email._is(val)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
# uuid
|
|
124
|
-
def test_match_uuid():
|
|
125
|
-
val = "884762be-51f3-44c3-b811-1e14c5d89262"
|
|
126
|
-
assert uuid._is(val)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def test_do_not_match_uuid():
|
|
130
|
-
val = "0610928327"
|
|
131
|
-
assert not uuid._is(val)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# Mongo ObjectId
|
|
135
|
-
def test_match_mongo_object_id():
|
|
136
|
-
val = "62320e50f981bc2b57bcc044"
|
|
137
|
-
assert mongo_object_id._is(val)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def test_do_not_match_mongo_object_id():
|
|
141
|
-
val = "884762be-51f3-44c3-b811-1e14c5d89262"
|
|
142
|
-
assert not mongo_object_id._is(val)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
# url
|
|
146
|
-
def test_match_url():
|
|
147
|
-
val = "www.etalab.data.gouv.fr"
|
|
148
|
-
assert url._is(val)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def test_do_not_match_url():
|
|
152
|
-
val = "c est une phrase"
|
|
153
|
-
assert not url._is(val)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
# adresse
|
|
157
|
-
def test_match_adresse():
|
|
158
|
-
val = "rue du martyr"
|
|
159
|
-
assert adresse._is(val)
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def test_do_not_match_adresse():
|
|
163
|
-
val = "bonjour les amis"
|
|
164
|
-
assert not adresse._is(val)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
# code_commune_insee
|
|
168
|
-
def test_match_code_commune_insee():
|
|
169
|
-
val = "91471"
|
|
170
|
-
assert code_commune_insee._is(val)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def test_do_not_match_code_commune_insee():
|
|
174
|
-
val = "914712"
|
|
175
|
-
assert not code_commune_insee._is(val)
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
# commune
|
|
179
|
-
def test_match_commune():
|
|
180
|
-
val = "saint denis"
|
|
181
|
-
assert commune._is(val)
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
def test_do_not_match_commune():
|
|
185
|
-
val = "new york"
|
|
186
|
-
assert not commune._is(val)
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
# departement
|
|
190
|
-
def test_match_departement():
|
|
191
|
-
val = "essonne"
|
|
192
|
-
assert departement._is(val)
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
def test_do_not_match_departement():
|
|
196
|
-
val = "new york"
|
|
197
|
-
assert not departement._is(val)
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
# pays
|
|
201
|
-
def test_match_pays():
|
|
202
|
-
val = "france"
|
|
203
|
-
assert pays._is(val)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def test_do_not_match_pays():
|
|
207
|
-
val = "new york"
|
|
208
|
-
assert not pays._is(val)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
# region
|
|
212
|
-
def test_match_region():
|
|
213
|
-
val = "bretagne"
|
|
214
|
-
assert region._is(val)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def test_do_not_match_region():
|
|
218
|
-
val = "jambon beurre"
|
|
219
|
-
assert not region._is(val)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
# iso_country_code
|
|
223
|
-
def test_match_iso_country_code():
|
|
224
|
-
val = "FR"
|
|
225
|
-
assert iso_country_code_alpha2._is(val)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def test_do_not_match_iso_country_code():
|
|
229
|
-
val = "XX"
|
|
230
|
-
assert not iso_country_code_alpha2._is(val)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
# iso_country_code alpha-3
|
|
234
|
-
def test_match_iso_country_code_alpha3():
|
|
235
|
-
val = "FRA"
|
|
236
|
-
assert iso_country_code_alpha3._is(val)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def test_do_not_match_iso_country_code_alpha3():
|
|
240
|
-
val = "ABC"
|
|
241
|
-
assert not iso_country_code_alpha3._is(val)
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
# iso_country_code numerique
|
|
245
|
-
def test_match_iso_country_code_numeric():
|
|
246
|
-
val = "250"
|
|
247
|
-
print(iso_country_code_numeric._is(val))
|
|
248
|
-
assert iso_country_code_numeric._is(val)
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def test_do_not_match_iso_country_code_numeric():
|
|
252
|
-
val = "003"
|
|
253
|
-
assert not iso_country_code_numeric._is(val)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
# jour de la semaine
|
|
257
|
-
def test_match_jour_de_la_semaine():
|
|
258
|
-
val = "lundi"
|
|
259
|
-
assert jour_de_la_semaine._is(val)
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def test_do_not_match_jour_de_la_semaine():
|
|
263
|
-
val = "jour de la biere"
|
|
264
|
-
assert not jour_de_la_semaine._is(val)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
# year
|
|
268
|
-
def test_match_year():
|
|
269
|
-
val = "2015"
|
|
270
|
-
assert year._is(val)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
def test_do_not_match_year():
|
|
274
|
-
val = "20166"
|
|
275
|
-
assert not year._is(val)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
# date
|
|
279
|
-
def test_match_date():
|
|
280
|
-
val = "1960-08-07"
|
|
281
|
-
assert date._is(val)
|
|
282
|
-
val = '12/02/2007'
|
|
283
|
-
assert date._is(val)
|
|
284
|
-
val = '15 jan 1985'
|
|
285
|
-
assert date._is(val)
|
|
286
|
-
val = '15 décembre 1985'
|
|
287
|
-
assert date._is(val)
|
|
288
|
-
val = '02052003'
|
|
289
|
-
assert date._is(val)
|
|
290
|
-
val = '1993-12/02'
|
|
291
|
-
assert date._is(val)
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def test_do_not_match_date():
|
|
295
|
-
val = "1993-1993-1993"
|
|
296
|
-
assert not date._is(val)
|
|
297
|
-
val = '39-10-1993'
|
|
298
|
-
assert not date._is(val)
|
|
299
|
-
val = '19-15-1993'
|
|
300
|
-
assert not date._is(val)
|
|
301
|
-
val = '15 tambour 1985'
|
|
302
|
-
assert not date._is(val)
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
# datetime
|
|
306
|
-
def test_match_datetime():
|
|
307
|
-
val = "2021-06-22T10:20:10"
|
|
308
|
-
assert datetime_iso._is(val)
|
|
309
|
-
val = "2021-06-22T30:20:10"
|
|
310
|
-
assert not datetime_iso._is(val)
|
|
311
|
-
|
|
312
|
-
val = "Sun, 06 Nov 1994 08:49:37 GMT"
|
|
313
|
-
assert datetime_rfc822._is(val)
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
# siren
|
|
317
|
-
def test_match_siren():
|
|
318
|
-
val = "552 100 554"
|
|
319
|
-
assert siren._is(val)
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
def test_do_not_match_siren():
|
|
323
|
-
val = "42"
|
|
324
|
-
assert not siren._is(val)
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
# rna
|
|
328
|
-
def test_match_rna():
|
|
329
|
-
val = "W751515517"
|
|
330
|
-
assert code_rna._is(val)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
def test_do_not_match_rna():
|
|
334
|
-
val = "W111111111111111111111111111111111111"
|
|
335
|
-
assert not code_rna._is(val)
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def test_match_waldec():
|
|
339
|
-
val = "751P00188854"
|
|
340
|
-
assert code_waldec._is(val)
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
def test_do_not_match_waldec():
|
|
344
|
-
val = "AA751PEE00188854"
|
|
345
|
-
assert not code_waldec._is(val)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
# json
|
|
349
|
-
def test_match_json():
|
|
350
|
-
val = '{"pomme": "fruit", "reponse": 42}'
|
|
351
|
-
assert json._is(val)
|
|
352
|
-
val = "[1,2,3,4]"
|
|
353
|
-
assert json._is(val)
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def test_do_not_match_json():
|
|
357
|
-
val = '{"coordinates": [45.783753, 3.049342], "citycode": "63870"}'
|
|
358
|
-
assert not json._is(val)
|
|
359
|
-
val = "666"
|
|
360
|
-
assert not json._is(val)
|
tests/test_file.py
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
from csv_detective import explore_csv
|
|
2
|
-
import pytest
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def test_columns_output_on_file():
|
|
6
|
-
output = explore_csv.routine(
|
|
7
|
-
csv_file_path="tests/a_test_file.csv",
|
|
8
|
-
num_rows=-1,
|
|
9
|
-
output_profile=False,
|
|
10
|
-
save_results=False,
|
|
11
|
-
)
|
|
12
|
-
assert isinstance(output, dict)
|
|
13
|
-
assert output["separator"] == ";"
|
|
14
|
-
assert output["header_row_idx"] == 2
|
|
15
|
-
assert output["header"] == [
|
|
16
|
-
"NUMCOM",
|
|
17
|
-
"NOMCOM",
|
|
18
|
-
"NUMDEP",
|
|
19
|
-
"NOMDEP",
|
|
20
|
-
"NUMEPCI",
|
|
21
|
-
"NOMEPCI",
|
|
22
|
-
"TXCOUVGLO_COM_2014",
|
|
23
|
-
"TXCOUVGLO_DEP_2014",
|
|
24
|
-
"TXCOUVGLO_EPCI_2014",
|
|
25
|
-
"STRUCTURED_INFO",
|
|
26
|
-
"GEO_INFO",
|
|
27
|
-
]
|
|
28
|
-
assert output["total_lines"] == 414
|
|
29
|
-
assert output["nb_duplicates"] == 7
|
|
30
|
-
assert output["columns"]["NOMCOM"]["format"] == "commune"
|
|
31
|
-
assert output["columns"]["NOMDEP"]["format"] == "departement"
|
|
32
|
-
assert output["columns"]["NUMEPCI"]["format"] == "siren"
|
|
33
|
-
assert output["columns"]["STRUCTURED_INFO"]["python_type"] == "json"
|
|
34
|
-
assert output["columns"]["STRUCTURED_INFO"]["format"] == "json"
|
|
35
|
-
assert output["columns"]["GEO_INFO"]["python_type"] == "json"
|
|
36
|
-
assert output["columns"]["GEO_INFO"]["format"] == "json_geojson"
|
|
37
|
-
assert output["columns"]["NUMEPCI"]["format"] == "siren"
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def test_profile_output_on_file():
|
|
41
|
-
output = explore_csv.routine(
|
|
42
|
-
csv_file_path="tests/a_test_file.csv",
|
|
43
|
-
num_rows=-1,
|
|
44
|
-
output_profile=True,
|
|
45
|
-
save_results=False,
|
|
46
|
-
)
|
|
47
|
-
assert all(
|
|
48
|
-
[
|
|
49
|
-
c in list(output["profile"]["NUMCOM"].keys())
|
|
50
|
-
for c in [
|
|
51
|
-
"min",
|
|
52
|
-
"max",
|
|
53
|
-
"mean",
|
|
54
|
-
"std",
|
|
55
|
-
"tops",
|
|
56
|
-
"nb_distinct",
|
|
57
|
-
"nb_missing_values",
|
|
58
|
-
]
|
|
59
|
-
]
|
|
60
|
-
)
|
|
61
|
-
assert len(output["profile"]["NOMCOM"].keys()) == 3
|
|
62
|
-
assert output["profile"]["NUMCOM"]["min"] == 1001
|
|
63
|
-
assert output["profile"]["NUMCOM"]["max"] == 6125
|
|
64
|
-
assert round(output["profile"]["NUMCOM"]["mean"]) == 1245
|
|
65
|
-
assert round(output["profile"]["NUMCOM"]["std"]) == 363
|
|
66
|
-
assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_distinct"] == 296
|
|
67
|
-
assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_missing_values"] == 3
|
|
68
|
-
assert output["profile"]["GEO_INFO"]["nb_distinct"] == 1
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def test_exception():
|
|
72
|
-
with pytest.raises(Exception):
|
|
73
|
-
explore_csv.routine(
|
|
74
|
-
csv_file_path="tests/a_test_file.csv",
|
|
75
|
-
num_rows=50,
|
|
76
|
-
output_profile=True,
|
|
77
|
-
save_results=False,
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def test_code_dep_reg_on_file():
|
|
82
|
-
output = explore_csv.routine(
|
|
83
|
-
csv_file_path="tests/b_test_file.csv",
|
|
84
|
-
num_rows=-1,
|
|
85
|
-
output_profile=False,
|
|
86
|
-
save_results=False,
|
|
87
|
-
)
|
|
88
|
-
assert isinstance(output, dict)
|
|
89
|
-
assert output["columns"]["code_departement"]["format"] == "code_departement"
|
|
90
|
-
assert output["columns"]["code_region"]["format"] == "code_region"
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def test_schema_on_file():
|
|
94
|
-
output = explore_csv.routine(
|
|
95
|
-
csv_file_path="tests/b_test_file.csv",
|
|
96
|
-
num_rows=-1,
|
|
97
|
-
output_schema=True,
|
|
98
|
-
)
|
|
99
|
-
assert isinstance(output, dict)
|
|
100
|
-
is_column_dep = False
|
|
101
|
-
is_column_reg = False
|
|
102
|
-
for item in output["schema"]["fields"]:
|
|
103
|
-
if item["name"] == "code_departement":
|
|
104
|
-
is_column_dep = True
|
|
105
|
-
assert item["description"] == "Le code INSEE du département"
|
|
106
|
-
assert item["type"] == "string"
|
|
107
|
-
assert item["formatFR"] == "code_departement"
|
|
108
|
-
assert item["constraints"]["pattern"] == "^(([013-9]\\d|2[AB1-9])$|9\\d{2}$)"
|
|
109
|
-
if item["name"] == "code_region":
|
|
110
|
-
is_column_reg = True
|
|
111
|
-
assert item["description"] == "Le code INSEE de la région"
|
|
112
|
-
assert item["type"] == "string"
|
|
113
|
-
assert item["formatFR"] == "code_region"
|
|
114
|
-
assert item["constraints"]["pattern"] == "^\\d{2}$"
|
|
115
|
-
assert is_column_dep
|
|
116
|
-
assert is_column_reg
|
tests/test_labels.py
DELETED
|
File without changes
|