PyPI - csv-detective - Versions diffs - 0.9.3.dev2382__py3-none-any.whl → 0.9.3.dev2400__py3-none-any.whl - Mend

csv-detective 0.9.3.dev2382py3-none-any.whl → 0.9.3.dev2400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

csv_detective/detection/__init__.py +0 -0
csv_detective/detection/columns.py +0 -0
csv_detective/detection/encoding.py +0 -0
csv_detective/detection/engine.py +0 -0
csv_detective/detection/formats.py +0 -0
csv_detective/detection/headers.py +0 -0
csv_detective/detection/rows.py +0 -0
csv_detective/detection/separator.py +0 -0
csv_detective/detection/variables.py +0 -0
csv_detective/format.py +0 -0
csv_detective/formats/__init__.py +0 -0
csv_detective/formats/adresse.py +0 -0
csv_detective/formats/booleen.py +0 -0
csv_detective/formats/code_commune_insee.py +0 -0
csv_detective/formats/code_csp_insee.py +0 -0
csv_detective/formats/code_departement.py +0 -0
csv_detective/formats/code_fantoir.py +0 -0
csv_detective/formats/code_import.py +0 -0
csv_detective/formats/code_postal.py +0 -0
csv_detective/formats/code_region.py +0 -0
csv_detective/formats/code_rna.py +0 -0
csv_detective/formats/code_waldec.py +0 -0
csv_detective/formats/commune.py +0 -0
csv_detective/formats/csp_insee.py +0 -0
csv_detective/formats/date.py +0 -0
csv_detective/formats/date_fr.py +0 -0
csv_detective/formats/datetime_aware.py +0 -0
csv_detective/formats/datetime_naive.py +0 -0
csv_detective/formats/datetime_rfc822.py +0 -0
csv_detective/formats/departement.py +0 -0
csv_detective/formats/email.py +0 -0
csv_detective/formats/float.py +0 -0
csv_detective/formats/geojson.py +0 -0
csv_detective/formats/insee_ape700.py +0 -0
csv_detective/formats/insee_canton.py +0 -0
csv_detective/formats/int.py +0 -0
csv_detective/formats/iso_country_code_alpha2.py +0 -0
csv_detective/formats/iso_country_code_alpha3.py +0 -0
csv_detective/formats/iso_country_code_numeric.py +0 -0
csv_detective/formats/jour_de_la_semaine.py +0 -0
csv_detective/formats/json.py +0 -0
csv_detective/formats/latitude_l93.py +0 -0
csv_detective/formats/latitude_wgs.py +0 -0
csv_detective/formats/latitude_wgs_fr_metropole.py +0 -0
csv_detective/formats/latlon_wgs.py +0 -0
csv_detective/formats/longitude_l93.py +0 -0
csv_detective/formats/longitude_wgs.py +0 -0
csv_detective/formats/longitude_wgs_fr_metropole.py +0 -0
csv_detective/formats/lonlat_wgs.py +0 -0
csv_detective/formats/mois_de_lannee.py +0 -0
csv_detective/formats/money.py +0 -0
csv_detective/formats/mongo_object_id.py +0 -0
csv_detective/formats/pays.py +0 -0
csv_detective/formats/percent.py +0 -0
csv_detective/formats/region.py +0 -0
csv_detective/formats/sexe.py +0 -0
csv_detective/formats/siren.py +0 -0
csv_detective/formats/siret.py +0 -0
csv_detective/formats/tel_fr.py +0 -0
csv_detective/formats/uai.py +0 -0
csv_detective/formats/url.py +2 -1
csv_detective/formats/username.py +0 -0
csv_detective/formats/uuid.py +0 -0
csv_detective/formats/year.py +0 -0
csv_detective/output/__init__.py +0 -0
csv_detective/output/dataframe.py +0 -0
csv_detective/output/example.py +0 -0
csv_detective/output/profile.py +0 -0
csv_detective/output/schema.py +0 -0
csv_detective/output/utils.py +0 -0
csv_detective/parsing/__init__.py +0 -0
csv_detective/parsing/columns.py +0 -0
csv_detective/parsing/compression.py +0 -0
csv_detective/parsing/csv.py +0 -0
csv_detective/parsing/excel.py +0 -0
csv_detective/parsing/load.py +0 -0
csv_detective/validate.py +0 -0
{csv_detective-0.9.3.dev2382.dist-info → csv_detective-0.9.3.dev2400.dist-info}/METADATA +17 -18
{csv_detective-0.9.3.dev2382.dist-info → csv_detective-0.9.3.dev2400.dist-info}/RECORD +14 -25
csv_detective-0.9.3.dev2400.dist-info/WHEEL +4 -0
{csv_detective-0.9.3.dev2382.dist-info → csv_detective-0.9.3.dev2400.dist-info}/entry_points.txt +1 -0
csv_detective-0.9.3.dev2382.dist-info/WHEEL +0 -5
csv_detective-0.9.3.dev2382.dist-info/licenses/LICENSE +0 -21
csv_detective-0.9.3.dev2382.dist-info/top_level.txt +0 -4
tests/__init__.py +0 -0
tests/test_example.py +0 -67
tests/test_fields.py +0 -167
tests/test_file.py +0 -413
tests/test_labels.py +0 -26
tests/test_structure.py +0 -45
tests/test_validation.py +0 -108
venv/bin/activate_this.py +0 -38
venv/bin/runxlrd.py +0 -410

csv_detective/detection/__init__.py CHANGED Viewed

File without changes

csv_detective/detection/columns.py CHANGED Viewed

File without changes

csv_detective/detection/encoding.py CHANGED Viewed

File without changes

csv_detective/detection/engine.py CHANGED Viewed

File without changes

csv_detective/detection/formats.py CHANGED Viewed

File without changes

csv_detective/detection/headers.py CHANGED Viewed

File without changes

csv_detective/detection/rows.py CHANGED Viewed

File without changes

csv_detective/detection/separator.py CHANGED Viewed

File without changes

csv_detective/detection/variables.py CHANGED Viewed

File without changes

csv_detective/format.py CHANGED Viewed

File without changes

csv_detective/formats/__init__.py CHANGED Viewed

File without changes

csv_detective/formats/adresse.py CHANGED Viewed

File without changes

csv_detective/formats/booleen.py CHANGED Viewed

File without changes

csv_detective/formats/code_commune_insee.py CHANGED Viewed

File without changes

csv_detective/formats/code_csp_insee.py CHANGED Viewed

File without changes

csv_detective/formats/code_departement.py CHANGED Viewed

File without changes

csv_detective/formats/code_fantoir.py CHANGED Viewed

File without changes

csv_detective/formats/code_import.py CHANGED Viewed

File without changes

csv_detective/formats/code_postal.py CHANGED Viewed

File without changes

csv_detective/formats/code_region.py CHANGED Viewed

File without changes

csv_detective/formats/code_rna.py CHANGED Viewed

File without changes

csv_detective/formats/code_waldec.py CHANGED Viewed

File without changes

csv_detective/formats/commune.py CHANGED Viewed

File without changes

csv_detective/formats/csp_insee.py CHANGED Viewed

File without changes

csv_detective/formats/date.py CHANGED Viewed

File without changes

csv_detective/formats/date_fr.py CHANGED Viewed

File without changes

csv_detective/formats/datetime_aware.py CHANGED Viewed

File without changes

csv_detective/formats/datetime_naive.py CHANGED Viewed

File without changes

csv_detective/formats/datetime_rfc822.py CHANGED Viewed

File without changes

csv_detective/formats/departement.py CHANGED Viewed

File without changes

csv_detective/formats/email.py CHANGED Viewed

File without changes

csv_detective/formats/float.py CHANGED Viewed

File without changes

csv_detective/formats/geojson.py CHANGED Viewed

File without changes

csv_detective/formats/insee_ape700.py CHANGED Viewed

File without changes

csv_detective/formats/insee_canton.py CHANGED Viewed

File without changes

csv_detective/formats/int.py CHANGED Viewed

File without changes

csv_detective/formats/iso_country_code_alpha2.py CHANGED Viewed

File without changes

csv_detective/formats/iso_country_code_alpha3.py CHANGED Viewed

File without changes

csv_detective/formats/iso_country_code_numeric.py CHANGED Viewed

File without changes

csv_detective/formats/jour_de_la_semaine.py CHANGED Viewed

File without changes

csv_detective/formats/json.py CHANGED Viewed

File without changes

csv_detective/formats/latitude_l93.py CHANGED Viewed

File without changes

csv_detective/formats/latitude_wgs.py CHANGED Viewed

File without changes

csv_detective/formats/latitude_wgs_fr_metropole.py CHANGED Viewed

File without changes

csv_detective/formats/latlon_wgs.py CHANGED Viewed

File without changes

csv_detective/formats/longitude_l93.py CHANGED Viewed

File without changes

csv_detective/formats/longitude_wgs.py CHANGED Viewed

File without changes

csv_detective/formats/longitude_wgs_fr_metropole.py CHANGED Viewed

File without changes

csv_detective/formats/lonlat_wgs.py CHANGED Viewed

File without changes

csv_detective/formats/mois_de_lannee.py CHANGED Viewed

File without changes

csv_detective/formats/money.py CHANGED Viewed

File without changes

csv_detective/formats/mongo_object_id.py CHANGED Viewed

File without changes

csv_detective/formats/pays.py CHANGED Viewed

File without changes

csv_detective/formats/percent.py CHANGED Viewed

File without changes

csv_detective/formats/region.py CHANGED Viewed

File without changes

csv_detective/formats/sexe.py CHANGED Viewed

File without changes

csv_detective/formats/siren.py CHANGED Viewed

File without changes

csv_detective/formats/siret.py CHANGED Viewed

File without changes

csv_detective/formats/tel_fr.py CHANGED Viewed

File without changes

csv_detective/formats/uai.py CHANGED Viewed

File without changes

csv_detective/formats/url.py CHANGED Viewed

@@ -20,7 +20,7 @@ labels = [
 pattern = re.compile(
     r"^((https?|ftp)://|www\.)(([A-Za-z0-9-]+\.)+[A-Za-z]{2,6})"
-    r"(/[A-Za-z0-9._~:/?#[@!$&'()*+,;=%-]*)?$"
+    r"(/[A-Za-z\u00C0-\u024F\u1E00-\u1EFF0-9\s._~:/?#[@!$&'()*+,;=%-]*)?$"
 )
@@ -40,6 +40,7 @@ _test_values = {
             "aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/"
             "?score__greater=0.9&decompte__exact=13"
         ),
+        "https://une-ville.fr/délibérations/2025/Doc avec espaces et àccëñts.pdf",
     ],
     False: ["tmp@data.gouv.fr"],
 }

csv_detective/formats/username.py CHANGED Viewed

File without changes

csv_detective/formats/uuid.py CHANGED Viewed

File without changes

csv_detective/formats/year.py CHANGED Viewed

File without changes

csv_detective/output/__init__.py CHANGED Viewed

File without changes

csv_detective/output/dataframe.py CHANGED Viewed

File without changes

csv_detective/output/example.py CHANGED Viewed

File without changes

csv_detective/output/profile.py CHANGED Viewed

File without changes

csv_detective/output/schema.py CHANGED Viewed

File without changes

csv_detective/output/utils.py CHANGED Viewed

File without changes

csv_detective/parsing/__init__.py CHANGED Viewed

File without changes

csv_detective/parsing/columns.py CHANGED Viewed

File without changes

csv_detective/parsing/compression.py CHANGED Viewed

File without changes

csv_detective/parsing/csv.py CHANGED Viewed

File without changes

csv_detective/parsing/excel.py CHANGED Viewed

File without changes

csv_detective/parsing/load.py CHANGED Viewed

File without changes

csv_detective/validate.py CHANGED Viewed

File without changes

{csv_detective-0.9.3.dev2382.dist-info → csv_detective-0.9.3.dev2400.dist-info}/METADATA RENAMED Viewed

@@ -1,33 +1,32 @@
-Metadata-Version: 2.4
+Metadata-Version: 2.3
 Name: csv-detective
-Version: 0.9.3.dev2382
+Version: 0.9.3.dev2400
 Summary: Detect tabular files column content
-Author-email: "data.gouv.fr" <opendatateam@data.gouv.fr>
-License: MIT
-Project-URL: Source, https://github.com/datagouv/csv_detective
 Keywords: CSV,data processing,encoding,guess,parser,tabular
-Requires-Python: <3.15,>=3.10
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: dateparser<2,>=1.2.0
+Author: data.gouv.fr
+Author-email: data.gouv.fr <opendatateam@data.gouv.fr>
+License: MIT
+Requires-Dist: dateparser>=1.2.0,<2
 Requires-Dist: faust-cchardet==2.1.19
-Requires-Dist: pandas<3,>=2.2.0
-Requires-Dist: python-dateutil<3,>=2.8.2
-Requires-Dist: Unidecode<2,>=1.3.6
+Requires-Dist: pandas>=2.2.0,<3
+Requires-Dist: python-dateutil>=2.8.2,<3
+Requires-Dist: unidecode>=1.3.6,<2
 Requires-Dist: openpyxl>=3.1.5
 Requires-Dist: xlrd>=2.0.1
 Requires-Dist: odfpy>=1.4.1
-Requires-Dist: requests<3,>=2.32.3
+Requires-Dist: requests>=2.32.3,<3
 Requires-Dist: python-magic>=0.4.27
 Requires-Dist: frformat==0.4.0
-Requires-Dist: Faker>=33.0.0
+Requires-Dist: faker>=33.0.0
 Requires-Dist: rstr>=3.2.2
 Requires-Dist: more-itertools>=10.8.0
+Requires-Dist: pytest>=8.3.0 ; extra == 'dev'
+Requires-Dist: responses>=0.25.0 ; extra == 'dev'
+Requires-Dist: ruff>=0.9.3 ; extra == 'dev'
+Requires-Python: >=3.10, <3.15
+Project-URL: Source, https://github.com/datagouv/csv_detective
 Provides-Extra: dev
-Requires-Dist: pytest>=8.3.0; extra == "dev"
-Requires-Dist: responses>=0.25.0; extra == "dev"
-Requires-Dist: ruff>=0.9.3; extra == "dev"
-Dynamic: license-file
+Description-Content-Type: text/markdown
 # CSV Detective

{csv_detective-0.9.3.dev2382.dist-info → csv_detective-0.9.3.dev2400.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,5 @@
 csv_detective/__init__.py,sha256=zlYElTOp_I2_VG7ZdOTuAu0wuCXSc0cr3sH6gtk2bcg,152
 csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
-csv_detective/explore_csv.py,sha256=-LCHr7vyT0Q0oLtXeOO8pEevJ6-8Ib9JP3D7nVgZM8o,7090
-csv_detective/format.py,sha256=XX_cSTQc0jlsQq3GUqHi7Cz36AiRrpjrwPmeoOTLMvo,2396
-csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
-csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
 csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
 csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
@@ -13,6 +9,8 @@ csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6H
 csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
 csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
 csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0JzeS6JbkxY,3576
+csv_detective/explore_csv.py,sha256=-LCHr7vyT0Q0oLtXeOO8pEevJ6-8Ib9JP3D7nVgZM8o,7090
+csv_detective/format.py,sha256=XX_cSTQc0jlsQq3GUqHi7Cz36AiRrpjrwPmeoOTLMvo,2396
 csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
 csv_detective/formats/adresse.py,sha256=jALDpEDAWyAcgqEfNVRg_W1r6XaYuJKD_jAaP2l-bxk,1943
 csv_detective/formats/booleen.py,sha256=AnDDKShkSYpWO4POhwY2V7_C4yPWbmqBu8CJPgQ9Gwc,648
@@ -27,6 +25,11 @@ csv_detective/formats/code_rna.py,sha256=WExlQtlAUfOFT4N3MKsMBhZVxTdNzgexFjmXhZd
 csv_detective/formats/code_waldec.py,sha256=kJEJfikbhMfVwtA8hBpup0tpeSFoY_rWrEdXQxgNwhg,297
 csv_detective/formats/commune.py,sha256=oVpwINGqpwMOT43KkasozipJ9hBeoQ5FrKV_wIeVJGE,532
 csv_detective/formats/csp_insee.py,sha256=HE6NK6Sw91mLFeAAKwWUXZZfXX6fiA0zK4RI4YdkUFY,656
+csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
+csv_detective/formats/data/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
+csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
+csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
+csv_detective/formats/data/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
 csv_detective/formats/date.py,sha256=X4ohXaFO8cXPJktUSumc3bfdlbDIWEYTG8S9ugVRcsE,2730
 csv_detective/formats/date_fr.py,sha256=3hTw5RommrhcgECFRSt9KgyB9zyi1j4W3UygEHmRgoE,502
 csv_detective/formats/datetime_aware.py,sha256=-1ZBix6vYlYXTvhXrijP-98AN7iPB0x_DbbwU1QjMCI,1470
@@ -63,15 +66,10 @@ csv_detective/formats/siren.py,sha256=ieLe50vdSnkXadcUI8VXnnId9GFGHyIBWVTP6bJtyM
 csv_detective/formats/siret.py,sha256=ehkZgOH-HggN6IgxF4G0DMut_6giZ3gc4g9wMdwZFHQ,997
 csv_detective/formats/tel_fr.py,sha256=yKCqIlqKO2yKucCoCjYfSjqNKfTjqFcmNXxg6THG0WE,624
 csv_detective/formats/uai.py,sha256=uT5gjdTmoFH9QPZdTFkJgiyuKLW0B6KmT6yqHQeaeOU,711
-csv_detective/formats/url.py,sha256=GYE9j_i4kpEQueBXa1Fla0wk8_sc0n230GL3KaIRvwY,932
+csv_detective/formats/url.py,sha256=j6tCbcEzQw7U53ixeeFfhzueN8syVgQsjmAmY7RRWdU,1049
 csv_detective/formats/username.py,sha256=y38OggfWpEQsGi0JnD9QRM30musa29lO6nz-qybR24U,249
 csv_detective/formats/uuid.py,sha256=ekMEFfzQtz0cLudzmu3AoCM0Yf5pu23qAcFNFgHWJ1A,346
 csv_detective/formats/year.py,sha256=pkAfYPKZdy0g1ZoHGgJNpgTS5y5weGEKXCVMGaxIX8k,472
-csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
-csv_detective/formats/data/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
-csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
-csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
-csv_detective/formats/data/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
 csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
 csv_detective/output/dataframe.py,sha256=TyBc2ObaVUns_ydJWOMKmCYvuj7ddxag0QN3z37g3GE,3219
 csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
@@ -85,18 +83,9 @@ csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,
 csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
 csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
 csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
-csv_detective-0.9.3.dev2382.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
-tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
-tests/test_fields.py,sha256=EWHIKwRSdIh74bBSoozYmZBETf7V03JMWpglyxA0ci0,5616
-tests/test_file.py,sha256=MxJOWwhRG2Xm1_m3C9x8CS9FepjUebET-6EsMi3DvmY,13125
-tests/test_labels.py,sha256=kDPerWC3_J3l1p5I3-MHwz7BmhcuxZAws_wSgHCHUuI,536
-tests/test_structure.py,sha256=XDbviuuvk-0Mu9Y9PI6He2e5hry2dXVJ6yBVwEqF_2o,1043
-tests/test_validation.py,sha256=9djBT-PDhu_563OFgWyE20o-wPEWEIQGXp6Pjh0_MQM,3463
-venv/bin/activate_this.py,sha256=wS7qPipy8R-dS_0ICD8PqqUQ8F-PrtcpiJw2DUPngYM,1287
-venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
-csv_detective-0.9.3.dev2382.dist-info/METADATA,sha256=gGotUngB4Ch3dhlapEv97KEq1JUX-xI1NsT51rOCZ1U,11084
-csv_detective-0.9.3.dev2382.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-csv_detective-0.9.3.dev2382.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.9.3.dev2382.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
-csv_detective-0.9.3.dev2382.dist-info/RECORD,,
+csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
+csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
+csv_detective-0.9.3.dev2400.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
+csv_detective-0.9.3.dev2400.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
+csv_detective-0.9.3.dev2400.dist-info/METADATA,sha256=XBMZp650BNXuUmMPEw7ffC7tNfMD69JGd0diGhKCIQE,11063
+csv_detective-0.9.3.dev2400.dist-info/RECORD,,

csv_detective-0.9.3.dev2400.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: uv 0.9.15
+Root-Is-Purelib: true
+Tag: py3-none-any

{csv_detective-0.9.3.dev2382.dist-info → csv_detective-0.9.3.dev2400.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,2 +1,3 @@
 [console_scripts]
 csv_detective = csv_detective.cli:run

csv_detective-0.9.3.dev2382.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
-Root-Is-Purelib: true
-Tag: py3-none-any

csv_detective-0.9.3.dev2382.dist-info/licenses/LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2025 data.gouv.fr
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

csv_detective-0.9.3.dev2382.dist-info/top_level.txt DELETED Viewed

@@ -1,4 +0,0 @@
-csv_detective
-dist
-tests
-venv

tests/__init__.py DELETED Viewed

File without changes

tests/test_example.py DELETED Viewed

@@ -1,67 +0,0 @@
-import re
-from uuid import UUID
-from csv_detective.output.example import create_example_csv_file
-def test_example_creation():
-    fields = [
-        {
-            "name": "id_unique",
-            "type": "id",
-        },
-        {
-            "name": "nom_modele",
-            "type": "str",
-            "args": {"length": 20},
-        },
-        {
-            "name": "siret",
-            "type": "str",
-            "args": {"pattern": "^\\d{14}$"},
-        },
-        {
-            "name": "type_producteur",
-            "type": "str",
-            "args": {"enum": ["privé", "public", "association"]},
-        },
-        {
-            "name": "date_creation",
-            "type": "date",
-            "args": {
-                "date_range": ["1996-02-13", "2000-01-28"],
-                "format": "%Y-%m-%d",
-            },
-        },
-        {
-            "name": "url_produit",
-            "type": "url",
-        },
-        {
-            "name": "nb_produits",
-            "type": "int",
-        },
-        {"name": "note", "type": "float", "args": {"num_range": [1, 20]}},
-    ]
-    df = create_example_csv_file(
-        fields=fields,
-        file_length=5,
-        output_name=None,
-    )
-    assert len(df) == 5
-    assert all(UUID(_) for _ in df["id_unique"])
-    assert all(len(_) == 20 for _ in df["nom_modele"])
-    assert all(re.match("^\\d{14}$", _) for _ in df["siret"])
-    assert all(_ in ["privé", "public", "association"] for _ in df["type_producteur"])
-    assert all(_ >= "1996-02-13" and _ <= "2000-01-28" for _ in df["date_creation"])
-    assert all(_.startswith("http") for _ in df["url_produit"])
-    assert all(isinstance(_, int) for _ in df["nb_produits"])
-    assert all(_ >= 1 and _ <= 20 for _ in df["note"])
-def test_example_from_tableschema():
-    df = create_example_csv_file(
-        schema_path="https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/2.3.1/schema-statique.json",
-        output_name=None,
-    )
-    assert len(df) == 10

tests/test_fields.py DELETED Viewed

@@ -1,167 +0,0 @@
-from datetime import date as _date
-from datetime import datetime as _datetime
-from unittest.mock import patch
-import pandas as pd
-import pytest
-from numpy import random
-from csv_detective.detection.variables import (
-    detect_categorical_variable,
-    detect_continuous_variable,
-)
-from csv_detective.format import FormatsManager
-from csv_detective.output.dataframe import cast
-from csv_detective.output.utils import prepare_output_dict
-from csv_detective.parsing.columns import test_col as col_test  # to prevent pytest from testing it
-fmtm = FormatsManager()
-def test_all_format_funcs_return_bool():
-    for format in fmtm.formats.values():
-        for tmp in ["a", "9", "3.14", "[]", float("nan"), "2021-06-22 10:20:10"]:
-            assert isinstance(format.func(tmp), bool)
-# categorical
-def test_detect_categorical_variable():
-    categorical_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
-    categorical_col2 = [str(k // 20) for k in range(100)]
-    not_categorical_col = [i for i in range(100)]
-    df_dict = {
-        "cat": categorical_col,
-        "cat2": categorical_col2,
-        "not_cat": not_categorical_col,
-    }
-    df = pd.DataFrame(df_dict, dtype=str)
-    res, _ = detect_categorical_variable(df)
-    assert len(res) and all(k in res for k in ["cat", "cat2"])
-# continuous
-def test_detect_continuous_variable():
-    continuous_col = random.random(100)
-    continuous_col_2 = [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7, 21, 3] * 10
-    not_continuous_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
-    df_dict = {"cont": continuous_col, "not_cont": not_continuous_col}
-    df_dict_2 = {"cont": continuous_col_2, "not_cont": not_continuous_col}
-    df = pd.DataFrame(df_dict, dtype=str)
-    df2 = pd.DataFrame(df_dict_2, dtype=str)
-    res = detect_continuous_variable(df)
-    res2 = detect_continuous_variable(df2, continuous_th=0.65)
-    assert res.values and res.values[0] == "cont"
-    assert res2.values and res2.values[0] == "cont"
-# we could also have a function here to add all True values of (almost)
-# each field to the False values of all others (to do when parenthood is added)
-def test_all_fields_have_tests():
-    for format in fmtm.formats.values():
-        valid = format._test_values
-        # checking structure
-        assert all(
-            isinstance(key, bool)
-            and isinstance(vals, list)
-            and all(isinstance(val, str) for val in vals)
-            for key, vals in valid.items()
-        )
-        # checking that we have valid and invalid cases for each
-        assert all(b in valid.keys() for b in [True, False])
-# this is based on the _test_values of each <format>.py file
-@pytest.mark.parametrize(
-    "args",
-    (
-        (format.func, value, valid)
-        for valid in [True, False]
-        for format in fmtm.formats.values()
-        for value in format._test_values[valid]
-    ),
-)
-def test_fields_with_values(args):
-    func, value, valid = args
-    assert func(value) is valid
-@pytest.mark.parametrize(
-    "args",
-    (
-        ("1.9", "float", float),
-        ("oui", "bool", bool),
-        ("[1, 2]", "json", list),
-        ('{"a": 1}', "json", dict),
-        ("2022-08-01", "date", _date),
-        ("2024-09-23 17:32:07", "datetime", _datetime),
-        ("2024-09-23 17:32:07+02:00", "datetime", _datetime),
-    ),
-)
-def test_cast(args):
-    value, detected_type, cast_type = args
-    assert isinstance(cast(value, detected_type), cast_type)
-@pytest.mark.parametrize(
-    "args",
-    (
-        # there is a specific numerical format => specific wins
-        ({"int": 1, "float": 1, "latitude_wgs": 1}, "latitude_wgs"),
-        # scores are equal for related formats => priority wins
-        ({"int": 1, "float": 1}, "int"),
-        # score is lower for priority format => secondary wins
-        ({"int": 0.5, "float": 1}, "float"),
-        # score is lower for priority format, but is 1 => priority wins
-        ({"int": 1, "float": 1.25}, "int"),
-        # two rounds of priority => highest priority wins
-        ({"latlon_wgs": 1, "lonlat_wgs": 1, "json": 1}, "latlon_wgs"),
-        # no detection => default to string
-        ({}, "string"),
-    ),
-)
-def test_priority(args):
-    detections, expected = args
-    col = "col1"
-    output = prepare_output_dict(pd.DataFrame({col: detections}), limited_output=True)
-    assert output[col]["format"] == expected
-@pytest.mark.parametrize(
-    "args",
-    (
-        ("1996-02-13", fmtm.formats["date"]),
-        ("28/01/2000", fmtm.formats["date"]),
-        ("2025-08-20T14:30:00+02:00", fmtm.formats["datetime_aware"]),
-        ("2025/08/20 14:30:00.2763-12:00", fmtm.formats["datetime_aware"]),
-        ("1925_12_20T14:30:00.2763", fmtm.formats["datetime_naive"]),
-        ("1925 12 20 14:30:00Z", fmtm.formats["datetime_aware"]),
-    ),
-)
-def test_early_detection(args):
-    value, format = args
-    with patch("csv_detective.formats.date.date_casting") as mock_func:
-        res = format.func(value)
-        assert res
-        mock_func.assert_not_called()
-def test_all_proportion_1():
-    # building a table that uses only correct values for these formats, except on one row
-    table = pd.DataFrame(
-        {
-            name: (format._test_values[True] * 100)[:100] + ["not_suitable"]
-            for name, format in fmtm.formats.items()
-            if format.proportion == 1
-        }
-    )
-    # testing columns for all formats
-    returned_table = col_test(table, fmtm.formats, limited_output=True)
-    # the analysis should have found no match on any format
-    assert all(returned_table[col].sum() == 0 for col in table.columns)

csv-detective 0.9.3.dev2382__py3-none-any.whl → 0.9.3.dev2400__py3-none-any.whl

csv-detective 0.9.3.dev2382py3-none-any.whl → 0.9.3.dev2400py3-none-any.whl