csv-detective 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. csv_detective/__init__.py +7 -1
  2. csv_detective/cli.py +33 -21
  3. csv_detective/{detect_fields/FR → detection}/__init__.py +0 -0
  4. csv_detective/detection/columns.py +89 -0
  5. csv_detective/detection/encoding.py +29 -0
  6. csv_detective/detection/engine.py +46 -0
  7. csv_detective/detection/formats.py +156 -0
  8. csv_detective/detection/headers.py +28 -0
  9. csv_detective/detection/rows.py +18 -0
  10. csv_detective/detection/separator.py +44 -0
  11. csv_detective/detection/variables.py +97 -0
  12. csv_detective/explore_csv.py +151 -377
  13. csv_detective/format.py +67 -0
  14. csv_detective/formats/__init__.py +9 -0
  15. csv_detective/formats/adresse.py +116 -0
  16. csv_detective/formats/binary.py +26 -0
  17. csv_detective/formats/booleen.py +35 -0
  18. csv_detective/formats/code_commune_insee.py +26 -0
  19. csv_detective/formats/code_csp_insee.py +36 -0
  20. csv_detective/formats/code_departement.py +29 -0
  21. csv_detective/formats/code_fantoir.py +21 -0
  22. csv_detective/formats/code_import.py +17 -0
  23. csv_detective/formats/code_postal.py +25 -0
  24. csv_detective/formats/code_region.py +22 -0
  25. csv_detective/formats/code_rna.py +29 -0
  26. csv_detective/formats/code_waldec.py +17 -0
  27. csv_detective/formats/commune.py +27 -0
  28. csv_detective/formats/csp_insee.py +31 -0
  29. csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
  30. csv_detective/formats/date.py +99 -0
  31. csv_detective/formats/date_fr.py +22 -0
  32. csv_detective/formats/datetime_aware.py +45 -0
  33. csv_detective/formats/datetime_naive.py +48 -0
  34. csv_detective/formats/datetime_rfc822.py +24 -0
  35. csv_detective/formats/departement.py +37 -0
  36. csv_detective/formats/email.py +28 -0
  37. csv_detective/formats/float.py +29 -0
  38. csv_detective/formats/geojson.py +36 -0
  39. csv_detective/formats/insee_ape700.py +31 -0
  40. csv_detective/formats/insee_canton.py +28 -0
  41. csv_detective/formats/int.py +23 -0
  42. csv_detective/formats/iso_country_code_alpha2.py +30 -0
  43. csv_detective/formats/iso_country_code_alpha3.py +30 -0
  44. csv_detective/formats/iso_country_code_numeric.py +31 -0
  45. csv_detective/formats/jour_de_la_semaine.py +41 -0
  46. csv_detective/formats/json.py +20 -0
  47. csv_detective/formats/latitude_l93.py +48 -0
  48. csv_detective/formats/latitude_wgs.py +42 -0
  49. csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
  50. csv_detective/formats/latlon_wgs.py +53 -0
  51. csv_detective/formats/longitude_l93.py +39 -0
  52. csv_detective/formats/longitude_wgs.py +32 -0
  53. csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
  54. csv_detective/formats/lonlat_wgs.py +36 -0
  55. csv_detective/formats/mois_de_lannee.py +48 -0
  56. csv_detective/formats/money.py +18 -0
  57. csv_detective/formats/mongo_object_id.py +14 -0
  58. csv_detective/formats/pays.py +35 -0
  59. csv_detective/formats/percent.py +16 -0
  60. csv_detective/formats/region.py +70 -0
  61. csv_detective/formats/sexe.py +17 -0
  62. csv_detective/formats/siren.py +37 -0
  63. csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -29
  64. csv_detective/formats/tel_fr.py +36 -0
  65. csv_detective/formats/uai.py +36 -0
  66. csv_detective/formats/url.py +46 -0
  67. csv_detective/formats/username.py +14 -0
  68. csv_detective/formats/uuid.py +16 -0
  69. csv_detective/formats/year.py +28 -0
  70. csv_detective/output/__init__.py +65 -0
  71. csv_detective/output/dataframe.py +96 -0
  72. csv_detective/output/example.py +250 -0
  73. csv_detective/output/profile.py +119 -0
  74. csv_detective/{schema_generation.py → output/schema.py} +268 -343
  75. csv_detective/output/utils.py +74 -0
  76. csv_detective/{detect_fields/FR/geo → parsing}/__init__.py +0 -0
  77. csv_detective/parsing/columns.py +235 -0
  78. csv_detective/parsing/compression.py +11 -0
  79. csv_detective/parsing/csv.py +56 -0
  80. csv_detective/parsing/excel.py +167 -0
  81. csv_detective/parsing/load.py +111 -0
  82. csv_detective/parsing/text.py +56 -0
  83. csv_detective/utils.py +23 -196
  84. csv_detective/validate.py +138 -0
  85. csv_detective-0.9.3.dev2438.dist-info/METADATA +267 -0
  86. csv_detective-0.9.3.dev2438.dist-info/RECORD +92 -0
  87. csv_detective-0.9.3.dev2438.dist-info/WHEEL +4 -0
  88. {csv_detective-0.6.7.dist-info → csv_detective-0.9.3.dev2438.dist-info}/entry_points.txt +1 -0
  89. csv_detective/all_packages.txt +0 -104
  90. csv_detective/detect_fields/FR/geo/adresse/__init__.py +0 -100
  91. csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -24
  92. csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt +0 -37600
  93. csv_detective/detect_fields/FR/geo/code_departement/__init__.py +0 -11
  94. csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -15
  95. csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt +0 -26122
  96. csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -19
  97. csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt +0 -36822
  98. csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -27
  99. csv_detective/detect_fields/FR/geo/commune/__init__.py +0 -21
  100. csv_detective/detect_fields/FR/geo/commune/commune.txt +0 -36745
  101. csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -19
  102. csv_detective/detect_fields/FR/geo/departement/departement.txt +0 -101
  103. csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +0 -20
  104. csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt +0 -2055
  105. csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt +0 -2055
  106. csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -13
  107. csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
  108. csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -13
  109. csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
  110. csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -17
  111. csv_detective/detect_fields/FR/geo/pays/pays.txt +0 -248
  112. csv_detective/detect_fields/FR/geo/region/__init__.py +0 -16
  113. csv_detective/detect_fields/FR/geo/region/region.txt +0 -44
  114. csv_detective/detect_fields/FR/other/__init__.py +0 -0
  115. csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +0 -26
  116. csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
  117. csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -8
  118. csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -12
  119. csv_detective/detect_fields/FR/other/csp_insee/__init__.py +0 -16
  120. csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
  121. csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +0 -16
  122. csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -9
  123. csv_detective/detect_fields/FR/other/siren/__init__.py +0 -18
  124. csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -15
  125. csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
  126. csv_detective/detect_fields/FR/temp/__init__.py +0 -0
  127. csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +0 -23
  128. csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +0 -37
  129. csv_detective/detect_fields/__init__.py +0 -57
  130. csv_detective/detect_fields/geo/__init__.py +0 -0
  131. csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
  132. csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
  133. csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
  134. csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -22
  135. csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
  136. csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -15
  137. csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
  138. csv_detective/detect_fields/other/__init__.py +0 -0
  139. csv_detective/detect_fields/other/booleen/__init__.py +0 -21
  140. csv_detective/detect_fields/other/email/__init__.py +0 -8
  141. csv_detective/detect_fields/other/float/__init__.py +0 -17
  142. csv_detective/detect_fields/other/int/__init__.py +0 -12
  143. csv_detective/detect_fields/other/json/__init__.py +0 -24
  144. csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
  145. csv_detective/detect_fields/other/twitter/__init__.py +0 -8
  146. csv_detective/detect_fields/other/url/__init__.py +0 -11
  147. csv_detective/detect_fields/other/uuid/__init__.py +0 -11
  148. csv_detective/detect_fields/temp/__init__.py +0 -0
  149. csv_detective/detect_fields/temp/date/__init__.py +0 -62
  150. csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -18
  151. csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +0 -21
  152. csv_detective/detect_fields/temp/year/__init__.py +0 -10
  153. csv_detective/detect_labels/FR/__init__.py +0 -0
  154. csv_detective/detect_labels/FR/geo/__init__.py +0 -0
  155. csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -40
  156. csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -42
  157. csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -33
  158. csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -33
  159. csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -41
  160. csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -33
  161. csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -33
  162. csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -47
  163. csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -33
  164. csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -54
  165. csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -55
  166. csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -44
  167. csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -45
  168. csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -45
  169. csv_detective/detect_labels/FR/geo/region/__init__.py +0 -45
  170. csv_detective/detect_labels/FR/other/__init__.py +0 -0
  171. csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -33
  172. csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -38
  173. csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -33
  174. csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -37
  175. csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -33
  176. csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -40
  177. csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -33
  178. csv_detective/detect_labels/FR/other/siren/__init__.py +0 -41
  179. csv_detective/detect_labels/FR/other/siret/__init__.py +0 -40
  180. csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -45
  181. csv_detective/detect_labels/FR/other/uai/__init__.py +0 -50
  182. csv_detective/detect_labels/FR/temp/__init__.py +0 -0
  183. csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -41
  184. csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -33
  185. csv_detective/detect_labels/__init__.py +0 -43
  186. csv_detective/detect_labels/geo/__init__.py +0 -0
  187. csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -41
  188. csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -41
  189. csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -41
  190. csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -42
  191. csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -55
  192. csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -67
  193. csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -45
  194. csv_detective/detect_labels/other/__init__.py +0 -0
  195. csv_detective/detect_labels/other/booleen/__init__.py +0 -34
  196. csv_detective/detect_labels/other/email/__init__.py +0 -45
  197. csv_detective/detect_labels/other/float/__init__.py +0 -33
  198. csv_detective/detect_labels/other/int/__init__.py +0 -33
  199. csv_detective/detect_labels/other/money/__init__.py +0 -11
  200. csv_detective/detect_labels/other/money/check_col_name.py +0 -8
  201. csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -33
  202. csv_detective/detect_labels/other/twitter/__init__.py +0 -33
  203. csv_detective/detect_labels/other/url/__init__.py +0 -48
  204. csv_detective/detect_labels/other/uuid/__init__.py +0 -33
  205. csv_detective/detect_labels/temp/__init__.py +0 -0
  206. csv_detective/detect_labels/temp/date/__init__.py +0 -51
  207. csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -45
  208. csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -44
  209. csv_detective/detect_labels/temp/year/__init__.py +0 -44
  210. csv_detective/detection.py +0 -361
  211. csv_detective/process_text.py +0 -39
  212. csv_detective/s3_utils.py +0 -48
  213. csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md +0 -118
  214. csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt +0 -661
  215. csv_detective-0.6.7.data/data/share/csv_detective/README.md +0 -247
  216. csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt +0 -661
  217. csv_detective-0.6.7.dist-info/METADATA +0 -23
  218. csv_detective-0.6.7.dist-info/RECORD +0 -150
  219. csv_detective-0.6.7.dist-info/WHEEL +0 -5
  220. csv_detective-0.6.7.dist-info/top_level.txt +0 -2
  221. tests/__init__.py +0 -0
  222. tests/test_fields.py +0 -360
  223. tests/test_file.py +0 -116
  224. tests/test_labels.py +0 -7
  225. /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
  226. /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
  227. /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
  228. /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
@@ -1,23 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: csv_detective
3
- Version: 0.6.7
4
- Summary: Detect CSV column content
5
- Home-page: https://github.com/etalab/csv_detective
6
- Author: Etalab
7
- Author-email: opendatateam@data.gouv.fr
8
- License: http://www.fsf.org/licensing/licenses/agpl-3.0.html
9
- Keywords: CSV data processing encoding guess parser tabular
10
- Classifier: Development Status :: 2 - Pre-Alpha
11
- Classifier: License :: OSI Approved :: GNU Affero General Public License v3
12
- Classifier: Operating System :: POSIX
13
- Classifier: Programming Language :: Python :: 3
14
- Classifier: Topic :: Scientific/Engineering :: Information Analysis
15
- Description-Content-Type: text/markdown
16
- License-File: LICENSE.AGPL.txt
17
- Requires-Dist: boto3 ==1.26.65
18
- Requires-Dist: faust-cchardet ==2.1.19
19
- Requires-Dist: pandas ==1.5.3
20
- Requires-Dist: pytest ==7.2.1
21
- Requires-Dist: python-dateutil ==2.8.2
22
- Requires-Dist: Unidecode ==1.3.6
23
-
@@ -1,150 +0,0 @@
1
- csv_detective/__init__.py,sha256=GXnXV3cFmzDnIeFijtPpxojjjKDkoNrbwKLQwhYI7RY,22
2
- csv_detective/all_packages.txt,sha256=MzN0kVnX99LU0PCqrO7JBjrYMngXn7GByhGYwTvbo74,4681
3
- csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
4
- csv_detective/detection.py,sha256=FDC7gLe4CNWrMf9py78JXv1vpvjTrS7UwdCA7TQ7u_Y,12249
5
- csv_detective/explore_csv.py,sha256=KBLMKlyc_5bRJLQH9MYAx9VDTeuD5TzTdjflHR4WnPE,14605
6
- csv_detective/process_text.py,sha256=1KMgoTnfUKghNcmDQp_84ryLwcMP2PR3sTyCnuOeeAI,1236
7
- csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
8
- csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
9
- csv_detective/utils.py,sha256=0cpPlcgB6oOpxPtVv6cSqwJGYOYt8GN8uxviABxLwd0,8263
10
- csv_detective/detect_fields/__init__.py,sha256=CchNbi1vrgIGh_uBexXZTzfjBETDY0kQLjI-PAquU8M,921
11
- csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=s0Tvyjf09DHksFRlbbtHwepIxRJZk328PvZuac7h0Ok,1647
14
- csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=34ugFPPjduSjH466aw3XcMcCi97kAaJX2RiguEHOW5M,705
15
- csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt,sha256=DgMtDlS5tX464k0RgSnSmgbIN9Z3BfK7gOt-k1uLfyk,225600
16
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=9jadtQHMDpcdYKEis5BJpQrpWvIubKarUECPcPRyFJY,382
17
- csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=L9poDp6kuZBdpVjVoHqfslDGEXWl8t2U8Hl6FXCz4N4,397
18
- csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt,sha256=nMQcnFaoTyLnIOSRnv0e30F1plvHeMEzr_ZfMwoh6bM,130610
19
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=2DDZl-AuDRKJKg2AEsICzupEMf1_w6wEbgNUFGMHyAE,464
20
- csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt,sha256=J4XyH2TAYqTtBTyk0ySrmIMXpTlu4H7aFnd861gcf2U,220932
21
- csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=tGFxKHl1sVtFBftSWGi1SXXoDPJcuDcZA_c6YXqjStY,333
22
- csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=uhWvl6NqI-AfE2mPCFrPfuBUKR_MAZM1kn81UabdjmM,540
23
- csv_detective/detect_fields/FR/geo/commune/commune.txt,sha256=IiKhO6-1XmcbAjPXLRvPyR3trPbCAHwCrLDJjRhutT8,468480
24
- csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=E7BdP-5PEW9N6AVuzr4vfccebuvC9M286RB1YvDdZN0,525
25
- csv_detective/detect_fields/FR/geo/departement/departement.txt,sha256=rgNc9QO3ahH5Z-FzDnrp8YaMP140KNi2EYFkoreIAhk,1012
26
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=yBjm694LpLphaLoEaLq1D3J52oHiSjc_FgxW-viMaV0,543
27
- csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt,sha256=foCRrjuobtpKqkjaEKLwUzzk467Lo9Z8sOwFFXrmtj8,98052
28
- csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt,sha256=8eiau0QfU58AWqhl9N8N1kYHWmh1U8D-Nyfb5R7z4kg,25539
29
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=gOGelADhufPOuhuAl_m9wsYyJoWBUjqaN1gZttf-qA8,343
30
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=WjPHg8s0ND6bOwS-yo6FP1dnwD-6SWg9oH1K0avHsbI,344
31
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=Gf8IPsjzBYumGuV5-7_eW8s_Gm2tOMgDBKgwDZY3kcw,344
32
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=d4fLK4IndwllDhsddyTbyRiPfc8O9wT0pLIRI_C3QvQ,344
33
- csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=lDTRA4EkbjcFKoOcWaZ0c8OBcwig4Bovj7hIzWipZvI,402
34
- csv_detective/detect_fields/FR/geo/pays/pays.txt,sha256=prUV8eSVIuxNspIYHDPg9x_MC0DgFnbr29uxFLcGceU,2549
35
- csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=dDjK_TrsdOKgsiS-5Ofs98ND6v1Of_iw6XrfhSnntn4,400
36
- csv_detective/detect_fields/FR/geo/region/region.txt,sha256=wv77qAdQ0FSc33kvnhmg4LBBVKE4tvdgpZV_NDz3o-Y,560
37
- csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=XCegXF9rmf0yYv2_xFt_HemYsEP_GAzk-eNLa1PUVJw,512
39
- csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
40
- csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z4AFN4BeBFO9OAZ4lkE4mybSDyMd7uu1PwhDjq2It_k,120
41
- csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=vfCl1ENCTiJUDF4Dlse5tT2LD9k7Ed674a81VrAe1g0,270
42
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=47r_-CJneGX0C4OB_YHOvMn3TCvg4zKULh1ervIEDdQ,442
43
- csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
44
- csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=xJdMpBJoqe_XDkh-WkH8ZtT2AN43vfAEzueK7faL3GM,259
45
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=G6EliaNJw_RCpHaAGfTTCkdKe_OxPqDUjpcLikmr4eQ,465
46
- csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
47
- csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=YkX4vC85oul30H1Qejsnid_WFv2i7CKK8LH83x6SfRk,215
48
- csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=jjCXN5xJPD_pOGNSOc3XDIm5jMX4YS2Nk0nfwt89GWs,388
49
- csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=Yo0YwttbJUZCSIamWBmZmVrAiOLABHywcY5b7CJ_14c,653
50
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=8rrNHCs9mjQ7RVpOhjBMMOsgCI3ormvw4LnrUxno4YI,289
51
- csv_detective/detect_fields/FR/other/uai/__init__.py,sha256=6pbVoU5g3Jok4OrWmVITHazXriTpKRPkrv-PHppklI4,299
52
- csv_detective/detect_fields/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py,sha256=jklVG-8Hcv1bjsKLqC6uN0zwmGkPPTgSa0p4iThz6DE,341
54
- csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py,sha256=dzk4BAszoKO_FAAPYrocWpKg7zpl8JukQTPkfSmt3H4,527
55
- csv_detective/detect_fields/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py,sha256=rwIqUgW0DUOXevg1I-ah6uhm00QcKde2bc2YOmwZ1Jc,405
57
- csv_detective/detect_fields/geo/iso_country_code_alpha2/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
58
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=_xnwOcW2G3tBxHBFGPguK0fo-Kc8j9s_KC6mqTyjrbY,381
59
- csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
60
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=GAHzTvbAgG6kCNFzLh6K-m01CbFLgL_mnCP3jWmzWPY,408
61
- csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
62
- csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=FPHOfTrfXJs62-NgeOcNGOvwPd7I1fEVp8lTdMNfj3w,433
63
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
64
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=yvjNFyiF-xbhsL0LzC_mS3-_m74t47tItNxbd_nrQsM,254
65
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=myGcoTQUhVPyHDDXeWu4yj7vuqVkobbu1MbDaw3wS5Q,323
66
- csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- csv_detective/detect_fields/other/booleen/__init__.py,sha256=wfAa0aPOO63SIjQoPXfB50xl7YfEQ8xwYYhv8K-VOkE,256
68
- csv_detective/detect_fields/other/email/__init__.py,sha256=TDC1XK9a3A-KTAE9hxVy-V3xmMcVoQHjfYSM0NzRt_A,153
69
- csv_detective/detect_fields/other/float/__init__.py,sha256=P9VdTaRHYOxCcr4r5Om0-BtcdjlKiEdQtUJgPKvuWik,427
70
- csv_detective/detect_fields/other/int/__init__.py,sha256=pkYmP_DG5wIB6-tVAbbj4jRVSgzGTRdpOftnkVsX0fw,218
71
- csv_detective/detect_fields/other/json/__init__.py,sha256=CMhfnA0_O3B6FBwsYgjaTPOv_wQB2nBDNWO9jSYyP4Y,527
72
- csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=znlOyispn4k_oFxiVXX7LwhFG5bz63Nhv5_zny7Zbj0,131
73
- csv_detective/detect_fields/other/twitter/__init__.py,sha256=EoRUXII5aoSrPCSLaKcTjH2g0P5ojF2fOlxeqm5NfQs,129
74
- csv_detective/detect_fields/other/url/__init__.py,sha256=ihhkhectA73ovpZ42n0VbgXvtUa6TjhHlEKvrsT4pY0,232
75
- csv_detective/detect_fields/other/uuid/__init__.py,sha256=uqX0DE-KQUHrkhVssU1AeLEFUw-yIisoabQo7uF5a4s,198
76
- csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- csv_detective/detect_fields/temp/date/__init__.py,sha256=Nc-ZYYM-E7Z-MH8flOkG4iBl238l2XcQF95Nh1lFiu8,1891
78
- csv_detective/detect_fields/temp/datetime_iso/__init__.py,sha256=8LrsbnKea64zSLOV6ENHDLlmCWtEUhGqLsV75ftZHHk,430
79
- csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=t7WxeA0ZavES_pvqHoO4JAAErpC-ifGFFncZ-ehjTTM,511
80
- csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRhKTDT-FTqGOBpdartuShA,194
81
- csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
82
- csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
- csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
- csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=r14SVoVJiaabyr6lTahI_Qsk0EH3F8UVSi6TRnDQS7o,1063
85
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=qJKvP6g98ceSxaFtd37-bI-9uFhQvdwFSh1n3MrzrOo,1096
86
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=FtPwHudArmsgkjCT_IM-I4_wALOsKjiK0-TEsYe9tw4,1025
87
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=irrBwY_TXAGRhOAcH1Xqi9D7P5Ajk2854ee1qXwuTnA,949
88
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=L74MwxadiT_MVrEEWUlMbhUsE_kk7xz_E2BHrZMUxMs,1070
89
- csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=Di9j-AKCogKxavnPgGjA_P8hy8g6JyJ0GBOO0k4l-qY,1012
90
- csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=8Jhx4neUt5iyyK_b1D4WWsdxi3mpz7cNZQ28fFF4xaE,948
91
- csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=N8MYMhqhspoLAUgD25pIrsqDKRuwTGnXXm8Chr8wih8,1229
92
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=8Tcqzjn-dGGjpxzo-2TqmEYpyfEhcqa1XNcQgMnqq88,957
93
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=nUT7SL4sKP_q9LTbiMBaCzJ029yBMP_phAD_CiOVHfc,1386
94
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=DCyN6-k1FH1kfTy4tFZWIH6lyaKeT-vgWnDh8TB7JhU,1381
95
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Lcqc8Agjxy2dPulu65NRel4uxRLPcQrAGrLsBTYT8EQ,1139
96
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=xd_W-L9pkKpsMT1IZ0fVMdty1dmT75uS8gmekb_InAw,1144
97
- csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=HJ3hNV3xeAN46YP6c-tqQgHMNvltm-tgApfofR5FraE,1169
98
- csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=ZPw8LXIuV8OvFVY_DA3MkvpAFzB6Rs749Ppr0Wc4lao,1164
99
- csv_detective/detect_labels/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=rk5S6UGL0vND2X2ty1HJJH3qswUbaV6ZJHHKAywRa6o,939
101
- csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=MBF9qZIGbX1dP3DJBI71pbqrGcgOR1xXAbGdiHX0pc4,1024
102
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=RDU8jDZgutfxnJl5lQkbqymJmGeeGXpR2i4CuGfqU10,934
103
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=-GoB9i83O3_rg81Ry3ZtLOdMhlrMPZ34he4hn9U6qDc,1043
104
- csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=7eV737iM1X9MTHureWiCpnxAUJ2_YjI14Vs41MGEX2Q,975
105
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=6UDx_6JRG__aSXTKKPq-2oBJR7ZiWg0HhSLDl4pETm4,1124
106
- csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=87BcCHmofOMqfHfHzmwZzIplcBMAm1AUMxVNvigigTQ,956
107
- csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=oWkOquzLLbDwBlAs_hoic_UQu7LFOmFZ76570vwRgdc,1103
108
- csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Xx-oajnzxEe6pEAYafsnZo7S-mLfnB0pP3z5gv3kJy0,1040
109
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=WWglp7xmb_Wz8bxrYYmB46cRyCJKNNqguubziMktZZI,1143
110
- csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=sVcw6fwQi9ocIEmLEJRi9m4WvTLg_ORwaW0KaJqeMB8,1316
111
- csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=a3fKjduxRIMNu7TF124pG--Mb21PIqZYnQwYU4APLBw,1074
113
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=taNWDz1_0KE4cOS4SeJcC8igMSA2LBbv8TvbCg50-TY,934
114
- csv_detective/detect_labels/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
116
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
117
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
118
- csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=p6mXMb1GMkrs27WmoqRqPE3wCbs3iPL4FWfcc280bGA,1072
119
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=DCyN6-k1FH1kfTy4tFZWIH6lyaKeT-vgWnDh8TB7JhU,1381
120
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=xfzeQ1KXmEZAkpVQT_qAYsC4RnXKl11dTB9PoFExGgQ,1705
121
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=knv3HnIerZ6oUPrzGkW2GJjsiTnCklqZ9_koNJCG91I,1145
122
- csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
- csv_detective/detect_labels/other/booleen/__init__.py,sha256=ahxzBUcJYc5f1J8aAeYDhtSc-URJHS6ruGTAbZXaoG8,987
124
- csv_detective/detect_labels/other/email/__init__.py,sha256=2pf0e8o3L57damyi4BLrqA9Opw0trZl2wWDHY88s41E,1148
125
- csv_detective/detect_labels/other/float/__init__.py,sha256=9JC0-B-aVqlLe3FeN8uH5HZjIc2V6hZ7JFStkSLsHW0,926
126
- csv_detective/detect_labels/other/int/__init__.py,sha256=i9xN8TYBy4C5b1vYO1l3Rkvn4uq_tft8Rip_ErSUIt8,933
127
- csv_detective/detect_labels/other/money/__init__.py,sha256=kBEGuUy6kYkOI3vC_a7waBciG2ipyV9bhC330U8WaoI,279
128
- csv_detective/detect_labels/other/money/check_col_name.py,sha256=zgp5eUnf3XRQuxgdEGfxPfUnniO8Pzw19uK0ICr2pf8,414
129
- csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=Y-inIGmeH3lZcN9kR6icE3QypS54qJWv8aE4GQUudpc,927
130
- csv_detective/detect_labels/other/twitter/__init__.py,sha256=D8G4vGsFL9a99OJz-03wp4HbZSvT-y1IxyRJiSsqxFc,959
131
- csv_detective/detect_labels/other/url/__init__.py,sha256=vqUQvn5o6JZU8iRsSG3AYqggjlhzagozVYWwpuSReV8,1202
132
- csv_detective/detect_labels/other/uuid/__init__.py,sha256=OdMUxqvqMdGaY5nph7CbIF_Q0LSxljxE72kCMT4m-Zk,931
133
- csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- csv_detective/detect_labels/temp/date/__init__.py,sha256=GrIbo64WVM3hi7ShBRKKyKUZxkZlVKhpgk41FxkM1VI,1281
135
- csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
136
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
137
- csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
138
- csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md,sha256=urZrWA8jhrqctpQke5NPhzYZINE8UXc7AczcqaxbK3U,4465
139
- csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
140
- csv_detective-0.6.7.data/data/share/csv_detective/README.md,sha256=5pxTU1Ljer8Gw0rOZZvi6vyC3QhjFrGaFJloy2N9GMs,9339
141
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
142
- tests/test_fields.py,sha256=xu80qwzZVQIH6dVRf1dE1lru4EzM2XtGc7YoMDPRAX4,7439
143
- tests/test_file.py,sha256=tWULaaHIokkwKXW_9hfbONYDZncbOXCFL0I0sXtQ4YA,3913
144
- tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
145
- csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
146
- csv_detective-0.6.7.dist-info/METADATA,sha256=acU2wBZzO6LDCEuDjR49xqX4N0xmqhDR4pM8Uzm-KKg,871
147
- csv_detective-0.6.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
148
- csv_detective-0.6.7.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
149
- csv_detective-0.6.7.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
150
- csv_detective-0.6.7.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
3
- Root-Is-Purelib: true
4
- Tag: py3-none-any
5
-
@@ -1,2 +0,0 @@
1
- csv_detective
2
- tests
tests/__init__.py DELETED
File without changes
tests/test_fields.py DELETED
@@ -1,360 +0,0 @@
1
- import pandas as pd
2
- from numpy import random
3
-
4
- from csv_detective.detect_fields.FR.other import (
5
- code_csp_insee,
6
- csp_insee,
7
- sexe,
8
- siren,
9
- tel_fr,
10
- code_rna,
11
- code_waldec,
12
- )
13
- from csv_detective.detect_fields.other import email, url, uuid, mongo_object_id, json
14
-
15
- from csv_detective.detect_fields.FR.geo import (
16
- adresse,
17
- code_commune_insee,
18
- commune,
19
- departement,
20
- pays,
21
- region,
22
- )
23
- from csv_detective.detect_fields.geo import (
24
- iso_country_code_alpha2,
25
- iso_country_code_alpha3,
26
- iso_country_code_numeric,
27
- )
28
-
29
- from csv_detective.detect_fields.FR.temp import jour_de_la_semaine
30
- from csv_detective.detect_fields.temp import year, date, datetime_iso, datetime_rfc822
31
-
32
- from csv_detective.detection import (
33
- detetect_categorical_variable,
34
- detect_continuous_variable,
35
- )
36
-
37
-
38
- # categorical
39
- def test_detetect_categorical_variable():
40
- categorical_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
41
- not_categorical_col = [i for i in range(100)]
42
-
43
- df_dict = {"cat": categorical_col, "not_cat": not_categorical_col}
44
- df = pd.DataFrame(df_dict, dtype="unicode")
45
-
46
- res, _ = detetect_categorical_variable(df)
47
- assert res.values and res.values[0] == "cat"
48
-
49
-
50
- # continuous
51
- def test_detect_continuous_variable():
52
- continuous_col = random.random(100)
53
- continuous_col_2 = [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7, 21, 3] * 10
54
- not_continuous_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
55
-
56
- df_dict = {"cont": continuous_col, "not_cont": not_continuous_col}
57
- df_dict_2 = {"cont": continuous_col_2, "not_cont": not_continuous_col}
58
-
59
- df = pd.DataFrame(df_dict, dtype="unicode")
60
- df2 = pd.DataFrame(df_dict_2, dtype="unicode")
61
-
62
- res = detect_continuous_variable(df)
63
- res2 = detect_continuous_variable(df2, continuous_th=0.65)
64
- assert res.values and res.values[0] == "cont"
65
- assert res2.values and res2.values[0] == "cont"
66
-
67
-
68
- # csp_insee
69
- def test_match_csp_insee():
70
- val = "employes de la poste"
71
- assert csp_insee._is(val)
72
-
73
-
74
- def test_do_not_match_csp_insee():
75
- val = "super-heros"
76
- assert not csp_insee._is(val)
77
-
78
-
79
- # code_csp_insee
80
- def test_match_code_csp_insee():
81
- val = "121f"
82
- assert code_csp_insee._is(val)
83
-
84
-
85
- def test_do_not_match_code_csp_insee():
86
- val = "121x"
87
- assert not code_csp_insee._is(val)
88
-
89
-
90
- # sexe
91
- def test_match_sexe():
92
- val = "homme"
93
- assert sexe._is(val)
94
-
95
-
96
- def test_do_not_match_sexe():
97
- val = "hermaphrodite"
98
- assert not sexe._is(val)
99
-
100
-
101
- # tel_fr
102
- def test_match_tel_fr():
103
- val = "0134643467"
104
- assert tel_fr._is(val)
105
-
106
-
107
- def test_do_not_match_tel_fr():
108
- val = "3345689715"
109
- assert not tel_fr._is(val)
110
-
111
-
112
- # email
113
- def test_match_email():
114
- val = "cdo_intern@data.gouv.fr"
115
- assert email._is(val)
116
-
117
-
118
- def test_do_not_match_email():
119
- val = "cdo@@gouv.sfd"
120
- assert not email._is(val)
121
-
122
-
123
- # uuid
124
- def test_match_uuid():
125
- val = "884762be-51f3-44c3-b811-1e14c5d89262"
126
- assert uuid._is(val)
127
-
128
-
129
- def test_do_not_match_uuid():
130
- val = "0610928327"
131
- assert not uuid._is(val)
132
-
133
-
134
- # Mongo ObjectId
135
- def test_match_mongo_object_id():
136
- val = "62320e50f981bc2b57bcc044"
137
- assert mongo_object_id._is(val)
138
-
139
-
140
- def test_do_not_match_mongo_object_id():
141
- val = "884762be-51f3-44c3-b811-1e14c5d89262"
142
- assert not mongo_object_id._is(val)
143
-
144
-
145
- # url
146
- def test_match_url():
147
- val = "www.etalab.data.gouv.fr"
148
- assert url._is(val)
149
-
150
-
151
- def test_do_not_match_url():
152
- val = "c est une phrase"
153
- assert not url._is(val)
154
-
155
-
156
- # adresse
157
- def test_match_adresse():
158
- val = "rue du martyr"
159
- assert adresse._is(val)
160
-
161
-
162
- def test_do_not_match_adresse():
163
- val = "bonjour les amis"
164
- assert not adresse._is(val)
165
-
166
-
167
- # code_commune_insee
168
- def test_match_code_commune_insee():
169
- val = "91471"
170
- assert code_commune_insee._is(val)
171
-
172
-
173
- def test_do_not_match_code_commune_insee():
174
- val = "914712"
175
- assert not code_commune_insee._is(val)
176
-
177
-
178
- # commune
179
- def test_match_commune():
180
- val = "saint denis"
181
- assert commune._is(val)
182
-
183
-
184
- def test_do_not_match_commune():
185
- val = "new york"
186
- assert not commune._is(val)
187
-
188
-
189
- # departement
190
- def test_match_departement():
191
- val = "essonne"
192
- assert departement._is(val)
193
-
194
-
195
- def test_do_not_match_departement():
196
- val = "new york"
197
- assert not departement._is(val)
198
-
199
-
200
- # pays
201
- def test_match_pays():
202
- val = "france"
203
- assert pays._is(val)
204
-
205
-
206
- def test_do_not_match_pays():
207
- val = "new york"
208
- assert not pays._is(val)
209
-
210
-
211
- # region
212
- def test_match_region():
213
- val = "bretagne"
214
- assert region._is(val)
215
-
216
-
217
- def test_do_not_match_region():
218
- val = "jambon beurre"
219
- assert not region._is(val)
220
-
221
-
222
- # iso_country_code
223
- def test_match_iso_country_code():
224
- val = "FR"
225
- assert iso_country_code_alpha2._is(val)
226
-
227
-
228
- def test_do_not_match_iso_country_code():
229
- val = "XX"
230
- assert not iso_country_code_alpha2._is(val)
231
-
232
-
233
- # iso_country_code alpha-3
234
- def test_match_iso_country_code_alpha3():
235
- val = "FRA"
236
- assert iso_country_code_alpha3._is(val)
237
-
238
-
239
- def test_do_not_match_iso_country_code_alpha3():
240
- val = "ABC"
241
- assert not iso_country_code_alpha3._is(val)
242
-
243
-
244
- # iso_country_code numerique
245
- def test_match_iso_country_code_numeric():
246
- val = "250"
247
- print(iso_country_code_numeric._is(val))
248
- assert iso_country_code_numeric._is(val)
249
-
250
-
251
- def test_do_not_match_iso_country_code_numeric():
252
- val = "003"
253
- assert not iso_country_code_numeric._is(val)
254
-
255
-
256
- # jour de la semaine
257
- def test_match_jour_de_la_semaine():
258
- val = "lundi"
259
- assert jour_de_la_semaine._is(val)
260
-
261
-
262
- def test_do_not_match_jour_de_la_semaine():
263
- val = "jour de la biere"
264
- assert not jour_de_la_semaine._is(val)
265
-
266
-
267
- # year
268
- def test_match_year():
269
- val = "2015"
270
- assert year._is(val)
271
-
272
-
273
- def test_do_not_match_year():
274
- val = "20166"
275
- assert not year._is(val)
276
-
277
-
278
- # date
279
- def test_match_date():
280
- val = "1960-08-07"
281
- assert date._is(val)
282
- val = '12/02/2007'
283
- assert date._is(val)
284
- val = '15 jan 1985'
285
- assert date._is(val)
286
- val = '15 décembre 1985'
287
- assert date._is(val)
288
- val = '02052003'
289
- assert date._is(val)
290
- val = '1993-12/02'
291
- assert date._is(val)
292
-
293
-
294
- def test_do_not_match_date():
295
- val = "1993-1993-1993"
296
- assert not date._is(val)
297
- val = '39-10-1993'
298
- assert not date._is(val)
299
- val = '19-15-1993'
300
- assert not date._is(val)
301
- val = '15 tambour 1985'
302
- assert not date._is(val)
303
-
304
-
305
- # datetime
306
- def test_match_datetime():
307
- val = "2021-06-22T10:20:10"
308
- assert datetime_iso._is(val)
309
- val = "2021-06-22T30:20:10"
310
- assert not datetime_iso._is(val)
311
-
312
- val = "Sun, 06 Nov 1994 08:49:37 GMT"
313
- assert datetime_rfc822._is(val)
314
-
315
-
316
- # siren
317
- def test_match_siren():
318
- val = "552 100 554"
319
- assert siren._is(val)
320
-
321
-
322
- def test_do_not_match_siren():
323
- val = "42"
324
- assert not siren._is(val)
325
-
326
-
327
- # rna
328
- def test_match_rna():
329
- val = "W751515517"
330
- assert code_rna._is(val)
331
-
332
-
333
- def test_do_not_match_rna():
334
- val = "W111111111111111111111111111111111111"
335
- assert not code_rna._is(val)
336
-
337
-
338
- def test_match_waldec():
339
- val = "751P00188854"
340
- assert code_waldec._is(val)
341
-
342
-
343
- def test_do_not_match_waldec():
344
- val = "AA751PEE00188854"
345
- assert not code_waldec._is(val)
346
-
347
-
348
- # json
349
- def test_match_json():
350
- val = '{"pomme": "fruit", "reponse": 42}'
351
- assert json._is(val)
352
- val = "[1,2,3,4]"
353
- assert json._is(val)
354
-
355
-
356
- def test_do_not_match_json():
357
- val = '{"coordinates": [45.783753, 3.049342], "citycode": "63870"}'
358
- assert not json._is(val)
359
- val = "666"
360
- assert not json._is(val)
tests/test_file.py DELETED
@@ -1,116 +0,0 @@
1
- from csv_detective import explore_csv
2
- import pytest
3
-
4
-
5
- def test_columns_output_on_file():
6
- output = explore_csv.routine(
7
- csv_file_path="tests/a_test_file.csv",
8
- num_rows=-1,
9
- output_profile=False,
10
- save_results=False,
11
- )
12
- assert isinstance(output, dict)
13
- assert output["separator"] == ";"
14
- assert output["header_row_idx"] == 2
15
- assert output["header"] == [
16
- "NUMCOM",
17
- "NOMCOM",
18
- "NUMDEP",
19
- "NOMDEP",
20
- "NUMEPCI",
21
- "NOMEPCI",
22
- "TXCOUVGLO_COM_2014",
23
- "TXCOUVGLO_DEP_2014",
24
- "TXCOUVGLO_EPCI_2014",
25
- "STRUCTURED_INFO",
26
- "GEO_INFO",
27
- ]
28
- assert output["total_lines"] == 414
29
- assert output["nb_duplicates"] == 7
30
- assert output["columns"]["NOMCOM"]["format"] == "commune"
31
- assert output["columns"]["NOMDEP"]["format"] == "departement"
32
- assert output["columns"]["NUMEPCI"]["format"] == "siren"
33
- assert output["columns"]["STRUCTURED_INFO"]["python_type"] == "json"
34
- assert output["columns"]["STRUCTURED_INFO"]["format"] == "json"
35
- assert output["columns"]["GEO_INFO"]["python_type"] == "json"
36
- assert output["columns"]["GEO_INFO"]["format"] == "json_geojson"
37
- assert output["columns"]["NUMEPCI"]["format"] == "siren"
38
-
39
-
40
- def test_profile_output_on_file():
41
- output = explore_csv.routine(
42
- csv_file_path="tests/a_test_file.csv",
43
- num_rows=-1,
44
- output_profile=True,
45
- save_results=False,
46
- )
47
- assert all(
48
- [
49
- c in list(output["profile"]["NUMCOM"].keys())
50
- for c in [
51
- "min",
52
- "max",
53
- "mean",
54
- "std",
55
- "tops",
56
- "nb_distinct",
57
- "nb_missing_values",
58
- ]
59
- ]
60
- )
61
- assert len(output["profile"]["NOMCOM"].keys()) == 3
62
- assert output["profile"]["NUMCOM"]["min"] == 1001
63
- assert output["profile"]["NUMCOM"]["max"] == 6125
64
- assert round(output["profile"]["NUMCOM"]["mean"]) == 1245
65
- assert round(output["profile"]["NUMCOM"]["std"]) == 363
66
- assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_distinct"] == 296
67
- assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_missing_values"] == 3
68
- assert output["profile"]["GEO_INFO"]["nb_distinct"] == 1
69
-
70
-
71
- def test_exception():
72
- with pytest.raises(Exception):
73
- explore_csv.routine(
74
- csv_file_path="tests/a_test_file.csv",
75
- num_rows=50,
76
- output_profile=True,
77
- save_results=False,
78
- )
79
-
80
-
81
- def test_code_dep_reg_on_file():
82
- output = explore_csv.routine(
83
- csv_file_path="tests/b_test_file.csv",
84
- num_rows=-1,
85
- output_profile=False,
86
- save_results=False,
87
- )
88
- assert isinstance(output, dict)
89
- assert output["columns"]["code_departement"]["format"] == "code_departement"
90
- assert output["columns"]["code_region"]["format"] == "code_region"
91
-
92
-
93
- def test_schema_on_file():
94
- output = explore_csv.routine(
95
- csv_file_path="tests/b_test_file.csv",
96
- num_rows=-1,
97
- output_schema=True,
98
- )
99
- assert isinstance(output, dict)
100
- is_column_dep = False
101
- is_column_reg = False
102
- for item in output["schema"]["fields"]:
103
- if item["name"] == "code_departement":
104
- is_column_dep = True
105
- assert item["description"] == "Le code INSEE du département"
106
- assert item["type"] == "string"
107
- assert item["formatFR"] == "code_departement"
108
- assert item["constraints"]["pattern"] == "^(([013-9]\\d|2[AB1-9])$|9\\d{2}$)"
109
- if item["name"] == "code_region":
110
- is_column_reg = True
111
- assert item["description"] == "Le code INSEE de la région"
112
- assert item["type"] == "string"
113
- assert item["formatFR"] == "code_region"
114
- assert item["constraints"]["pattern"] == "^\\d{2}$"
115
- assert is_column_dep
116
- assert is_column_reg
tests/test_labels.py DELETED
@@ -1,7 +0,0 @@
1
- from csv_detective.detect_labels.other import money
2
-
3
-
4
- # money labels
5
- def test_money_labels():
6
- header = "Montant total"
7
- assert money._is(header) == 1.0