csv-detective 0.8.1.dev1720__py3-none-any.whl → 0.8.1.dev1729__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/__init__.py +94 -43
- csv_detective/detect_labels/__init__.py +51 -1
- csv_detective/detection/variables.py +2 -3
- csv_detective/load_tests.py +1 -2
- {csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/METADATA +11 -2
- {csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/RECORD +11 -11
- tests/test_structure.py +1 -2
- {csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/top_level.txt +0 -0
|
@@ -1,61 +1,112 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
|
-
from .FR.other import (
|
|
3
|
-
code_csp_insee,
|
|
4
|
-
csp_insee,
|
|
5
|
-
sexe,
|
|
6
|
-
siren,
|
|
7
|
-
tel_fr,
|
|
8
|
-
uai,
|
|
9
|
-
siret,
|
|
10
|
-
insee_ape700,
|
|
11
|
-
date_fr,
|
|
12
|
-
code_import,
|
|
13
|
-
code_waldec,
|
|
14
|
-
code_rna,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
from .other import (
|
|
18
|
-
email,
|
|
19
|
-
url,
|
|
20
|
-
booleen,
|
|
21
|
-
money,
|
|
22
|
-
mongo_object_id,
|
|
23
|
-
percent,
|
|
24
|
-
twitter,
|
|
25
|
-
float,
|
|
26
|
-
int,
|
|
27
|
-
uuid,
|
|
28
|
-
json,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
1
|
from .FR.geo import (
|
|
32
2
|
adresse,
|
|
33
3
|
code_commune_insee,
|
|
34
|
-
code_postal,
|
|
35
|
-
commune,
|
|
36
|
-
departement,
|
|
37
|
-
pays,
|
|
38
|
-
region,
|
|
39
4
|
code_departement,
|
|
40
5
|
code_fantoir,
|
|
41
|
-
|
|
42
|
-
latitude_wgs_fr_metropole,
|
|
6
|
+
code_postal,
|
|
43
7
|
code_region,
|
|
8
|
+
commune,
|
|
9
|
+
departement,
|
|
10
|
+
insee_canton,
|
|
44
11
|
latitude_l93,
|
|
12
|
+
latitude_wgs_fr_metropole,
|
|
45
13
|
longitude_l93,
|
|
46
|
-
|
|
14
|
+
longitude_wgs_fr_metropole,
|
|
15
|
+
pays,
|
|
16
|
+
region,
|
|
47
17
|
)
|
|
48
|
-
|
|
18
|
+
from .FR.other import (
|
|
19
|
+
code_csp_insee,
|
|
20
|
+
code_import,
|
|
21
|
+
code_rna,
|
|
22
|
+
code_waldec,
|
|
23
|
+
csp_insee,
|
|
24
|
+
date_fr,
|
|
25
|
+
insee_ape700,
|
|
26
|
+
sexe,
|
|
27
|
+
siren,
|
|
28
|
+
siret,
|
|
29
|
+
tel_fr,
|
|
30
|
+
uai,
|
|
31
|
+
)
|
|
32
|
+
from .FR.temp import jour_de_la_semaine, mois_de_annee
|
|
49
33
|
from .geo import (
|
|
50
34
|
iso_country_code_alpha2,
|
|
51
35
|
iso_country_code_alpha3,
|
|
52
36
|
iso_country_code_numeric,
|
|
37
|
+
json_geojson,
|
|
53
38
|
latitude_wgs,
|
|
54
|
-
longitude_wgs,
|
|
55
39
|
latlon_wgs,
|
|
40
|
+
longitude_wgs,
|
|
56
41
|
lonlat_wgs,
|
|
57
|
-
json_geojson,
|
|
58
42
|
)
|
|
43
|
+
from .other import (
|
|
44
|
+
booleen,
|
|
45
|
+
email,
|
|
46
|
+
float,
|
|
47
|
+
int,
|
|
48
|
+
json,
|
|
49
|
+
money,
|
|
50
|
+
mongo_object_id,
|
|
51
|
+
percent,
|
|
52
|
+
twitter,
|
|
53
|
+
url,
|
|
54
|
+
uuid,
|
|
55
|
+
)
|
|
56
|
+
from .temp import date, datetime_aware, datetime_naive, datetime_rfc822, year
|
|
59
57
|
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
__all__ = [
|
|
59
|
+
"adresse",
|
|
60
|
+
"code_commune_insee",
|
|
61
|
+
"code_departement",
|
|
62
|
+
"code_fantoir",
|
|
63
|
+
"code_postal",
|
|
64
|
+
"code_region",
|
|
65
|
+
"commune",
|
|
66
|
+
"departement",
|
|
67
|
+
"insee_canton",
|
|
68
|
+
"latitude_l93",
|
|
69
|
+
"latitude_wgs_fr_metropole",
|
|
70
|
+
"longitude_l93",
|
|
71
|
+
"longitude_wgs_fr_metropole",
|
|
72
|
+
"pays",
|
|
73
|
+
"region",
|
|
74
|
+
"code_csp_insee",
|
|
75
|
+
"code_import",
|
|
76
|
+
"code_rna",
|
|
77
|
+
"code_waldec",
|
|
78
|
+
"csp_insee",
|
|
79
|
+
"date_fr",
|
|
80
|
+
"insee_ape700",
|
|
81
|
+
"sexe",
|
|
82
|
+
"siren",
|
|
83
|
+
"siret",
|
|
84
|
+
"tel_fr",
|
|
85
|
+
"uai",
|
|
86
|
+
"jour_de_la_semaine",
|
|
87
|
+
"mois_de_annee",
|
|
88
|
+
"iso_country_code_alpha2",
|
|
89
|
+
"iso_country_code_alpha3",
|
|
90
|
+
"iso_country_code_numeric",
|
|
91
|
+
"json_geojson",
|
|
92
|
+
"latitude_wgs",
|
|
93
|
+
"latlon_wgs",
|
|
94
|
+
"longitude_wgs",
|
|
95
|
+
"lonlat_wgs",
|
|
96
|
+
"booleen",
|
|
97
|
+
"email",
|
|
98
|
+
"float",
|
|
99
|
+
"int",
|
|
100
|
+
"json",
|
|
101
|
+
"money",
|
|
102
|
+
"mongo_object_id",
|
|
103
|
+
"percent",
|
|
104
|
+
"twitter",
|
|
105
|
+
"url",
|
|
106
|
+
"uuid",
|
|
107
|
+
"date",
|
|
108
|
+
"datetime_aware",
|
|
109
|
+
"datetime_naive",
|
|
110
|
+
"datetime_rfc822",
|
|
111
|
+
"year",
|
|
112
|
+
]
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
from .FR.geo import (
|
|
3
2
|
adresse,
|
|
4
3
|
code_commune_insee,
|
|
@@ -42,3 +41,54 @@ from .geo import (
|
|
|
42
41
|
)
|
|
43
42
|
from .other import booleen, email, float, int, money, mongo_object_id, twitter, url, uuid
|
|
44
43
|
from .temp import date, datetime_rfc822, year
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
"adresse",
|
|
47
|
+
"code_commune_insee",
|
|
48
|
+
"code_departement",
|
|
49
|
+
"code_fantoir",
|
|
50
|
+
"code_postal",
|
|
51
|
+
"code_region",
|
|
52
|
+
"commune",
|
|
53
|
+
"departement",
|
|
54
|
+
"insee_canton",
|
|
55
|
+
"latitude_l93",
|
|
56
|
+
"latitude_wgs_fr_metropole",
|
|
57
|
+
"longitude_l93",
|
|
58
|
+
"longitude_wgs_fr_metropole",
|
|
59
|
+
"pays",
|
|
60
|
+
"region",
|
|
61
|
+
"code_csp_insee",
|
|
62
|
+
"code_rna",
|
|
63
|
+
"code_waldec",
|
|
64
|
+
"csp_insee",
|
|
65
|
+
"date_fr",
|
|
66
|
+
"insee_ape700",
|
|
67
|
+
"sexe",
|
|
68
|
+
"siren",
|
|
69
|
+
"siret",
|
|
70
|
+
"tel_fr",
|
|
71
|
+
"uai",
|
|
72
|
+
"iso_country_code_alpha2",
|
|
73
|
+
"iso_country_code_alpha3",
|
|
74
|
+
"iso_country_code_numeric",
|
|
75
|
+
"json_geojson",
|
|
76
|
+
"latitude_wgs",
|
|
77
|
+
"latlon_wgs",
|
|
78
|
+
"longitude_wgs",
|
|
79
|
+
"lonlat_wgs",
|
|
80
|
+
"jour_de_la_semaine",
|
|
81
|
+
"mois_de_annee",
|
|
82
|
+
"booleen",
|
|
83
|
+
"email",
|
|
84
|
+
"float",
|
|
85
|
+
"int",
|
|
86
|
+
"money",
|
|
87
|
+
"mongo_object_id",
|
|
88
|
+
"twitter",
|
|
89
|
+
"url",
|
|
90
|
+
"uuid",
|
|
91
|
+
"date",
|
|
92
|
+
"datetime_rfc822",
|
|
93
|
+
"year",
|
|
94
|
+
]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from ast import literal_eval
|
|
2
1
|
import logging
|
|
2
|
+
from ast import literal_eval
|
|
3
3
|
from time import time
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
@@ -36,8 +36,7 @@ def detect_continuous_variable(
|
|
|
36
36
|
value = value.replace(",", ".")
|
|
37
37
|
value = literal_eval(value)
|
|
38
38
|
return type(value)
|
|
39
|
-
|
|
40
|
-
except:
|
|
39
|
+
except Exception:
|
|
41
40
|
return False
|
|
42
41
|
|
|
43
42
|
if verbose:
|
csv_detective/load_tests.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.8.1.
|
|
3
|
+
Version: 0.8.1.dev1729
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
5
|
Author-email: Etalab <opendatateam@data.gouv.fr>
|
|
6
6
|
License: MIT
|
|
@@ -212,12 +212,21 @@ Organisations such as [data.gouv.fr](http://data.gouv.fr) aggregate huge amounts
|
|
|
212
212
|
|
|
213
213
|
An early version of this analysis of all resources on data.gouv.fr can be found [here](https://github.com/Leobouloc/data.gouv-exploration).
|
|
214
214
|
|
|
215
|
+
## Linting
|
|
216
|
+
|
|
217
|
+
Remember to format, lint, and sort imports with [Ruff](https://docs.astral.sh/ruff/) before committing (checks will remind you anyway):
|
|
218
|
+
```bash
|
|
219
|
+
pip install .[dev]
|
|
220
|
+
ruff check --fix .
|
|
221
|
+
ruff format .
|
|
222
|
+
```
|
|
223
|
+
|
|
215
224
|
## Release
|
|
216
225
|
|
|
217
226
|
The release process uses `bumpx`.
|
|
218
227
|
|
|
219
228
|
```shell
|
|
220
|
-
pip install -
|
|
229
|
+
pip install -e .[dev]
|
|
221
230
|
```
|
|
222
231
|
|
|
223
232
|
### Process
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
csv_detective/__init__.py,sha256=XY7pnoNHlocvyUiK8EQpJYPSQt5BRWWJD8KiPlvI9pU,164
|
|
2
2
|
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
3
|
csv_detective/explore_csv.py,sha256=sEMza4Z27ac88fGq7tUiK1zlfvuftztHhHVoa0c2EVU,9191
|
|
4
|
-
csv_detective/load_tests.py,sha256=
|
|
4
|
+
csv_detective/load_tests.py,sha256=uVKweLq3cf-yB5ZZI-m9tBVs_SWNcOw8sDJa97TOJGo,2266
|
|
5
5
|
csv_detective/s3_utils.py,sha256=z1KTVVkdurMv21o-rZu7_aluMJnSi-d5uxnQbqT2NoI,1407
|
|
6
6
|
csv_detective/utils.py,sha256=u9I1tsyMfVr2eIYiGCD7Iu30d55H3za44-N3cV2nj8M,1013
|
|
7
7
|
csv_detective/validate.py,sha256=RLHXLrRuynkdcvHUlSEbyglPvdbNYlT1Z4nQI-BdYdA,2898
|
|
8
|
-
csv_detective/detect_fields/__init__.py,sha256=
|
|
8
|
+
csv_detective/detect_fields/__init__.py,sha256=ZZ7u9zsMtCqPC2xxeLp57UTCbqpKFJi6D_LO1ew15BU,1980
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=Q5tVRMW5QdFLfiNm42JmIwNRuBR5ZI3dQhzHPzXVnzo,1676
|
|
@@ -72,7 +72,7 @@ csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRU
|
|
|
72
72
|
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
|
|
73
73
|
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=-pFdIIPgaLq2_QbFJ9zwy4YIwZuC73F0A_cNDntTuvQ,512
|
|
74
74
|
csv_detective/detect_fields/temp/year/__init__.py,sha256=gHchVciZExbGZLMBcbBaDXB0IgGptkQc4RhfSOMY0Ww,194
|
|
75
|
-
csv_detective/detect_labels/__init__.py,sha256=
|
|
75
|
+
csv_detective/detect_labels/__init__.py,sha256=93s93DRNeFw9fJiGp0rW3iRWZX3WOeVau2PAaF4QlPE,1777
|
|
76
76
|
csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
78
|
csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=fNWFW-Wo3n6azDBfmi0J0qnzP-p2StLxCc9eNiE9NNE,346
|
|
@@ -136,7 +136,7 @@ csv_detective/detection/formats.py,sha256=dzJPdi2rP2jTHZBk9UHpJL3c5N-PSohCymHs-O
|
|
|
136
136
|
csv_detective/detection/headers.py,sha256=y5iR4jWH5fUtAH_Zg0zxWSVG_INCHlXJFMbhPpI2YMo,1148
|
|
137
137
|
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
138
138
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
139
|
-
csv_detective/detection/variables.py,sha256=
|
|
139
|
+
csv_detective/detection/variables.py,sha256=wfsA_MOk14TPMOY7gkvpTGpo9-USzMnFaAou3MPHqxc,3536
|
|
140
140
|
csv_detective/output/__init__.py,sha256=f-UFv_iULpVF_Fy39H4sfACEnrthjK4N3mCAVPkjnKw,1860
|
|
141
141
|
csv_detective/output/dataframe.py,sha256=UpLuSxx_SFbKpem1n-xY7jF16MXGpKQYEWjaSMIiB4s,2215
|
|
142
142
|
csv_detective/output/example.py,sha256=XrnPS_uC0cICn7tgnLWNctpUbnPzl7fIMzNTzJEWGJc,8655
|
|
@@ -150,19 +150,19 @@ csv_detective/parsing/csv.py,sha256=qZFLOT3YCPoHF0svfVfQBnS8eHtucjDZ7dFITAPgLhc,
|
|
|
150
150
|
csv_detective/parsing/excel.py,sha256=ULUDw76z6hs1Xm2yL9KBM0EOvIsfBLkxwqTZfDEx6aE,7045
|
|
151
151
|
csv_detective/parsing/load.py,sha256=C3M8nvgWenOb8aDFi5dpDGCoAw9EBqr4EB63zbz2M14,3699
|
|
152
152
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
153
|
-
csv_detective-0.8.1.
|
|
153
|
+
csv_detective-0.8.1.dev1729.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
154
154
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
155
|
tests/test_example.py,sha256=iO4RxMHZxnBAiKm6fsFar5OVg8hYKnqNZCw0SUnEuQQ,1972
|
|
156
156
|
tests/test_fields.py,sha256=Y2mBfV9ZdxTHYwHnkzGbpo1k_qJRLC8nU-zzAUxFmAE,11964
|
|
157
157
|
tests/test_file.py,sha256=YuVbSfeo_ASPiLT8CyxXqJENcDpj4wAFXzLwu_GzsOA,8437
|
|
158
158
|
tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
|
|
159
|
-
tests/test_structure.py,sha256=
|
|
159
|
+
tests/test_structure.py,sha256=GRDYKy0UcdqlN4qglzsRC0puFj5cb-SVvONjvcPvtAA,1400
|
|
160
160
|
tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
|
|
161
161
|
venv/bin/activate_this.py,sha256=NRy3waFmwW1pOaNUp33wNN0vD1Kzkd-zXX-Sgl4EiVI,1286
|
|
162
162
|
venv/bin/jp.py,sha256=7z7dvRg0M7HzpZG4ssQID7nScjvQx7bcYTxJWDOrS6E,1717
|
|
163
163
|
venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
|
|
164
|
-
csv_detective-0.8.1.
|
|
165
|
-
csv_detective-0.8.1.
|
|
166
|
-
csv_detective-0.8.1.
|
|
167
|
-
csv_detective-0.8.1.
|
|
168
|
-
csv_detective-0.8.1.
|
|
164
|
+
csv_detective-0.8.1.dev1729.dist-info/METADATA,sha256=d8206Q0vrz70oOi2MG0ECreuwWkNUcCtkU_bi9HBFMI,9767
|
|
165
|
+
csv_detective-0.8.1.dev1729.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
csv_detective-0.8.1.dev1729.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
167
|
+
csv_detective-0.8.1.dev1729.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
|
|
168
|
+
csv_detective-0.8.1.dev1729.dist-info/RECORD,,
|
tests/test_structure.py
CHANGED
|
File without changes
|
{csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1720.dist-info → csv_detective-0.8.1.dev1729.dist-info}/top_level.txt
RENAMED
|
File without changes
|