csv-detective 0.7.5.dev1298__py3-none-any.whl → 0.7.5.dev1320__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/FR/other/code_import/__init__.py +9 -0
- csv_detective/detect_fields/FR/other/code_waldec/__init__.py +2 -5
- csv_detective/detect_fields/__init__.py +1 -0
- csv_detective/validate.py +4 -2
- {csv_detective-0.7.5.dev1298.data → csv_detective-0.7.5.dev1320.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/METADATA +1 -1
- {csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/RECORD +14 -13
- {csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/WHEEL +1 -1
- tests/test_fields.py +6 -1
- {csv_detective-0.7.5.dev1298.data → csv_detective-0.7.5.dev1320.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1298.data → csv_detective-0.7.5.dev1320.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
3
|
PROPORTION = 0.9
|
|
4
|
-
regex =
|
|
5
|
-
r'^\d{3}\D\d{1,10}$|^\d\D\d\D\d{10}$|^\d{3}\D{3}\d{1,10}$|^\d{3}\D\d{4}\D\d{1,10}'
|
|
6
|
-
r'$|^\d{3}\D\d{2}[-]\d{3}$|^\d\D\d\D\d{2}\D\d{1,8}$'
|
|
7
|
-
)
|
|
4
|
+
regex = r"^W\d[\dA-Z]\d{7}$"
|
|
8
5
|
|
|
9
6
|
|
|
10
7
|
def _is(val):
|
|
11
|
-
|
|
8
|
+
"""Repere le code Waldec"""
|
|
12
9
|
return isinstance(val, str) and bool(re.match(regex, val))
|
csv_detective/validate.py
CHANGED
|
@@ -42,7 +42,8 @@ def validate(
|
|
|
42
42
|
any(col_name not in list(table.columns) for col_name in previous_analysis["columns"])
|
|
43
43
|
or any(col_name not in list(previous_analysis["columns"].keys()) for col_name in table.columns)
|
|
44
44
|
):
|
|
45
|
-
|
|
45
|
+
if verbose:
|
|
46
|
+
logging.warning("> Columns do not match, proceeding with full analysis")
|
|
46
47
|
return False, table, analysis
|
|
47
48
|
for col_name, args in previous_analysis["columns"].items():
|
|
48
49
|
if verbose:
|
|
@@ -55,7 +56,8 @@ def validate(
|
|
|
55
56
|
if skipna:
|
|
56
57
|
col_data = col_data.loc[~col_data.isna()]
|
|
57
58
|
if not col_data.apply(test_func).all():
|
|
58
|
-
|
|
59
|
+
if verbose:
|
|
60
|
+
logging.warning("> Test failed, proceeding with full analysis")
|
|
59
61
|
return False, table, analysis
|
|
60
62
|
if verbose:
|
|
61
63
|
logging.info("> All checks successful")
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
- Add validation function and associated flow [#112](https://github.com/datagouv/csv-detective/pull/112)
|
|
17
17
|
- Better float detection [#113](https://github.com/datagouv/csv-detective/pull/113)
|
|
18
18
|
- Refactor fields tests [#114](https://github.com/datagouv/csv-detective/pull/114)
|
|
19
|
+
- Better code waldec and add code import [#116](https://github.com/datagouv/csv-detective/pull/116)
|
|
19
20
|
|
|
20
21
|
## 0.7.4 (2024-11-15)
|
|
21
22
|
|
|
@@ -4,8 +4,8 @@ csv_detective/explore_csv.py,sha256=ocWlUEtuwZ-6bjDc6gfhC2-6DljMVhvXhHrfICCXGfQ,
|
|
|
4
4
|
csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
|
|
5
5
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
6
6
|
csv_detective/utils.py,sha256=Bx_1k4Sdpd5PCjuAy4AeayCmmw7TMR_zgtKIHNLi5g0,1157
|
|
7
|
-
csv_detective/validate.py,sha256=
|
|
8
|
-
csv_detective/detect_fields/__init__.py,sha256=
|
|
7
|
+
csv_detective/validate.py,sha256=0wSi5GgKPRW3m66413a-9Uti1vBRam5pQxVA9Dc5jQ8,2368
|
|
8
|
+
csv_detective/detect_fields/__init__.py,sha256=7Tz0Niaz0BboA3YVsp_6WPA6ywciwDN4-lOy_Ie_0Y8,976
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
|
|
@@ -26,8 +26,9 @@ csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=JbFKDd4jAnd9yb7YqP3
|
|
|
26
26
|
csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=SRWJvg3Ikyjmop9iL14igTjxNGpO-QB3fpADI_bLYEY,566
|
|
28
28
|
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
|
|
29
|
+
csv_detective/detect_fields/FR/other/code_import/__init__.py,sha256=zJ9YfPa5p--uHNQFeO1gTjxDy2Um_r-MxQd29VBNjFw,243
|
|
29
30
|
csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
|
|
30
|
-
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=
|
|
31
|
+
csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=41SYNzCzUFh4trQlwG-9UC0-1Wi4fTcv8Byi_dd9Lq4,168
|
|
31
32
|
csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=lvcaVKgOPrCaZb-Y1-wYCbLYB_CQjCJFNAzfWDwtTVE,496
|
|
32
33
|
csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
|
|
33
34
|
csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=kMV52djlG0y4o0ELEZuvTv_FvooYOgTnV1aWhycFJDc,284
|
|
@@ -147,19 +148,19 @@ csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,
|
|
|
147
148
|
csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
|
|
148
149
|
csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
|
|
149
150
|
csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
|
|
150
|
-
csv_detective-0.7.5.
|
|
151
|
-
csv_detective-0.7.5.
|
|
152
|
-
csv_detective-0.7.5.
|
|
153
|
-
csv_detective-0.7.5.
|
|
151
|
+
csv_detective-0.7.5.dev1320.data/data/share/csv_detective/CHANGELOG.md,sha256=aFDguybPGcPheztzpQNq-YVZZW1n8prG1txK4b32DhM,8084
|
|
152
|
+
csv_detective-0.7.5.dev1320.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
153
|
+
csv_detective-0.7.5.dev1320.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
|
|
154
|
+
csv_detective-0.7.5.dev1320.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
154
155
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
156
|
tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
|
|
156
|
-
tests/test_fields.py,sha256=
|
|
157
|
+
tests/test_fields.py,sha256=E6kEsp6_W56WW6FXWUl7hggsJv-vsKuOaJ9JLoFmrUw,9964
|
|
157
158
|
tests/test_file.py,sha256=9APE1d43lQ8Dk8lwJFNUK_YekYYsQ0ae2_fgpcPE9mk,8116
|
|
158
159
|
tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
|
|
159
160
|
tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
|
|
160
161
|
tests/test_validation.py,sha256=VwtBcnGAQ_eSFrBibWnMSTDjuy6y2JLlqvc3Zb667NY,479
|
|
161
|
-
csv_detective-0.7.5.
|
|
162
|
-
csv_detective-0.7.5.
|
|
163
|
-
csv_detective-0.7.5.
|
|
164
|
-
csv_detective-0.7.5.
|
|
165
|
-
csv_detective-0.7.5.
|
|
162
|
+
csv_detective-0.7.5.dev1320.dist-info/METADATA,sha256=lxx-TBya3ciYiOlxVY6YGAd7MVv7D6ChKGtl6gJDkRE,1386
|
|
163
|
+
csv_detective-0.7.5.dev1320.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
|
|
164
|
+
csv_detective-0.7.5.dev1320.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
165
|
+
csv_detective-0.7.5.dev1320.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
166
|
+
csv_detective-0.7.5.dev1320.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -24,6 +24,7 @@ from csv_detective.detect_fields.FR.geo import (
|
|
|
24
24
|
from csv_detective.detect_fields.FR.other import (
|
|
25
25
|
code_csp_insee,
|
|
26
26
|
code_rna,
|
|
27
|
+
code_import,
|
|
27
28
|
code_waldec,
|
|
28
29
|
csp_insee,
|
|
29
30
|
date_fr,
|
|
@@ -184,8 +185,12 @@ fields = {
|
|
|
184
185
|
"Wa1#89sf&h",
|
|
185
186
|
],
|
|
186
187
|
},
|
|
188
|
+
code_import: {
|
|
189
|
+
True: ["123S1871092288"],
|
|
190
|
+
False: ["AA751PEE00188854", "W123456789"],
|
|
191
|
+
},
|
|
187
192
|
code_waldec: {
|
|
188
|
-
True: ["
|
|
193
|
+
True: ["W123456789", "W2D1234567"],
|
|
189
194
|
False: ["AA751PEE00188854"],
|
|
190
195
|
},
|
|
191
196
|
csp_insee: {
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1298.dist-info → csv_detective-0.7.5.dev1320.dist-info}/top_level.txt
RENAMED
|
File without changes
|