csv-detective 0.7.5.dev1277__py3-none-any.whl → 0.7.5.dev1298__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +1 -1
- csv_detective/detect_fields/__init__.py +6 -4
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py +7 -7
- csv_detective/detect_fields/other/float/__init__.py +4 -4
- csv_detective/detect_fields/other/money/__init__.py +11 -0
- csv_detective/detect_fields/other/percent/__init__.py +9 -0
- csv_detective/detection/formats.py +145 -0
- csv_detective/explore_csv.py +94 -222
- csv_detective/load_tests.py +62 -0
- csv_detective/output/__init__.py +64 -0
- csv_detective/output/dataframe.py +0 -0
- csv_detective/output/example.py +77 -77
- csv_detective/output/profile.py +0 -0
- csv_detective/output/schema.py +0 -0
- csv_detective/output/utils.py +0 -0
- csv_detective/utils.py +2 -0
- csv_detective/validate.py +70 -0
- {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/CHANGELOG.md +2 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/METADATA +1 -1
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/RECORD +27 -20
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/WHEEL +1 -1
- tests/test_example.py +10 -10
- tests/test_fields.py +270 -415
- tests/test_file.py +19 -9
- tests/test_structure.py +6 -0
- tests/test_validation.py +18 -0
- {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/top_level.txt +0 -0
tests/test_fields.py
CHANGED
|
@@ -15,7 +15,9 @@ from csv_detective.detect_fields.FR.geo import (
|
|
|
15
15
|
departement,
|
|
16
16
|
insee_canton,
|
|
17
17
|
latitude_l93,
|
|
18
|
+
latitude_wgs_fr_metropole,
|
|
18
19
|
longitude_l93,
|
|
20
|
+
longitude_wgs_fr_metropole,
|
|
19
21
|
pays,
|
|
20
22
|
region,
|
|
21
23
|
)
|
|
@@ -24,31 +26,43 @@ from csv_detective.detect_fields.FR.other import (
|
|
|
24
26
|
code_rna,
|
|
25
27
|
code_waldec,
|
|
26
28
|
csp_insee,
|
|
29
|
+
date_fr,
|
|
30
|
+
insee_ape700,
|
|
27
31
|
sexe,
|
|
28
32
|
siren,
|
|
33
|
+
siret,
|
|
29
34
|
tel_fr,
|
|
35
|
+
uai,
|
|
30
36
|
)
|
|
31
|
-
from csv_detective.detect_fields.FR.temp import jour_de_la_semaine
|
|
37
|
+
from csv_detective.detect_fields.FR.temp import jour_de_la_semaine, mois_de_annee
|
|
32
38
|
from csv_detective.detect_fields.geo import (
|
|
33
39
|
iso_country_code_alpha2,
|
|
34
40
|
iso_country_code_alpha3,
|
|
35
41
|
iso_country_code_numeric,
|
|
42
|
+
json_geojson,
|
|
43
|
+
latitude_wgs,
|
|
44
|
+
latlon_wgs,
|
|
45
|
+
longitude_wgs,
|
|
36
46
|
)
|
|
37
47
|
from csv_detective.detect_fields.other import (
|
|
48
|
+
booleen,
|
|
38
49
|
email,
|
|
39
50
|
json,
|
|
51
|
+
money,
|
|
40
52
|
mongo_object_id,
|
|
53
|
+
percent,
|
|
54
|
+
twitter,
|
|
41
55
|
url,
|
|
42
56
|
uuid,
|
|
43
57
|
int as test_int,
|
|
44
58
|
float as test_float,
|
|
45
59
|
)
|
|
46
|
-
from csv_detective.detect_fields.temp import date, datetime_iso, datetime_rfc822, year
|
|
60
|
+
from csv_detective.detect_fields.temp import date, datetime, datetime_iso, datetime_rfc822, year
|
|
47
61
|
from csv_detective.detection.variables import (
|
|
48
62
|
detect_continuous_variable,
|
|
49
63
|
detect_categorical_variable,
|
|
50
64
|
)
|
|
51
|
-
from csv_detective.
|
|
65
|
+
from csv_detective.load_tests import return_all_tests
|
|
52
66
|
from csv_detective.output.dataframe import cast
|
|
53
67
|
|
|
54
68
|
|
|
@@ -94,420 +108,261 @@ def test_detect_continuous_variable():
|
|
|
94
108
|
assert res2.values and res2.values[0] == "cont"
|
|
95
109
|
|
|
96
110
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
assert iso_country_code_alpha2._is(val)
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
def test_do_not_match_iso_country_code():
|
|
342
|
-
val = "XX"
|
|
343
|
-
assert not iso_country_code_alpha2._is(val)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
# iso_country_code alpha-3
|
|
347
|
-
def test_match_iso_country_code_alpha3():
|
|
348
|
-
val = "FRA"
|
|
349
|
-
assert iso_country_code_alpha3._is(val)
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def test_do_not_match_iso_country_code_alpha3():
|
|
353
|
-
val = "ABC"
|
|
354
|
-
assert not iso_country_code_alpha3._is(val)
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
# iso_country_code numerique
|
|
358
|
-
def test_match_iso_country_code_numeric():
|
|
359
|
-
val = "250"
|
|
360
|
-
assert iso_country_code_numeric._is(val)
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
def test_do_not_match_iso_country_code_numeric():
|
|
364
|
-
val = "003"
|
|
365
|
-
assert not iso_country_code_numeric._is(val)
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
# jour de la semaine
|
|
369
|
-
def test_match_jour_de_la_semaine():
|
|
370
|
-
val = "lundi"
|
|
371
|
-
assert jour_de_la_semaine._is(val)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
def test_do_not_match_jour_de_la_semaine():
|
|
375
|
-
val = "jour de la biere"
|
|
376
|
-
assert not jour_de_la_semaine._is(val)
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
# year
|
|
380
|
-
def test_match_year():
|
|
381
|
-
val = "2015"
|
|
382
|
-
assert year._is(val)
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
def test_do_not_match_year():
|
|
386
|
-
val = "20166"
|
|
387
|
-
assert not year._is(val)
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
# date
|
|
391
|
-
def test_match_date():
|
|
392
|
-
val = "1960-08-07"
|
|
393
|
-
assert date._is(val)
|
|
394
|
-
val = "12/02/2007"
|
|
395
|
-
assert date._is(val)
|
|
396
|
-
val = "15 jan 1985"
|
|
397
|
-
assert date._is(val)
|
|
398
|
-
val = "15 décembre 1985"
|
|
399
|
-
assert date._is(val)
|
|
400
|
-
val = "02 05 2003"
|
|
401
|
-
assert date._is(val)
|
|
402
|
-
val = "20030502"
|
|
403
|
-
assert date._is(val)
|
|
404
|
-
val = "1993-12/02"
|
|
405
|
-
assert date._is(val)
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
def test_do_not_match_date():
|
|
409
|
-
val = "1993-1993-1993"
|
|
410
|
-
assert not date._is(val)
|
|
411
|
-
val = "39-10-1993"
|
|
412
|
-
assert not date._is(val)
|
|
413
|
-
val = "19-15-1993"
|
|
414
|
-
assert not date._is(val)
|
|
415
|
-
val = "15 tambour 1985"
|
|
416
|
-
assert not date._is(val)
|
|
417
|
-
val = "12152003"
|
|
418
|
-
assert not date._is(val)
|
|
419
|
-
val = "20031512"
|
|
420
|
-
assert not date._is(val)
|
|
421
|
-
val = "02052003"
|
|
422
|
-
assert not date._is(val)
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
# datetime
|
|
426
|
-
def test_match_datetime():
|
|
427
|
-
val = "2021-06-22T10:20:10"
|
|
428
|
-
assert datetime_iso._is(val)
|
|
429
|
-
val = "2021-06-22T30:20:10"
|
|
430
|
-
assert not datetime_iso._is(val)
|
|
431
|
-
|
|
432
|
-
val = "Sun, 06 Nov 1994 08:49:37 GMT"
|
|
433
|
-
assert datetime_rfc822._is(val)
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
# siren
|
|
437
|
-
def test_match_siren():
|
|
438
|
-
val = "552 100 554"
|
|
439
|
-
assert siren._is(val)
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
def test_do_not_match_siren():
|
|
443
|
-
val = "42"
|
|
444
|
-
assert not siren._is(val)
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
# rna
|
|
448
|
-
def test_match_rna():
|
|
449
|
-
val = "W751515517"
|
|
450
|
-
assert code_rna._is(val)
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
def test_do_not_match_rna():
|
|
454
|
-
vals = [
|
|
455
|
-
"W111111111111111111111111111111111111",
|
|
456
|
-
"w143788974",
|
|
457
|
-
"W12",
|
|
458
|
-
"678W23456",
|
|
459
|
-
"165789325",
|
|
460
|
-
"Wa1#89sf&h",
|
|
461
|
-
]
|
|
462
|
-
for val in vals:
|
|
463
|
-
assert not code_rna._is(val)
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
def test_match_waldec():
|
|
467
|
-
val = "751P00188854"
|
|
468
|
-
assert code_waldec._is(val)
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
def test_do_not_match_waldec():
|
|
472
|
-
val = "AA751PEE00188854"
|
|
473
|
-
assert not code_waldec._is(val)
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
# json
|
|
477
|
-
def test_match_json():
|
|
478
|
-
val = '{"pomme": "fruit", "reponse": 42}'
|
|
479
|
-
assert json._is(val)
|
|
480
|
-
val = "[1,2,3,4]"
|
|
481
|
-
assert json._is(val)
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
def test_do_not_match_json():
|
|
485
|
-
val = '{"coordinates": [45.783753, 3.049342], "citycode": "63870"}'
|
|
486
|
-
assert not json._is(val)
|
|
487
|
-
val = "666"
|
|
488
|
-
assert not json._is(val)
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
# int
|
|
492
|
-
def test_match_int():
|
|
493
|
-
for val in ["1", "0", "1764", "-24"]:
|
|
494
|
-
assert test_int._is(val)
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
def test_not_match_int():
|
|
498
|
-
for val in ["01053", "1.2", "123_456", "+35"]:
|
|
499
|
-
assert not test_int._is(val)
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
# float
|
|
503
|
-
def test_match_float():
|
|
504
|
-
for val in ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1"]:
|
|
505
|
-
assert test_float._is(val)
|
|
111
|
+
fields = {
|
|
112
|
+
adresse: {
|
|
113
|
+
True: ["rue du martyr"],
|
|
114
|
+
False: ["un batiment"],
|
|
115
|
+
},
|
|
116
|
+
code_commune_insee: {
|
|
117
|
+
True: ["91471", "01053"],
|
|
118
|
+
False: ["914712", "01000"],
|
|
119
|
+
},
|
|
120
|
+
code_departement: {
|
|
121
|
+
True: ["75", "2A", "2b", "974", "01"],
|
|
122
|
+
False: ["00", "96", "101"],
|
|
123
|
+
},
|
|
124
|
+
code_fantoir: {
|
|
125
|
+
True: ["7755A", "B150B", "ZA04C", "ZB03D"],
|
|
126
|
+
False: ["7755", "ZA99A"],
|
|
127
|
+
},
|
|
128
|
+
code_postal: {
|
|
129
|
+
True: ["75020", "01000"],
|
|
130
|
+
False: ["77777", "018339"],
|
|
131
|
+
},
|
|
132
|
+
code_region: {
|
|
133
|
+
True: ["32"],
|
|
134
|
+
False: ["55"],
|
|
135
|
+
},
|
|
136
|
+
commune: {
|
|
137
|
+
True: ["saint denis"],
|
|
138
|
+
False: ["new york", "lion"],
|
|
139
|
+
},
|
|
140
|
+
departement: {
|
|
141
|
+
True: ["essonne"],
|
|
142
|
+
False: ["alabama", "auvergne"],
|
|
143
|
+
},
|
|
144
|
+
insee_canton: {
|
|
145
|
+
True: ["nantua"],
|
|
146
|
+
False: ["california"],
|
|
147
|
+
},
|
|
148
|
+
latitude_l93: {
|
|
149
|
+
True: ["6037008", "7123528.5", "7124528,5"],
|
|
150
|
+
False: ["0", "-6734529.6", "7245669.8", "3422674,78", "32_34"],
|
|
151
|
+
},
|
|
152
|
+
longitude_l93: {
|
|
153
|
+
True: ["0", "-154", "1265783,45", "34723.4"],
|
|
154
|
+
False: ["1456669.8", "-776225", "346_3214"],
|
|
155
|
+
},
|
|
156
|
+
latitude_wgs_fr_metropole: {
|
|
157
|
+
True: ["42.5"],
|
|
158
|
+
False: ["22.5", "62.5"],
|
|
159
|
+
},
|
|
160
|
+
longitude_wgs_fr_metropole: {
|
|
161
|
+
True: ["-2.5"],
|
|
162
|
+
False: ["12.8"],
|
|
163
|
+
},
|
|
164
|
+
pays: {
|
|
165
|
+
True: ["france", "italie"],
|
|
166
|
+
False: ["amerique", "paris"],
|
|
167
|
+
},
|
|
168
|
+
region: {
|
|
169
|
+
True: ["bretagne", "ile-de-france"],
|
|
170
|
+
False: ["baviere", "overgne"],
|
|
171
|
+
},
|
|
172
|
+
code_csp_insee: {
|
|
173
|
+
True: ["121f"],
|
|
174
|
+
False: ["121x"],
|
|
175
|
+
},
|
|
176
|
+
code_rna: {
|
|
177
|
+
True: ["W751515517"],
|
|
178
|
+
False: [
|
|
179
|
+
"W111111111111111111111111111111111111",
|
|
180
|
+
"w143788974",
|
|
181
|
+
"W12",
|
|
182
|
+
"678W23456",
|
|
183
|
+
"165789325",
|
|
184
|
+
"Wa1#89sf&h",
|
|
185
|
+
],
|
|
186
|
+
},
|
|
187
|
+
code_waldec: {
|
|
188
|
+
True: ["751P00188854"],
|
|
189
|
+
False: ["AA751PEE00188854"],
|
|
190
|
+
},
|
|
191
|
+
csp_insee: {
|
|
192
|
+
True: ["employes de la poste"],
|
|
193
|
+
False: ["super-heros"],
|
|
194
|
+
},
|
|
195
|
+
sexe: {
|
|
196
|
+
True: ["homme"],
|
|
197
|
+
False: ["hermaphrodite"],
|
|
198
|
+
},
|
|
199
|
+
siren: {
|
|
200
|
+
True: ["552 100 554", "552100554"],
|
|
201
|
+
False: ["42"],
|
|
202
|
+
},
|
|
203
|
+
siret: {
|
|
204
|
+
True: ["13002526500013", "130 025 265 00013"],
|
|
205
|
+
False: ["13002526500012"],
|
|
206
|
+
},
|
|
207
|
+
uai: {
|
|
208
|
+
True: ["0422170F"],
|
|
209
|
+
False: ["04292E"],
|
|
210
|
+
},
|
|
211
|
+
date_fr: {
|
|
212
|
+
True: ["13 fevrier 1996"],
|
|
213
|
+
False: ["44 march 2025"],
|
|
214
|
+
},
|
|
215
|
+
insee_ape700: {
|
|
216
|
+
True: ["0116Z"],
|
|
217
|
+
False: ["0116A"]
|
|
218
|
+
},
|
|
219
|
+
tel_fr: {
|
|
220
|
+
True: ["0134643467"],
|
|
221
|
+
False: ["6625388263", "01288398"],
|
|
222
|
+
},
|
|
223
|
+
jour_de_la_semaine: {
|
|
224
|
+
True: ["lundi"],
|
|
225
|
+
False: ["jour de la biere"],
|
|
226
|
+
},
|
|
227
|
+
mois_de_annee: {
|
|
228
|
+
True: ["juin", "décembre"],
|
|
229
|
+
False: ["november"],
|
|
230
|
+
},
|
|
231
|
+
iso_country_code_alpha2: {
|
|
232
|
+
True: ["FR"],
|
|
233
|
+
False: ["XX", "A", "FRA"],
|
|
234
|
+
},
|
|
235
|
+
iso_country_code_alpha3: {
|
|
236
|
+
True: ["FRA"],
|
|
237
|
+
False: ["XXX", "FR", "A"],
|
|
238
|
+
},
|
|
239
|
+
iso_country_code_numeric: {
|
|
240
|
+
True: ["250"],
|
|
241
|
+
False: ["003"],
|
|
242
|
+
},
|
|
243
|
+
json_geojson: {
|
|
244
|
+
True: [
|
|
245
|
+
'{"coordinates": [45.783753, 3.049342], "type": "63870"}',
|
|
246
|
+
'{"geometry": {"coordinates": [45.783753, 3.049342]}}',
|
|
247
|
+
],
|
|
248
|
+
False: ['{"pomme": "fruit", "reponse": 42}'],
|
|
249
|
+
},
|
|
250
|
+
latitude_wgs: {
|
|
251
|
+
True: ["43.2", "-22"],
|
|
252
|
+
False: ["100"],
|
|
253
|
+
},
|
|
254
|
+
latlon_wgs: {
|
|
255
|
+
True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8"],
|
|
256
|
+
False: ["0.1,192", "-102, 92"],
|
|
257
|
+
},
|
|
258
|
+
longitude_wgs: {
|
|
259
|
+
True: ["120", "-20.2"],
|
|
260
|
+
False: ["-200"],
|
|
261
|
+
},
|
|
262
|
+
booleen: {
|
|
263
|
+
True: ["oui", "0", "1", "yes", "false", "True"],
|
|
264
|
+
False: ["nein", "ja", "2", "-0"],
|
|
265
|
+
},
|
|
266
|
+
email: {
|
|
267
|
+
True: ["cdo_intern@data.gouv.fr"],
|
|
268
|
+
False: ["cdo@@gouv.sfd"],
|
|
269
|
+
},
|
|
270
|
+
json: {
|
|
271
|
+
True: ['{"pomme": "fruit", "reponse": 42}', "[1,2,3,4]"],
|
|
272
|
+
False: ['{"coordinates": [45.783753, 3.049342], "citycode": "63870"}', "{zefib:"],
|
|
273
|
+
},
|
|
274
|
+
money: {
|
|
275
|
+
True: ["120€", "-20.2$"],
|
|
276
|
+
False: ["200", "100 euros"],
|
|
277
|
+
},
|
|
278
|
+
mongo_object_id: {
|
|
279
|
+
True: ["62320e50f981bc2b57bcc044"],
|
|
280
|
+
False: ["884762be-51f3-44c3-b811-1e14c5d89262", "0230240284a66e"],
|
|
281
|
+
},
|
|
282
|
+
percent: {
|
|
283
|
+
True: ["120%", "-20.2%"],
|
|
284
|
+
False: ["200", "100 pourcents"],
|
|
285
|
+
},
|
|
286
|
+
twitter: {
|
|
287
|
+
True: ["@accueil1"],
|
|
288
|
+
False: ["adresse@mail"],
|
|
289
|
+
},
|
|
290
|
+
url: {
|
|
291
|
+
True: ["www.etalab.data.gouv.fr"],
|
|
292
|
+
False: ["une phrase avec un @ dedans"],
|
|
293
|
+
},
|
|
294
|
+
uuid: {
|
|
295
|
+
True: ["884762be-51f3-44c3-b811-1e14c5d89262"],
|
|
296
|
+
False: ["0610928327"],
|
|
297
|
+
},
|
|
298
|
+
test_int: {
|
|
299
|
+
True: ["1", "0", "1764", "-24"],
|
|
300
|
+
False: ["01053", "1.2", "123_456", "+35"],
|
|
301
|
+
},
|
|
302
|
+
test_float: {
|
|
303
|
+
True: ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1"],
|
|
304
|
+
False: ["01053", "01053.89", "1e3", "123_456", "123_456.78", "+35", "+35.9"],
|
|
305
|
+
},
|
|
306
|
+
date: {
|
|
307
|
+
True: [
|
|
308
|
+
"1960-08-07",
|
|
309
|
+
"12/02/2007",
|
|
310
|
+
"15 jan 1985",
|
|
311
|
+
"15 décembre 1985",
|
|
312
|
+
"02 05 2003",
|
|
313
|
+
"20030502",
|
|
314
|
+
"1993-12/02",
|
|
315
|
+
],
|
|
316
|
+
False: [
|
|
317
|
+
"1993-1993-1993",
|
|
318
|
+
"39-10-1993",
|
|
319
|
+
"19-15-1993",
|
|
320
|
+
"15 tambour 1985",
|
|
321
|
+
"12152003",
|
|
322
|
+
"20031512",
|
|
323
|
+
"02052003",
|
|
324
|
+
],
|
|
325
|
+
},
|
|
326
|
+
datetime: {
|
|
327
|
+
True: ["2021-06-22T10:20:10"],
|
|
328
|
+
False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT"],
|
|
329
|
+
},
|
|
330
|
+
datetime_iso: {
|
|
331
|
+
True: ["2021-06-22T10:20:10"],
|
|
332
|
+
False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT"],
|
|
333
|
+
},
|
|
334
|
+
datetime_rfc822: {
|
|
335
|
+
True: ["Sun, 06 Nov 1994 08:49:37 GMT"],
|
|
336
|
+
False: ["2021-06-22T10:20:10"],
|
|
337
|
+
},
|
|
338
|
+
year: {
|
|
339
|
+
True: ["2015"],
|
|
340
|
+
False: ["20166"],
|
|
341
|
+
},
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
# we could also have a function here to add all True values of (almost)
|
|
345
|
+
# each field to the False values of all others
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def test_all_fields_have_tests():
|
|
349
|
+
all_tests = return_all_tests("ALL", "detect_fields")
|
|
350
|
+
for test in all_tests:
|
|
351
|
+
assert fields.get(test)
|
|
506
352
|
|
|
507
353
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
354
|
+
@pytest.mark.parametrize(
|
|
355
|
+
"args",
|
|
356
|
+
(
|
|
357
|
+
(field, value, valid)
|
|
358
|
+
for field in fields
|
|
359
|
+
for valid in [True, False]
|
|
360
|
+
for value in fields[field][valid]
|
|
361
|
+
),
|
|
362
|
+
)
|
|
363
|
+
def test_fields_with_values(args):
|
|
364
|
+
field, value, valid = args
|
|
365
|
+
assert field._is(value) is valid
|
|
511
366
|
|
|
512
367
|
|
|
513
368
|
@pytest.mark.parametrize(
|