csv-detective 0.8.1.dev1380__py3-none-any.whl → 0.8.1.dev1416__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/other/url/__init__.py +7 -6
- csv_detective/utils.py +1 -0
- {csv_detective-0.8.1.dev1380.data → csv_detective-0.8.1.dev1416.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/METADATA +1 -1
- {csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/RECORD +12 -12
- {csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/WHEEL +1 -1
- tests/test_fields.py +11 -2
- {csv_detective-0.8.1.dev1380.data → csv_detective-0.8.1.dev1416.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.8.1.dev1380.data → csv_detective-0.8.1.dev1416.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
|
+
import re
|
|
1
2
|
|
|
2
3
|
PROPORTION = 1
|
|
4
|
+
url_pattern = re.compile(
|
|
5
|
+
r"^((https?|ftp)://|www\.)(([A-Za-z0-9-]+\.)+[A-Za-z]{2,6})"
|
|
6
|
+
r"(/[A-Za-z0-9._~:/?#[@!$&'()*+,;=%-]*)?$"
|
|
7
|
+
)
|
|
3
8
|
|
|
4
9
|
|
|
5
10
|
def _is(val):
|
|
6
|
-
|
|
11
|
+
"""Detects urls"""
|
|
7
12
|
if not isinstance(val, str):
|
|
8
13
|
return False
|
|
9
|
-
|
|
10
|
-
b = 'www.' in val
|
|
11
|
-
c = any([x in val for x in ['.fr', '.com', '.org', '.gouv', '.net']])
|
|
12
|
-
d = not ('@' in val)
|
|
13
|
-
return (a or b or c) and d
|
|
14
|
+
return bool(url_pattern.match(val))
|
csv_detective/utils.py
CHANGED
|
@@ -25,6 +25,7 @@ def display_logs_depending_process_time(prompt: str, duration: float):
|
|
|
25
25
|
|
|
26
26
|
def is_url(file_path: str) -> bool:
|
|
27
27
|
# could be more sophisticated if needed
|
|
28
|
+
# using the URL detection test was considered but too broad (schema required to use requests)
|
|
28
29
|
return file_path.startswith('http')
|
|
29
30
|
|
|
30
31
|
|
|
@@ -3,7 +3,7 @@ csv_detective/cli.py,sha256=itooHtpyfC6DUsL_DchPKe1xo7m0MYJIp1L4R8eqoTk,1401
|
|
|
3
3
|
csv_detective/explore_csv.py,sha256=IT1-9TbS78p6oeDpQ5T6DQ93xQbobcscyBQb6nh86H4,9082
|
|
4
4
|
csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
|
|
5
5
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
6
|
-
csv_detective/utils.py,sha256
|
|
6
|
+
csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
|
|
7
7
|
csv_detective/validate.py,sha256=4e7f8bNXPU9GqNx4QXXiaoINyotozbL52JB6psVAjyY,2631
|
|
8
8
|
csv_detective/detect_fields/__init__.py,sha256=7Tz0Niaz0BboA3YVsp_6WPA6ywciwDN4-lOy_Ie_0Y8,976
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -63,7 +63,7 @@ csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5
|
|
|
63
63
|
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=7fcrHsOZAqXp2_N0IjPskYJ_qi4xRlo9iyNNDQVLzsU,156
|
|
64
64
|
csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
|
|
65
65
|
csv_detective/detect_fields/other/twitter/__init__.py,sha256=qbwLKsTBRFQ4PyTNVeEZ5Hkf5Wwi3ZKclLER_V0YO3g,154
|
|
66
|
-
csv_detective/detect_fields/other/url/__init__.py,sha256=
|
|
66
|
+
csv_detective/detect_fields/other/url/__init__.py,sha256=L7h9fZldh1w86XwCx0x3Q1TXSJ_nIId1C-l1yFzZYrA,299
|
|
67
67
|
csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
|
|
68
68
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
69
|
csv_detective/detect_fields/temp/date/__init__.py,sha256=1a_Ra9fmT4wgGMrcknXP7eN7A2QiaMF0Yjy0-BMihtA,987
|
|
@@ -147,19 +147,19 @@ csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,
|
|
|
147
147
|
csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
|
|
148
148
|
csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
|
|
149
149
|
csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
|
|
150
|
-
csv_detective-0.8.1.
|
|
151
|
-
csv_detective-0.8.1.
|
|
152
|
-
csv_detective-0.8.1.
|
|
153
|
-
csv_detective-0.8.1.
|
|
150
|
+
csv_detective-0.8.1.dev1416.data/data/share/csv_detective/CHANGELOG.md,sha256=Ar1X9WX1CVoStDzDEOo5O3P0DgRtUUmo70KAYlWLJyQ,8443
|
|
151
|
+
csv_detective-0.8.1.dev1416.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
152
|
+
csv_detective-0.8.1.dev1416.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
|
|
153
|
+
csv_detective-0.8.1.dev1416.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
154
154
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
155
|
tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
|
|
156
|
-
tests/test_fields.py,sha256=
|
|
156
|
+
tests/test_fields.py,sha256=d2tNvjtal6ZbO646x1GDbp_CGgp-EIcdg2SgMG72J6E,10270
|
|
157
157
|
tests/test_file.py,sha256=9APE1d43lQ8Dk8lwJFNUK_YekYYsQ0ae2_fgpcPE9mk,8116
|
|
158
158
|
tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
|
|
159
159
|
tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
|
|
160
160
|
tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
|
|
161
|
-
csv_detective-0.8.1.
|
|
162
|
-
csv_detective-0.8.1.
|
|
163
|
-
csv_detective-0.8.1.
|
|
164
|
-
csv_detective-0.8.1.
|
|
165
|
-
csv_detective-0.8.1.
|
|
161
|
+
csv_detective-0.8.1.dev1416.dist-info/METADATA,sha256=aCmQVKUNFvJLzTS8DHELQme0GS9jwrHGod4JLWIGt1o,1386
|
|
162
|
+
csv_detective-0.8.1.dev1416.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
163
|
+
csv_detective-0.8.1.dev1416.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
164
|
+
csv_detective-0.8.1.dev1416.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
165
|
+
csv_detective-0.8.1.dev1416.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -293,8 +293,17 @@ fields = {
|
|
|
293
293
|
False: ["adresse@mail"],
|
|
294
294
|
},
|
|
295
295
|
url: {
|
|
296
|
-
True: [
|
|
297
|
-
|
|
296
|
+
True: [
|
|
297
|
+
"www.data.gouv.fr",
|
|
298
|
+
"http://data.gouv.fr",
|
|
299
|
+
"https://www.youtube.com/@data-gouv-fr",
|
|
300
|
+
(
|
|
301
|
+
"https://tabular-api.data.gouv.fr/api/resources/"
|
|
302
|
+
"aaaaaaaa-1111-bbbb-2222-cccccccccccc/data/"
|
|
303
|
+
"?score__greater=0.9&decompte__exact=13"
|
|
304
|
+
),
|
|
305
|
+
],
|
|
306
|
+
False: ["tmp@data.gouv.fr"],
|
|
298
307
|
},
|
|
299
308
|
uuid: {
|
|
300
309
|
True: ["884762be-51f3-44c3-b811-1e14c5d89262"],
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{csv_detective-0.8.1.dev1380.dist-info → csv_detective-0.8.1.dev1416.dist-info}/top_level.txt
RENAMED
|
File without changes
|