csv-detective 0.9.3.dev2057__py3-none-any.whl → 0.9.3.dev2123__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/temp/date/__init__.py +1 -2
- csv_detective/detect_fields/temp/datetime_aware/__init__.py +7 -6
- csv_detective/detect_fields/temp/datetime_naive/__init__.py +3 -3
- csv_detective/detection/engine.py +1 -2
- csv_detective/detection/formats.py +1 -2
- csv_detective/detection/headers.py +2 -2
- csv_detective/explore_csv.py +8 -9
- csv_detective/load_tests.py +1 -2
- csv_detective/output/__init__.py +3 -4
- csv_detective/output/dataframe.py +1 -2
- csv_detective/output/example.py +12 -12
- csv_detective/output/schema.py +1 -2
- csv_detective/parsing/excel.py +2 -3
- csv_detective/parsing/load.py +3 -4
- csv_detective/utils.py +1 -2
- csv_detective/validate.py +4 -5
- {csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/METADATA +2 -2
- {csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/RECORD +24 -24
- tests/test_fields.py +4 -3
- venv/bin/activate_this.py +1 -1
- {csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import Optional
|
|
4
3
|
|
|
5
4
|
from dateparser import parse as date_parser
|
|
6
5
|
from dateutil.parser import ParserError
|
|
@@ -10,7 +9,7 @@ PROPORTION = 1
|
|
|
10
9
|
# /!\ this is only for dates, not datetimes which are handled by other utils
|
|
11
10
|
|
|
12
11
|
|
|
13
|
-
def date_casting(val: str) ->
|
|
12
|
+
def date_casting(val: str) -> datetime | None:
|
|
14
13
|
"""For performance reasons, we try first with dateutil and fallback on dateparser"""
|
|
15
14
|
try:
|
|
16
15
|
return dateutil_parser(val)
|
|
@@ -1,24 +1,25 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
from csv_detective.detect_fields.temp.date import aaaammjj_pattern, date_casting
|
|
5
5
|
|
|
6
6
|
PROPORTION = 1
|
|
7
7
|
threshold = 0.7
|
|
8
8
|
|
|
9
|
-
# matches AAAA-MM-JJTHH:MM:SS(.dddddd)±HH:MM with any of the listed separators for the date OR NO SEPARATOR
|
|
9
|
+
# matches AAAA-MM-JJTHH:MM:SS(.dddddd)(±HH:MM|Z) with any of the listed separators for the date OR NO SEPARATOR
|
|
10
10
|
pat = (
|
|
11
11
|
aaaammjj_pattern.replace("$", "")
|
|
12
|
-
+ r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})
|
|
12
|
+
+ r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})"
|
|
13
|
+
+ r"?(([+-](0\d|1[0-9]|2[0-3]):([0-5][0-9]))|Z)$"
|
|
13
14
|
)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
def _is(val:
|
|
17
|
+
def _is(val: Any | None) -> bool:
|
|
17
18
|
"""Detects timezone-aware datetimes only"""
|
|
18
19
|
# early stops, to cut processing time
|
|
19
|
-
#
|
|
20
|
+
# 16 is the minimal length of a datetime format YYMMDDTHH:MM:SSZ
|
|
20
21
|
# 32 is the maximal length of an ISO datetime format YYYY-MM-DDTHH:MM:SS.dddddd+HH:MM, keeping some slack
|
|
21
|
-
if not isinstance(val, str) or len(val) > 35 or len(val) <
|
|
22
|
+
if not isinstance(val, str) or len(val) > 35 or len(val) < 16:
|
|
22
23
|
return False
|
|
23
24
|
# if usual format, no need to parse
|
|
24
25
|
if bool(re.match(pat, val)):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
from csv_detective.detect_fields.temp.date import aaaammjj_pattern, date_casting
|
|
5
5
|
|
|
@@ -9,11 +9,11 @@ threshold = 0.7
|
|
|
9
9
|
# matches AAAA-MM-JJTHH:MM:SS(.dddddd)Z with any of the listed separators for the date OR NO SEPARATOR
|
|
10
10
|
pat = (
|
|
11
11
|
aaaammjj_pattern.replace("$", "")
|
|
12
|
-
+ r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})
|
|
12
|
+
+ r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})?$"
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def _is(val:
|
|
16
|
+
def _is(val: Any | None) -> bool:
|
|
17
17
|
"""Detects naive datetimes only"""
|
|
18
18
|
# early stops, to cut processing time
|
|
19
19
|
# 15 is the minimal length of a datetime format YYMMDDTHH:MM:SS
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from time import time
|
|
2
|
-
from typing import Optional
|
|
3
2
|
|
|
4
3
|
import magic
|
|
5
4
|
import requests
|
|
@@ -16,7 +15,7 @@ engine_to_file = {
|
|
|
16
15
|
}
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
def detect_engine(file_path: str, verbose=False) ->
|
|
18
|
+
def detect_engine(file_path: str, verbose=False) -> str | None:
|
|
20
19
|
if verbose:
|
|
21
20
|
start = time()
|
|
22
21
|
mapping = {
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from typing import Union
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
import pandas as pd
|
|
@@ -22,7 +21,7 @@ def detect_formats(
|
|
|
22
21
|
table: pd.DataFrame,
|
|
23
22
|
analysis: dict,
|
|
24
23
|
file_path: str,
|
|
25
|
-
user_input_tests:
|
|
24
|
+
user_input_tests: str | list[str] = "ALL",
|
|
26
25
|
limited_output: bool = True,
|
|
27
26
|
skipna: bool = True,
|
|
28
27
|
verbose: bool = False,
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from time import time
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import TextIO
|
|
4
4
|
|
|
5
5
|
from csv_detective.utils import display_logs_depending_process_time
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def detect_headers(file: TextIO, sep: str, verbose: bool = False) -> tuple[int,
|
|
8
|
+
def detect_headers(file: TextIO, sep: str, verbose: bool = False) -> tuple[int, list | None]:
|
|
9
9
|
"""Tests 10 first rows for possible header (in case header is not 1st row)"""
|
|
10
10
|
if verbose:
|
|
11
11
|
start = time()
|
csv_detective/explore_csv.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from time import time
|
|
3
|
-
from typing import Optional, Union
|
|
4
3
|
|
|
5
4
|
import pandas as pd
|
|
6
5
|
|
|
@@ -16,19 +15,19 @@ logging.basicConfig(level=logging.INFO)
|
|
|
16
15
|
def routine(
|
|
17
16
|
file_path: str,
|
|
18
17
|
num_rows: int = 500,
|
|
19
|
-
user_input_tests:
|
|
18
|
+
user_input_tests: str | list[str] = "ALL",
|
|
20
19
|
limited_output: bool = True,
|
|
21
|
-
save_results:
|
|
22
|
-
encoding:
|
|
23
|
-
sep:
|
|
20
|
+
save_results: bool | str = True,
|
|
21
|
+
encoding: str | None = None,
|
|
22
|
+
sep: str | None = None,
|
|
24
23
|
skipna: bool = True,
|
|
25
24
|
output_profile: bool = False,
|
|
26
25
|
output_schema: bool = False,
|
|
27
26
|
output_df: bool = False,
|
|
28
27
|
cast_json: bool = True,
|
|
29
28
|
verbose: bool = False,
|
|
30
|
-
sheet_name:
|
|
31
|
-
) ->
|
|
29
|
+
sheet_name: str | int | None = None,
|
|
30
|
+
) -> dict | tuple[dict, pd.DataFrame]:
|
|
32
31
|
"""Returns a dict with information about the table and possible
|
|
33
32
|
column contents, and if requested the DataFrame with columns cast according to analysis.
|
|
34
33
|
|
|
@@ -107,9 +106,9 @@ def validate_then_detect(
|
|
|
107
106
|
file_path: str,
|
|
108
107
|
previous_analysis: dict,
|
|
109
108
|
num_rows: int = 500,
|
|
110
|
-
user_input_tests:
|
|
109
|
+
user_input_tests: str | list[str] = "ALL",
|
|
111
110
|
limited_output: bool = True,
|
|
112
|
-
save_results:
|
|
111
|
+
save_results: bool | str = True,
|
|
113
112
|
skipna: bool = True,
|
|
114
113
|
output_profile: bool = False,
|
|
115
114
|
output_schema: bool = False,
|
csv_detective/load_tests.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Union
|
|
3
2
|
|
|
4
3
|
from csv_detective import detect_fields, detect_labels # noqa
|
|
5
4
|
|
|
@@ -18,7 +17,7 @@ def get_all_packages(detect_type) -> list:
|
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
def return_all_tests(
|
|
21
|
-
user_input_tests:
|
|
20
|
+
user_input_tests: str | list,
|
|
22
21
|
detect_type: str,
|
|
23
22
|
) -> list:
|
|
24
23
|
"""
|
csv_detective/output/__init__.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
from typing import Optional, Union
|
|
4
3
|
|
|
5
4
|
import pandas as pd
|
|
6
5
|
|
|
@@ -17,14 +16,14 @@ def generate_output(
|
|
|
17
16
|
file_path: str,
|
|
18
17
|
num_rows: int = 500,
|
|
19
18
|
limited_output: bool = True,
|
|
20
|
-
save_results:
|
|
19
|
+
save_results: bool | str = True,
|
|
21
20
|
output_profile: bool = False,
|
|
22
21
|
output_schema: bool = False,
|
|
23
22
|
output_df: bool = False,
|
|
24
23
|
cast_json: bool = True,
|
|
25
24
|
verbose: bool = False,
|
|
26
|
-
sheet_name:
|
|
27
|
-
) ->
|
|
25
|
+
sheet_name: str | int | None = None,
|
|
26
|
+
) -> dict | tuple[dict, pd.DataFrame]:
|
|
28
27
|
if output_profile:
|
|
29
28
|
analysis["profile"] = create_profile(
|
|
30
29
|
table=table,
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from datetime import date, datetime
|
|
3
3
|
from time import time
|
|
4
|
-
from typing import Optional, Union
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
7
6
|
|
|
@@ -11,7 +10,7 @@ from csv_detective.detect_fields.temp.date import date_casting
|
|
|
11
10
|
from csv_detective.utils import display_logs_depending_process_time
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def cast(value: str, _type: str) ->
|
|
13
|
+
def cast(value: str, _type: str) -> str | float | bool | date | datetime | None:
|
|
15
14
|
if not isinstance(value, str) or not value:
|
|
16
15
|
# None is the current default value in hydra, should we keep this?
|
|
17
16
|
return None
|
csv_detective/output/example.py
CHANGED
|
@@ -3,7 +3,7 @@ import random
|
|
|
3
3
|
import string
|
|
4
4
|
import uuid
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Type
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import requests
|
|
@@ -14,10 +14,10 @@ fake = Faker()
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def create_example_csv_file(
|
|
17
|
-
fields:
|
|
18
|
-
schema_path:
|
|
17
|
+
fields: dict | None = None,
|
|
18
|
+
schema_path: str | None = None,
|
|
19
19
|
file_length: int = 10,
|
|
20
|
-
output_name:
|
|
20
|
+
output_name: str | None = "example_file.csv",
|
|
21
21
|
output_sep: str = ";",
|
|
22
22
|
encoding: str = "utf-8",
|
|
23
23
|
ignore_required: bool = False,
|
|
@@ -49,8 +49,8 @@ def create_example_csv_file(
|
|
|
49
49
|
def _string(
|
|
50
50
|
length: int = 10,
|
|
51
51
|
required: bool = True,
|
|
52
|
-
pattern:
|
|
53
|
-
enum:
|
|
52
|
+
pattern: str | None = None,
|
|
53
|
+
enum: str | None = None,
|
|
54
54
|
) -> str:
|
|
55
55
|
if potential_skip(required):
|
|
56
56
|
return ""
|
|
@@ -70,7 +70,7 @@ def create_example_csv_file(
|
|
|
70
70
|
return str(uuid.uuid4())
|
|
71
71
|
|
|
72
72
|
def _date(
|
|
73
|
-
date_range:
|
|
73
|
+
date_range: list[str] | None = None,
|
|
74
74
|
format: str = "%Y-%m-%d",
|
|
75
75
|
required: bool = True,
|
|
76
76
|
) -> str:
|
|
@@ -99,7 +99,7 @@ def create_example_csv_file(
|
|
|
99
99
|
return fake.time(format)
|
|
100
100
|
|
|
101
101
|
def _datetime(
|
|
102
|
-
datetime_range:
|
|
102
|
+
datetime_range: list[str] | None = None,
|
|
103
103
|
format: str = "%Y-%m-%d %H-%M-%S",
|
|
104
104
|
required: bool = True,
|
|
105
105
|
) -> str:
|
|
@@ -123,11 +123,11 @@ def create_example_csv_file(
|
|
|
123
123
|
return f"http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}"
|
|
124
124
|
|
|
125
125
|
def _number(
|
|
126
|
-
num_type: Type[
|
|
127
|
-
num_range:
|
|
128
|
-
enum:
|
|
126
|
+
num_type: Type[int | float] = int,
|
|
127
|
+
num_range: list[float] | None = None,
|
|
128
|
+
enum: list | None = None,
|
|
129
129
|
required: bool = True,
|
|
130
|
-
) ->
|
|
130
|
+
) -> int | float:
|
|
131
131
|
assert num_range is None or len(num_range) == 2
|
|
132
132
|
if potential_skip(required):
|
|
133
133
|
return ""
|
csv_detective/output/schema.py
CHANGED
|
@@ -2,7 +2,6 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from time import time
|
|
5
|
-
from typing import Union
|
|
6
5
|
|
|
7
6
|
from csv_detective.utils import display_logs_depending_process_time
|
|
8
7
|
|
|
@@ -197,7 +196,7 @@ def get_constraints(format: str) -> dict:
|
|
|
197
196
|
|
|
198
197
|
def generate_table_schema(
|
|
199
198
|
analysis_report: dict,
|
|
200
|
-
save_results:
|
|
199
|
+
save_results: bool | str = True,
|
|
201
200
|
verbose: bool = False,
|
|
202
201
|
) -> dict:
|
|
203
202
|
"""Generates a table schema from the analysis report
|
csv_detective/parsing/excel.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
2
|
from time import time
|
|
3
|
-
from typing import Optional
|
|
4
3
|
|
|
5
4
|
import openpyxl
|
|
6
5
|
import pandas as pd
|
|
@@ -23,8 +22,8 @@ XLS_LIKE_EXT = NEW_EXCEL_EXT + OLD_EXCEL_EXT + OPEN_OFFICE_EXT
|
|
|
23
22
|
def parse_excel(
|
|
24
23
|
file_path: str,
|
|
25
24
|
num_rows: int = -1,
|
|
26
|
-
engine:
|
|
27
|
-
sheet_name:
|
|
25
|
+
engine: str | None = None,
|
|
26
|
+
sheet_name: str | None = None,
|
|
28
27
|
random_state: int = 42,
|
|
29
28
|
verbose: bool = False,
|
|
30
29
|
) -> tuple[pd.DataFrame, int, int, str, str, int]:
|
csv_detective/parsing/load.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from io import BytesIO, StringIO
|
|
2
|
-
from typing import Optional, Union
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
5
4
|
import requests
|
|
@@ -26,10 +25,10 @@ from .excel import (
|
|
|
26
25
|
def load_file(
|
|
27
26
|
file_path: str,
|
|
28
27
|
num_rows: int = 500,
|
|
29
|
-
encoding:
|
|
30
|
-
sep:
|
|
28
|
+
encoding: str | None = None,
|
|
29
|
+
sep: str | None = None,
|
|
31
30
|
verbose: bool = False,
|
|
32
|
-
sheet_name:
|
|
31
|
+
sheet_name: str | int | None = None,
|
|
33
32
|
) -> tuple[pd.DataFrame, dict]:
|
|
34
33
|
file_name = file_path.split("/")[-1]
|
|
35
34
|
engine = None
|
csv_detective/utils.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Optional, Union
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
5
4
|
|
|
@@ -31,7 +30,7 @@ def is_url(file_path: str) -> bool:
|
|
|
31
30
|
return file_path.startswith("http")
|
|
32
31
|
|
|
33
32
|
|
|
34
|
-
def cast_prevent_nan(value: float, _type: str) ->
|
|
33
|
+
def cast_prevent_nan(value: float, _type: str) -> float | int | None:
|
|
35
34
|
if _type not in {"int", "float"}:
|
|
36
35
|
raise ValueError(f"Invalid type was passed: {_type}")
|
|
37
36
|
return None if pd.isna(value) else eval(_type)(value)
|
csv_detective/validate.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Optional, Union
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
5
4
|
|
|
@@ -22,12 +21,12 @@ def validate(
|
|
|
22
21
|
file_path: str,
|
|
23
22
|
previous_analysis: dict,
|
|
24
23
|
num_rows: int = 500,
|
|
25
|
-
encoding:
|
|
26
|
-
sep:
|
|
24
|
+
encoding: str | None = None,
|
|
25
|
+
sep: str | None = None,
|
|
27
26
|
verbose: bool = False,
|
|
28
27
|
skipna: bool = True,
|
|
29
|
-
sheet_name:
|
|
30
|
-
) -> tuple[bool,
|
|
28
|
+
sheet_name: str | int | None = None,
|
|
29
|
+
) -> tuple[bool, pd.DataFrame | None, dict | None]:
|
|
31
30
|
"""
|
|
32
31
|
Verify is the given file has the same fields and types as in the previous analysis.
|
|
33
32
|
"""
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.9.3.
|
|
3
|
+
Version: 0.9.3.dev2123
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
5
|
Author-email: Etalab <opendatateam@data.gouv.fr>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Source, https://github.com/datagouv/csv_detective
|
|
8
8
|
Keywords: CSV,data processing,encoding,guess,parser,tabular
|
|
9
|
-
Requires-Python: <3.14,>=3.
|
|
9
|
+
Requires-Python: <3.14,>=3.10
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
12
|
Requires-Dist: dateparser<2,>=1.2.0
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
csv_detective/__init__.py,sha256=qvjDQBcw1ZIpapIrdGg1IUjBJ1q5KPhQda_05fevleg,126
|
|
2
2
|
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
-
csv_detective/explore_csv.py,sha256=
|
|
4
|
-
csv_detective/load_tests.py,sha256=
|
|
5
|
-
csv_detective/utils.py,sha256=
|
|
6
|
-
csv_detective/validate.py,sha256=
|
|
3
|
+
csv_detective/explore_csv.py,sha256=uXMFu_IIsRh8ky_PfdPTDVco_j4jSDahzMW6rnjXveE,5726
|
|
4
|
+
csv_detective/load_tests.py,sha256=75iCxSlIeLUT-nH1fTaSjLofIPJ2AIBczkIZWaO_mkw,2234
|
|
5
|
+
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
6
|
+
csv_detective/validate.py,sha256=bC9-OWH9lU45Ibma-QryvOdmcncDUBiNk0G2NADrjmQ,2841
|
|
7
7
|
csv_detective/detect_fields/__init__.py,sha256=ZZ7u9zsMtCqPC2xxeLp57UTCbqpKFJi6D_LO1ew15BU,1980
|
|
8
8
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -66,9 +66,9 @@ csv_detective/detect_fields/other/twitter/__init__.py,sha256=Npu6ZbyNfHq1y7xn0Gd
|
|
|
66
66
|
csv_detective/detect_fields/other/url/__init__.py,sha256=L7h9fZldh1w86XwCx0x3Q1TXSJ_nIId1C-l1yFzZYrA,299
|
|
67
67
|
csv_detective/detect_fields/other/uuid/__init__.py,sha256=XFxbIsdIhRw0dtFxBXQBhicE4yy7P4jmwYXeJhq6FVY,215
|
|
68
68
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
-
csv_detective/detect_fields/temp/date/__init__.py,sha256=
|
|
70
|
-
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=
|
|
71
|
-
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=
|
|
69
|
+
csv_detective/detect_fields/temp/date/__init__.py,sha256=j066luXADCti4Mbb-jvznrL1jf3p5TpEpVzW8vThRDE,2124
|
|
70
|
+
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=oDaZIhkW0SXSYeuK5R5TIzajvSmu-XjUn8GpqITFLnY,1250
|
|
71
|
+
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=z5wpuHiDl8j7ZeQjfZ5wO9lG6H9Ps6X218ANNw19Dag,1073
|
|
72
72
|
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=-pFdIIPgaLq2_QbFJ9zwy4YIwZuC73F0A_cNDntTuvQ,512
|
|
73
73
|
csv_detective/detect_fields/temp/year/__init__.py,sha256=gHchVciZExbGZLMBcbBaDXB0IgGptkQc4RhfSOMY0Ww,194
|
|
74
74
|
csv_detective/detect_labels/__init__.py,sha256=93s93DRNeFw9fJiGp0rW3iRWZX3WOeVau2PAaF4QlPE,1777
|
|
@@ -130,37 +130,37 @@ csv_detective/detect_labels/temp/year/__init__.py,sha256=7uWaCZY7dOG7nolW46IgBWm
|
|
|
130
130
|
csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
131
|
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
132
132
|
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
133
|
-
csv_detective/detection/engine.py,sha256=
|
|
134
|
-
csv_detective/detection/formats.py,sha256=
|
|
135
|
-
csv_detective/detection/headers.py,sha256=
|
|
133
|
+
csv_detective/detection/engine.py,sha256=NpWUgqsNXogBnVclPYccqJZVtDd780houVY-YIMr5c0,1511
|
|
134
|
+
csv_detective/detection/formats.py,sha256=QXdxdECU5uC_ytLBT_6-xe0VAiaMptXF4KYiShRUVCA,7702
|
|
135
|
+
csv_detective/detection/headers.py,sha256=hvYU13Nq8GWci5skc5vVUOxM0DwOUwbjVMlmY94lWhA,1135
|
|
136
136
|
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
137
137
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
138
138
|
csv_detective/detection/variables.py,sha256=wfsA_MOk14TPMOY7gkvpTGpo9-USzMnFaAou3MPHqxc,3536
|
|
139
|
-
csv_detective/output/__init__.py,sha256=
|
|
140
|
-
csv_detective/output/dataframe.py,sha256=
|
|
141
|
-
csv_detective/output/example.py,sha256=
|
|
139
|
+
csv_detective/output/__init__.py,sha256=3g6aR6tg1WM-bPFrAdSPSFbNEj2y7tnZiAC_DAhw9_Q,1876
|
|
140
|
+
csv_detective/output/dataframe.py,sha256=Ao7hyfkyQxpmQ9PGBq4bFYJnJaURczl10H7q0oUcYEw,2097
|
|
141
|
+
csv_detective/output/example.py,sha256=R7nxBBawM6KT9nipO7PAAc2zaIXjY-YxzWTd1NqK4xA,8599
|
|
142
142
|
csv_detective/output/profile.py,sha256=thckCcfy9cES5yYNW6TDGV82gP1OFWJuLhInT1g7JpI,2814
|
|
143
|
-
csv_detective/output/schema.py,sha256=
|
|
143
|
+
csv_detective/output/schema.py,sha256=vXPlEw44zRR4GcYd-PQ_R_qXeCaefEDxW2XmprdNP_c,10453
|
|
144
144
|
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
145
145
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
146
|
csv_detective/parsing/columns.py,sha256=HRHJBJ1gftuheegJHzhQmg-u83pVAXXuQ9GKR34mKgk,5696
|
|
147
147
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
148
148
|
csv_detective/parsing/csv.py,sha256=fJkjKvyk7InkNnYKtmivyi48mmcwvrha7gvZ5J4-86A,1588
|
|
149
|
-
csv_detective/parsing/excel.py,sha256=
|
|
150
|
-
csv_detective/parsing/load.py,sha256
|
|
149
|
+
csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
|
|
150
|
+
csv_detective/parsing/load.py,sha256=-pQlwOPTYVpvgt21ERa4K9ObcLozWBJbZ3kWO1U0wkE,3648
|
|
151
151
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
152
|
-
csv_detective-0.9.3.
|
|
152
|
+
csv_detective-0.9.3.dev2123.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
153
153
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
154
154
|
tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
|
|
155
|
-
tests/test_fields.py,sha256=
|
|
155
|
+
tests/test_fields.py,sha256=R6r6dcUwPx9XWIoc1xH4z0HlCnTj_bmxw91H5Gfqq5I,13762
|
|
156
156
|
tests/test_file.py,sha256=QEBv69P0bAKWBzhQ3KKOR1Z1RQSf5CVEilqBojwP2Yc,10791
|
|
157
157
|
tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
|
|
158
158
|
tests/test_structure.py,sha256=GRDYKy0UcdqlN4qglzsRC0puFj5cb-SVvONjvcPvtAA,1400
|
|
159
159
|
tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
|
|
160
|
-
venv/bin/activate_this.py,sha256=
|
|
160
|
+
venv/bin/activate_this.py,sha256=wS7qPipy8R-dS_0ICD8PqqUQ8F-PrtcpiJw2DUPngYM,1287
|
|
161
161
|
venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
|
|
162
|
-
csv_detective-0.9.3.
|
|
163
|
-
csv_detective-0.9.3.
|
|
164
|
-
csv_detective-0.9.3.
|
|
165
|
-
csv_detective-0.9.3.
|
|
166
|
-
csv_detective-0.9.3.
|
|
162
|
+
csv_detective-0.9.3.dev2123.dist-info/METADATA,sha256=R1suLzB-agHz3ejK3iOo9I8YrEEeRo9hvqKlSjN61eI,9736
|
|
163
|
+
csv_detective-0.9.3.dev2123.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
164
|
+
csv_detective-0.9.3.dev2123.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
165
|
+
csv_detective-0.9.3.dev2123.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
|
|
166
|
+
csv_detective-0.9.3.dev2123.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -357,6 +357,7 @@ fields = {
|
|
|
357
357
|
True: [
|
|
358
358
|
"2021-06-22 10:20:10-04:00",
|
|
359
359
|
"2030-06-22 00:00:00.0028+02:00",
|
|
360
|
+
"2000-12-21 10:20:10.1Z",
|
|
360
361
|
"2024-12-19T10:53:36.428000+00:00",
|
|
361
362
|
"1996/06/22 10:20:10 GMT",
|
|
362
363
|
],
|
|
@@ -365,7 +366,6 @@ fields = {
|
|
|
365
366
|
datetime_naive: {
|
|
366
367
|
True: [
|
|
367
368
|
"2021-06-22 10:20:10",
|
|
368
|
-
"1999-12-01T00:00:00Z",
|
|
369
369
|
"2030/06-22 00:00:00",
|
|
370
370
|
"2030/06/22 00:00:00.0028",
|
|
371
371
|
],
|
|
@@ -373,6 +373,7 @@ fields = {
|
|
|
373
373
|
"2021-06-22T30:20:10",
|
|
374
374
|
"Sun, 06 Nov 1994 08:49:37 GMT",
|
|
375
375
|
"2021-06-44 10:20:10+02:00",
|
|
376
|
+
"1999-12-01T00:00:00Z",
|
|
376
377
|
"2021-06-44",
|
|
377
378
|
"15 décembre 1985",
|
|
378
379
|
],
|
|
@@ -459,8 +460,8 @@ def test_priority(args):
|
|
|
459
460
|
("28/01/2000", date),
|
|
460
461
|
("2025-08-20T14:30:00+02:00", datetime_aware),
|
|
461
462
|
("2025/08/20 14:30:00.2763-12:00", datetime_aware),
|
|
462
|
-
("1925_12_20T14:30:00.
|
|
463
|
-
("1925 12 20 14:30:00Z",
|
|
463
|
+
("1925_12_20T14:30:00.2763", datetime_naive),
|
|
464
|
+
("1925 12 20 14:30:00Z", datetime_aware),
|
|
464
465
|
),
|
|
465
466
|
)
|
|
466
467
|
def test_early_detection(args):
|
venv/bin/activate_this.py
CHANGED
|
@@ -29,7 +29,7 @@ os.environ["VIRTUAL_ENV_PROMPT"] = '' or os.path.basename(base)
|
|
|
29
29
|
|
|
30
30
|
# add the virtual environments libraries to the host python import mechanism
|
|
31
31
|
prev_length = len(sys.path)
|
|
32
|
-
for lib in '../lib/python3.
|
|
32
|
+
for lib in '../lib/python3.11/site-packages'.split(os.pathsep):
|
|
33
33
|
path = os.path.realpath(os.path.join(bin_dir, lib))
|
|
34
34
|
site.addsitedir(path.decode("utf-8") if '' else path)
|
|
35
35
|
sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]
|
|
File without changes
|
{csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.9.3.dev2057.dist-info → csv_detective-0.9.3.dev2123.dist-info}/top_level.txt
RENAMED
|
File without changes
|