PyPI - csv-detective - Versions diffs - 0.8.1.dev1469__py3-none-any.whl → 0.8.1.dev1491__py3-none-any.whl - Mend

csv-detective 0.8.1.dev1469py3-none-any.whl → 0.8.1.dev1491py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

csv_detective/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .explore_csv import routine, routine_minio, validate_then_detect  # noqa
-from .output.example import create_example_csv_file  # noqa
+from csv_detective.explore_csv import routine, routine_minio, validate_then_detect  # noqa
+from csv_detective.output.example import create_example_csv_file  # noqa
 __version__ = '0.8.1.dev'

csv_detective/cli.py CHANGED Viewed

@@ -4,7 +4,7 @@ Command line client for csv_detective
 import argparse
 import json
-from .explore_csv import routine
+from csv_detective.explore_csv import routine
 def run():

csv_detective/explore_csv.py CHANGED Viewed

@@ -7,12 +7,12 @@ from typing import Optional, Union
 import pandas as pd
-from .detection.formats import detect_formats
-from .output import generate_output, generate_table_schema
-from .parsing.load import load_file
-from .s3_utils import download_from_minio, upload_to_minio
-from .utils import display_logs_depending_process_time, is_url
-from .validate import validate
+from csv_detective.detection.formats import detect_formats
+from csv_detective.output import generate_output, generate_table_schema
+from csv_detective.parsing.load import load_file
+from csv_detective.s3_utils import download_from_minio, upload_to_minio
+from csv_detective.utils import display_logs_depending_process_time, is_url
+from csv_detective.validate import validate
 logging.basicConfig(level=logging.INFO)

csv_detective/parsing/__init__.py ADDED Viewed

File without changes

csv_detective/parsing/columns.py ADDED Viewed

@@ -0,0 +1,141 @@
+import logging
+from time import time
+from typing import Callable
+import pandas as pd
+from csv_detective.utils import display_logs_depending_process_time
+MAX_ROWS_ANALYSIS = 1e5
+def test_col_val(
+    serie: pd.Series,
+    test_func: Callable,
+    proportion: float = 0.9,
+    skipna: bool = True,
+    limited_output: bool = False,
+    verbose: bool = False,
+):
+    """Tests values of the serie using test_func.
+         - skipna : if True indicates that NaNs are not counted as False
+         - proportion :  indicates the proportion of values that have to pass the test
+    for the serie to be detected as a certain format
+    """
+    if verbose:
+        start = time()
+    # TODO : change for a cleaner method and only test columns in modules labels
+    def apply_test_func(serie: pd.Series, test_func: Callable, _range: int):
+        return serie.sample(n=_range).apply(test_func)
+    try:
+        if skipna:
+            serie = serie[serie.notnull()]
+        ser_len = len(serie)
+        if ser_len == 0:
+            return 0.0
+        if not limited_output:
+            result = apply_test_func(serie, test_func, ser_len).sum() / ser_len
+            return result if result >= proportion else 0.0
+        else:
+            if proportion == 1:  # Then try first 1 value, then 5, then all
+                for _range in [
+                    min(1, ser_len),
+                    min(5, ser_len),
+                    ser_len,
+                ]:  # Pour ne pas faire d'opérations inutiles, on commence par 1,
+                    # puis 5 valeurs puis la serie complète
+                    if all(apply_test_func(serie, test_func, _range)):
+                        # print(serie.name, ': check OK')
+                        pass
+                    else:
+                        return 0.0
+                return 1.0
+            else:
+                # if we have a proportion, statistically it's OK to analyse up to 10k rows
+                # (arbitrary number) and get a significant result
+                to_analyse = min(ser_len, MAX_ROWS_ANALYSIS)
+                result = apply_test_func(serie, test_func, to_analyse).sum() / to_analyse
+                return result if result >= proportion else 0.0
+    finally:
+        if verbose and time() - start > 3:
+            display_logs_depending_process_time(
+                f"\t/!\\ Column '{serie.name}' took too long ({round(time() - start, 3)}s)",
+                time() - start
+            )
+def test_col_label(label: str, test_func: Callable, proportion: float = 1, limited_output: bool = False):
+    """Tests label (from header) using test_func.
+    - proportion :  indicates the minimum score to pass the test for the serie
+    to be detected as a certain format
+    """
+    if not limited_output:
+        return test_func(label)
+    else:
+        result = test_func(label)
+        return result if result >= proportion else 0
+def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna: bool = True, verbose: bool = False):
+    if verbose:
+        start = time()
+        logging.info("Testing columns to get types")
+    test_funcs = dict()
+    for test in all_tests:
+        name = test.__name__.split(".")[-1]
+        test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
+    return_table = pd.DataFrame(columns=table.columns)
+    for idx, (key, value) in enumerate(test_funcs.items()):
+        if verbose:
+            start_type = time()
+            logging.info(f"\t- Starting with type '{key}'")
+        # improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
+        # => the following needs to change, "apply" means all columns are tested for one type at once
+        return_table.loc[key] = table.apply(
+            lambda serie: test_col_val(
+                serie,
+                value["func"],
+                value["prop"],
+                skipna=skipna,
+                limited_output=limited_output,
+                verbose=verbose,
+            )
+        )
+        if verbose:
+            display_logs_depending_process_time(
+                f'\t> Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
+                time() - start_type
+            )
+    if verbose:
+        display_logs_depending_process_time(f"Done testing columns in {round(time() - start, 3)}s", time() - start)
+    return return_table
+def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbose: bool = False):
+    if verbose:
+        start = time()
+        logging.info("Testing labels to get types")
+    test_funcs = dict()
+    for test in all_tests:
+        name = test.__name__.split(".")[-1]
+        test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
+    return_table = pd.DataFrame(columns=table.columns)
+    for idx, (key, value) in enumerate(test_funcs.items()):
+        if verbose:
+            start_type = time()
+        return_table.loc[key] = [
+            test_col_label(
+                col_name, value["func"], value["prop"], limited_output=limited_output
+            )
+            for col_name in table.columns
+        ]
+        if verbose:
+            display_logs_depending_process_time(
+                f'\t- Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
+                time() - start_type
+            )
+    if verbose:
+        display_logs_depending_process_time(f"Done testing labels in {round(time() - start, 3)}s", time() - start)
+    return return_table

csv_detective/parsing/compression.py ADDED Viewed

@@ -0,0 +1,11 @@
+import gzip
+from io import BytesIO
+def unzip(binary_file: BytesIO, engine: str) -> BytesIO:
+    if engine == "gzip":
+        with gzip.open(binary_file, mode="rb") as binary_file:
+            file_content = binary_file.read()
+    else:
+        raise NotImplementedError(f"{engine} is not yet supported")
+    return BytesIO(file_content)

csv_detective/parsing/csv.py ADDED Viewed

@@ -0,0 +1,55 @@
+import logging
+from time import time
+from typing import TextIO
+import pandas as pd
+from csv_detective.utils import display_logs_depending_process_time
+def parse_csv(
+    the_file: TextIO,
+    encoding: str,
+    sep: str,
+    num_rows: int,
+    skiprows: int,
+    random_state: int = 42,
+    verbose: bool = False,
+) -> tuple[pd.DataFrame, int, int]:
+    if verbose:
+        start = time()
+        logging.info("Parsing table")
+    table = None
+    if not isinstance(the_file, str):
+        the_file.seek(0)
+    total_lines = None
+    for encoding in [encoding, "ISO-8859-1", "utf-8"]:
+        if encoding is None:
+            continue
+        if "ISO-8859" in encoding:
+            encoding = "ISO-8859-1"
+        try:
+            table = pd.read_csv(
+                the_file, sep=sep, dtype="unicode", encoding=encoding, skiprows=skiprows
+            )
+            total_lines = len(table)
+            nb_duplicates = len(table.loc[table.duplicated()])
+            if num_rows > 0:
+                num_rows = min(num_rows - 1, total_lines)
+                table = table.sample(num_rows, random_state=random_state)
+            # else : table is unchanged
+            break
+        except TypeError:
+            print("Trying encoding : {encoding}".format(encoding=encoding))
+    if table is None:
+        raise ValueError("Could not load file")
+    if verbose:
+        display_logs_depending_process_time(
+            f'Table parsed successfully in {round(time() - start, 3)}s',
+            time() - start,
+        )
+    return table, total_lines, nb_duplicates

csv_detective/parsing/excel.py ADDED Viewed

@@ -0,0 +1,169 @@
+from io import BytesIO
+from time import time
+from typing import Optional
+import openpyxl
+import pandas as pd
+import requests
+import xlrd
+from csv_detective.detection.engine import engine_to_file
+from csv_detective.detection.rows import remove_empty_first_rows
+from csv_detective.utils import (
+    display_logs_depending_process_time,
+    is_url,
+)
+NEW_EXCEL_EXT = [".xlsx", ".xlsm", ".xltx", ".xltm"]
+OLD_EXCEL_EXT = [".xls"]
+OPEN_OFFICE_EXT = [".odf", ".ods", ".odt"]
+XLS_LIKE_EXT = NEW_EXCEL_EXT + OLD_EXCEL_EXT + OPEN_OFFICE_EXT
+def parse_excel(
+    file_path: str,
+    num_rows: int = -1,
+    engine: Optional[str] = None,
+    sheet_name: Optional[str] = None,
+    random_state: int = 42,
+    verbose: bool = False,
+) -> tuple[pd.DataFrame, int, int, str, str, int]:
+    """"Excel-like parsing is really slow, could be a good improvement for future development"""
+    if verbose:
+        start = time()
+    no_sheet_specified = sheet_name is None
+    if (
+        engine in ['openpyxl', 'xlrd'] or
+        any([file_path.endswith(k) for k in NEW_EXCEL_EXT + OLD_EXCEL_EXT])
+    ):
+        remote_content = None
+        if is_url(file_path):
+            r = requests.get(file_path)
+            r.raise_for_status()
+            remote_content = BytesIO(r.content)
+        if not engine:
+            if any([file_path.endswith(k) for k in NEW_EXCEL_EXT]):
+                engine = "openpyxl"
+            else:
+                engine = "xlrd"
+        if sheet_name is None:
+            if verbose:
+                display_logs_depending_process_time(
+                    f'Detected {engine_to_file[engine]} file, no sheet specified, reading the largest one',
+                    time() - start,
+                )
+            try:
+                if engine == "openpyxl":
+                    # openpyxl doesn't want to open files that don't have a valid extension
+                    # see: https://foss.heptapod.net/openpyxl/openpyxl/-/issues/2157
+                    # if the file is remote, we have a remote content anyway so it's fine
+                    if not remote_content and '.' not in file_path.split('/')[-1]:
+                        with open(file_path, 'rb') as f:
+                            remote_content = BytesIO(f.read())
+                    # faster than loading all sheets
+                    wb = openpyxl.load_workbook(remote_content or file_path, read_only=True)
+                    try:
+                        sizes = {s.title: s.max_row * s.max_column for s in wb.worksheets}
+                    except TypeError:
+                        # sometimes read_only can't get the info, so we have to open the file for real
+                        # this takes more time but it's for a limited number of files
+                        # and it's this or nothing
+                        wb = openpyxl.load_workbook(remote_content or file_path)
+                        sizes = {s.title: s.max_row * s.max_column for s in wb.worksheets}
+                else:
+                    if remote_content:
+                        wb = xlrd.open_workbook(file_contents=remote_content.read())
+                    else:
+                        wb = xlrd.open_workbook(file_path)
+                    sizes = {s.name: s.nrows * s.ncols for s in wb.sheets()}
+                sheet_name = max(sizes, key=sizes.get)
+            except xlrd.biffh.XLRDError:
+                # sometimes a xls file is recognized as ods
+                if verbose:
+                    display_logs_depending_process_time(
+                        'Could not read file with classic xls reader, trying with ODS',
+                        time() - start,
+                    )
+                engine = "odf"
+    if engine == "odf" or any([file_path.endswith(k) for k in OPEN_OFFICE_EXT]):
+        # for ODS files, no way to get sheets' sizes without
+        # loading the file one way or another (pandas or pure odfpy)
+        # so all in one
+        engine = "odf"
+        if sheet_name is None:
+            if verbose:
+                display_logs_depending_process_time(
+                    f'Detected {engine_to_file[engine]} file, no sheet specified, reading the largest one',
+                    time() - start,
+                )
+            tables = pd.read_excel(
+                file_path,
+                engine="odf",
+                sheet_name=None,
+                dtype="unicode",
+            )
+            sizes = {sheet_name: table.size for sheet_name, table in tables.items()}
+            sheet_name = max(sizes, key=sizes.get)
+            if verbose:
+                display_logs_depending_process_time(
+                    f'Going forwards with sheet "{sheet_name}"',
+                    time() - start,
+                )
+            table = tables[sheet_name]
+        else:
+            if verbose:
+                display_logs_depending_process_time(
+                    f'Detected {engine_to_file[engine]} file, reading sheet "{sheet_name}"',
+                    time() - start,
+                )
+            table = pd.read_excel(
+                file_path,
+                engine="odf",
+                sheet_name=sheet_name,
+                dtype="unicode",
+            )
+        table, header_row_idx = remove_empty_first_rows(table)
+        total_lines = len(table)
+        nb_duplicates = len(table.loc[table.duplicated()])
+        if num_rows > 0:
+            num_rows = min(num_rows - 1, total_lines)
+            table = table.sample(num_rows, random_state=random_state)
+        if verbose:
+            display_logs_depending_process_time(
+                f'Table parsed successfully in {round(time() - start, 3)}s',
+                time() - start,
+            )
+        return table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx
+    # so here we end up with (old and new) excel files only
+    if verbose:
+        if no_sheet_specified:
+            display_logs_depending_process_time(
+                f'Going forwards with sheet "{sheet_name}"',
+                time() - start,
+            )
+        else:
+            display_logs_depending_process_time(
+                f'Detected {engine_to_file[engine]} file, reading sheet "{sheet_name}"',
+                time() - start,
+            )
+    table = pd.read_excel(
+        file_path,
+        engine=engine,
+        sheet_name=sheet_name,
+        dtype="unicode",
+    )
+    table, header_row_idx = remove_empty_first_rows(table)
+    total_lines = len(table)
+    nb_duplicates = len(table.loc[table.duplicated()])
+    if num_rows > 0:
+        num_rows = min(num_rows - 1, total_lines)
+        table = table.sample(num_rows, random_state=random_state)
+    if verbose:
+        display_logs_depending_process_time(
+            f'Table parsed successfully in {round(time() - start, 3)}s',
+            time() - start,
+        )
+    return table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx

csv_detective/parsing/load.py ADDED Viewed

@@ -0,0 +1,97 @@
+from io import BytesIO, StringIO
+from typing import Optional, Union
+import pandas as pd
+import requests
+from csv_detective.detection.columns import detect_heading_columns, detect_trailing_columns
+from csv_detective.detection.encoding import detect_encoding
+from csv_detective.detection.engine import (
+    COMPRESSION_ENGINES,
+    EXCEL_ENGINES,
+    detect_engine,
+)
+from csv_detective.detection.headers import detect_headers
+from csv_detective.detection.separator import detect_separator
+from csv_detective.utils import is_url
+from .compression import unzip
+from .csv import parse_csv
+from .excel import (
+    XLS_LIKE_EXT,
+    parse_excel,
+)
+def load_file(
+    file_path: str,
+    num_rows: int = 500,
+    encoding: Optional[str] = None,
+    sep: Optional[str] = None,
+    verbose: bool = False,
+    sheet_name: Optional[Union[str, int]] = None,
+) -> tuple[pd.DataFrame, dict]:
+    file_name = file_path.split('/')[-1]
+    engine = None
+    if '.' not in file_name or not file_name.endswith("csv"):
+        # file has no extension, we'll investigate how to read it
+        engine = detect_engine(file_path, verbose=verbose)
+    if engine in EXCEL_ENGINES or any([file_path.endswith(k) for k in XLS_LIKE_EXT]):
+        table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx = parse_excel(
+            file_path=file_path,
+            num_rows=num_rows,
+            engine=engine,
+            sheet_name=sheet_name,
+            verbose=verbose,
+        )
+        header = table.columns.to_list()
+        analysis = {
+            "engine": engine,
+            "sheet_name": sheet_name,
+        }
+    else:
+        # fetching or reading file as binary
+        if is_url(file_path):
+            r = requests.get(file_path, allow_redirects=True)
+            r.raise_for_status()
+            binary_file = BytesIO(r.content)
+        else:
+            binary_file = open(file_path, "rb")
+        # handling compression
+        if engine in COMPRESSION_ENGINES:
+            binary_file: BytesIO = unzip(binary_file=binary_file, engine=engine)
+        # detecting encoding if not specified
+        if encoding is None:
+            encoding: str = detect_encoding(binary_file, verbose=verbose)
+            binary_file.seek(0)
+        # decoding and reading file
+        if is_url(file_path) or engine in COMPRESSION_ENGINES:
+            str_file = StringIO(binary_file.read().decode(encoding=encoding))
+        else:
+            str_file = open(file_path, "r", encoding=encoding)
+        if sep is None:
+            sep = detect_separator(str_file, verbose=verbose)
+        header_row_idx, header = detect_headers(str_file, sep, verbose=verbose)
+        if header is None:
+            return {"error": True}
+        elif isinstance(header, list):
+            if any([x is None for x in header]):
+                return {"error": True}
+        heading_columns = detect_heading_columns(str_file, sep, verbose=verbose)
+        trailing_columns = detect_trailing_columns(str_file, sep, heading_columns, verbose=verbose)
+        table, total_lines, nb_duplicates = parse_csv(
+            str_file, encoding, sep, num_rows, header_row_idx, verbose=verbose
+        )
+        analysis = {
+            "encoding": encoding,
+            "separator": sep,
+            "heading_columns": heading_columns,
+            "trailing_columns": trailing_columns,
+        }
+    analysis.update({
+        "header_row_idx": header_row_idx,
+        "header": header,
+        "total_lines": total_lines,
+        "nb_duplicates": nb_duplicates,
+    })
+    return table, analysis

csv_detective/parsing/text.py ADDED Viewed

@@ -0,0 +1,61 @@
+from re import finditer
+def camel_case_split(identifier: str):
+    matches = finditer(
+        ".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", identifier
+    )
+    return " ".join([m.group(0) for m in matches])
+translate_dict = {
+    " ": ["-", "_", "'", ",", "  "],
+    "a": ["à", "â"],
+    "c": ["ç"],
+    "e": ["é", "è", "ê", "Ã©"],
+    "i": ["î", "ï"],
+    "o": ["ô", "ö"],
+    "u": ["ù", "û", "ü"],
+}
+# Process text
+def _process_text(val: str):
+    """Traitement des chaînes de caractères pour les standardiser.
+    Plusieurs alternatives ont été testées : .translate, unidecode.unidecode,
+    des méthodes hybrides, mais aucune ne s'est avérée plus performante."""
+    val = camel_case_split(val)
+    val = val.lower()
+    for target in translate_dict:
+        for source in translate_dict[target]:
+            val = val.replace(source, target)
+    val = val.strip()
+    return val
+def is_word_in_string(word: str, string: str):
+    # if the substring is too short, the test can become irrelevant
+    return len(word) > 2 and word in string
+def header_score(header: str, words_combinations_list: list[str]) -> float:
+    """Returns:
+    - 1 if the header is exactly in the specified list
+    - 0.5 if any of the words is within the header
+    - 0 otherwise"""
+    processed_header = _process_text(header)
+    header_matches_words_combination = float(
+        any(
+            words_combination == processed_header for words_combination in words_combinations_list
+        )
+    )
+    words_combination_in_header = 0.5 * (
+        any(
+            is_word_in_string(
+                words_combination, processed_header
+            ) for words_combination in words_combinations_list
+        )
+    )
+    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/validate.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Optional, Union
 import pandas as pd
 from csv_detective.load_tests import return_all_tests
-from .parsing.load import load_file
+from csv_detective.parsing.load import load_file
 logging.basicConfig(level=logging.INFO)

{csv_detective-0.8.1.dev1469.data → csv_detective-0.8.1.dev1491.data}/data/share/csv_detective/CHANGELOG.md RENAMED Viewed

@@ -6,7 +6,7 @@
 - Refactor repo metadata and requirements [#120](https://github.com/datagouv/csv-detective/pull/120) [#122](https://github.com/datagouv/csv-detective/pull/122)
 - Better URL detection [#121](https://github.com/datagouv/csv-detective/pull/121)
 - For big files, analyse on sample then validate on whole file [#124](https://github.com/datagouv/csv-detective/pull/124)
-- Fix imports [#125](https://github.com/datagouv/csv-detective/pull/125)
+- Fix imports [#125](https://github.com/datagouv/csv-detective/pull/125) [#126](https://github.com/datagouv/csv-detective/pull/126) [#127](https://github.com/datagouv/csv-detective/pull/127)
 ## 0.8.0 (2025-05-20)

{csv_detective-0.8.1.dev1469.dist-info → csv_detective-0.8.1.dev1491.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: csv_detective
-Version: 0.8.1.dev1469
+Version: 0.8.1.dev1491
 Summary: Detect tabular files column content
 Home-page: https://github.com/datagouv/csv_detective
 Author: Etalab

{csv_detective-0.8.1.dev1469.dist-info → csv_detective-0.8.1.dev1491.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-csv_detective/__init__.py,sha256=fxctDlEyUexNk_ePriWu6V05xZEeirMV0v_StnEZ8vQ,165
-csv_detective/cli.py,sha256=itooHtpyfC6DUsL_DchPKe1xo7m0MYJIp1L4R8eqoTk,1401
-csv_detective/explore_csv.py,sha256=YxXgaUqUNdAGsU8bC-cs_TVvSza4wc4aMJQjWRkRT5s,9144
+csv_detective/__init__.py,sha256=TwRP1gozmEmweSbK-lqihSsb-EqmCFSKUnJXz2x-dHE,191
+csv_detective/cli.py,sha256=VNztFz2nc90E3zkghF8PYtXTEZ6TrBSCQMi9v1ljkJs,1414
+csv_detective/explore_csv.py,sha256=VEeAJaz3FPOmGmQ-Yuf3FuSRRPULM03FrTf3qwZX52s,9222
 csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
 csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
 csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
-csv_detective/validate.py,sha256=4e7f8bNXPU9GqNx4QXXiaoINyotozbL52JB6psVAjyY,2631
+csv_detective/validate.py,sha256=d_4Phmjk6Y0Z0YYVw4vpoZy8E79K370reGgkpzx1mcQ,2644
 csv_detective/detect_fields/__init__.py,sha256=7Tz0Niaz0BboA3YVsp_6WPA6ywciwDN4-lOy_Ie_0Y8,976
 csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -136,10 +136,17 @@ csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5
 csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
 csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
 csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
-csv_detective-0.8.1.dev1469.data/data/share/csv_detective/CHANGELOG.md,sha256=-Ut6d9FycTm_ax8QNjBEATCH9NOWOq3fwVLeSgjRTDU,8798
-csv_detective-0.8.1.dev1469.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
-csv_detective-0.8.1.dev1469.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
-csv_detective-0.8.1.dev1469.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
+csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+csv_detective/parsing/columns.py,sha256=e0xVmeXNvSC3su5HTFSNClgkz8PlFkoHmNwRYdS57mk,5670
+csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
+csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
+csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
+csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
+csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
+csv_detective-0.8.1.dev1491.data/data/share/csv_detective/CHANGELOG.md,sha256=cfs5oHz9y-jeXsxyJ8tImHbpUVxtRdLmB03om8a0rco,8916
+csv_detective-0.8.1.dev1491.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
+csv_detective-0.8.1.dev1491.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
+csv_detective-0.8.1.dev1491.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
 tests/test_fields.py,sha256=d2tNvjtal6ZbO646x1GDbp_CGgp-EIcdg2SgMG72J6E,10270
@@ -147,8 +154,8 @@ tests/test_file.py,sha256=FWVtYHlD5uU7tPeYsqlQg6O4lpU8Ct35vddkbzhvvjA,8508
 tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
 tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
 tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
-csv_detective-0.8.1.dev1469.dist-info/METADATA,sha256=J9fGXJjtRLS17DxyfwmzjteKpx23J01Cr3oNZaw0DSg,10443
-csv_detective-0.8.1.dev1469.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-csv_detective-0.8.1.dev1469.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.8.1.dev1469.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
-csv_detective-0.8.1.dev1469.dist-info/RECORD,,
+csv_detective-0.8.1.dev1491.dist-info/METADATA,sha256=x0WDskrI4p-HHHSGpBBXmYgF010VKmFUG59dadKSXYI,10443
+csv_detective-0.8.1.dev1491.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+csv_detective-0.8.1.dev1491.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
+csv_detective-0.8.1.dev1491.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
+csv_detective-0.8.1.dev1491.dist-info/RECORD,,

{csv_detective-0.8.1.dev1469.data → csv_detective-0.8.1.dev1491.data}/data/share/csv_detective/LICENSE RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1469.data → csv_detective-0.8.1.dev1491.data}/data/share/csv_detective/README.md RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1469.dist-info → csv_detective-0.8.1.dev1491.dist-info}/WHEEL RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1469.dist-info → csv_detective-0.8.1.dev1491.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1469.dist-info → csv_detective-0.8.1.dev1491.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1469.dist-info → csv_detective-0.8.1.dev1491.dist-info}/top_level.txt RENAMED Viewed

File without changes

csv-detective 0.8.1.dev1469__py3-none-any.whl → 0.8.1.dev1491__py3-none-any.whl

csv-detective 0.8.1.dev1469py3-none-any.whl → 0.8.1.dev1491py3-none-any.whl