PyPI - csv-detective - Versions diffs - 0.7.5.dev980__py3-none-any.whl → 0.7.5.dev1052__py3-none-any.whl - Mend

csv-detective 0.7.5.dev980py3-none-any.whl → 0.7.5.dev1052py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

csv_detective/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from .explore_csv import routine, routine_minio  # noqa
+from .create_example import create_example_csv_file  # noqa
 __version__ = '0.7.5.dev'

csv_detective/create_example.py ADDED Viewed

@@ -0,0 +1,247 @@
+import random
+import uuid
+import string
+from datetime import datetime
+import pandas as pd
+from typing import List, Union, Optional, Any, Type
+import json
+import requests
+import rstr
+from faker import Faker
+fake = Faker()
+def create_example_csv_file(
+    fields: Optional[dict] = None,
+    schema_path: Optional[str] = None,
+    file_length: int = 10,
+    output_name: str = 'example_file.csv',
+    output_sep: str = ';',
+    encoding: str = 'utf-8',
+    ignore_required: bool = False,
+) -> pd.DataFrame:
+    '''
+    Create an example file based on a list of dicts like follows:
+    fields = [
+        {
+            "name": "column_name",
+            "type": "column_type",
+            "args": {dict_of_args}  # optional
+        },
+        ...
+    ]
+    Or from a TableSchema
+    '''
+    # need to make a CLI command
+    if not (fields or schema_path):
+        raise ValueError("At least fields or schema_path must be specified.")
+    def potential_skip(required: bool) -> bool:
+        if ignore_required:
+            return False
+        if not required:
+            # for now 30% chance to have an optional value, this could go as an argument
+            return random.randint(1, 100) <= 30
+    def _string(
+        length: int = 10,
+        required: bool = True,
+        pattern: Optional[str] = None,
+        enum: Optional[str] = None,
+    ) -> str:
+        if potential_skip(required):
+            return ''
+        if pattern is not None:
+            return rstr.xeger(pattern)
+        elif enum is not None:
+            return random.choice(enum)
+        else:
+            letters = string.ascii_lowercase
+            return ''.join(random.choice(letters) for i in range(length))
+    def _id(
+        required: bool = True,
+    ) -> str:
+        if potential_skip(required):
+            return ''
+        return str(uuid.uuid4())
+    def _date(
+        date_range: Union[None, List[str]] = None,
+        format: str = '%Y-%m-%d',
+        required: bool = True,
+    ) -> str:
+        # the bounds specified in date_range are expected in the same format as the desired output format
+        assert all([k in format for k in ['%d', '%m', '%Y']])
+        if potential_skip(required):
+            return ''
+        if date_range is None:
+            return fake.date(format)
+        else:
+            if len(date_range) != 2:
+                raise ValueError('"date_range" must have exactly two elements.')
+            return fake.date_between_dates(
+                datetime.strptime(date_range[0], format),
+                datetime.strptime(date_range[1], format),
+            ).strftime(format)
+    def _time(
+        format: str = '%H:%M:%S',
+        required: bool = True,
+    ) -> str:
+        assert all([k in format for k in ['%H', '%M', '%S']])
+        if potential_skip(required):
+            return ''
+        # maybe add a time_range argument?
+        return fake.time(format)
+    def _datetime(
+        datetime_range: Optional[List[str]] = None,
+        format: str = '%Y-%m-%d %H-%M-%S',
+        required: bool = True,
+    ) -> str:
+        # the bounds specified in datetime_range are expected in the same format as the desired output format
+        assert all([k in format for k in ['%d', '%m', '%Y', '%H', '%M', '%S']])
+        if potential_skip(required):
+            return ''
+        if datetime_range is None:
+            return fake.date_time().strftime(format)
+        else:
+            if len(datetime_range) != 2:
+                raise ValueError('"date_range" must have exactly two elements.')
+            return fake.date_time_between(
+                datetime.strptime(datetime_range[0], format),
+                datetime.strptime(datetime_range[1], format),
+            ).strftime(format)
+    def _url(required: bool = True) -> str:
+        if potential_skip(required):
+            return ''
+        return f'http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}'
+    def _number(
+        num_type: Type[Union[int, float]] = int,
+        num_range: Optional[List[float]] = None,
+        enum: Optional[list] = None,
+        required: bool = True,
+    ) -> Union[int, float]:
+        assert num_range is None or len(num_range) == 2
+        if potential_skip(required):
+            return ''
+        if enum:
+            return random.choice(enum)
+        if num_range is None:
+            num_range = [0, 1000]
+        if num_type == int:
+            return random.randint(num_range[0], num_range[1])
+        else:
+            return round(random.uniform(num_range[0], num_range[1]), 1)
+    def _bool(required: bool = True) -> bool:
+        if potential_skip(required):
+            return ''
+        return random.randint(0, 1) == 0
+    def _array(enum: List[Any], required: bool = True) -> str:
+        if potential_skip(required):
+            return ''
+        return f"[{','.join(random.sample(enum, random.randint(1, len(enum))))}]"
+    def build_args_from_constraints(constraints: dict) -> dict:
+        args = {}
+        args['required'] = constraints.get('required', False)
+        for _ in ['pattern', 'enum', 'format']:
+            if _ in constraints:
+                args[_] = constraints[_]
+        if 'minimum' in constraints and 'maximum' in constraints:
+            args['num_range'] = [constraints['minimum'], constraints['maximum']]
+        # maybe there are better values than these?
+        elif 'minimum' in constraints:
+            args['num_range'] = [constraints['minimum'], 10 + constraints['minimum']]
+        elif 'maximum' in constraints:
+            args['num_range'] = [constraints['maximum'] - 10, constraints['maximum']]
+        if 'minLength' in constraints:
+            args['length'] = constraints['minLength']
+        if 'maxLength' in constraints:
+            args['length'] = constraints['maxLength']
+        return args
+    schema_types_to_python = {
+        'number': 'float',
+        'integer': 'int',
+        'string': 'str',
+        'year': 'year',
+        'boolean': 'bool',
+        'date': 'date',
+        'yearmonth': 'date',
+        'time': 'time',
+        'datetime': 'datetime',
+        'array': 'array'
+    }
+    if schema_path:
+        if schema_path.startswith('http'):
+            schema = requests.get(schema_path).json()
+        else:
+            with open(schema_path, encoding=encoding) as jsonfile:
+                schema = json.load(jsonfile)
+        if not ('fields' in schema.keys()):
+            raise ValueError('The schema must have a "fields" key.')
+        else:
+            fields = [
+                {
+                    'name': f['name'],
+                    'type': schema_types_to_python.get(f['type'], 'str'),
+                    # when frformat is supported in TableSchema, we can build args for French standards
+                    # linked to https://github.com/datagouv/fr-format/issues/26
+                    'args': (
+                        build_args_from_constraints(f['constraints']) if 'constraints' in f.keys()
+                        else build_args_from_constraints(f['arrayItem']['constraints'])
+                        if 'arrayItem' in f.keys() and 'constraints' in f['arrayItem'].keys()
+                        else {}
+                    )
+                } for f in schema['fields']
+            ]
+    for k in range(len(fields)):
+        if 'args' not in fields[k]:
+            fields[k]['args'] = {}
+        if fields[k]['type'] == 'float':
+            fields[k]['args']['num_type'] = float
+        elif fields[k]['type'] == 'int':
+            fields[k]['args']['num_type'] = int
+        elif fields[k]['type'] == 'year':
+            fields[k]['args']['num_type'] = int
+            fields[k]['args']['num_range'] = [1990, 2050]
+    types_to_func = {
+        'int': _number,
+        'float': _number,
+        'date': _date,
+        'time': _time,
+        'str': _string,
+        'url': _url,
+        'id': _id,
+        'year': _number,
+        'bool': _bool,
+        'datetime': _datetime,
+        'array': _array,
+    }
+    # would it be better to create by column or by row (as for now)?
+    output = pd.DataFrame(
+        [
+            [
+                types_to_func.get(f['type'], 'str')(**f['args'])
+                for f in fields
+            ] for _ in range(file_length)
+        ],
+        columns=[f["name"] for f in fields],
+    )
+    if output_name:
+        output.to_csv(output_name, sep=output_sep, index=False)
+    return output

csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py CHANGED Viewed

@@ -1,5 +1,9 @@
-from frformat import CodeCommuneInsee
+from frformat import CodeCommuneInsee, Millesime
 PROPORTION = 0.75
-_is = CodeCommuneInsee.is_valid
+_code_commune_insee = CodeCommuneInsee(Millesime.LATEST)
+def _is(val):
+    return _code_commune_insee.is_valid(val)

csv_detective/detect_fields/FR/geo/code_departement/__init__.py CHANGED Viewed

@@ -1,7 +1,15 @@
-from frformat import NumeroDepartement
+from frformat import NumeroDepartement, Options, Millesime
 PROPORTION = 1
+_options = Options(
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True
+)
+_numero_departement = NumeroDepartement(Millesime.LATEST, _options)
 def _is(val):
-    return isinstance(val, str) and NumeroDepartement.is_valid(val, strict=False)
+    return isinstance(val, str) and _numero_departement.is_valid(val)

csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py CHANGED Viewed

@@ -2,6 +2,8 @@ from frformat import CodeFantoir
 PROPORTION = 1
+_code_fantoir = CodeFantoir()
 def _is(val):
-    return isinstance(val, str) and CodeFantoir.is_valid(val)
+    return isinstance(val, str) and _code_fantoir.is_valid(val)

csv_detective/detect_fields/FR/geo/code_postal/__init__.py CHANGED Viewed

@@ -2,4 +2,9 @@ from frformat import CodePostal
 PROPORTION = 0.9
-_is = CodePostal.is_valid
+_code_postal = CodePostal()
+def _is(val):
+    return _code_postal.is_valid(val)

csv_detective/detect_fields/FR/geo/code_region/__init__.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from frformat import CodeRegion
+from frformat import CodeRegion, Millesime
 PROPORTION = 1
+_code_region = CodeRegion(Millesime.LATEST)
 def _is(val):
     '''Renvoie True si val peut être un code_région, False sinon'''
-    return isinstance(val, str) and CodeRegion.is_valid(val)
+    return isinstance(val, str) and _code_region.is_valid(val)

csv_detective/detect_fields/FR/geo/commune/__init__.py CHANGED Viewed

@@ -1,8 +1,16 @@
-from frformat import Commune
+from frformat import Commune, Options, Millesime
 PROPORTION = 0.9
+_options = Options(
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True
+)
+_commune = Commune(Millesime.LATEST, _options)
 def _is(val):
     """Match avec le nom des communes"""
-    return isinstance(val, str) and Commune.is_valid(val, strict=False)
+    return isinstance(val, str) and _commune.is_valid(val)

csv_detective/detect_fields/FR/geo/departement/__init__.py CHANGED Viewed

@@ -1,8 +1,16 @@
-from frformat import Departement
+from frformat import Departement, Options, Millesime
 PROPORTION = 0.9
+_options = Options(
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True
+)
+_departement = Departement(Millesime.LATEST, _options)
 def _is(val):
     """Match avec le nom des departements"""
-    return isinstance(val, str) and Departement.is_valid(val, strict=False)
+    return isinstance(val, str) and _departement.is_valid(val)

csv_detective/detect_fields/FR/geo/insee_canton/__init__.py CHANGED Viewed

@@ -1,8 +1,15 @@
-from frformat import Canton
+from frformat import Canton, Options, Millesime
 PROPORTION = 0.9
+_options = Options(
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True
+)
+_canton = Canton(Millesime.LATEST, _options)
 def _is(val):
     """Match avec le nom des cantons"""
-    return isinstance(val, str) and Canton.is_valid(val, strict=False)
+    return isinstance(val, str) and _canton.is_valid(val)

csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py CHANGED Viewed

@@ -1,18 +1,18 @@
 from frformat import LatitudeL93
 from csv_detective.detect_fields.other.float import _is as is_float
 from csv_detective.detect_fields.other.float import float_casting
 PROPORTION = 0.9
+_latitudel93 = LatitudeL93()
 def _is(val):
     try:
-        if isinstance(val, (float, int)):
-            return LatitudeL93.is_valid(val)
-        elif isinstance(val, str) and is_float(val):
-            return LatitudeL93.is_valid(float_casting(val))
+        if isinstance(val, str) and is_float(val):
+            return _latitudel93.is_valid(float_casting(val))
         return False

csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py CHANGED Viewed

@@ -1,18 +1,18 @@
 from frformat import LongitudeL93
 from csv_detective.detect_fields.other.float import _is as is_float
 from csv_detective.detect_fields.other.float import float_casting
 PROPORTION = 0.9
+_longitudel93 = LongitudeL93()
 def _is(val):
     try:
-        if isinstance(val, (float, int)):
-            return LongitudeL93.is_valid(val)
-        elif isinstance(val, str) and is_float(val):
-            return LongitudeL93.is_valid(float_casting(val))
+        if isinstance(val, str) and is_float(val):
+            return _longitudel93.is_valid(float_casting(val))
         return False

csv_detective/detect_fields/FR/geo/pays/__init__.py CHANGED Viewed

@@ -1,8 +1,16 @@
-from frformat import Pays
+from frformat import Pays, Options, Millesime
 PROPORTION = 0.6
+_options = Options(
+        ignore_case=True,
+        ignore_accents=True,
+        replace_non_alphanumeric_with_space=True,
+        ignore_extra_whitespace=True
+    )
+_pays = Pays(Millesime.LATEST, _options)
 def _is(val):
     """Match avec le nom des pays"""
-    return isinstance(val, str) and Pays.is_valid(val, strict=False)
+    return isinstance(val, str) and _pays.is_valid(val)

csv_detective/detect_fields/FR/geo/region/__init__.py CHANGED Viewed

@@ -1,8 +1,48 @@
-from frformat import Region
+from frformat import Region, Options, Millesime
 PROPORTION = 1
+_extra_valid_values_set = frozenset({
+        "alsace",
+        "aquitaine",
+        "ara",
+        "aura",
+        "auvergne",
+        "auvergne et rhone alpes",
+        "basse normandie",
+        "bfc",
+        "bourgogne",
+        "bourgogne et franche comte",
+        "centre",
+        "champagne ardenne",
+        "franche comte",
+        "ge",
+        "haute normandie",
+        "hdf",
+        "languedoc roussillon",
+        "limousin",
+        "lorraine",
+        "midi pyrenees",
+        "nord pas de calais",
+        "npdc",
+        "paca",
+        "picardie",
+        "poitou charentes",
+        "reunion",
+        "rhone alpes",
+        })
+_options = Options(
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True,
+    extra_valid_values=_extra_valid_values_set
+)
+_region = Region(Millesime.LATEST, _options)
 def _is(val):
     """Match avec le nom des regions"""
-    return isinstance(val, str) and Region.is_valid(val, strict=False)
+    return isinstance(val, str) and _region.is_valid(val)

csv_detective/detect_fields/FR/other/code_rna/__init__.py CHANGED Viewed

@@ -2,6 +2,8 @@ from frformat import CodeRNA
 PROPORTION = 0.9
+_code_rna = CodeRNA()
 def _is(val):
-    return isinstance(val, str) and CodeRNA.is_valid(val)
+    return isinstance(val, str) and _code_rna.is_valid(val)

{csv_detective-0.7.5.dev980.data → csv_detective-0.7.5.dev1052.data}/data/share/csv_detective/CHANGELOG.md RENAMED Viewed

@@ -2,7 +2,7 @@
 ## Current (in progress)
-- Nothing yet
+- New function that creates a csv from a list of fields and constraints, or from a TableSchema [#100](https://github.com/datagouv/csv-detective/pull/100)
 ## 0.7.4 (2024-11-15)

{csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: csv_detective
-Version: 0.7.5.dev980
+Version: 0.7.5.dev1052
 Summary: Detect CSV column content
 Home-page: https://github.com/etalab/csv_detective
 Author: Etalab
@@ -26,5 +26,7 @@ Requires-Dist: odfpy==1.4.1
 Requires-Dist: requests==2.32.3
 Requires-Dist: responses==0.25.0
 Requires-Dist: python-magic==0.4.27
-Requires-Dist: frformat==0.3.0
+Requires-Dist: frformat==0.4.0
+Requires-Dist: faker==33.0.0
+Requires-Dist: rstr==3.2.2

{csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,6 @@
-csv_detective/__init__.py,sha256=giVhs0g13y4U2H0WiVBLcrvytcMxQ1LiCd2i03XITwQ,83
+csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
 csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
+csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
 csv_detective/detection.py,sha256=AuXlPOZfzqznZY2ybAAgaXIq6qVITYd3MXf2CoigI3I,22097
 csv_detective/explore_csv.py,sha256=X5yZS3WCUsafUMcs5tOnDTeMGzMnfr0iB9vEDx7xiqg,16977
 csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
@@ -10,24 +11,24 @@ csv_detective/detect_fields/__init__.py,sha256=CchNbi1vrgIGh_uBexXZTzfjBETDY0kQL
 csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=e5JqMNOPxx0Ivju3zAHCGMopZroCpR4vr3DJKlQhMz4,1675
-csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=l3-4WlLacNVngWWcgNhxwYMACFEKQRky_KJo_M7g5fc,90
-csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=cv53Vw0uqsXu1zl47JR7WPGP0PKjWgNJ_2ibrKAc3tU,153
-csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=rRnOsK5Ax8Dy1MyGUq-o7Kcs89okCXpwllJcOF79Ns0,127
-csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=DHFC0I05Iywt9eVgflLtsmK78PYomI2KNiAuGQRm4CM,77
-csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=AEirwDhkjCpymsaB09Nc8OmcexN2eaLNyipizYe3m4Q,195
-csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=mfLJu1elZiNGF5Uh565HvOTUjRrAKqj45QjYQ41uw0w,176
-csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=9TUR7YnYhkJDxpUjlK2BRudDCsvHuH57sXpne7Kjb1g,188
-csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=k09WqKkB-RgR1Dr0nvO8iaxyvROj2wcV3t8Vc4JJSdQ,173
-csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=15g9DJBvJUXQwOS3vz9l-77as3e1AC7sTaMQVyj5xHg,496
+csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
+csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=unr-Y4zquKSM5PVUiQGnOm-zQvaN8qd3v_XHf0W2VH8,378
+csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
+csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=e1SdnW8zVSxrRMm-CeK9tlkLzORP2C6KOInTWnB7h3o,134
+csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=y-TPljkf-W209tp7V0RnJ34936XxB6FA2-XPYK3DV8I,253
+csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=tZ4d1BQd9Xow0SWBcmuGlnX-RKHDzCstdY9AsXM6-Nk,379
+csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=je2zLsPlK_X189bbmKzf4BJSEoFShxMz2eQNXB7hsh0,399
+csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=3uNN_Iha6dFfm24CluUmkHFg6nj7kRQaXrHDEcLfyjY,373
+csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=5v07RJbi12eoPa-e_-q8xlWBew80FPMxsggcMgZQiI8,438
 csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=WjPHg8s0ND6bOwS-yo6FP1dnwD-6SWg9oH1K0avHsbI,344
-csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=4ZJZRIyr4RSaLzMoxoVNME-HrA2_mF1V1CluxgTGp_0,499
+csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=ZwThPSfbRwNHA_anuplxTPYHK-WMduc_np2Xw9XsApM,442
 csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=d4fLK4IndwllDhsddyTbyRiPfc8O9wT0pLIRI_C3QvQ,344
-csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=kFXGruWjn5EfKVQIfjiLEhNc73p_N2VgZCl-l8DIHqs,166
-csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=pajL5nr2zWnzCiVsC9SZcfMfjXLUJXm0QKLcyMecVYg,171
+csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=2q5T4SmCK6ZFF1mrv7d-q9tOIQKBcROI24y_UYIuvz0,383
+csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=JbFKDd4jAnd9yb7YqP36MoLdO1JFPm1cg60fGXt6ZvI,1074
 csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=X0NT6YbBg9PrxIcBwzUCQuBiv_QdDdqb3CJnrlent28,566
 csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
-csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=7bQiT-Mx7e7lW2MSydKXCIk_D8xjWLdWhQIxT7q4fG4,121
+csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
 csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=g9n5sOjRlk4I9YFZjdaTYrXf8ftXRDunGZOUpYhN4fA,295
 csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=XacU_3rwXqtdbw_ULTSnu0OOtx0w_rKlviCrLmNdHjc,496
 csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
@@ -125,17 +126,18 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=GrIbo64WVM3hi7ShBRKKyKU
 csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
 csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
 csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
-csv_detective-0.7.5.dev980.data/data/share/csv_detective/CHANGELOG.md,sha256=4ABp5UF2L6tPg-eK7Dj6NWgnFnkU74BwhrMzrRGJ2Lw,6585
-csv_detective-0.7.5.dev980.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
-csv_detective-0.7.5.dev980.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
+csv_detective-0.7.5.dev1052.data/data/share/csv_detective/CHANGELOG.md,sha256=oDqKO3qTo-cUSJB4fMbsyQY2O4pEQhOwWeHsZwaGkxM,6725
+csv_detective-0.7.5.dev1052.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
+csv_detective-0.7.5.dev1052.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/test_fields.py,sha256=Uq9eJaK3D8b_lDd_4Q3aMGUHP4NkrpY6g07LUnJcDDc,10587
+tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
+tests/test_fields.py,sha256=kXel-hiyQYrJ3OLmwUMg1K3DKbbwBLvUplxZWxpp18I,10605
 tests/test_file.py,sha256=1fEOu3bArGBaarRKAoTXAF3cSIGJfFN3UIwOW6esWRs,6399
 tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
 tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
-csv_detective-0.7.5.dev980.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
-csv_detective-0.7.5.dev980.dist-info/METADATA,sha256=U3bGCQBrGNtgHc5kIuteE4nRrMG6G__xyuko0mWdmJY,1089
-csv_detective-0.7.5.dev980.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-csv_detective-0.7.5.dev980.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.7.5.dev980.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
-csv_detective-0.7.5.dev980.dist-info/RECORD,,
+csv_detective-0.7.5.dev1052.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
+csv_detective-0.7.5.dev1052.dist-info/METADATA,sha256=mgg54BtjqPn-L_G2a4JU0SyorK8uYzUyp64cxOAIe6A,1146
+csv_detective-0.7.5.dev1052.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+csv_detective-0.7.5.dev1052.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
+csv_detective-0.7.5.dev1052.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
+csv_detective-0.7.5.dev1052.dist-info/RECORD,,

tests/test_example.py ADDED Viewed

@@ -0,0 +1,71 @@
+import re
+from uuid import UUID
+from csv_detective import create_example_csv_file
+def test_example_creation():
+    fields = [
+        {
+            "name": "id_unique",
+            "type": "id",
+        },
+        {
+            "name": "nom_modele",
+            "type": "str",
+            "args": {'length': 20},
+        },
+        {
+            "name": "siret",
+            "type": "str",
+            "args": {'pattern': '^\\d{14}$'},
+        },
+        {
+            "name": "type_producteur",
+            "type": "str",
+            "args": {'enum': ['privé', 'public', 'association']},
+        },
+        {
+            "name": "date_creation",
+            "type": "date",
+            "args": {
+                'date_range': ['1996-02-13', '2000-01-28'],
+                'format': '%Y-%m-%d',
+            },
+        },
+        {
+            "name": "url_produit",
+            "type": "url",
+        },
+        {
+            "name": "nb_produits",
+            "type": "int",
+        },
+        {
+            "name": "note",
+            "type": "float",
+            "args": {'num_range': [1, 20]}
+        },
+    ]
+    df = create_example_csv_file(
+        fields=fields,
+        file_length=5,
+        output_name="",
+    )
+    assert len(df) == 5
+    assert all(UUID(_) for _ in df["id_unique"])
+    assert all(len(_) == 20 for _ in df["nom_modele"])
+    assert all(re.match("^\\d{14}$", _) for _ in df["siret"])
+    assert all(_ in ['privé', 'public', 'association'] for _ in df["type_producteur"])
+    assert all(_ >= '1996-02-13' and _ <= '2000-01-28' for _ in df["date_creation"])
+    assert all(_.startswith("http") for _ in df["url_produit"])
+    assert all(isinstance(_, int) for _ in df["nb_produits"])
+    assert all(_ >= 1 and _ <= 20 for _ in df["note"])
+def test_example_from_tableschema():
+    df = create_example_csv_file(
+        schema_path="https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/2.3.1/schema-statique.json",
+        output_name="",
+    )
+    assert len(df) == 10

tests/test_fields.py CHANGED Viewed

@@ -282,26 +282,26 @@ def test_do_not_match_canton():
 # latitude_l93
 def test_match_latitude_l93():
-    vals = [6037008, 7123528.5, "7124528,5"]
+    vals = ["6037008", "7123528.5", "7124528,5"]
     for val in vals:
         assert latitude_l93._is(val)
 def test_do_not_match_latitude_93():
-    vals = [0, -6734529.6, 7245669.8, "3422674,78", "32_34"]
+    vals = ["0", "-6734529.6", "7245669.8", "3422674,78", "32_34"]
     for val in vals:
         assert not latitude_l93._is(val)
 # longitude_l93
 def test_match_longitude_l93():
-    vals = [0, -154, "1265783,45", 34723.4]
+    vals = ["0", "-154", "1265783,45", "34723.4"]
     for val in vals:
         assert longitude_l93._is(val)
 def test_do_not_match_longitude_93():
-    vals = [1456669.8, "-776225", "346_3214"]
+    vals = ["1456669.8", "-776225", "346_3214"]
     for val in vals:
         assert not longitude_l93._is(val)

{csv_detective-0.7.5.dev980.data → csv_detective-0.7.5.dev1052.data}/data/share/csv_detective/LICENSE.AGPL.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev980.data → csv_detective-0.7.5.dev1052.data}/data/share/csv_detective/README.md RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/LICENSE.AGPL.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/WHEEL RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/top_level.txt RENAMED Viewed

File without changes

csv-detective 0.7.5.dev980__py3-none-any.whl → 0.7.5.dev1052__py3-none-any.whl

csv-detective 0.7.5.dev980py3-none-any.whl → 0.7.5.dev1052py3-none-any.whl