csv-detective 0.7.5.dev980__py3-none-any.whl → 0.7.5.dev1052__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. csv_detective/__init__.py +1 -0
  2. csv_detective/create_example.py +247 -0
  3. csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +6 -2
  4. csv_detective/detect_fields/FR/geo/code_departement/__init__.py +10 -2
  5. csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +3 -1
  6. csv_detective/detect_fields/FR/geo/code_postal/__init__.py +6 -1
  7. csv_detective/detect_fields/FR/geo/code_region/__init__.py +4 -2
  8. csv_detective/detect_fields/FR/geo/commune/__init__.py +10 -2
  9. csv_detective/detect_fields/FR/geo/departement/__init__.py +10 -2
  10. csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +9 -2
  11. csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +5 -5
  12. csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +5 -5
  13. csv_detective/detect_fields/FR/geo/pays/__init__.py +10 -2
  14. csv_detective/detect_fields/FR/geo/region/__init__.py +42 -2
  15. csv_detective/detect_fields/FR/other/code_rna/__init__.py +3 -1
  16. {csv_detective-0.7.5.dev980.data → csv_detective-0.7.5.dev1052.data}/data/share/csv_detective/CHANGELOG.md +1 -1
  17. {csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/METADATA +4 -2
  18. {csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/RECORD +26 -24
  19. tests/test_example.py +71 -0
  20. tests/test_fields.py +4 -4
  21. {csv_detective-0.7.5.dev980.data → csv_detective-0.7.5.dev1052.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
  22. {csv_detective-0.7.5.dev980.data → csv_detective-0.7.5.dev1052.data}/data/share/csv_detective/README.md +0 -0
  23. {csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/LICENSE.AGPL.txt +0 -0
  24. {csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/WHEEL +0 -0
  25. {csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/entry_points.txt +0 -0
  26. {csv_detective-0.7.5.dev980.dist-info → csv_detective-0.7.5.dev1052.dist-info}/top_level.txt +0 -0
csv_detective/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
1
  from .explore_csv import routine, routine_minio # noqa
2
+ from .create_example import create_example_csv_file # noqa
2
3
 
3
4
  __version__ = '0.7.5.dev'
@@ -0,0 +1,247 @@
1
+ import random
2
+ import uuid
3
+ import string
4
+ from datetime import datetime
5
+ import pandas as pd
6
+ from typing import List, Union, Optional, Any, Type
7
+ import json
8
+ import requests
9
+ import rstr
10
+ from faker import Faker
11
+
12
+ fake = Faker()
13
+
14
+
15
+ def create_example_csv_file(
16
+ fields: Optional[dict] = None,
17
+ schema_path: Optional[str] = None,
18
+ file_length: int = 10,
19
+ output_name: str = 'example_file.csv',
20
+ output_sep: str = ';',
21
+ encoding: str = 'utf-8',
22
+ ignore_required: bool = False,
23
+ ) -> pd.DataFrame:
24
+ '''
25
+ Create an example file based on a list of dicts like follows:
26
+ fields = [
27
+ {
28
+ "name": "column_name",
29
+ "type": "column_type",
30
+ "args": {dict_of_args} # optional
31
+ },
32
+ ...
33
+ ]
34
+ Or from a TableSchema
35
+ '''
36
+ # need to make a CLI command
37
+
38
+ if not (fields or schema_path):
39
+ raise ValueError("At least fields or schema_path must be specified.")
40
+
41
+ def potential_skip(required: bool) -> bool:
42
+ if ignore_required:
43
+ return False
44
+ if not required:
45
+ # for now 30% chance to have an optional value, this could go as an argument
46
+ return random.randint(1, 100) <= 30
47
+
48
+ def _string(
49
+ length: int = 10,
50
+ required: bool = True,
51
+ pattern: Optional[str] = None,
52
+ enum: Optional[str] = None,
53
+ ) -> str:
54
+ if potential_skip(required):
55
+ return ''
56
+ if pattern is not None:
57
+ return rstr.xeger(pattern)
58
+ elif enum is not None:
59
+ return random.choice(enum)
60
+ else:
61
+ letters = string.ascii_lowercase
62
+ return ''.join(random.choice(letters) for i in range(length))
63
+
64
+ def _id(
65
+ required: bool = True,
66
+ ) -> str:
67
+ if potential_skip(required):
68
+ return ''
69
+ return str(uuid.uuid4())
70
+
71
+ def _date(
72
+ date_range: Union[None, List[str]] = None,
73
+ format: str = '%Y-%m-%d',
74
+ required: bool = True,
75
+ ) -> str:
76
+ # the bounds specified in date_range are expected in the same format as the desired output format
77
+ assert all([k in format for k in ['%d', '%m', '%Y']])
78
+ if potential_skip(required):
79
+ return ''
80
+ if date_range is None:
81
+ return fake.date(format)
82
+ else:
83
+ if len(date_range) != 2:
84
+ raise ValueError('"date_range" must have exactly two elements.')
85
+ return fake.date_between_dates(
86
+ datetime.strptime(date_range[0], format),
87
+ datetime.strptime(date_range[1], format),
88
+ ).strftime(format)
89
+
90
+ def _time(
91
+ format: str = '%H:%M:%S',
92
+ required: bool = True,
93
+ ) -> str:
94
+ assert all([k in format for k in ['%H', '%M', '%S']])
95
+ if potential_skip(required):
96
+ return ''
97
+ # maybe add a time_range argument?
98
+ return fake.time(format)
99
+
100
+ def _datetime(
101
+ datetime_range: Optional[List[str]] = None,
102
+ format: str = '%Y-%m-%d %H-%M-%S',
103
+ required: bool = True,
104
+ ) -> str:
105
+ # the bounds specified in datetime_range are expected in the same format as the desired output format
106
+ assert all([k in format for k in ['%d', '%m', '%Y', '%H', '%M', '%S']])
107
+ if potential_skip(required):
108
+ return ''
109
+ if datetime_range is None:
110
+ return fake.date_time().strftime(format)
111
+ else:
112
+ if len(datetime_range) != 2:
113
+ raise ValueError('"date_range" must have exactly two elements.')
114
+ return fake.date_time_between(
115
+ datetime.strptime(datetime_range[0], format),
116
+ datetime.strptime(datetime_range[1], format),
117
+ ).strftime(format)
118
+
119
+ def _url(required: bool = True) -> str:
120
+ if potential_skip(required):
121
+ return ''
122
+ return f'http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}'
123
+
124
+ def _number(
125
+ num_type: Type[Union[int, float]] = int,
126
+ num_range: Optional[List[float]] = None,
127
+ enum: Optional[list] = None,
128
+ required: bool = True,
129
+ ) -> Union[int, float]:
130
+ assert num_range is None or len(num_range) == 2
131
+ if potential_skip(required):
132
+ return ''
133
+ if enum:
134
+ return random.choice(enum)
135
+ if num_range is None:
136
+ num_range = [0, 1000]
137
+ if num_type == int:
138
+ return random.randint(num_range[0], num_range[1])
139
+ else:
140
+ return round(random.uniform(num_range[0], num_range[1]), 1)
141
+
142
+ def _bool(required: bool = True) -> bool:
143
+ if potential_skip(required):
144
+ return ''
145
+ return random.randint(0, 1) == 0
146
+
147
+ def _array(enum: List[Any], required: bool = True) -> str:
148
+ if potential_skip(required):
149
+ return ''
150
+ return f"[{','.join(random.sample(enum, random.randint(1, len(enum))))}]"
151
+
152
+ def build_args_from_constraints(constraints: dict) -> dict:
153
+ args = {}
154
+ args['required'] = constraints.get('required', False)
155
+ for _ in ['pattern', 'enum', 'format']:
156
+ if _ in constraints:
157
+ args[_] = constraints[_]
158
+ if 'minimum' in constraints and 'maximum' in constraints:
159
+ args['num_range'] = [constraints['minimum'], constraints['maximum']]
160
+ # maybe there are better values than these?
161
+ elif 'minimum' in constraints:
162
+ args['num_range'] = [constraints['minimum'], 10 + constraints['minimum']]
163
+ elif 'maximum' in constraints:
164
+ args['num_range'] = [constraints['maximum'] - 10, constraints['maximum']]
165
+ if 'minLength' in constraints:
166
+ args['length'] = constraints['minLength']
167
+ if 'maxLength' in constraints:
168
+ args['length'] = constraints['maxLength']
169
+ return args
170
+
171
+ schema_types_to_python = {
172
+ 'number': 'float',
173
+ 'integer': 'int',
174
+ 'string': 'str',
175
+ 'year': 'year',
176
+ 'boolean': 'bool',
177
+ 'date': 'date',
178
+ 'yearmonth': 'date',
179
+ 'time': 'time',
180
+ 'datetime': 'datetime',
181
+ 'array': 'array'
182
+ }
183
+
184
+ if schema_path:
185
+ if schema_path.startswith('http'):
186
+ schema = requests.get(schema_path).json()
187
+ else:
188
+ with open(schema_path, encoding=encoding) as jsonfile:
189
+ schema = json.load(jsonfile)
190
+ if not ('fields' in schema.keys()):
191
+ raise ValueError('The schema must have a "fields" key.')
192
+ else:
193
+ fields = [
194
+ {
195
+ 'name': f['name'],
196
+ 'type': schema_types_to_python.get(f['type'], 'str'),
197
+ # when frformat is supported in TableSchema, we can build args for French standards
198
+ # linked to https://github.com/datagouv/fr-format/issues/26
199
+ 'args': (
200
+ build_args_from_constraints(f['constraints']) if 'constraints' in f.keys()
201
+ else build_args_from_constraints(f['arrayItem']['constraints'])
202
+ if 'arrayItem' in f.keys() and 'constraints' in f['arrayItem'].keys()
203
+ else {}
204
+ )
205
+ } for f in schema['fields']
206
+ ]
207
+
208
+ for k in range(len(fields)):
209
+ if 'args' not in fields[k]:
210
+ fields[k]['args'] = {}
211
+ if fields[k]['type'] == 'float':
212
+ fields[k]['args']['num_type'] = float
213
+ elif fields[k]['type'] == 'int':
214
+ fields[k]['args']['num_type'] = int
215
+ elif fields[k]['type'] == 'year':
216
+ fields[k]['args']['num_type'] = int
217
+ fields[k]['args']['num_range'] = [1990, 2050]
218
+
219
+ types_to_func = {
220
+ 'int': _number,
221
+ 'float': _number,
222
+ 'date': _date,
223
+ 'time': _time,
224
+ 'str': _string,
225
+ 'url': _url,
226
+ 'id': _id,
227
+ 'year': _number,
228
+ 'bool': _bool,
229
+ 'datetime': _datetime,
230
+ 'array': _array,
231
+ }
232
+
233
+ # would it be better to create by column or by row (as for now)?
234
+ output = pd.DataFrame(
235
+ [
236
+ [
237
+ types_to_func.get(f['type'], 'str')(**f['args'])
238
+ for f in fields
239
+ ] for _ in range(file_length)
240
+ ],
241
+ columns=[f["name"] for f in fields],
242
+ )
243
+
244
+ if output_name:
245
+ output.to_csv(output_name, sep=output_sep, index=False)
246
+
247
+ return output
@@ -1,5 +1,9 @@
1
- from frformat import CodeCommuneInsee
1
+ from frformat import CodeCommuneInsee, Millesime
2
2
 
3
3
  PROPORTION = 0.75
4
4
 
5
- _is = CodeCommuneInsee.is_valid
5
+ _code_commune_insee = CodeCommuneInsee(Millesime.LATEST)
6
+
7
+
8
+ def _is(val):
9
+ return _code_commune_insee.is_valid(val)
@@ -1,7 +1,15 @@
1
- from frformat import NumeroDepartement
1
+ from frformat import NumeroDepartement, Options, Millesime
2
2
 
3
3
  PROPORTION = 1
4
4
 
5
+ _options = Options(
6
+ ignore_case=True,
7
+ ignore_accents=True,
8
+ replace_non_alphanumeric_with_space=True,
9
+ ignore_extra_whitespace=True
10
+ )
11
+ _numero_departement = NumeroDepartement(Millesime.LATEST, _options)
12
+
5
13
 
6
14
  def _is(val):
7
- return isinstance(val, str) and NumeroDepartement.is_valid(val, strict=False)
15
+ return isinstance(val, str) and _numero_departement.is_valid(val)
@@ -2,6 +2,8 @@ from frformat import CodeFantoir
2
2
 
3
3
  PROPORTION = 1
4
4
 
5
+ _code_fantoir = CodeFantoir()
6
+
5
7
 
6
8
  def _is(val):
7
- return isinstance(val, str) and CodeFantoir.is_valid(val)
9
+ return isinstance(val, str) and _code_fantoir.is_valid(val)
@@ -2,4 +2,9 @@ from frformat import CodePostal
2
2
 
3
3
  PROPORTION = 0.9
4
4
 
5
- _is = CodePostal.is_valid
5
+ _code_postal = CodePostal()
6
+
7
+
8
+ def _is(val):
9
+
10
+ return _code_postal.is_valid(val)
@@ -1,8 +1,10 @@
1
- from frformat import CodeRegion
1
+ from frformat import CodeRegion, Millesime
2
2
 
3
3
  PROPORTION = 1
4
4
 
5
+ _code_region = CodeRegion(Millesime.LATEST)
6
+
5
7
 
6
8
  def _is(val):
7
9
  '''Renvoie True si val peut être un code_région, False sinon'''
8
- return isinstance(val, str) and CodeRegion.is_valid(val)
10
+ return isinstance(val, str) and _code_region.is_valid(val)
@@ -1,8 +1,16 @@
1
- from frformat import Commune
1
+ from frformat import Commune, Options, Millesime
2
2
 
3
3
  PROPORTION = 0.9
4
4
 
5
+ _options = Options(
6
+ ignore_case=True,
7
+ ignore_accents=True,
8
+ replace_non_alphanumeric_with_space=True,
9
+ ignore_extra_whitespace=True
10
+ )
11
+ _commune = Commune(Millesime.LATEST, _options)
12
+
5
13
 
6
14
  def _is(val):
7
15
  """Match avec le nom des communes"""
8
- return isinstance(val, str) and Commune.is_valid(val, strict=False)
16
+ return isinstance(val, str) and _commune.is_valid(val)
@@ -1,8 +1,16 @@
1
- from frformat import Departement
1
+ from frformat import Departement, Options, Millesime
2
2
 
3
3
  PROPORTION = 0.9
4
4
 
5
+ _options = Options(
6
+ ignore_case=True,
7
+ ignore_accents=True,
8
+ replace_non_alphanumeric_with_space=True,
9
+ ignore_extra_whitespace=True
10
+ )
11
+ _departement = Departement(Millesime.LATEST, _options)
12
+
5
13
 
6
14
  def _is(val):
7
15
  """Match avec le nom des departements"""
8
- return isinstance(val, str) and Departement.is_valid(val, strict=False)
16
+ return isinstance(val, str) and _departement.is_valid(val)
@@ -1,8 +1,15 @@
1
- from frformat import Canton
1
+ from frformat import Canton, Options, Millesime
2
2
 
3
3
  PROPORTION = 0.9
4
+ _options = Options(
5
+ ignore_case=True,
6
+ ignore_accents=True,
7
+ replace_non_alphanumeric_with_space=True,
8
+ ignore_extra_whitespace=True
9
+ )
10
+ _canton = Canton(Millesime.LATEST, _options)
4
11
 
5
12
 
6
13
  def _is(val):
7
14
  """Match avec le nom des cantons"""
8
- return isinstance(val, str) and Canton.is_valid(val, strict=False)
15
+ return isinstance(val, str) and _canton.is_valid(val)
@@ -1,18 +1,18 @@
1
1
  from frformat import LatitudeL93
2
2
  from csv_detective.detect_fields.other.float import _is as is_float
3
+
3
4
  from csv_detective.detect_fields.other.float import float_casting
4
5
 
5
6
 
6
7
  PROPORTION = 0.9
7
8
 
9
+ _latitudel93 = LatitudeL93()
10
+
8
11
 
9
12
  def _is(val):
10
13
  try:
11
- if isinstance(val, (float, int)):
12
- return LatitudeL93.is_valid(val)
13
-
14
- elif isinstance(val, str) and is_float(val):
15
- return LatitudeL93.is_valid(float_casting(val))
14
+ if isinstance(val, str) and is_float(val):
15
+ return _latitudel93.is_valid(float_casting(val))
16
16
 
17
17
  return False
18
18
 
@@ -1,18 +1,18 @@
1
1
  from frformat import LongitudeL93
2
2
  from csv_detective.detect_fields.other.float import _is as is_float
3
+
3
4
  from csv_detective.detect_fields.other.float import float_casting
4
5
 
5
6
 
6
7
  PROPORTION = 0.9
7
8
 
9
+ _longitudel93 = LongitudeL93()
10
+
8
11
 
9
12
  def _is(val):
10
13
  try:
11
- if isinstance(val, (float, int)):
12
- return LongitudeL93.is_valid(val)
13
-
14
- elif isinstance(val, str) and is_float(val):
15
- return LongitudeL93.is_valid(float_casting(val))
14
+ if isinstance(val, str) and is_float(val):
15
+ return _longitudel93.is_valid(float_casting(val))
16
16
 
17
17
  return False
18
18
 
@@ -1,8 +1,16 @@
1
- from frformat import Pays
1
+ from frformat import Pays, Options, Millesime
2
2
 
3
3
  PROPORTION = 0.6
4
4
 
5
+ _options = Options(
6
+ ignore_case=True,
7
+ ignore_accents=True,
8
+ replace_non_alphanumeric_with_space=True,
9
+ ignore_extra_whitespace=True
10
+ )
11
+ _pays = Pays(Millesime.LATEST, _options)
12
+
5
13
 
6
14
  def _is(val):
7
15
  """Match avec le nom des pays"""
8
- return isinstance(val, str) and Pays.is_valid(val, strict=False)
16
+ return isinstance(val, str) and _pays.is_valid(val)
@@ -1,8 +1,48 @@
1
- from frformat import Region
1
+ from frformat import Region, Options, Millesime
2
2
 
3
3
  PROPORTION = 1
4
4
 
5
+ _extra_valid_values_set = frozenset({
6
+ "alsace",
7
+ "aquitaine",
8
+ "ara",
9
+ "aura",
10
+ "auvergne",
11
+ "auvergne et rhone alpes",
12
+ "basse normandie",
13
+ "bfc",
14
+ "bourgogne",
15
+ "bourgogne et franche comte",
16
+ "centre",
17
+ "champagne ardenne",
18
+ "franche comte",
19
+ "ge",
20
+ "haute normandie",
21
+ "hdf",
22
+ "languedoc roussillon",
23
+ "limousin",
24
+ "lorraine",
25
+ "midi pyrenees",
26
+ "nord pas de calais",
27
+ "npdc",
28
+ "paca",
29
+ "picardie",
30
+ "poitou charentes",
31
+ "reunion",
32
+ "rhone alpes",
33
+ })
34
+
35
+
36
+ _options = Options(
37
+ ignore_case=True,
38
+ ignore_accents=True,
39
+ replace_non_alphanumeric_with_space=True,
40
+ ignore_extra_whitespace=True,
41
+ extra_valid_values=_extra_valid_values_set
42
+ )
43
+ _region = Region(Millesime.LATEST, _options)
44
+
5
45
 
6
46
  def _is(val):
7
47
  """Match avec le nom des regions"""
8
- return isinstance(val, str) and Region.is_valid(val, strict=False)
48
+ return isinstance(val, str) and _region.is_valid(val)
@@ -2,6 +2,8 @@ from frformat import CodeRNA
2
2
 
3
3
  PROPORTION = 0.9
4
4
 
5
+ _code_rna = CodeRNA()
6
+
5
7
 
6
8
  def _is(val):
7
- return isinstance(val, str) and CodeRNA.is_valid(val)
9
+ return isinstance(val, str) and _code_rna.is_valid(val)
@@ -2,7 +2,7 @@
2
2
 
3
3
  ## Current (in progress)
4
4
 
5
- - Nothing yet
5
+ - New function that creates a csv from a list of fields and constraints, or from a TableSchema [#100](https://github.com/datagouv/csv-detective/pull/100)
6
6
 
7
7
  ## 0.7.4 (2024-11-15)
8
8
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: csv_detective
3
- Version: 0.7.5.dev980
3
+ Version: 0.7.5.dev1052
4
4
  Summary: Detect CSV column content
5
5
  Home-page: https://github.com/etalab/csv_detective
6
6
  Author: Etalab
@@ -26,5 +26,7 @@ Requires-Dist: odfpy==1.4.1
26
26
  Requires-Dist: requests==2.32.3
27
27
  Requires-Dist: responses==0.25.0
28
28
  Requires-Dist: python-magic==0.4.27
29
- Requires-Dist: frformat==0.3.0
29
+ Requires-Dist: frformat==0.4.0
30
+ Requires-Dist: faker==33.0.0
31
+ Requires-Dist: rstr==3.2.2
30
32
 
@@ -1,5 +1,6 @@
1
- csv_detective/__init__.py,sha256=giVhs0g13y4U2H0WiVBLcrvytcMxQ1LiCd2i03XITwQ,83
1
+ csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
2
2
  csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
3
+ csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
3
4
  csv_detective/detection.py,sha256=AuXlPOZfzqznZY2ybAAgaXIq6qVITYd3MXf2CoigI3I,22097
4
5
  csv_detective/explore_csv.py,sha256=X5yZS3WCUsafUMcs5tOnDTeMGzMnfr0iB9vEDx7xiqg,16977
5
6
  csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
@@ -10,24 +11,24 @@ csv_detective/detect_fields/__init__.py,sha256=CchNbi1vrgIGh_uBexXZTzfjBETDY0kQL
10
11
  csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
12
  csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
13
  csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=e5JqMNOPxx0Ivju3zAHCGMopZroCpR4vr3DJKlQhMz4,1675
13
- csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=l3-4WlLacNVngWWcgNhxwYMACFEKQRky_KJo_M7g5fc,90
14
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=cv53Vw0uqsXu1zl47JR7WPGP0PKjWgNJ_2ibrKAc3tU,153
15
- csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=rRnOsK5Ax8Dy1MyGUq-o7Kcs89okCXpwllJcOF79Ns0,127
16
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=DHFC0I05Iywt9eVgflLtsmK78PYomI2KNiAuGQRm4CM,77
17
- csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=AEirwDhkjCpymsaB09Nc8OmcexN2eaLNyipizYe3m4Q,195
18
- csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=mfLJu1elZiNGF5Uh565HvOTUjRrAKqj45QjYQ41uw0w,176
19
- csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=9TUR7YnYhkJDxpUjlK2BRudDCsvHuH57sXpne7Kjb1g,188
20
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=k09WqKkB-RgR1Dr0nvO8iaxyvROj2wcV3t8Vc4JJSdQ,173
21
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=15g9DJBvJUXQwOS3vz9l-77as3e1AC7sTaMQVyj5xHg,496
14
+ csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
15
+ csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=unr-Y4zquKSM5PVUiQGnOm-zQvaN8qd3v_XHf0W2VH8,378
16
+ csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
17
+ csv_detective/detect_fields/FR/geo/code_postal/__init__.py,sha256=e1SdnW8zVSxrRMm-CeK9tlkLzORP2C6KOInTWnB7h3o,134
18
+ csv_detective/detect_fields/FR/geo/code_region/__init__.py,sha256=y-TPljkf-W209tp7V0RnJ34936XxB6FA2-XPYK3DV8I,253
19
+ csv_detective/detect_fields/FR/geo/commune/__init__.py,sha256=tZ4d1BQd9Xow0SWBcmuGlnX-RKHDzCstdY9AsXM6-Nk,379
20
+ csv_detective/detect_fields/FR/geo/departement/__init__.py,sha256=je2zLsPlK_X189bbmKzf4BJSEoFShxMz2eQNXB7hsh0,399
21
+ csv_detective/detect_fields/FR/geo/insee_canton/__init__.py,sha256=3uNN_Iha6dFfm24CluUmkHFg6nj7kRQaXrHDEcLfyjY,373
22
+ csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py,sha256=5v07RJbi12eoPa-e_-q8xlWBew80FPMxsggcMgZQiI8,438
22
23
  csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=WjPHg8s0ND6bOwS-yo6FP1dnwD-6SWg9oH1K0avHsbI,344
23
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=4ZJZRIyr4RSaLzMoxoVNME-HrA2_mF1V1CluxgTGp_0,499
24
+ csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py,sha256=ZwThPSfbRwNHA_anuplxTPYHK-WMduc_np2Xw9XsApM,442
24
25
  csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=d4fLK4IndwllDhsddyTbyRiPfc8O9wT0pLIRI_C3QvQ,344
25
- csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=kFXGruWjn5EfKVQIfjiLEhNc73p_N2VgZCl-l8DIHqs,166
26
- csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=pajL5nr2zWnzCiVsC9SZcfMfjXLUJXm0QKLcyMecVYg,171
26
+ csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=2q5T4SmCK6ZFF1mrv7d-q9tOIQKBcROI24y_UYIuvz0,383
27
+ csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=JbFKDd4jAnd9yb7YqP36MoLdO1JFPm1cg60fGXt6ZvI,1074
27
28
  csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
29
  csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=X0NT6YbBg9PrxIcBwzUCQuBiv_QdDdqb3CJnrlent28,566
29
30
  csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
30
- csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=7bQiT-Mx7e7lW2MSydKXCIk_D8xjWLdWhQIxT7q4fG4,121
31
+ csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
31
32
  csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=g9n5sOjRlk4I9YFZjdaTYrXf8ftXRDunGZOUpYhN4fA,295
32
33
  csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=XacU_3rwXqtdbw_ULTSnu0OOtx0w_rKlviCrLmNdHjc,496
33
34
  csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
@@ -125,17 +126,18 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=GrIbo64WVM3hi7ShBRKKyKU
125
126
  csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
126
127
  csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
127
128
  csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
128
- csv_detective-0.7.5.dev980.data/data/share/csv_detective/CHANGELOG.md,sha256=4ABp5UF2L6tPg-eK7Dj6NWgnFnkU74BwhrMzrRGJ2Lw,6585
129
- csv_detective-0.7.5.dev980.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
130
- csv_detective-0.7.5.dev980.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
129
+ csv_detective-0.7.5.dev1052.data/data/share/csv_detective/CHANGELOG.md,sha256=oDqKO3qTo-cUSJB4fMbsyQY2O4pEQhOwWeHsZwaGkxM,6725
130
+ csv_detective-0.7.5.dev1052.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
131
+ csv_detective-0.7.5.dev1052.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
131
132
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
- tests/test_fields.py,sha256=Uq9eJaK3D8b_lDd_4Q3aMGUHP4NkrpY6g07LUnJcDDc,10587
133
+ tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
134
+ tests/test_fields.py,sha256=kXel-hiyQYrJ3OLmwUMg1K3DKbbwBLvUplxZWxpp18I,10605
133
135
  tests/test_file.py,sha256=1fEOu3bArGBaarRKAoTXAF3cSIGJfFN3UIwOW6esWRs,6399
134
136
  tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
135
137
  tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
136
- csv_detective-0.7.5.dev980.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
137
- csv_detective-0.7.5.dev980.dist-info/METADATA,sha256=U3bGCQBrGNtgHc5kIuteE4nRrMG6G__xyuko0mWdmJY,1089
138
- csv_detective-0.7.5.dev980.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
139
- csv_detective-0.7.5.dev980.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
140
- csv_detective-0.7.5.dev980.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
141
- csv_detective-0.7.5.dev980.dist-info/RECORD,,
138
+ csv_detective-0.7.5.dev1052.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
139
+ csv_detective-0.7.5.dev1052.dist-info/METADATA,sha256=mgg54BtjqPn-L_G2a4JU0SyorK8uYzUyp64cxOAIe6A,1146
140
+ csv_detective-0.7.5.dev1052.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
141
+ csv_detective-0.7.5.dev1052.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
142
+ csv_detective-0.7.5.dev1052.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
143
+ csv_detective-0.7.5.dev1052.dist-info/RECORD,,
tests/test_example.py ADDED
@@ -0,0 +1,71 @@
1
+
2
+ import re
3
+ from uuid import UUID
4
+ from csv_detective import create_example_csv_file
5
+
6
+
7
+ def test_example_creation():
8
+ fields = [
9
+ {
10
+ "name": "id_unique",
11
+ "type": "id",
12
+ },
13
+ {
14
+ "name": "nom_modele",
15
+ "type": "str",
16
+ "args": {'length': 20},
17
+ },
18
+ {
19
+ "name": "siret",
20
+ "type": "str",
21
+ "args": {'pattern': '^\\d{14}$'},
22
+ },
23
+ {
24
+ "name": "type_producteur",
25
+ "type": "str",
26
+ "args": {'enum': ['privé', 'public', 'association']},
27
+ },
28
+ {
29
+ "name": "date_creation",
30
+ "type": "date",
31
+ "args": {
32
+ 'date_range': ['1996-02-13', '2000-01-28'],
33
+ 'format': '%Y-%m-%d',
34
+ },
35
+ },
36
+ {
37
+ "name": "url_produit",
38
+ "type": "url",
39
+ },
40
+ {
41
+ "name": "nb_produits",
42
+ "type": "int",
43
+ },
44
+ {
45
+ "name": "note",
46
+ "type": "float",
47
+ "args": {'num_range': [1, 20]}
48
+ },
49
+ ]
50
+ df = create_example_csv_file(
51
+ fields=fields,
52
+ file_length=5,
53
+ output_name="",
54
+ )
55
+ assert len(df) == 5
56
+ assert all(UUID(_) for _ in df["id_unique"])
57
+ assert all(len(_) == 20 for _ in df["nom_modele"])
58
+ assert all(re.match("^\\d{14}$", _) for _ in df["siret"])
59
+ assert all(_ in ['privé', 'public', 'association'] for _ in df["type_producteur"])
60
+ assert all(_ >= '1996-02-13' and _ <= '2000-01-28' for _ in df["date_creation"])
61
+ assert all(_.startswith("http") for _ in df["url_produit"])
62
+ assert all(isinstance(_, int) for _ in df["nb_produits"])
63
+ assert all(_ >= 1 and _ <= 20 for _ in df["note"])
64
+
65
+
66
+ def test_example_from_tableschema():
67
+ df = create_example_csv_file(
68
+ schema_path="https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/2.3.1/schema-statique.json",
69
+ output_name="",
70
+ )
71
+ assert len(df) == 10
tests/test_fields.py CHANGED
@@ -282,26 +282,26 @@ def test_do_not_match_canton():
282
282
 
283
283
  # latitude_l93
284
284
  def test_match_latitude_l93():
285
- vals = [6037008, 7123528.5, "7124528,5"]
285
+ vals = ["6037008", "7123528.5", "7124528,5"]
286
286
  for val in vals:
287
287
  assert latitude_l93._is(val)
288
288
 
289
289
 
290
290
  def test_do_not_match_latitude_93():
291
- vals = [0, -6734529.6, 7245669.8, "3422674,78", "32_34"]
291
+ vals = ["0", "-6734529.6", "7245669.8", "3422674,78", "32_34"]
292
292
  for val in vals:
293
293
  assert not latitude_l93._is(val)
294
294
 
295
295
 
296
296
  # longitude_l93
297
297
  def test_match_longitude_l93():
298
- vals = [0, -154, "1265783,45", 34723.4]
298
+ vals = ["0", "-154", "1265783,45", "34723.4"]
299
299
  for val in vals:
300
300
  assert longitude_l93._is(val)
301
301
 
302
302
 
303
303
  def test_do_not_match_longitude_93():
304
- vals = [1456669.8, "-776225", "346_3214"]
304
+ vals = ["1456669.8", "-776225", "346_3214"]
305
305
  for val in vals:
306
306
  assert not longitude_l93._is(val)
307
307