csv-detective 0.7.5.dev1286__py3-none-any.whl → 0.7.5.dev1307__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,19 +10,21 @@ from .FR.other import (
10
10
  insee_ape700,
11
11
  date_fr,
12
12
  code_waldec,
13
- code_rna
13
+ code_rna,
14
14
  )
15
15
 
16
16
  from .other import (
17
17
  email,
18
18
  url,
19
19
  booleen,
20
+ money,
20
21
  mongo_object_id,
22
+ percent,
21
23
  twitter,
22
24
  float,
23
25
  int,
24
26
  uuid,
25
- json
27
+ json,
26
28
  )
27
29
 
28
30
  from .FR.geo import (
@@ -40,7 +42,7 @@ from .FR.geo import (
40
42
  code_region,
41
43
  latitude_l93,
42
44
  longitude_l93,
43
- insee_canton
45
+ insee_canton,
44
46
  )
45
47
 
46
48
  from .geo import (
@@ -50,7 +52,7 @@ from .geo import (
50
52
  latitude_wgs,
51
53
  longitude_wgs,
52
54
  latlon_wgs,
53
- json_geojson
55
+ json_geojson,
54
56
  )
55
57
 
56
58
  from .FR.temp import jour_de_la_semaine, mois_de_annee
@@ -1,13 +1,13 @@
1
- import re
1
+ from ..latitude_wgs import _is as is_lat
2
+ from ..longitude_wgs import _is as is_lon
2
3
 
3
- PROPORTION = 0.9
4
+ PROPORTION = 1
4
5
 
5
6
 
6
7
  def _is(val):
7
8
  '''Renvoie True si val peut etre une latitude,longitude'''
8
9
 
9
- return isinstance(val, str) and bool(
10
- re.match(
11
- r'^\[?[\+\-]?[0-8]?\d\.\d* ?, ?[\+\-]?(1[0-7]\d|\d{1,2})\.\d+\]?$', val
12
- )
13
- )
10
+ if not isinstance(val, str) or val.count(",") != 1:
11
+ return False
12
+ lat, lon = val.split(",")
13
+ return is_lat(lat) and is_lon(lon.replace(" ", ""))
@@ -0,0 +1,11 @@
1
+ from ..float import _is as is_float
2
+
3
+ currencies = set(["€", "$", "£", "¥"])
4
+
5
+ PROPORTION = 0.8
6
+
7
+
8
+ def _is(val: str):
9
+ if not isinstance(val, str) or val[-1] not in currencies:
10
+ return False
11
+ return is_float(val[:-1])
@@ -0,0 +1,9 @@
1
+ from ..float import _is as is_float
2
+
3
+ PROPORTION = 0.8
4
+
5
+
6
+ def _is(val: str):
7
+ if not isinstance(val, str) or val[-1] != "%":
8
+ return False
9
+ return is_float(val[:-1])
@@ -17,12 +17,12 @@ def create_example_csv_file(
17
17
  fields: Optional[dict] = None,
18
18
  schema_path: Optional[str] = None,
19
19
  file_length: int = 10,
20
- output_name: str = 'example_file.csv',
21
- output_sep: str = ';',
22
- encoding: str = 'utf-8',
20
+ output_name: Optional[str] = "example_file.csv",
21
+ output_sep: str = ";",
22
+ encoding: str = "utf-8",
23
23
  ignore_required: bool = False,
24
24
  ) -> pd.DataFrame:
25
- '''
25
+ """
26
26
  Create an example file based on a list of dicts like follows:
27
27
  fields = [
28
28
  {
@@ -33,7 +33,7 @@ def create_example_csv_file(
33
33
  ...
34
34
  ]
35
35
  Or from a TableSchema
36
- '''
36
+ """
37
37
  # need to make a CLI command
38
38
 
39
39
  if not (fields or schema_path):
@@ -53,65 +53,65 @@ def create_example_csv_file(
53
53
  enum: Optional[str] = None,
54
54
  ) -> str:
55
55
  if potential_skip(required):
56
- return ''
56
+ return ""
57
57
  if pattern is not None:
58
58
  return rstr.xeger(pattern)
59
59
  elif enum is not None:
60
60
  return random.choice(enum)
61
61
  else:
62
62
  letters = string.ascii_lowercase
63
- return ''.join(random.choice(letters) for i in range(length))
63
+ return "".join(random.choice(letters) for i in range(length))
64
64
 
65
65
  def _id(
66
66
  required: bool = True,
67
67
  ) -> str:
68
68
  if potential_skip(required):
69
- return ''
69
+ return ""
70
70
  return str(uuid.uuid4())
71
71
 
72
72
  def _date(
73
73
  date_range: Union[None, list[str]] = None,
74
- format: str = '%Y-%m-%d',
74
+ format: str = "%Y-%m-%d",
75
75
  required: bool = True,
76
76
  ) -> str:
77
77
  # the bounds specified in date_range are expected in the same format as the desired output format
78
- assert all([k in format for k in ['%d', '%m', '%Y']])
78
+ assert all([k in format for k in ["%d", "%m", "%Y"]])
79
79
  if potential_skip(required):
80
- return ''
80
+ return ""
81
81
  if date_range is None:
82
82
  return fake.date(format)
83
83
  else:
84
84
  if len(date_range) != 2:
85
- raise ValueError('"date_range" must have exactly two elements.')
85
+ raise ValueError("'date_range' must have exactly two elements.")
86
86
  return fake.date_between_dates(
87
87
  datetime.strptime(date_range[0], format),
88
88
  datetime.strptime(date_range[1], format),
89
89
  ).strftime(format)
90
90
 
91
91
  def _time(
92
- format: str = '%H:%M:%S',
92
+ format: str = "%H:%M:%S",
93
93
  required: bool = True,
94
94
  ) -> str:
95
- assert all([k in format for k in ['%H', '%M', '%S']])
95
+ assert all([k in format for k in ["%H", "%M", "%S"]])
96
96
  if potential_skip(required):
97
- return ''
97
+ return ""
98
98
  # maybe add a time_range argument?
99
99
  return fake.time(format)
100
100
 
101
101
  def _datetime(
102
102
  datetime_range: Optional[list[str]] = None,
103
- format: str = '%Y-%m-%d %H-%M-%S',
103
+ format: str = "%Y-%m-%d %H-%M-%S",
104
104
  required: bool = True,
105
105
  ) -> str:
106
106
  # the bounds specified in datetime_range are expected in the same format as the desired output format
107
- assert all([k in format for k in ['%d', '%m', '%Y', '%H', '%M', '%S']])
107
+ assert all([k in format for k in ["%d", "%m", "%Y", "%H", "%M", "%S"]])
108
108
  if potential_skip(required):
109
- return ''
109
+ return ""
110
110
  if datetime_range is None:
111
111
  return fake.date_time().strftime(format)
112
112
  else:
113
113
  if len(datetime_range) != 2:
114
- raise ValueError('"date_range" must have exactly two elements.')
114
+ raise ValueError("'date_range' must have exactly two elements.")
115
115
  return fake.date_time_between(
116
116
  datetime.strptime(datetime_range[0], format),
117
117
  datetime.strptime(datetime_range[1], format),
@@ -119,8 +119,8 @@ def create_example_csv_file(
119
119
 
120
120
  def _url(required: bool = True) -> str:
121
121
  if potential_skip(required):
122
- return ''
123
- return f'http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}'
122
+ return ""
123
+ return f"http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}"
124
124
 
125
125
  def _number(
126
126
  num_type: Type[Union[int, float]] = int,
@@ -130,7 +130,7 @@ def create_example_csv_file(
130
130
  ) -> Union[int, float]:
131
131
  assert num_range is None or len(num_range) == 2
132
132
  if potential_skip(required):
133
- return ''
133
+ return ""
134
134
  if enum:
135
135
  return random.choice(enum)
136
136
  if num_range is None:
@@ -142,100 +142,100 @@ def create_example_csv_file(
142
142
 
143
143
  def _bool(required: bool = True) -> bool:
144
144
  if potential_skip(required):
145
- return ''
145
+ return ""
146
146
  return random.randint(0, 1) == 0
147
147
 
148
148
  def _array(enum: list[Any], required: bool = True) -> str:
149
149
  if potential_skip(required):
150
- return ''
150
+ return ""
151
151
  return f"[{','.join(random.sample(enum, random.randint(1, len(enum))))}]"
152
152
 
153
153
  def build_args_from_constraints(constraints: dict) -> dict:
154
154
  args = {}
155
- args['required'] = constraints.get('required', False)
156
- for _ in ['pattern', 'enum', 'format']:
155
+ args["required"] = constraints.get("required", False)
156
+ for _ in ["pattern", "enum", "format"]:
157
157
  if _ in constraints:
158
158
  args[_] = constraints[_]
159
- if 'minimum' in constraints and 'maximum' in constraints:
160
- args['num_range'] = [constraints['minimum'], constraints['maximum']]
159
+ if "minimum" in constraints and "maximum" in constraints:
160
+ args["num_range"] = [constraints["minimum"], constraints["maximum"]]
161
161
  # maybe there are better values than these?
162
- elif 'minimum' in constraints:
163
- args['num_range'] = [constraints['minimum'], 10 + constraints['minimum']]
164
- elif 'maximum' in constraints:
165
- args['num_range'] = [constraints['maximum'] - 10, constraints['maximum']]
166
- if 'minLength' in constraints:
167
- args['length'] = constraints['minLength']
168
- if 'maxLength' in constraints:
169
- args['length'] = constraints['maxLength']
162
+ elif "minimum" in constraints:
163
+ args["num_range"] = [constraints["minimum"], 10 + constraints["minimum"]]
164
+ elif "maximum" in constraints:
165
+ args["num_range"] = [constraints["maximum"] - 10, constraints["maximum"]]
166
+ if "minLength" in constraints:
167
+ args["length"] = constraints["minLength"]
168
+ if "maxLength" in constraints:
169
+ args["length"] = constraints["maxLength"]
170
170
  return args
171
171
 
172
172
  schema_types_to_python = {
173
- 'number': 'float',
174
- 'integer': 'int',
175
- 'string': 'str',
176
- 'year': 'year',
177
- 'boolean': 'bool',
178
- 'date': 'date',
179
- 'yearmonth': 'date',
180
- 'time': 'time',
181
- 'datetime': 'datetime',
182
- 'array': 'array'
173
+ "number": "float",
174
+ "integer": "int",
175
+ "string": "str",
176
+ "year": "year",
177
+ "boolean": "bool",
178
+ "date": "date",
179
+ "yearmonth": "date",
180
+ "time": "time",
181
+ "datetime": "datetime",
182
+ "array": "array"
183
183
  }
184
184
 
185
185
  if schema_path:
186
- if schema_path.startswith('http'):
186
+ if schema_path.startswith("http"):
187
187
  schema = requests.get(schema_path).json()
188
188
  else:
189
189
  with open(schema_path, encoding=encoding) as jsonfile:
190
190
  schema = json.load(jsonfile)
191
- if not ('fields' in schema.keys()):
192
- raise ValueError('The schema must have a "fields" key.')
191
+ if not ("fields" in schema.keys()):
192
+ raise ValueError("The schema must have a 'fields' key.")
193
193
  else:
194
194
  fields = [
195
195
  {
196
- 'name': f['name'],
197
- 'type': schema_types_to_python.get(f['type'], 'str'),
196
+ "name": f["name"],
197
+ "type": schema_types_to_python.get(f["type"], "str"),
198
198
  # when frformat is supported in TableSchema, we can build args for French standards
199
199
  # linked to https://github.com/datagouv/fr-format/issues/26
200
- 'args': (
201
- build_args_from_constraints(f['constraints']) if 'constraints' in f.keys()
202
- else build_args_from_constraints(f['arrayItem']['constraints'])
203
- if 'arrayItem' in f.keys() and 'constraints' in f['arrayItem'].keys()
200
+ "args": (
201
+ build_args_from_constraints(f["constraints"]) if "constraints" in f.keys()
202
+ else build_args_from_constraints(f["arrayItem"]["constraints"])
203
+ if "arrayItem" in f.keys() and "constraints" in f["arrayItem"].keys()
204
204
  else {}
205
205
  )
206
- } for f in schema['fields']
206
+ } for f in schema["fields"]
207
207
  ]
208
208
 
209
209
  for k in range(len(fields)):
210
- if 'args' not in fields[k]:
211
- fields[k]['args'] = {}
212
- if fields[k]['type'] == 'float':
213
- fields[k]['args']['num_type'] = float
214
- elif fields[k]['type'] == 'int':
215
- fields[k]['args']['num_type'] = int
216
- elif fields[k]['type'] == 'year':
217
- fields[k]['args']['num_type'] = int
218
- fields[k]['args']['num_range'] = [1990, 2050]
210
+ if "args" not in fields[k]:
211
+ fields[k]["args"] = {}
212
+ if fields[k]["type"] == "float":
213
+ fields[k]["args"]["num_type"] = float
214
+ elif fields[k]["type"] == "int":
215
+ fields[k]["args"]["num_type"] = int
216
+ elif fields[k]["type"] == "year":
217
+ fields[k]["args"]["num_type"] = int
218
+ fields[k]["args"]["num_range"] = [1990, 2050]
219
219
 
220
220
  types_to_func = {
221
- 'int': _number,
222
- 'float': _number,
223
- 'date': _date,
224
- 'time': _time,
225
- 'str': _string,
226
- 'url': _url,
227
- 'id': _id,
228
- 'year': _number,
229
- 'bool': _bool,
230
- 'datetime': _datetime,
231
- 'array': _array,
221
+ "int": _number,
222
+ "float": _number,
223
+ "date": _date,
224
+ "time": _time,
225
+ "str": _string,
226
+ "url": _url,
227
+ "id": _id,
228
+ "year": _number,
229
+ "bool": _bool,
230
+ "datetime": _datetime,
231
+ "array": _array,
232
232
  }
233
233
 
234
234
  # would it be better to create by column or by row (as for now)?
235
235
  output = pd.DataFrame(
236
236
  [
237
237
  [
238
- types_to_func.get(f['type'], 'str')(**f['args'])
238
+ types_to_func.get(f["type"], "str")(**f["args"])
239
239
  for f in fields
240
240
  ] for _ in range(file_length)
241
241
  ],
csv_detective/validate.py CHANGED
@@ -42,7 +42,8 @@ def validate(
42
42
  any(col_name not in list(table.columns) for col_name in previous_analysis["columns"])
43
43
  or any(col_name not in list(previous_analysis["columns"].keys()) for col_name in table.columns)
44
44
  ):
45
- logging.warning("> Columns do not match, proceeding with full analysis")
45
+ if verbose:
46
+ logging.warning("> Columns do not match, proceeding with full analysis")
46
47
  return False, table, analysis
47
48
  for col_name, args in previous_analysis["columns"].items():
48
49
  if verbose:
@@ -55,7 +56,8 @@ def validate(
55
56
  if skipna:
56
57
  col_data = col_data.loc[~col_data.isna()]
57
58
  if not col_data.apply(test_func).all():
58
- logging.warning("> Test failed, proceeding with full analysis")
59
+ if verbose:
60
+ logging.warning("> Test failed, proceeding with full analysis")
59
61
  return False, table, analysis
60
62
  if verbose:
61
63
  logging.info("> All checks successful")
@@ -15,6 +15,7 @@
15
15
  - Restructure repo (breaking changes) [#111](https://github.com/datagouv/csv-detective/pull/111)
16
16
  - Add validation function and associated flow [#112](https://github.com/datagouv/csv-detective/pull/112)
17
17
  - Better float detection [#113](https://github.com/datagouv/csv-detective/pull/113)
18
+ - Refactor fields tests [#114](https://github.com/datagouv/csv-detective/pull/114)
18
19
 
19
20
  ## 0.7.4 (2024-11-15)
20
21
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.7.5.dev1286
3
+ Version: 0.7.5.dev1307
4
4
  Summary: Detect CSV column content
5
5
  Home-page: https://github.com/etalab/csv_detective
6
6
  Author: Etalab
@@ -4,8 +4,8 @@ csv_detective/explore_csv.py,sha256=ocWlUEtuwZ-6bjDc6gfhC2-6DljMVhvXhHrfICCXGfQ,
4
4
  csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
5
5
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
6
6
  csv_detective/utils.py,sha256=Bx_1k4Sdpd5PCjuAy4AeayCmmw7TMR_zgtKIHNLi5g0,1157
7
- csv_detective/validate.py,sha256=o4Qulf8E-x1zsWT9OD4Fpw83Gku1WA3JlX83j7bu0DA,2314
8
- csv_detective/detect_fields/__init__.py,sha256=NVfE3BQVExgXb-BPbhDvlkM5-0naEVLpZ4aM_OGHYfE,931
7
+ csv_detective/validate.py,sha256=0wSi5GgKPRW3m66413a-9Uti1vBRam5pQxVA9Dc5jQ8,2368
8
+ csv_detective/detect_fields/__init__.py,sha256=qkwT_o_S7qvLEsRssICpoGmCc3h5y2MVy1XI56LFcV0,959
9
9
  csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
@@ -50,7 +50,7 @@ csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=wJAy
50
50
  csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
51
51
  csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=FPHOfTrfXJs62-NgeOcNGOvwPd7I1fEVp8lTdMNfj3w,433
52
52
  csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
53
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=3nlBqFYD4kVSVxw4b9DTPcxW59oL0T3Kj0OxPlyP9og,268
53
+ csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=7_mnO9uC_kI7e2WR8xIer7Kqw8zi-v-JKaAD4zcoGbE,342
54
54
  csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=G7afWOKiGh_Tv7gwDNGt1a4B_A8hkCBkIxn3THDCUFk,330
55
55
  csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  csv_detective/detect_fields/other/booleen/__init__.py,sha256=wn_yyTAmGxqo0l0b7JRpGb0da_E27iGxES9zWCrnsqc,497
@@ -58,7 +58,9 @@ csv_detective/detect_fields/other/email/__init__.py,sha256=O9tgJmq0O8Q-8iin63NqE
58
58
  csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
59
59
  csv_detective/detect_fields/other/int/__init__.py,sha256=QN3kQJLYqLRBiubUK7g4Xq03PlA5wqVwx2pPPIO9FdI,320
60
60
  csv_detective/detect_fields/other/json/__init__.py,sha256=DhzyvT12kOqgum89silIu3uoSYXmC_s_AaxLtXAD4eU,540
61
+ csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
61
62
  csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=7fcrHsOZAqXp2_N0IjPskYJ_qi4xRlo9iyNNDQVLzsU,156
63
+ csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
62
64
  csv_detective/detect_fields/other/twitter/__init__.py,sha256=qbwLKsTBRFQ4PyTNVeEZ5Hkf5Wwi3ZKclLER_V0YO3g,154
63
65
  csv_detective/detect_fields/other/url/__init__.py,sha256=9WaTqCglEsw_lJG_xZsBMdxJXg2yuQ92_fkX6CXWNV0,286
64
66
  csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
@@ -135,7 +137,7 @@ csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_p
135
137
  csv_detective/detection/variables.py,sha256=3qEMtjZ_zyIFXvTnFgK7ZMDx8C12uQXKfFjEj2moyJc,3558
136
138
  csv_detective/output/__init__.py,sha256=XDS4Dgvv6oloIao9JquHa0m1nnlQ_q2gHuEPGlaETic,1890
137
139
  csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
138
- csv_detective/output/example.py,sha256=i8PkdXxidF7qR_9aK8vh12JpZdJQryhBgyrMS8iy5rk,8642
140
+ csv_detective/output/example.py,sha256=26rY7XNXK47e9xJMl-Js8jJwFIuv7V7B7e256VecKuk,8652
139
141
  csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
140
142
  csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
141
143
  csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
@@ -145,19 +147,19 @@ csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,
145
147
  csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
146
148
  csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
147
149
  csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
148
- csv_detective-0.7.5.dev1286.data/data/share/csv_detective/CHANGELOG.md,sha256=Gqw7W41bXK_JgIYi80vdOPR6JLY5rgABeNsiDStE4XA,7901
149
- csv_detective-0.7.5.dev1286.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
150
- csv_detective-0.7.5.dev1286.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
151
- csv_detective-0.7.5.dev1286.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
150
+ csv_detective-0.7.5.dev1307.data/data/share/csv_detective/CHANGELOG.md,sha256=Y8aL18x5EGGvA9AqukEi4tn78se_Lzisa2J32kOSer8,7984
151
+ csv_detective-0.7.5.dev1307.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
152
+ csv_detective-0.7.5.dev1307.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
153
+ csv_detective-0.7.5.dev1307.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
152
154
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
153
- tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
154
- tests/test_fields.py,sha256=53kiUQiqGt4_fnyCoxhNLeEsuN1LRDB-7HGT3p_Ed9I,11147
155
+ tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
156
+ tests/test_fields.py,sha256=0hce2XtDHY9dTLCYhrm2s4I41OeKsQbbaKmDZ4XctUw,9824
155
157
  tests/test_file.py,sha256=9APE1d43lQ8Dk8lwJFNUK_YekYYsQ0ae2_fgpcPE9mk,8116
156
158
  tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
157
159
  tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
158
160
  tests/test_validation.py,sha256=VwtBcnGAQ_eSFrBibWnMSTDjuy6y2JLlqvc3Zb667NY,479
159
- csv_detective-0.7.5.dev1286.dist-info/METADATA,sha256=rLptgL-FkLZzfkxPt7_0I-k7EKPKbEHhd3Ei2qt54KI,1386
160
- csv_detective-0.7.5.dev1286.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
161
- csv_detective-0.7.5.dev1286.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
162
- csv_detective-0.7.5.dev1286.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
163
- csv_detective-0.7.5.dev1286.dist-info/RECORD,,
161
+ csv_detective-0.7.5.dev1307.dist-info/METADATA,sha256=RaSc6oAUAB9KsfbjOi5xRdyM8d127pL_GKYEU0195mA,1386
162
+ csv_detective-0.7.5.dev1307.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
163
+ csv_detective-0.7.5.dev1307.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
164
+ csv_detective-0.7.5.dev1307.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
165
+ csv_detective-0.7.5.dev1307.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/test_example.py CHANGED
@@ -13,24 +13,24 @@ def test_example_creation():
13
13
  {
14
14
  "name": "nom_modele",
15
15
  "type": "str",
16
- "args": {'length': 20},
16
+ "args": {"length": 20},
17
17
  },
18
18
  {
19
19
  "name": "siret",
20
20
  "type": "str",
21
- "args": {'pattern': '^\\d{14}$'},
21
+ "args": {"pattern": "^\\d{14}$"},
22
22
  },
23
23
  {
24
24
  "name": "type_producteur",
25
25
  "type": "str",
26
- "args": {'enum': ['privé', 'public', 'association']},
26
+ "args": {"enum": ["privé", "public", "association"]},
27
27
  },
28
28
  {
29
29
  "name": "date_creation",
30
30
  "type": "date",
31
31
  "args": {
32
- 'date_range': ['1996-02-13', '2000-01-28'],
33
- 'format': '%Y-%m-%d',
32
+ "date_range": ["1996-02-13", "2000-01-28"],
33
+ "format": "%Y-%m-%d",
34
34
  },
35
35
  },
36
36
  {
@@ -44,20 +44,20 @@ def test_example_creation():
44
44
  {
45
45
  "name": "note",
46
46
  "type": "float",
47
- "args": {'num_range': [1, 20]}
47
+ "args": {"num_range": [1, 20]}
48
48
  },
49
49
  ]
50
50
  df = create_example_csv_file(
51
51
  fields=fields,
52
52
  file_length=5,
53
- output_name="",
53
+ output_name=None,
54
54
  )
55
55
  assert len(df) == 5
56
56
  assert all(UUID(_) for _ in df["id_unique"])
57
57
  assert all(len(_) == 20 for _ in df["nom_modele"])
58
58
  assert all(re.match("^\\d{14}$", _) for _ in df["siret"])
59
- assert all(_ in ['privé', 'public', 'association'] for _ in df["type_producteur"])
60
- assert all(_ >= '1996-02-13' and _ <= '2000-01-28' for _ in df["date_creation"])
59
+ assert all(_ in ["privé", "public", "association"] for _ in df["type_producteur"])
60
+ assert all(_ >= "1996-02-13" and _ <= "2000-01-28" for _ in df["date_creation"])
61
61
  assert all(_.startswith("http") for _ in df["url_produit"])
62
62
  assert all(isinstance(_, int) for _ in df["nb_produits"])
63
63
  assert all(_ >= 1 and _ <= 20 for _ in df["note"])
@@ -66,6 +66,6 @@ def test_example_creation():
66
66
  def test_example_from_tableschema():
67
67
  df = create_example_csv_file(
68
68
  schema_path="https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/2.3.1/schema-statique.json",
69
- output_name="",
69
+ output_name=None,
70
70
  )
71
71
  assert len(df) == 10
tests/test_fields.py CHANGED
@@ -15,7 +15,9 @@ from csv_detective.detect_fields.FR.geo import (
15
15
  departement,
16
16
  insee_canton,
17
17
  latitude_l93,
18
+ latitude_wgs_fr_metropole,
18
19
  longitude_l93,
20
+ longitude_wgs_fr_metropole,
19
21
  pays,
20
22
  region,
21
23
  )
@@ -24,26 +26,38 @@ from csv_detective.detect_fields.FR.other import (
24
26
  code_rna,
25
27
  code_waldec,
26
28
  csp_insee,
29
+ date_fr,
30
+ insee_ape700,
27
31
  sexe,
28
32
  siren,
33
+ siret,
29
34
  tel_fr,
35
+ uai,
30
36
  )
31
- from csv_detective.detect_fields.FR.temp import jour_de_la_semaine
37
+ from csv_detective.detect_fields.FR.temp import jour_de_la_semaine, mois_de_annee
32
38
  from csv_detective.detect_fields.geo import (
33
39
  iso_country_code_alpha2,
34
40
  iso_country_code_alpha3,
35
41
  iso_country_code_numeric,
42
+ json_geojson,
43
+ latitude_wgs,
44
+ latlon_wgs,
45
+ longitude_wgs,
36
46
  )
37
47
  from csv_detective.detect_fields.other import (
48
+ booleen,
38
49
  email,
39
50
  json,
51
+ money,
40
52
  mongo_object_id,
53
+ percent,
54
+ twitter,
41
55
  url,
42
56
  uuid,
43
57
  int as test_int,
44
58
  float as test_float,
45
59
  )
46
- from csv_detective.detect_fields.temp import date, datetime_iso, datetime_rfc822, year
60
+ from csv_detective.detect_fields.temp import date, datetime, datetime_iso, datetime_rfc822, year
47
61
  from csv_detective.detection.variables import (
48
62
  detect_continuous_variable,
49
63
  detect_categorical_variable,
@@ -94,420 +108,261 @@ def test_detect_continuous_variable():
94
108
  assert res2.values and res2.values[0] == "cont"
95
109
 
96
110
 
97
- # csp_insee
98
- def test_match_csp_insee():
99
- val = "employes de la poste"
100
- assert csp_insee._is(val)
101
-
102
-
103
- def test_do_not_match_csp_insee():
104
- val = "super-heros"
105
- assert not csp_insee._is(val)
106
-
107
-
108
- # code_csp_insee
109
- def test_match_code_csp_insee():
110
- val = "121f"
111
- assert code_csp_insee._is(val)
112
-
113
-
114
- def test_do_not_match_code_csp_insee():
115
- val = "121x"
116
- assert not code_csp_insee._is(val)
117
-
118
-
119
- # sexe
120
- def test_match_sexe():
121
- val = "homme"
122
- assert sexe._is(val)
123
-
124
-
125
- def test_do_not_match_sexe():
126
- val = "hermaphrodite"
127
- assert not sexe._is(val)
128
-
129
-
130
- # tel_fr
131
- def test_match_tel_fr():
132
- val = "0134643467"
133
- assert tel_fr._is(val)
134
-
135
-
136
- def test_do_not_match_tel_fr():
137
- val = "3345689715"
138
- assert not tel_fr._is(val)
139
-
140
-
141
- # email
142
- def test_match_email():
143
- val = "cdo_intern@data.gouv.fr"
144
- assert email._is(val)
145
-
146
-
147
- def test_do_not_match_email():
148
- val = "cdo@@gouv.sfd"
149
- assert not email._is(val)
150
-
151
-
152
- # uuid
153
- def test_match_uuid():
154
- val = "884762be-51f3-44c3-b811-1e14c5d89262"
155
- assert uuid._is(val)
156
-
157
-
158
- def test_do_not_match_uuid():
159
- val = "0610928327"
160
- assert not uuid._is(val)
161
-
162
-
163
- # Mongo ObjectId
164
- def test_match_mongo_object_id():
165
- val = "62320e50f981bc2b57bcc044"
166
- assert mongo_object_id._is(val)
167
-
168
-
169
- def test_do_not_match_mongo_object_id():
170
- val = "884762be-51f3-44c3-b811-1e14c5d89262"
171
- assert not mongo_object_id._is(val)
172
-
173
-
174
- # url
175
- def test_match_url():
176
- val = "www.etalab.data.gouv.fr"
177
- assert url._is(val)
178
-
179
-
180
- def test_do_not_match_url():
181
- val = "c est une phrase"
182
- assert not url._is(val)
183
-
184
-
185
- # adresse
186
- def test_match_adresse():
187
- val = "rue du martyr"
188
- assert adresse._is(val)
189
-
190
-
191
- def test_do_not_match_adresse():
192
- val = "bonjour les amis"
193
- assert not adresse._is(val)
194
-
195
-
196
- # code_commune_insee
197
- def test_match_code_commune_insee():
198
- val = "91471"
199
- assert code_commune_insee._is(val)
200
-
201
-
202
- def test_do_not_match_code_commune_insee():
203
- val = "914712"
204
- assert not code_commune_insee._is(val)
205
-
206
-
207
- # code_postal
208
- def test_match_code_postal():
209
- val = "75020"
210
- assert code_postal._is(val)
211
-
212
-
213
- def test_do_not_match_code_postal():
214
- val = "77777"
215
- assert not code_postal._is(val)
216
-
217
-
218
- # code_departement
219
- def test_match_code_departement():
220
- vals = ["75", "2A", "2a", "974"]
221
- for val in vals:
222
- assert code_departement._is(val)
223
-
224
-
225
- def test_do_not_match_code_departement():
226
- val = "00"
227
- assert not code_departement._is(val)
228
-
229
-
230
- # code_fantoir
231
- def test_match_code_fantoir():
232
- vals = ["7755A", "B150B", "ZA04C", "ZB03D"]
233
- for val in vals:
234
- assert code_fantoir._is(val)
235
-
236
-
237
- def test_do_not_match_code_fantoir():
238
- vals = ["7755", "ZA99A"]
239
- for val in vals:
240
- assert not code_fantoir._is(val)
241
-
242
-
243
- # code_region
244
- def test_match_code_region():
245
- val = "32"
246
- assert code_region._is(val)
247
-
248
-
249
- def test_do_not_match_code_region():
250
- val = "55"
251
- assert not code_region._is(val)
252
-
253
-
254
- # commune
255
- def test_match_commune():
256
- val = "saint denis"
257
- assert commune._is(val)
258
-
259
-
260
- def test_do_not_match_commune():
261
- val = "new york"
262
- assert not commune._is(val)
263
-
264
-
265
- # departement
266
- def test_match_departement():
267
- val = "essonne"
268
- assert departement._is(val)
269
-
270
-
271
- def test_do_not_match_departement():
272
- val = "new york"
273
- assert not departement._is(val)
274
-
275
-
276
- # insee_canton
277
- def test_match_canton():
278
- val = "nantua"
279
- assert insee_canton._is(val)
280
-
281
-
282
- def test_do_not_match_canton():
283
- val = "new york"
284
- assert not departement._is(val)
285
-
286
-
287
- # latitude_l93
288
- def test_match_latitude_l93():
289
- vals = ["6037008", "7123528.5", "7124528,5"]
290
- for val in vals:
291
- assert latitude_l93._is(val)
292
-
293
-
294
- def test_do_not_match_latitude_93():
295
- vals = ["0", "-6734529.6", "7245669.8", "3422674,78", "32_34"]
296
- for val in vals:
297
- assert not latitude_l93._is(val)
298
-
299
-
300
- # longitude_l93
301
- def test_match_longitude_l93():
302
- vals = ["0", "-154", "1265783,45", "34723.4"]
303
- for val in vals:
304
- assert longitude_l93._is(val)
305
-
306
-
307
- def test_do_not_match_longitude_93():
308
- vals = ["1456669.8", "-776225", "346_3214"]
309
- for val in vals:
310
- assert not longitude_l93._is(val)
311
-
312
-
313
- # pays
314
- def test_match_pays():
315
- val = "france"
316
- assert pays._is(val)
317
-
318
-
319
- def test_do_not_match_pays():
320
- val = "new york"
321
- assert not pays._is(val)
322
-
323
-
324
- # region
325
- def test_match_region():
326
- val = "bretagne"
327
- assert region._is(val)
328
-
329
-
330
- def test_do_not_match_region():
331
- val = "jambon beurre"
332
- assert not region._is(val)
333
-
334
-
335
- # iso_country_code
336
- def test_match_iso_country_code():
337
- val = "FR"
338
- assert iso_country_code_alpha2._is(val)
339
-
340
-
341
- def test_do_not_match_iso_country_code():
342
- val = "XX"
343
- assert not iso_country_code_alpha2._is(val)
344
-
345
-
346
- # iso_country_code alpha-3
347
- def test_match_iso_country_code_alpha3():
348
- val = "FRA"
349
- assert iso_country_code_alpha3._is(val)
350
-
351
-
352
- def test_do_not_match_iso_country_code_alpha3():
353
- val = "ABC"
354
- assert not iso_country_code_alpha3._is(val)
355
-
356
-
357
- # iso_country_code numerique
358
- def test_match_iso_country_code_numeric():
359
- val = "250"
360
- assert iso_country_code_numeric._is(val)
361
-
362
-
363
- def test_do_not_match_iso_country_code_numeric():
364
- val = "003"
365
- assert not iso_country_code_numeric._is(val)
366
-
367
-
368
- # jour de la semaine
369
- def test_match_jour_de_la_semaine():
370
- val = "lundi"
371
- assert jour_de_la_semaine._is(val)
372
-
373
-
374
- def test_do_not_match_jour_de_la_semaine():
375
- val = "jour de la biere"
376
- assert not jour_de_la_semaine._is(val)
377
-
378
-
379
- # year
380
- def test_match_year():
381
- val = "2015"
382
- assert year._is(val)
383
-
384
-
385
- def test_do_not_match_year():
386
- val = "20166"
387
- assert not year._is(val)
388
-
389
-
390
- # date
391
- def test_match_date():
392
- val = "1960-08-07"
393
- assert date._is(val)
394
- val = "12/02/2007"
395
- assert date._is(val)
396
- val = "15 jan 1985"
397
- assert date._is(val)
398
- val = "15 décembre 1985"
399
- assert date._is(val)
400
- val = "02 05 2003"
401
- assert date._is(val)
402
- val = "20030502"
403
- assert date._is(val)
404
- val = "1993-12/02"
405
- assert date._is(val)
406
-
407
-
408
- def test_do_not_match_date():
409
- val = "1993-1993-1993"
410
- assert not date._is(val)
411
- val = "39-10-1993"
412
- assert not date._is(val)
413
- val = "19-15-1993"
414
- assert not date._is(val)
415
- val = "15 tambour 1985"
416
- assert not date._is(val)
417
- val = "12152003"
418
- assert not date._is(val)
419
- val = "20031512"
420
- assert not date._is(val)
421
- val = "02052003"
422
- assert not date._is(val)
423
-
424
-
425
- # datetime
426
- def test_match_datetime():
427
- val = "2021-06-22T10:20:10"
428
- assert datetime_iso._is(val)
429
- val = "2021-06-22T30:20:10"
430
- assert not datetime_iso._is(val)
431
-
432
- val = "Sun, 06 Nov 1994 08:49:37 GMT"
433
- assert datetime_rfc822._is(val)
434
-
435
-
436
- # siren
437
- def test_match_siren():
438
- val = "552 100 554"
439
- assert siren._is(val)
440
-
441
-
442
- def test_do_not_match_siren():
443
- val = "42"
444
- assert not siren._is(val)
445
-
446
-
447
- # rna
448
- def test_match_rna():
449
- val = "W751515517"
450
- assert code_rna._is(val)
451
-
452
-
453
- def test_do_not_match_rna():
454
- vals = [
455
- "W111111111111111111111111111111111111",
456
- "w143788974",
457
- "W12",
458
- "678W23456",
459
- "165789325",
460
- "Wa1#89sf&h",
461
- ]
462
- for val in vals:
463
- assert not code_rna._is(val)
464
-
465
-
466
- def test_match_waldec():
467
- val = "751P00188854"
468
- assert code_waldec._is(val)
469
-
470
-
471
- def test_do_not_match_waldec():
472
- val = "AA751PEE00188854"
473
- assert not code_waldec._is(val)
474
-
475
-
476
- # json
477
- def test_match_json():
478
- val = '{"pomme": "fruit", "reponse": 42}'
479
- assert json._is(val)
480
- val = "[1,2,3,4]"
481
- assert json._is(val)
482
-
483
-
484
- def test_do_not_match_json():
485
- val = '{"coordinates": [45.783753, 3.049342], "citycode": "63870"}'
486
- assert not json._is(val)
487
- val = "666"
488
- assert not json._is(val)
489
-
490
-
491
- # int
492
- def test_match_int():
493
- for val in ["1", "0", "1764", "-24"]:
494
- assert test_int._is(val)
495
-
496
-
497
- def test_not_match_int():
498
- for val in ["01053", "1.2", "123_456", "+35"]:
499
- assert not test_int._is(val)
500
-
501
-
502
- # float
503
- def test_match_float():
504
- for val in ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1"]:
505
- assert test_float._is(val)
111
+ fields = {
112
+ adresse: {
113
+ True: ["rue du martyr"],
114
+ False: ["un batiment"],
115
+ },
116
+ code_commune_insee: {
117
+ True: ["91471", "01053"],
118
+ False: ["914712", "01000"],
119
+ },
120
+ code_departement: {
121
+ True: ["75", "2A", "2b", "974", "01"],
122
+ False: ["00", "96", "101"],
123
+ },
124
+ code_fantoir: {
125
+ True: ["7755A", "B150B", "ZA04C", "ZB03D"],
126
+ False: ["7755", "ZA99A"],
127
+ },
128
+ code_postal: {
129
+ True: ["75020", "01000"],
130
+ False: ["77777", "018339"],
131
+ },
132
+ code_region: {
133
+ True: ["32"],
134
+ False: ["55"],
135
+ },
136
+ commune: {
137
+ True: ["saint denis"],
138
+ False: ["new york", "lion"],
139
+ },
140
+ departement: {
141
+ True: ["essonne"],
142
+ False: ["alabama", "auvergne"],
143
+ },
144
+ insee_canton: {
145
+ True: ["nantua"],
146
+ False: ["california"],
147
+ },
148
+ latitude_l93: {
149
+ True: ["6037008", "7123528.5", "7124528,5"],
150
+ False: ["0", "-6734529.6", "7245669.8", "3422674,78", "32_34"],
151
+ },
152
+ longitude_l93: {
153
+ True: ["0", "-154", "1265783,45", "34723.4"],
154
+ False: ["1456669.8", "-776225", "346_3214"],
155
+ },
156
+ latitude_wgs_fr_metropole: {
157
+ True: ["42.5"],
158
+ False: ["22.5", "62.5"],
159
+ },
160
+ longitude_wgs_fr_metropole: {
161
+ True: ["-2.5"],
162
+ False: ["12.8"],
163
+ },
164
+ pays: {
165
+ True: ["france", "italie"],
166
+ False: ["amerique", "paris"],
167
+ },
168
+ region: {
169
+ True: ["bretagne", "ile-de-france"],
170
+ False: ["baviere", "overgne"],
171
+ },
172
+ code_csp_insee: {
173
+ True: ["121f"],
174
+ False: ["121x"],
175
+ },
176
+ code_rna: {
177
+ True: ["W751515517"],
178
+ False: [
179
+ "W111111111111111111111111111111111111",
180
+ "w143788974",
181
+ "W12",
182
+ "678W23456",
183
+ "165789325",
184
+ "Wa1#89sf&h",
185
+ ],
186
+ },
187
+ code_waldec: {
188
+ True: ["751P00188854"],
189
+ False: ["AA751PEE00188854"],
190
+ },
191
+ csp_insee: {
192
+ True: ["employes de la poste"],
193
+ False: ["super-heros"],
194
+ },
195
+ sexe: {
196
+ True: ["homme"],
197
+ False: ["hermaphrodite"],
198
+ },
199
+ siren: {
200
+ True: ["552 100 554", "552100554"],
201
+ False: ["42"],
202
+ },
203
+ siret: {
204
+ True: ["13002526500013", "130 025 265 00013"],
205
+ False: ["13002526500012"],
206
+ },
207
+ uai: {
208
+ True: ["0422170F"],
209
+ False: ["04292E"],
210
+ },
211
+ date_fr: {
212
+ True: ["13 fevrier 1996"],
213
+ False: ["44 march 2025"],
214
+ },
215
+ insee_ape700: {
216
+ True: ["0116Z"],
217
+ False: ["0116A"]
218
+ },
219
+ tel_fr: {
220
+ True: ["0134643467"],
221
+ False: ["6625388263", "01288398"],
222
+ },
223
+ jour_de_la_semaine: {
224
+ True: ["lundi"],
225
+ False: ["jour de la biere"],
226
+ },
227
+ mois_de_annee: {
228
+ True: ["juin", "décembre"],
229
+ False: ["november"],
230
+ },
231
+ iso_country_code_alpha2: {
232
+ True: ["FR"],
233
+ False: ["XX", "A", "FRA"],
234
+ },
235
+ iso_country_code_alpha3: {
236
+ True: ["FRA"],
237
+ False: ["XXX", "FR", "A"],
238
+ },
239
+ iso_country_code_numeric: {
240
+ True: ["250"],
241
+ False: ["003"],
242
+ },
243
+ json_geojson: {
244
+ True: [
245
+ '{"coordinates": [45.783753, 3.049342], "type": "63870"}',
246
+ '{"geometry": {"coordinates": [45.783753, 3.049342]}}',
247
+ ],
248
+ False: ['{"pomme": "fruit", "reponse": 42}'],
249
+ },
250
+ latitude_wgs: {
251
+ True: ["43.2", "-22"],
252
+ False: ["100"],
253
+ },
254
+ latlon_wgs: {
255
+ True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8"],
256
+ False: ["0.1,192", "-102, 92"],
257
+ },
258
+ longitude_wgs: {
259
+ True: ["120", "-20.2"],
260
+ False: ["-200"],
261
+ },
262
+ booleen: {
263
+ True: ["oui", "0", "1", "yes", "false", "True"],
264
+ False: ["nein", "ja", "2", "-0"],
265
+ },
266
+ email: {
267
+ True: ["cdo_intern@data.gouv.fr"],
268
+ False: ["cdo@@gouv.sfd"],
269
+ },
270
+ json: {
271
+ True: ['{"pomme": "fruit", "reponse": 42}', "[1,2,3,4]"],
272
+ False: ['{"coordinates": [45.783753, 3.049342], "citycode": "63870"}', "{zefib:"],
273
+ },
274
+ money: {
275
+ True: ["120€", "-20.2$"],
276
+ False: ["200", "100 euros"],
277
+ },
278
+ mongo_object_id: {
279
+ True: ["62320e50f981bc2b57bcc044"],
280
+ False: ["884762be-51f3-44c3-b811-1e14c5d89262", "0230240284a66e"],
281
+ },
282
+ percent: {
283
+ True: ["120%", "-20.2%"],
284
+ False: ["200", "100 pourcents"],
285
+ },
286
+ twitter: {
287
+ True: ["@accueil1"],
288
+ False: ["adresse@mail"],
289
+ },
290
+ url: {
291
+ True: ["www.etalab.data.gouv.fr"],
292
+ False: ["une phrase avec un @ dedans"],
293
+ },
294
+ uuid: {
295
+ True: ["884762be-51f3-44c3-b811-1e14c5d89262"],
296
+ False: ["0610928327"],
297
+ },
298
+ test_int: {
299
+ True: ["1", "0", "1764", "-24"],
300
+ False: ["01053", "1.2", "123_456", "+35"],
301
+ },
302
+ test_float: {
303
+ True: ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1"],
304
+ False: ["01053", "01053.89", "1e3", "123_456", "123_456.78", "+35", "+35.9"],
305
+ },
306
+ date: {
307
+ True: [
308
+ "1960-08-07",
309
+ "12/02/2007",
310
+ "15 jan 1985",
311
+ "15 décembre 1985",
312
+ "02 05 2003",
313
+ "20030502",
314
+ "1993-12/02",
315
+ ],
316
+ False: [
317
+ "1993-1993-1993",
318
+ "39-10-1993",
319
+ "19-15-1993",
320
+ "15 tambour 1985",
321
+ "12152003",
322
+ "20031512",
323
+ "02052003",
324
+ ],
325
+ },
326
+ datetime: {
327
+ True: ["2021-06-22T10:20:10"],
328
+ False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT"],
329
+ },
330
+ datetime_iso: {
331
+ True: ["2021-06-22T10:20:10"],
332
+ False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT"],
333
+ },
334
+ datetime_rfc822: {
335
+ True: ["Sun, 06 Nov 1994 08:49:37 GMT"],
336
+ False: ["2021-06-22T10:20:10"],
337
+ },
338
+ year: {
339
+ True: ["2015"],
340
+ False: ["20166"],
341
+ },
342
+ }
343
+
344
+ # we could also have a function here to add all True values of (almost)
345
+ # each field to the False values of all others
346
+
347
+
348
+ def test_all_fields_have_tests():
349
+ all_tests = return_all_tests("ALL", "detect_fields")
350
+ for test in all_tests:
351
+ assert fields.get(test)
506
352
 
507
353
 
508
- def test_not_match_float():
509
- for val in ["01053", "01053.89", "1e3", "123_456", "123_456.78", "+35", "+35.9"]:
510
- assert not test_float._is(val)
354
+ @pytest.mark.parametrize(
355
+ "args",
356
+ (
357
+ (field, value, valid)
358
+ for field in fields
359
+ for valid in [True, False]
360
+ for value in fields[field][valid]
361
+ ),
362
+ )
363
+ def test_fields_with_values(args):
364
+ field, value, valid = args
365
+ assert field._is(value) is valid
511
366
 
512
367
 
513
368
  @pytest.mark.parametrize(