csv-detective 0.7.5.dev1277__py3-none-any.whl → 0.7.5.dev1298__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. csv_detective/__init__.py +1 -1
  2. csv_detective/detect_fields/__init__.py +6 -4
  3. csv_detective/detect_fields/geo/latlon_wgs/__init__.py +7 -7
  4. csv_detective/detect_fields/other/float/__init__.py +4 -4
  5. csv_detective/detect_fields/other/money/__init__.py +11 -0
  6. csv_detective/detect_fields/other/percent/__init__.py +9 -0
  7. csv_detective/detection/formats.py +145 -0
  8. csv_detective/explore_csv.py +94 -222
  9. csv_detective/load_tests.py +62 -0
  10. csv_detective/output/__init__.py +64 -0
  11. csv_detective/output/dataframe.py +0 -0
  12. csv_detective/output/example.py +77 -77
  13. csv_detective/output/profile.py +0 -0
  14. csv_detective/output/schema.py +0 -0
  15. csv_detective/output/utils.py +0 -0
  16. csv_detective/utils.py +2 -0
  17. csv_detective/validate.py +70 -0
  18. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/CHANGELOG.md +2 -0
  19. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/METADATA +1 -1
  20. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/RECORD +27 -20
  21. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/WHEEL +1 -1
  22. tests/test_example.py +10 -10
  23. tests/test_fields.py +270 -415
  24. tests/test_file.py +19 -9
  25. tests/test_structure.py +6 -0
  26. tests/test_validation.py +18 -0
  27. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
  28. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/README.md +0 -0
  29. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/entry_points.txt +0 -0
  30. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
  31. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,62 @@
1
+ import os
2
+ from typing import Union
3
+
4
+ # flake8: noqa
5
+ from csv_detective import detect_fields, detect_labels
6
+
7
+
8
+ def get_all_packages(detect_type) -> list:
9
+ root_dir = os.path.dirname(os.path.abspath(__file__)) + "/" + detect_type
10
+ modules = []
11
+ for dirpath, _, filenames in os.walk(root_dir):
12
+ for filename in filenames:
13
+ file = os.path.join(dirpath, filename).replace(root_dir, "")
14
+ if file.endswith("__init__.py"):
15
+ module = (
16
+ file.replace("__init__.py", "")
17
+ .replace("/", ".").replace("\\", ".")[:-1]
18
+ )
19
+ if module:
20
+ modules.append(detect_type + module)
21
+ return modules
22
+
23
+
24
+ def return_all_tests(
25
+ user_input_tests: Union[str, list],
26
+ detect_type: str,
27
+ ) -> list:
28
+ """
29
+ returns all tests that have a method _is and are listed in the user_input_tests
30
+ the function can select a sub_package from csv_detective
31
+ user_input_tests may look like this:
32
+ - "ALL": all possible tests are made
33
+ - "FR.other.siren" (or any other path-like string to one of the tests, or a group of tests, like "FR.geo"):
34
+ this specifc (group of) test(s) only
35
+ - ["FR.temp.mois_de_annee", "geo", ...]: only the specified tests will be made ; you may also skip
36
+ specific (groups of) tests by add "-" at the start (e.g "-temp.date")
37
+ """
38
+ assert detect_type in ["detect_fields", "detect_labels"]
39
+ all_packages = get_all_packages(detect_type=detect_type)
40
+
41
+ if isinstance(user_input_tests, str):
42
+ user_input_tests = [user_input_tests]
43
+ if "ALL" in user_input_tests or all(x[0] == "-" for x in user_input_tests):
44
+ tests_to_do = [detect_type]
45
+ else:
46
+ tests_to_do = [
47
+ f"{detect_type}.{x}" for x in user_input_tests if x[0] != "-"
48
+ ]
49
+ tests_skipped = [
50
+ f"{detect_type}.{x[1:]}" for x in user_input_tests if x[0] == "-"
51
+ ]
52
+ all_tests = [
53
+ # this is why we need to import detect_fields/labels
54
+ eval(x) for x in all_packages
55
+ if any([y == x[: len(y)] for y in tests_to_do])
56
+ and all([y != x[: len(y)] for y in tests_skipped])
57
+ ]
58
+ # to remove groups of tests
59
+ all_tests = [
60
+ test for test in all_tests if "_is" in dir(test)
61
+ ]
62
+ return all_tests
@@ -0,0 +1,64 @@
1
+ import json
2
+ import os
3
+ from typing import Union
4
+
5
+ import pandas as pd
6
+
7
+ from csv_detective.utils import is_url
8
+ from .dataframe import cast_df
9
+ from .profile import create_profile
10
+ from .schema import generate_table_schema
11
+
12
+
13
+ def generate_output(
14
+ table: pd.DataFrame,
15
+ analysis: dict,
16
+ file_path: str,
17
+ num_rows: int = 500,
18
+ limited_output: bool = True,
19
+ save_results: Union[bool, str] = True,
20
+ output_profile: bool = False,
21
+ output_schema: bool = False,
22
+ output_df: bool = False,
23
+ cast_json: bool = True,
24
+ verbose: bool = False,
25
+ sheet_name: Union[str, int] = None,
26
+ ) -> Union[dict, tuple[dict, pd.DataFrame]]:
27
+
28
+ if output_profile:
29
+ analysis["profile"] = create_profile(
30
+ table=table,
31
+ dict_cols_fields=analysis["columns"],
32
+ num_rows=num_rows,
33
+ limited_output=limited_output,
34
+ verbose=verbose,
35
+ )
36
+
37
+ if save_results:
38
+ if isinstance(save_results, str):
39
+ output_path = save_results
40
+ else:
41
+ output_path = os.path.splitext(file_path)[0]
42
+ if is_url(output_path):
43
+ output_path = output_path.split('/')[-1]
44
+ if analysis.get("sheet_name"):
45
+ output_path += "_sheet-" + str(sheet_name)
46
+ output_path += ".json"
47
+ with open(output_path, "w", encoding="utf8") as fp:
48
+ json.dump(analysis, fp, indent=4, separators=(",", ": "), ensure_ascii=False)
49
+
50
+ if output_schema:
51
+ analysis["schema"] = generate_table_schema(
52
+ analysis,
53
+ save_file=False,
54
+ verbose=verbose
55
+ )
56
+
57
+ if output_df:
58
+ return analysis, cast_df(
59
+ df=table,
60
+ columns=analysis["columns"],
61
+ cast_json=cast_json,
62
+ verbose=verbose,
63
+ )
64
+ return analysis
File without changes
@@ -17,12 +17,12 @@ def create_example_csv_file(
17
17
  fields: Optional[dict] = None,
18
18
  schema_path: Optional[str] = None,
19
19
  file_length: int = 10,
20
- output_name: str = 'example_file.csv',
21
- output_sep: str = ';',
22
- encoding: str = 'utf-8',
20
+ output_name: Optional[str] = "example_file.csv",
21
+ output_sep: str = ";",
22
+ encoding: str = "utf-8",
23
23
  ignore_required: bool = False,
24
24
  ) -> pd.DataFrame:
25
- '''
25
+ """
26
26
  Create an example file based on a list of dicts like follows:
27
27
  fields = [
28
28
  {
@@ -33,7 +33,7 @@ def create_example_csv_file(
33
33
  ...
34
34
  ]
35
35
  Or from a TableSchema
36
- '''
36
+ """
37
37
  # need to make a CLI command
38
38
 
39
39
  if not (fields or schema_path):
@@ -53,65 +53,65 @@ def create_example_csv_file(
53
53
  enum: Optional[str] = None,
54
54
  ) -> str:
55
55
  if potential_skip(required):
56
- return ''
56
+ return ""
57
57
  if pattern is not None:
58
58
  return rstr.xeger(pattern)
59
59
  elif enum is not None:
60
60
  return random.choice(enum)
61
61
  else:
62
62
  letters = string.ascii_lowercase
63
- return ''.join(random.choice(letters) for i in range(length))
63
+ return "".join(random.choice(letters) for i in range(length))
64
64
 
65
65
  def _id(
66
66
  required: bool = True,
67
67
  ) -> str:
68
68
  if potential_skip(required):
69
- return ''
69
+ return ""
70
70
  return str(uuid.uuid4())
71
71
 
72
72
  def _date(
73
73
  date_range: Union[None, list[str]] = None,
74
- format: str = '%Y-%m-%d',
74
+ format: str = "%Y-%m-%d",
75
75
  required: bool = True,
76
76
  ) -> str:
77
77
  # the bounds specified in date_range are expected in the same format as the desired output format
78
- assert all([k in format for k in ['%d', '%m', '%Y']])
78
+ assert all([k in format for k in ["%d", "%m", "%Y"]])
79
79
  if potential_skip(required):
80
- return ''
80
+ return ""
81
81
  if date_range is None:
82
82
  return fake.date(format)
83
83
  else:
84
84
  if len(date_range) != 2:
85
- raise ValueError('"date_range" must have exactly two elements.')
85
+ raise ValueError("'date_range' must have exactly two elements.")
86
86
  return fake.date_between_dates(
87
87
  datetime.strptime(date_range[0], format),
88
88
  datetime.strptime(date_range[1], format),
89
89
  ).strftime(format)
90
90
 
91
91
  def _time(
92
- format: str = '%H:%M:%S',
92
+ format: str = "%H:%M:%S",
93
93
  required: bool = True,
94
94
  ) -> str:
95
- assert all([k in format for k in ['%H', '%M', '%S']])
95
+ assert all([k in format for k in ["%H", "%M", "%S"]])
96
96
  if potential_skip(required):
97
- return ''
97
+ return ""
98
98
  # maybe add a time_range argument?
99
99
  return fake.time(format)
100
100
 
101
101
  def _datetime(
102
102
  datetime_range: Optional[list[str]] = None,
103
- format: str = '%Y-%m-%d %H-%M-%S',
103
+ format: str = "%Y-%m-%d %H-%M-%S",
104
104
  required: bool = True,
105
105
  ) -> str:
106
106
  # the bounds specified in datetime_range are expected in the same format as the desired output format
107
- assert all([k in format for k in ['%d', '%m', '%Y', '%H', '%M', '%S']])
107
+ assert all([k in format for k in ["%d", "%m", "%Y", "%H", "%M", "%S"]])
108
108
  if potential_skip(required):
109
- return ''
109
+ return ""
110
110
  if datetime_range is None:
111
111
  return fake.date_time().strftime(format)
112
112
  else:
113
113
  if len(datetime_range) != 2:
114
- raise ValueError('"date_range" must have exactly two elements.')
114
+ raise ValueError("'date_range' must have exactly two elements.")
115
115
  return fake.date_time_between(
116
116
  datetime.strptime(datetime_range[0], format),
117
117
  datetime.strptime(datetime_range[1], format),
@@ -119,8 +119,8 @@ def create_example_csv_file(
119
119
 
120
120
  def _url(required: bool = True) -> str:
121
121
  if potential_skip(required):
122
- return ''
123
- return f'http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}'
122
+ return ""
123
+ return f"http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}"
124
124
 
125
125
  def _number(
126
126
  num_type: Type[Union[int, float]] = int,
@@ -130,7 +130,7 @@ def create_example_csv_file(
130
130
  ) -> Union[int, float]:
131
131
  assert num_range is None or len(num_range) == 2
132
132
  if potential_skip(required):
133
- return ''
133
+ return ""
134
134
  if enum:
135
135
  return random.choice(enum)
136
136
  if num_range is None:
@@ -142,100 +142,100 @@ def create_example_csv_file(
142
142
 
143
143
  def _bool(required: bool = True) -> bool:
144
144
  if potential_skip(required):
145
- return ''
145
+ return ""
146
146
  return random.randint(0, 1) == 0
147
147
 
148
148
  def _array(enum: list[Any], required: bool = True) -> str:
149
149
  if potential_skip(required):
150
- return ''
150
+ return ""
151
151
  return f"[{','.join(random.sample(enum, random.randint(1, len(enum))))}]"
152
152
 
153
153
  def build_args_from_constraints(constraints: dict) -> dict:
154
154
  args = {}
155
- args['required'] = constraints.get('required', False)
156
- for _ in ['pattern', 'enum', 'format']:
155
+ args["required"] = constraints.get("required", False)
156
+ for _ in ["pattern", "enum", "format"]:
157
157
  if _ in constraints:
158
158
  args[_] = constraints[_]
159
- if 'minimum' in constraints and 'maximum' in constraints:
160
- args['num_range'] = [constraints['minimum'], constraints['maximum']]
159
+ if "minimum" in constraints and "maximum" in constraints:
160
+ args["num_range"] = [constraints["minimum"], constraints["maximum"]]
161
161
  # maybe there are better values than these?
162
- elif 'minimum' in constraints:
163
- args['num_range'] = [constraints['minimum'], 10 + constraints['minimum']]
164
- elif 'maximum' in constraints:
165
- args['num_range'] = [constraints['maximum'] - 10, constraints['maximum']]
166
- if 'minLength' in constraints:
167
- args['length'] = constraints['minLength']
168
- if 'maxLength' in constraints:
169
- args['length'] = constraints['maxLength']
162
+ elif "minimum" in constraints:
163
+ args["num_range"] = [constraints["minimum"], 10 + constraints["minimum"]]
164
+ elif "maximum" in constraints:
165
+ args["num_range"] = [constraints["maximum"] - 10, constraints["maximum"]]
166
+ if "minLength" in constraints:
167
+ args["length"] = constraints["minLength"]
168
+ if "maxLength" in constraints:
169
+ args["length"] = constraints["maxLength"]
170
170
  return args
171
171
 
172
172
  schema_types_to_python = {
173
- 'number': 'float',
174
- 'integer': 'int',
175
- 'string': 'str',
176
- 'year': 'year',
177
- 'boolean': 'bool',
178
- 'date': 'date',
179
- 'yearmonth': 'date',
180
- 'time': 'time',
181
- 'datetime': 'datetime',
182
- 'array': 'array'
173
+ "number": "float",
174
+ "integer": "int",
175
+ "string": "str",
176
+ "year": "year",
177
+ "boolean": "bool",
178
+ "date": "date",
179
+ "yearmonth": "date",
180
+ "time": "time",
181
+ "datetime": "datetime",
182
+ "array": "array"
183
183
  }
184
184
 
185
185
  if schema_path:
186
- if schema_path.startswith('http'):
186
+ if schema_path.startswith("http"):
187
187
  schema = requests.get(schema_path).json()
188
188
  else:
189
189
  with open(schema_path, encoding=encoding) as jsonfile:
190
190
  schema = json.load(jsonfile)
191
- if not ('fields' in schema.keys()):
192
- raise ValueError('The schema must have a "fields" key.')
191
+ if not ("fields" in schema.keys()):
192
+ raise ValueError("The schema must have a 'fields' key.")
193
193
  else:
194
194
  fields = [
195
195
  {
196
- 'name': f['name'],
197
- 'type': schema_types_to_python.get(f['type'], 'str'),
196
+ "name": f["name"],
197
+ "type": schema_types_to_python.get(f["type"], "str"),
198
198
  # when frformat is supported in TableSchema, we can build args for French standards
199
199
  # linked to https://github.com/datagouv/fr-format/issues/26
200
- 'args': (
201
- build_args_from_constraints(f['constraints']) if 'constraints' in f.keys()
202
- else build_args_from_constraints(f['arrayItem']['constraints'])
203
- if 'arrayItem' in f.keys() and 'constraints' in f['arrayItem'].keys()
200
+ "args": (
201
+ build_args_from_constraints(f["constraints"]) if "constraints" in f.keys()
202
+ else build_args_from_constraints(f["arrayItem"]["constraints"])
203
+ if "arrayItem" in f.keys() and "constraints" in f["arrayItem"].keys()
204
204
  else {}
205
205
  )
206
- } for f in schema['fields']
206
+ } for f in schema["fields"]
207
207
  ]
208
208
 
209
209
  for k in range(len(fields)):
210
- if 'args' not in fields[k]:
211
- fields[k]['args'] = {}
212
- if fields[k]['type'] == 'float':
213
- fields[k]['args']['num_type'] = float
214
- elif fields[k]['type'] == 'int':
215
- fields[k]['args']['num_type'] = int
216
- elif fields[k]['type'] == 'year':
217
- fields[k]['args']['num_type'] = int
218
- fields[k]['args']['num_range'] = [1990, 2050]
210
+ if "args" not in fields[k]:
211
+ fields[k]["args"] = {}
212
+ if fields[k]["type"] == "float":
213
+ fields[k]["args"]["num_type"] = float
214
+ elif fields[k]["type"] == "int":
215
+ fields[k]["args"]["num_type"] = int
216
+ elif fields[k]["type"] == "year":
217
+ fields[k]["args"]["num_type"] = int
218
+ fields[k]["args"]["num_range"] = [1990, 2050]
219
219
 
220
220
  types_to_func = {
221
- 'int': _number,
222
- 'float': _number,
223
- 'date': _date,
224
- 'time': _time,
225
- 'str': _string,
226
- 'url': _url,
227
- 'id': _id,
228
- 'year': _number,
229
- 'bool': _bool,
230
- 'datetime': _datetime,
231
- 'array': _array,
221
+ "int": _number,
222
+ "float": _number,
223
+ "date": _date,
224
+ "time": _time,
225
+ "str": _string,
226
+ "url": _url,
227
+ "id": _id,
228
+ "year": _number,
229
+ "bool": _bool,
230
+ "datetime": _datetime,
231
+ "array": _array,
232
232
  }
233
233
 
234
234
  # would it be better to create by column or by row (as for now)?
235
235
  output = pd.DataFrame(
236
236
  [
237
237
  [
238
- types_to_func.get(f['type'], 'str')(**f['args'])
238
+ types_to_func.get(f["type"], "str")(**f["args"])
239
239
  for f in fields
240
240
  ] for _ in range(file_length)
241
241
  ],
File without changes
File without changes
File without changes
csv_detective/utils.py CHANGED
@@ -2,6 +2,8 @@ import logging
2
2
  import math
3
3
  from typing import Optional
4
4
 
5
+ logging.basicConfig(level=logging.INFO)
6
+
5
7
 
6
8
  def display_logs_depending_process_time(prompt: str, duration: float):
7
9
  '''
@@ -0,0 +1,70 @@
1
+ import logging
2
+ from typing import Union
3
+
4
+ import pandas as pd
5
+
6
+ from csv_detective.load_tests import return_all_tests
7
+ from .parsing.load import load_file
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+
11
+ tests = {
12
+ t.__name__.split(".")[-1]: t._is
13
+ for t in return_all_tests("ALL", "detect_fields")
14
+ }
15
+
16
+
17
+ def validate(
18
+ file_path: str,
19
+ previous_analysis: dict,
20
+ num_rows: int = 500,
21
+ encoding: str = None,
22
+ sep: str = None,
23
+ verbose: bool = False,
24
+ skipna: bool = True,
25
+ sheet_name: Union[str, int] = None,
26
+ ) -> tuple[bool, pd.DataFrame, dict]:
27
+ """
28
+ Verify is the given file has the same fields and types as in the previous analysis.
29
+ """
30
+ table, analysis = load_file(
31
+ file_path=file_path,
32
+ num_rows=num_rows,
33
+ encoding=encoding,
34
+ sep=sep,
35
+ verbose=verbose,
36
+ sheet_name=sheet_name,
37
+ )
38
+ if verbose:
39
+ logging.info("Comparing table with the previous analysis")
40
+ logging.info("- Checking if all columns match")
41
+ if (
42
+ any(col_name not in list(table.columns) for col_name in previous_analysis["columns"])
43
+ or any(col_name not in list(previous_analysis["columns"].keys()) for col_name in table.columns)
44
+ ):
45
+ logging.warning("> Columns do not match, proceeding with full analysis")
46
+ return False, table, analysis
47
+ for col_name, args in previous_analysis["columns"].items():
48
+ if verbose:
49
+ logging.info(f"- Testing {col_name} for {args['format']}")
50
+ if args["format"] == "string":
51
+ # no test for columns that have not been recognized as a specific format
52
+ continue
53
+ test_func = tests[args["format"]]
54
+ col_data = table[col_name]
55
+ if skipna:
56
+ col_data = col_data.loc[~col_data.isna()]
57
+ if not col_data.apply(test_func).all():
58
+ logging.warning("> Test failed, proceeding with full analysis")
59
+ return False, table, analysis
60
+ if verbose:
61
+ logging.info("> All checks successful")
62
+ return True, table, analysis | {
63
+ k: previous_analysis[k] for k in [
64
+ "categorical",
65
+ "columns",
66
+ "columns_fields",
67
+ "columns_labels",
68
+ "formats",
69
+ ]
70
+ }
@@ -13,7 +13,9 @@
13
13
  - Handle csv.gz files [#110](https://github.com/datagouv/csv-detective/pull/110)
14
14
  - Refactor file tests [#110](https://github.com/datagouv/csv-detective/pull/110)
15
15
  - Restructure repo (breaking changes) [#111](https://github.com/datagouv/csv-detective/pull/111)
16
+ - Add validation function and associated flow [#112](https://github.com/datagouv/csv-detective/pull/112)
16
17
  - Better float detection [#113](https://github.com/datagouv/csv-detective/pull/113)
18
+ - Refactor fields tests [#114](https://github.com/datagouv/csv-detective/pull/114)
17
19
 
18
20
  ## 0.7.4 (2024-11-15)
19
21
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.7.5.dev1277
3
+ Version: 0.7.5.dev1298
4
4
  Summary: Detect CSV column content
5
5
  Home-page: https://github.com/etalab/csv_detective
6
6
  Author: Etalab
@@ -1,9 +1,11 @@
1
- csv_detective/__init__.py,sha256=GCHgu0BhH5ACV7cf-1gDr9nRyvSoeQ1vRw9SjEHeMT4,143
1
+ csv_detective/__init__.py,sha256=vpK7WMkIQbcJzu6HKOwcn7PpHsNCCaXZ1YLMS5Wq9tM,165
2
2
  csv_detective/cli.py,sha256=itooHtpyfC6DUsL_DchPKe1xo7m0MYJIp1L4R8eqoTk,1401
3
- csv_detective/explore_csv.py,sha256=FmgJ2h1SxV8b_wOWia4xsswyVJTlCCW66e0nhltz-0s,14511
3
+ csv_detective/explore_csv.py,sha256=ocWlUEtuwZ-6bjDc6gfhC2-6DljMVhvXhHrfICCXGfQ,8986
4
+ csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
4
5
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
5
- csv_detective/utils.py,sha256=KAYfSJXnPuAXnSc38Jm57oQ_JP_0kUkmI1OV6gN5_ys,1116
6
- csv_detective/detect_fields/__init__.py,sha256=NVfE3BQVExgXb-BPbhDvlkM5-0naEVLpZ4aM_OGHYfE,931
6
+ csv_detective/utils.py,sha256=Bx_1k4Sdpd5PCjuAy4AeayCmmw7TMR_zgtKIHNLi5g0,1157
7
+ csv_detective/validate.py,sha256=o4Qulf8E-x1zsWT9OD4Fpw83Gku1WA3JlX83j7bu0DA,2314
8
+ csv_detective/detect_fields/__init__.py,sha256=qkwT_o_S7qvLEsRssICpoGmCc3h5y2MVy1XI56LFcV0,959
7
9
  csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
10
  csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
11
  csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
@@ -48,15 +50,17 @@ csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=wJAy
48
50
  csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
49
51
  csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=FPHOfTrfXJs62-NgeOcNGOvwPd7I1fEVp8lTdMNfj3w,433
50
52
  csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
51
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=3nlBqFYD4kVSVxw4b9DTPcxW59oL0T3Kj0OxPlyP9og,268
53
+ csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=7_mnO9uC_kI7e2WR8xIer7Kqw8zi-v-JKaAD4zcoGbE,342
52
54
  csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=G7afWOKiGh_Tv7gwDNGt1a4B_A8hkCBkIxn3THDCUFk,330
53
55
  csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
56
  csv_detective/detect_fields/other/booleen/__init__.py,sha256=wn_yyTAmGxqo0l0b7JRpGb0da_E27iGxES9zWCrnsqc,497
55
57
  csv_detective/detect_fields/other/email/__init__.py,sha256=O9tgJmq0O8Q-8iin63NqEEDhlsUJjxFZNaNFM4GZaws,178
56
- csv_detective/detect_fields/other/float/__init__.py,sha256=7bXuPAmBuIhKJEhq7d20B60WVol1AUpqRkWhreQpWfU,578
58
+ csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
57
59
  csv_detective/detect_fields/other/int/__init__.py,sha256=QN3kQJLYqLRBiubUK7g4Xq03PlA5wqVwx2pPPIO9FdI,320
58
60
  csv_detective/detect_fields/other/json/__init__.py,sha256=DhzyvT12kOqgum89silIu3uoSYXmC_s_AaxLtXAD4eU,540
61
+ csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
59
62
  csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=7fcrHsOZAqXp2_N0IjPskYJ_qi4xRlo9iyNNDQVLzsU,156
63
+ csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
60
64
  csv_detective/detect_fields/other/twitter/__init__.py,sha256=qbwLKsTBRFQ4PyTNVeEZ5Hkf5Wwi3ZKclLER_V0YO3g,154
61
65
  csv_detective/detect_fields/other/url/__init__.py,sha256=9WaTqCglEsw_lJG_xZsBMdxJXg2yuQ92_fkX6CXWNV0,286
62
66
  csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
@@ -126,12 +130,14 @@ csv_detective/detect_labels/temp/year/__init__.py,sha256=3U9j8Hux432KdGtIyArq_-v
126
130
  csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
127
131
  csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
128
132
  csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
133
+ csv_detective/detection/formats.py,sha256=VwFazRAFJN6eaYUK7IauVU88vuUBHccESY4UD8EgGUo,5386
129
134
  csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
130
135
  csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
131
136
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
132
137
  csv_detective/detection/variables.py,sha256=3qEMtjZ_zyIFXvTnFgK7ZMDx8C12uQXKfFjEj2moyJc,3558
138
+ csv_detective/output/__init__.py,sha256=XDS4Dgvv6oloIao9JquHa0m1nnlQ_q2gHuEPGlaETic,1890
133
139
  csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
134
- csv_detective/output/example.py,sha256=i8PkdXxidF7qR_9aK8vh12JpZdJQryhBgyrMS8iy5rk,8642
140
+ csv_detective/output/example.py,sha256=26rY7XNXK47e9xJMl-Js8jJwFIuv7V7B7e256VecKuk,8652
135
141
  csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
136
142
  csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
137
143
  csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
@@ -141,18 +147,19 @@ csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,
141
147
  csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
142
148
  csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
143
149
  csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
144
- csv_detective-0.7.5.dev1277.data/data/share/csv_detective/CHANGELOG.md,sha256=tgIIm6s4qoP4RGJK1cmqf-Cm5aHmXmBrwi37NVIYedg,7796
145
- csv_detective-0.7.5.dev1277.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
146
- csv_detective-0.7.5.dev1277.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
147
- csv_detective-0.7.5.dev1277.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
150
+ csv_detective-0.7.5.dev1298.data/data/share/csv_detective/CHANGELOG.md,sha256=Y8aL18x5EGGvA9AqukEi4tn78se_Lzisa2J32kOSer8,7984
151
+ csv_detective-0.7.5.dev1298.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
152
+ csv_detective-0.7.5.dev1298.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
153
+ csv_detective-0.7.5.dev1298.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
148
154
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
150
- tests/test_fields.py,sha256=LPLx09cX5u9XHAh65XvTgIqzKylToiHZxXzKhpV0wsk,11148
151
- tests/test_file.py,sha256=EleTssys5fCP4N0W1eTZN35uijzoF15e3dIcuIlrMsk,7865
155
+ tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
156
+ tests/test_fields.py,sha256=0hce2XtDHY9dTLCYhrm2s4I41OeKsQbbaKmDZ4XctUw,9824
157
+ tests/test_file.py,sha256=9APE1d43lQ8Dk8lwJFNUK_YekYYsQ0ae2_fgpcPE9mk,8116
152
158
  tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
153
- tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
154
- csv_detective-0.7.5.dev1277.dist-info/METADATA,sha256=RgcnqpKqQ1us0lmVf6McKYJs38DC1sqvAh10XgnJOY8,1386
155
- csv_detective-0.7.5.dev1277.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
156
- csv_detective-0.7.5.dev1277.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
157
- csv_detective-0.7.5.dev1277.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
158
- csv_detective-0.7.5.dev1277.dist-info/RECORD,,
159
+ tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
160
+ tests/test_validation.py,sha256=VwtBcnGAQ_eSFrBibWnMSTDjuy6y2JLlqvc3Zb667NY,479
161
+ csv_detective-0.7.5.dev1298.dist-info/METADATA,sha256=cy8kKhsbQVd8DQ2UMJe7z1nyxoGEvmFnQfsTdCTwXXc,1386
162
+ csv_detective-0.7.5.dev1298.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
163
+ csv_detective-0.7.5.dev1298.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
164
+ csv_detective-0.7.5.dev1298.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
165
+ csv_detective-0.7.5.dev1298.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5