csv-detective 0.9.3.dev2010__py3-none-any.whl → 0.9.3.dev2052__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +1 -2
- csv_detective/detect_fields/temp/datetime_naive/__init__.py +2 -6
- csv_detective/explore_csv.py +3 -110
- csv_detective/output/__init__.py +1 -1
- csv_detective/output/schema.py +8 -86
- {csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/METADATA +1 -2
- {csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/RECORD +12 -14
- tests/test_fields.py +8 -1
- csv_detective/s3_utils.py +0 -44
- venv/bin/jp.py +0 -54
- {csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/top_level.txt +0 -0
csv_detective/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ threshold = 0.7
|
|
|
9
9
|
# matches AAAA-MM-JJTHH:MM:SS(.dddddd)Z with any of the listed separators for the date OR NO SEPARATOR
|
|
10
10
|
pat = (
|
|
11
11
|
aaaammjj_pattern.replace("$", "")
|
|
12
|
-
+ r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})?Z
|
|
12
|
+
+ r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})?Z?$"
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
@@ -26,8 +26,4 @@ def _is(val: Optional[Any]) -> bool:
|
|
|
26
26
|
if sum([char.isdigit() or char in {"-", "/", ":", " "} for char in val]) / len(val) < threshold:
|
|
27
27
|
return False
|
|
28
28
|
res = date_casting(val)
|
|
29
|
-
return (
|
|
30
|
-
res is not None
|
|
31
|
-
and bool(res.hour or res.minute or res.second or res.microsecond)
|
|
32
|
-
and not bool(res.tzinfo)
|
|
33
|
-
)
|
|
29
|
+
return res is not None and not bool(res.tzinfo)
|
csv_detective/explore_csv.py
CHANGED
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
|
-
import os
|
|
4
|
-
import tempfile
|
|
5
2
|
from time import time
|
|
6
3
|
from typing import Optional, Union
|
|
7
4
|
|
|
8
5
|
import pandas as pd
|
|
9
6
|
|
|
10
7
|
from csv_detective.detection.formats import detect_formats
|
|
11
|
-
from csv_detective.output import generate_output
|
|
8
|
+
from csv_detective.output import generate_output
|
|
12
9
|
from csv_detective.parsing.load import load_file
|
|
13
|
-
from csv_detective.s3_utils import download_from_minio, upload_to_minio
|
|
14
10
|
from csv_detective.utils import display_logs_depending_process_time, is_url
|
|
15
11
|
from csv_detective.validate import validate
|
|
16
12
|
|
|
@@ -33,11 +29,11 @@ def routine(
|
|
|
33
29
|
verbose: bool = False,
|
|
34
30
|
sheet_name: Optional[Union[str, int]] = None,
|
|
35
31
|
) -> Union[dict, tuple[dict, pd.DataFrame]]:
|
|
36
|
-
"""Returns a dict with information about the
|
|
32
|
+
"""Returns a dict with information about the table and possible
|
|
37
33
|
column contents, and if requested the DataFrame with columns cast according to analysis.
|
|
38
34
|
|
|
39
35
|
Args:
|
|
40
|
-
file_path: local path to
|
|
36
|
+
file_path: local path or URL to file
|
|
41
37
|
num_rows: number of rows to sample from the file for analysis ; -1 for analysis
|
|
42
38
|
of the whole file
|
|
43
39
|
user_input_tests: tests to run on the file
|
|
@@ -173,106 +169,3 @@ def validate_then_detect(
|
|
|
173
169
|
display_logs_depending_process_time(
|
|
174
170
|
f"Process completed in {round(time() - start_routine, 3)}s", time() - start_routine
|
|
175
171
|
)
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
def routine_minio(
|
|
179
|
-
csv_minio_location: dict[str, str],
|
|
180
|
-
output_minio_location: dict[str, str],
|
|
181
|
-
tableschema_minio_location: dict[str, str],
|
|
182
|
-
minio_user: str,
|
|
183
|
-
minio_pwd: str,
|
|
184
|
-
**kwargs,
|
|
185
|
-
):
|
|
186
|
-
"""Returns a dict with information about the csv table and possible
|
|
187
|
-
column contents.
|
|
188
|
-
|
|
189
|
-
Args:
|
|
190
|
-
csv_minio_location: dict with Minio URL, bucket and key of the CSV file
|
|
191
|
-
output_minio_location: Minio URL, bucket and key to store output file. None if
|
|
192
|
-
not uploading to Minio.
|
|
193
|
-
tableschema_minio_location: Minio URL, bucket and key to store tableschema file.
|
|
194
|
-
None if not uploading the tableschema to Minio.
|
|
195
|
-
minio_user: user name for the minio instance
|
|
196
|
-
minio_pwd: password for the minio instance
|
|
197
|
-
kwargs: arguments for routine
|
|
198
|
-
|
|
199
|
-
Returns:
|
|
200
|
-
dict: a dict with information about the csv and possible types for each column
|
|
201
|
-
"""
|
|
202
|
-
|
|
203
|
-
if (
|
|
204
|
-
(
|
|
205
|
-
any(
|
|
206
|
-
[
|
|
207
|
-
location_dict is not None
|
|
208
|
-
for location_dict in [
|
|
209
|
-
csv_minio_location,
|
|
210
|
-
output_minio_location,
|
|
211
|
-
tableschema_minio_location,
|
|
212
|
-
]
|
|
213
|
-
]
|
|
214
|
-
)
|
|
215
|
-
)
|
|
216
|
-
and (minio_user is None)
|
|
217
|
-
or (minio_pwd is None)
|
|
218
|
-
):
|
|
219
|
-
raise ValueError("Minio credentials are required if using Minio")
|
|
220
|
-
|
|
221
|
-
for location_dict in [
|
|
222
|
-
csv_minio_location,
|
|
223
|
-
output_minio_location,
|
|
224
|
-
tableschema_minio_location,
|
|
225
|
-
]:
|
|
226
|
-
if location_dict is not None:
|
|
227
|
-
if any(
|
|
228
|
-
[
|
|
229
|
-
(location_key not in location_dict) or (location_dict[location_key] is None)
|
|
230
|
-
for location_key in ["netloc", "bucket", "key"]
|
|
231
|
-
]
|
|
232
|
-
):
|
|
233
|
-
raise ValueError("Minio location dict must contain url, bucket and key")
|
|
234
|
-
|
|
235
|
-
file_path = tempfile.NamedTemporaryFile(delete=False).name
|
|
236
|
-
download_from_minio(
|
|
237
|
-
netloc=csv_minio_location["netloc"],
|
|
238
|
-
bucket=csv_minio_location["bucket"],
|
|
239
|
-
key=csv_minio_location["key"],
|
|
240
|
-
filepath=file_path,
|
|
241
|
-
minio_user=minio_user,
|
|
242
|
-
minio_pwd=minio_pwd,
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
analysis = routine(
|
|
246
|
-
file_path,
|
|
247
|
-
save_results=True,
|
|
248
|
-
**kwargs,
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
# Write report JSON file.
|
|
252
|
-
output_path_to_store_minio_file = os.path.splitext(file_path)[0] + ".json"
|
|
253
|
-
with open(output_path_to_store_minio_file, "w", encoding="utf8") as fp:
|
|
254
|
-
json.dump(analysis, fp, indent=4, separators=(",", ": "))
|
|
255
|
-
|
|
256
|
-
upload_to_minio(
|
|
257
|
-
netloc=output_minio_location["netloc"],
|
|
258
|
-
bucket=output_minio_location["bucket"],
|
|
259
|
-
key=output_minio_location["key"],
|
|
260
|
-
filepath=output_path_to_store_minio_file,
|
|
261
|
-
minio_user=minio_user,
|
|
262
|
-
minio_pwd=minio_pwd,
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
os.remove(output_path_to_store_minio_file)
|
|
266
|
-
os.remove(file_path)
|
|
267
|
-
|
|
268
|
-
generate_table_schema(
|
|
269
|
-
analysis_report=analysis,
|
|
270
|
-
save_file=True,
|
|
271
|
-
netloc=tableschema_minio_location["netloc"],
|
|
272
|
-
bucket=tableschema_minio_location["bucket"],
|
|
273
|
-
key=tableschema_minio_location["key"],
|
|
274
|
-
minio_user=minio_user,
|
|
275
|
-
minio_pwd=minio_pwd,
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
return analysis
|
csv_detective/output/__init__.py
CHANGED
|
@@ -51,7 +51,7 @@ def generate_output(
|
|
|
51
51
|
)
|
|
52
52
|
|
|
53
53
|
if output_schema:
|
|
54
|
-
analysis["schema"] = generate_table_schema(analysis,
|
|
54
|
+
analysis["schema"] = generate_table_schema(analysis, save_results=False, verbose=verbose)
|
|
55
55
|
|
|
56
56
|
if output_df:
|
|
57
57
|
return analysis, cast_df(
|
csv_detective/output/schema.py
CHANGED
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import os
|
|
4
|
-
import tempfile
|
|
5
3
|
from datetime import datetime
|
|
6
4
|
from time import time
|
|
7
|
-
from typing import
|
|
5
|
+
from typing import Union
|
|
8
6
|
|
|
9
|
-
from botocore.exceptions import ClientError
|
|
10
|
-
|
|
11
|
-
from csv_detective.s3_utils import download_from_minio, get_s3_client, upload_to_minio
|
|
12
7
|
from csv_detective.utils import display_logs_depending_process_time
|
|
13
8
|
|
|
14
9
|
|
|
@@ -202,25 +197,14 @@ def get_constraints(format: str) -> dict:
|
|
|
202
197
|
|
|
203
198
|
def generate_table_schema(
|
|
204
199
|
analysis_report: dict,
|
|
205
|
-
|
|
206
|
-
netloc: Optional[str] = None,
|
|
207
|
-
bucket: Optional[str] = None,
|
|
208
|
-
key: Optional[str] = None,
|
|
209
|
-
minio_user: Optional[str] = None,
|
|
210
|
-
minio_pwd: Optional[str] = None,
|
|
200
|
+
save_results: Union[bool, str] = True,
|
|
211
201
|
verbose: bool = False,
|
|
212
202
|
) -> dict:
|
|
213
203
|
"""Generates a table schema from the analysis report
|
|
214
204
|
|
|
215
205
|
Args:
|
|
216
206
|
analysis_report (dict): The analysis report from csv_detective
|
|
217
|
-
|
|
218
|
-
netloc (str): The netloc of the minio instance to upload the tableschema
|
|
219
|
-
bucket (str): The bucket to save the schema in
|
|
220
|
-
key (str): The key to save the schema in (without extension as we will append
|
|
221
|
-
version number and extension)
|
|
222
|
-
minio_user (str): The minio user
|
|
223
|
-
minio_pwd (str): The minio password
|
|
207
|
+
save_results (bool or str): whether and where to save the results
|
|
224
208
|
|
|
225
209
|
Returns:
|
|
226
210
|
"""
|
|
@@ -277,71 +261,9 @@ def generate_table_schema(
|
|
|
277
261
|
f"Created schema in {round(time() - start, 3)}s", time() - start
|
|
278
262
|
)
|
|
279
263
|
|
|
280
|
-
if
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
if not all([netloc, key, bucket, minio_user, minio_pwd]):
|
|
285
|
-
raise Exception(
|
|
286
|
-
"To save schema into minio, parameters : netloc, key, bucket, "
|
|
287
|
-
"minio_user, minio_pwd should be provided"
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
# Create bucket if does not exist
|
|
291
|
-
client = get_s3_client(netloc, minio_user, minio_pwd)
|
|
292
|
-
try:
|
|
293
|
-
client.head_bucket(Bucket=bucket)
|
|
294
|
-
except ClientError:
|
|
295
|
-
client.create_bucket(Bucket=bucket)
|
|
296
|
-
|
|
297
|
-
tableschema_objects = client.list_objects(Bucket=bucket, Prefix=key, Delimiter="/")
|
|
298
|
-
if "Contents" in tableschema_objects:
|
|
299
|
-
tableschema_keys = [
|
|
300
|
-
tableschema["Key"]
|
|
301
|
-
for tableschema in client.list_objects(Bucket=bucket, Prefix=key, Delimiter="/")[
|
|
302
|
-
"Contents"
|
|
303
|
-
]
|
|
304
|
-
]
|
|
305
|
-
tableschema_versions = [
|
|
306
|
-
os.path.splitext(tableschema_key)[0].split("_")[-1]
|
|
307
|
-
for tableschema_key in tableschema_keys
|
|
308
|
-
]
|
|
309
|
-
latest_version = max(tableschema_versions)
|
|
264
|
+
if save_results:
|
|
265
|
+
output_path = save_results if isinstance(save_results, str) else "schema.json"
|
|
266
|
+
with open(output_path, "w", encoding="utf8") as fp:
|
|
267
|
+
json.dump(schema, fp, indent=4, separators=(",", ": "), ensure_ascii=False, default=str)
|
|
310
268
|
|
|
311
|
-
|
|
312
|
-
with open(latest_schema_file.name, "w") as fp:
|
|
313
|
-
download_from_minio(
|
|
314
|
-
netloc,
|
|
315
|
-
bucket,
|
|
316
|
-
f"{key}_{latest_version}.json",
|
|
317
|
-
latest_schema_file.name,
|
|
318
|
-
minio_user,
|
|
319
|
-
minio_pwd,
|
|
320
|
-
)
|
|
321
|
-
# Check if files are different
|
|
322
|
-
with open(latest_schema_file.name, "r") as fp:
|
|
323
|
-
latest_schema = json.load(fp)
|
|
324
|
-
if latest_schema["fields"] != fields:
|
|
325
|
-
latest_version_split = latest_version.split(".")
|
|
326
|
-
new_version = (
|
|
327
|
-
latest_version_split[0]
|
|
328
|
-
+ "."
|
|
329
|
-
+ latest_version_split[1]
|
|
330
|
-
+ "."
|
|
331
|
-
+ str(int(latest_version_split[2]) + 1)
|
|
332
|
-
)
|
|
333
|
-
else:
|
|
334
|
-
return None
|
|
335
|
-
|
|
336
|
-
schema["version"] = new_version
|
|
337
|
-
|
|
338
|
-
tableschema_file = tempfile.NamedTemporaryFile(delete=False)
|
|
339
|
-
with open(tableschema_file.name, "w") as fp:
|
|
340
|
-
json.dump(schema, fp, indent=4)
|
|
341
|
-
|
|
342
|
-
new_version_key = f"{key}_{new_version}.json"
|
|
343
|
-
upload_to_minio(
|
|
344
|
-
netloc, bucket, new_version_key, tableschema_file.name, minio_user, minio_pwd
|
|
345
|
-
)
|
|
346
|
-
os.unlink(tableschema_file.name)
|
|
347
|
-
return {"netloc": netloc, "bucket": bucket, "key": new_version_key}
|
|
269
|
+
return schema
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: csv-detective
|
|
3
|
-
Version: 0.9.3.
|
|
3
|
+
Version: 0.9.3.dev2052
|
|
4
4
|
Summary: Detect tabular files column content
|
|
5
5
|
Author-email: Etalab <opendatateam@data.gouv.fr>
|
|
6
6
|
License: MIT
|
|
@@ -9,7 +9,6 @@ Keywords: CSV,data processing,encoding,guess,parser,tabular
|
|
|
9
9
|
Requires-Python: <3.14,>=3.9
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
|
-
Requires-Dist: boto3<2,>=1.34.0
|
|
13
12
|
Requires-Dist: dateparser<2,>=1.2.0
|
|
14
13
|
Requires-Dist: faust-cchardet==2.1.19
|
|
15
14
|
Requires-Dist: pandas<3,>=2.2.0
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
csv_detective/__init__.py,sha256=
|
|
1
|
+
csv_detective/__init__.py,sha256=qvjDQBcw1ZIpapIrdGg1IUjBJ1q5KPhQda_05fevleg,126
|
|
2
2
|
csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
|
|
3
|
-
csv_detective/explore_csv.py,sha256=
|
|
3
|
+
csv_detective/explore_csv.py,sha256=1q9ZGGLZWwDwHRancdWwSypk0b_mQwpc2LNvcXMeiKQ,5806
|
|
4
4
|
csv_detective/load_tests.py,sha256=uVKweLq3cf-yB5ZZI-m9tBVs_SWNcOw8sDJa97TOJGo,2266
|
|
5
|
-
csv_detective/s3_utils.py,sha256=z1KTVVkdurMv21o-rZu7_aluMJnSi-d5uxnQbqT2NoI,1407
|
|
6
5
|
csv_detective/utils.py,sha256=xiIO7ZDqkTm9Rnhnq6RaDdnrPIfoG0JV9AsmaOG6plA,1162
|
|
7
6
|
csv_detective/validate.py,sha256=RLHXLrRuynkdcvHUlSEbyglPvdbNYlT1Z4nQI-BdYdA,2898
|
|
8
7
|
csv_detective/detect_fields/__init__.py,sha256=ZZ7u9zsMtCqPC2xxeLp57UTCbqpKFJi6D_LO1ew15BU,1980
|
|
@@ -69,7 +68,7 @@ csv_detective/detect_fields/other/uuid/__init__.py,sha256=XFxbIsdIhRw0dtFxBXQBhi
|
|
|
69
68
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
69
|
csv_detective/detect_fields/temp/date/__init__.py,sha256=JtWaK8hkzBaIUc-fu0G7lIFpWqCfraRh6l0Mo65U3b0,2155
|
|
71
70
|
csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=ZDNUcbU0ZJzaxUt0Utc1Y9dRrq4HHW9uCbcnOuz5Sfk,1247
|
|
72
|
-
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=
|
|
71
|
+
csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=U1mlQdbvEeJNZgMiYY7xv-_EIBRqzxHsKUgU3ZoF0FM,1088
|
|
73
72
|
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=-pFdIIPgaLq2_QbFJ9zwy4YIwZuC73F0A_cNDntTuvQ,512
|
|
74
73
|
csv_detective/detect_fields/temp/year/__init__.py,sha256=gHchVciZExbGZLMBcbBaDXB0IgGptkQc4RhfSOMY0Ww,194
|
|
75
74
|
csv_detective/detect_labels/__init__.py,sha256=93s93DRNeFw9fJiGp0rW3iRWZX3WOeVau2PAaF4QlPE,1777
|
|
@@ -137,11 +136,11 @@ csv_detective/detection/headers.py,sha256=y5iR4jWH5fUtAH_Zg0zxWSVG_INCHlXJFMbhPp
|
|
|
137
136
|
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
138
137
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
139
138
|
csv_detective/detection/variables.py,sha256=wfsA_MOk14TPMOY7gkvpTGpo9-USzMnFaAou3MPHqxc,3536
|
|
140
|
-
csv_detective/output/__init__.py,sha256=
|
|
139
|
+
csv_detective/output/__init__.py,sha256=Vo7hK5fq6hfK5019K4fEnv-LcfeRuNPQubQFkZAMszs,1933
|
|
141
140
|
csv_detective/output/dataframe.py,sha256=pjxvpzIWVUW9_xvT3JjoPnOIVUUHnzL7kZo1xQdMDxQ,2139
|
|
142
141
|
csv_detective/output/example.py,sha256=XrnPS_uC0cICn7tgnLWNctpUbnPzl7fIMzNTzJEWGJc,8655
|
|
143
142
|
csv_detective/output/profile.py,sha256=thckCcfy9cES5yYNW6TDGV82gP1OFWJuLhInT1g7JpI,2814
|
|
144
|
-
csv_detective/output/schema.py,sha256=
|
|
143
|
+
csv_detective/output/schema.py,sha256=YUt9c33mzP2fHoj-NwW7kBcANyrkU3lIBWvXRbugtyU,10485
|
|
145
144
|
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
146
145
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
146
|
csv_detective/parsing/columns.py,sha256=HRHJBJ1gftuheegJHzhQmg-u83pVAXXuQ9GKR34mKgk,5696
|
|
@@ -150,19 +149,18 @@ csv_detective/parsing/csv.py,sha256=fJkjKvyk7InkNnYKtmivyi48mmcwvrha7gvZ5J4-86A,
|
|
|
150
149
|
csv_detective/parsing/excel.py,sha256=sKD5PRN1TlzPPOKFnZ3VRb0r1yIjPLlpxVWmZQeLYFk,7027
|
|
151
150
|
csv_detective/parsing/load.py,sha256=C3M8nvgWenOb8aDFi5dpDGCoAw9EBqr4EB63zbz2M14,3699
|
|
152
151
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
153
|
-
csv_detective-0.9.3.
|
|
152
|
+
csv_detective-0.9.3.dev2052.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
154
153
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
154
|
tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
|
|
156
|
-
tests/test_fields.py,sha256=
|
|
155
|
+
tests/test_fields.py,sha256=IjUvDz155fl0XRw2ENEy5j_auyB3LTidXFi1bEWOCHg,13725
|
|
157
156
|
tests/test_file.py,sha256=QEBv69P0bAKWBzhQ3KKOR1Z1RQSf5CVEilqBojwP2Yc,10791
|
|
158
157
|
tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
|
|
159
158
|
tests/test_structure.py,sha256=GRDYKy0UcdqlN4qglzsRC0puFj5cb-SVvONjvcPvtAA,1400
|
|
160
159
|
tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
|
|
161
160
|
venv/bin/activate_this.py,sha256=NRy3waFmwW1pOaNUp33wNN0vD1Kzkd-zXX-Sgl4EiVI,1286
|
|
162
|
-
venv/bin/jp.py,sha256=7z7dvRg0M7HzpZG4ssQID7nScjvQx7bcYTxJWDOrS6E,1717
|
|
163
161
|
venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
|
|
164
|
-
csv_detective-0.9.3.
|
|
165
|
-
csv_detective-0.9.3.
|
|
166
|
-
csv_detective-0.9.3.
|
|
167
|
-
csv_detective-0.9.3.
|
|
168
|
-
csv_detective-0.9.3.
|
|
162
|
+
csv_detective-0.9.3.dev2052.dist-info/METADATA,sha256=Kn9PGSyDQEZfC9cFHyAB_nyyUupDB45IGnz2Ec2RktE,9735
|
|
163
|
+
csv_detective-0.9.3.dev2052.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
164
|
+
csv_detective-0.9.3.dev2052.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
165
|
+
csv_detective-0.9.3.dev2052.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
|
|
166
|
+
csv_detective-0.9.3.dev2052.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -363,11 +363,18 @@ fields = {
|
|
|
363
363
|
False: ["2021-06-22T30:20:10", "Sun, 06 Nov 1994 08:49:37 GMT", "2021-06-44 10:20:10"],
|
|
364
364
|
},
|
|
365
365
|
datetime_naive: {
|
|
366
|
-
True: [
|
|
366
|
+
True: [
|
|
367
|
+
"2021-06-22 10:20:10",
|
|
368
|
+
"1999-12-01T00:00:00Z",
|
|
369
|
+
"2030/06-22 00:00:00",
|
|
370
|
+
"2030/06/22 00:00:00.0028",
|
|
371
|
+
],
|
|
367
372
|
False: [
|
|
368
373
|
"2021-06-22T30:20:10",
|
|
369
374
|
"Sun, 06 Nov 1994 08:49:37 GMT",
|
|
370
375
|
"2021-06-44 10:20:10+02:00",
|
|
376
|
+
"2021-06-44",
|
|
377
|
+
"15 décembre 1985",
|
|
371
378
|
],
|
|
372
379
|
},
|
|
373
380
|
datetime_rfc822: {
|
csv_detective/s3_utils.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import boto3
|
|
4
|
-
from botocore.client import Config
|
|
5
|
-
from botocore.exceptions import ClientError
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def get_minio_url(netloc: str, bucket: str, key: str) -> str:
|
|
9
|
-
"""Returns location of given resource in minio once it is saved"""
|
|
10
|
-
return netloc + "/" + bucket + "/" + key
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def get_s3_client(url: str, minio_user: str, minio_pwd: str) -> boto3.client:
|
|
14
|
-
return boto3.client(
|
|
15
|
-
"s3",
|
|
16
|
-
endpoint_url=url,
|
|
17
|
-
aws_access_key_id=minio_user,
|
|
18
|
-
aws_secret_access_key=minio_pwd,
|
|
19
|
-
config=Config(signature_version="s3v4"),
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def download_from_minio(
|
|
24
|
-
netloc: str, bucket: str, key: str, filepath: str, minio_user: str, minio_pwd: str
|
|
25
|
-
) -> None:
|
|
26
|
-
logging.info("Downloading from minio")
|
|
27
|
-
s3 = get_s3_client(netloc, minio_user, minio_pwd)
|
|
28
|
-
try:
|
|
29
|
-
s3.download_file(bucket, key, filepath)
|
|
30
|
-
logging.info(f"Resource downloaded from minio at {get_minio_url(netloc, bucket, key)}")
|
|
31
|
-
except ClientError as e:
|
|
32
|
-
logging.error(e)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def upload_to_minio(
|
|
36
|
-
netloc: str, bucket: str, key: str, filepath: str, minio_user: str, minio_pwd: str
|
|
37
|
-
) -> None:
|
|
38
|
-
logging.info("Saving to minio")
|
|
39
|
-
s3 = get_s3_client(netloc, minio_user, minio_pwd)
|
|
40
|
-
try:
|
|
41
|
-
s3.upload_file(filepath, bucket, key)
|
|
42
|
-
logging.info(f"Resource saved into minio at {get_minio_url(netloc, bucket, key)}")
|
|
43
|
-
except ClientError as e:
|
|
44
|
-
logging.error(e)
|
venv/bin/jp.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
#!/home/circleci/project/venv/bin/python
|
|
2
|
-
|
|
3
|
-
import sys
|
|
4
|
-
import json
|
|
5
|
-
import argparse
|
|
6
|
-
from pprint import pformat
|
|
7
|
-
|
|
8
|
-
import jmespath
|
|
9
|
-
from jmespath import exceptions
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def main():
|
|
13
|
-
parser = argparse.ArgumentParser()
|
|
14
|
-
parser.add_argument('expression')
|
|
15
|
-
parser.add_argument('-f', '--filename',
|
|
16
|
-
help=('The filename containing the input data. '
|
|
17
|
-
'If a filename is not given then data is '
|
|
18
|
-
'read from stdin.'))
|
|
19
|
-
parser.add_argument('--ast', action='store_true',
|
|
20
|
-
help=('Pretty print the AST, do not search the data.'))
|
|
21
|
-
args = parser.parse_args()
|
|
22
|
-
expression = args.expression
|
|
23
|
-
if args.ast:
|
|
24
|
-
# Only print the AST
|
|
25
|
-
expression = jmespath.compile(args.expression)
|
|
26
|
-
sys.stdout.write(pformat(expression.parsed))
|
|
27
|
-
sys.stdout.write('\n')
|
|
28
|
-
return 0
|
|
29
|
-
if args.filename:
|
|
30
|
-
with open(args.filename, 'r') as f:
|
|
31
|
-
data = json.load(f)
|
|
32
|
-
else:
|
|
33
|
-
data = sys.stdin.read()
|
|
34
|
-
data = json.loads(data)
|
|
35
|
-
try:
|
|
36
|
-
sys.stdout.write(json.dumps(
|
|
37
|
-
jmespath.search(expression, data), indent=4, ensure_ascii=False))
|
|
38
|
-
sys.stdout.write('\n')
|
|
39
|
-
except exceptions.ArityError as e:
|
|
40
|
-
sys.stderr.write("invalid-arity: %s\n" % e)
|
|
41
|
-
return 1
|
|
42
|
-
except exceptions.JMESPathTypeError as e:
|
|
43
|
-
sys.stderr.write("invalid-type: %s\n" % e)
|
|
44
|
-
return 1
|
|
45
|
-
except exceptions.UnknownFunctionError as e:
|
|
46
|
-
sys.stderr.write("unknown-function: %s\n" % e)
|
|
47
|
-
return 1
|
|
48
|
-
except exceptions.ParseError as e:
|
|
49
|
-
sys.stderr.write("syntax-error: %s\n" % e)
|
|
50
|
-
return 1
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if __name__ == '__main__':
|
|
54
|
-
sys.exit(main())
|
|
File without changes
|
{csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.9.3.dev2010.dist-info → csv_detective-0.9.3.dev2052.dist-info}/top_level.txt
RENAMED
|
File without changes
|