csv-detective 0.7.5.dev1197__py3-none-any.whl → 0.7.5.dev1228__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. csv_detective/__init__.py +1 -1
  2. csv_detective/detect_fields/FR/geo/adresse/__init__.py +1 -1
  3. csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +1 -1
  4. csv_detective/detect_fields/FR/other/csp_insee/__init__.py +1 -1
  5. csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +1 -1
  6. csv_detective/detect_fields/FR/other/sexe/__init__.py +1 -1
  7. csv_detective/detect_fields/other/float/__init__.py +1 -1
  8. csv_detective/detect_labels/FR/geo/adresse/__init__.py +1 -1
  9. csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +1 -1
  10. csv_detective/detect_labels/FR/geo/code_departement/__init__.py +1 -1
  11. csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +1 -1
  12. csv_detective/detect_labels/FR/geo/code_postal/__init__.py +1 -1
  13. csv_detective/detect_labels/FR/geo/code_region/__init__.py +1 -1
  14. csv_detective/detect_labels/FR/geo/commune/__init__.py +1 -1
  15. csv_detective/detect_labels/FR/geo/departement/__init__.py +1 -1
  16. csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +1 -1
  17. csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +1 -1
  18. csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
  19. csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +1 -1
  20. csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
  21. csv_detective/detect_labels/FR/geo/pays/__init__.py +1 -1
  22. csv_detective/detect_labels/FR/geo/region/__init__.py +1 -1
  23. csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +1 -1
  24. csv_detective/detect_labels/FR/other/code_rna/__init__.py +1 -1
  25. csv_detective/detect_labels/FR/other/code_waldec/__init__.py +1 -1
  26. csv_detective/detect_labels/FR/other/csp_insee/__init__.py +1 -1
  27. csv_detective/detect_labels/FR/other/date_fr/__init__.py +1 -1
  28. csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +1 -1
  29. csv_detective/detect_labels/FR/other/sexe/__init__.py +1 -1
  30. csv_detective/detect_labels/FR/other/siren/__init__.py +1 -1
  31. csv_detective/detect_labels/FR/other/siret/__init__.py +1 -1
  32. csv_detective/detect_labels/FR/other/tel_fr/__init__.py +1 -1
  33. csv_detective/detect_labels/FR/other/uai/__init__.py +1 -1
  34. csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +1 -1
  35. csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +1 -1
  36. csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +1 -1
  37. csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +1 -1
  38. csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +1 -1
  39. csv_detective/detect_labels/geo/json_geojson/__init__.py +1 -1
  40. csv_detective/detect_labels/geo/latitude_wgs/__init__.py +1 -1
  41. csv_detective/detect_labels/geo/latlon_wgs/__init__.py +1 -1
  42. csv_detective/detect_labels/geo/longitude_wgs/__init__.py +1 -1
  43. csv_detective/detect_labels/other/booleen/__init__.py +1 -1
  44. csv_detective/detect_labels/other/email/__init__.py +1 -1
  45. csv_detective/detect_labels/other/float/__init__.py +1 -1
  46. csv_detective/detect_labels/other/int/__init__.py +1 -1
  47. csv_detective/detect_labels/other/mongo_object_id/__init__.py +1 -1
  48. csv_detective/detect_labels/other/twitter/__init__.py +1 -1
  49. csv_detective/detect_labels/other/url/__init__.py +1 -1
  50. csv_detective/detect_labels/other/uuid/__init__.py +1 -1
  51. csv_detective/detect_labels/temp/date/__init__.py +1 -1
  52. csv_detective/detect_labels/temp/datetime_iso/__init__.py +1 -1
  53. csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +1 -1
  54. csv_detective/detect_labels/temp/year/__init__.py +1 -1
  55. csv_detective/detection/columns.py +89 -0
  56. csv_detective/detection/encoding.py +27 -0
  57. csv_detective/detection/engine.py +46 -0
  58. csv_detective/detection/headers.py +32 -0
  59. csv_detective/detection/rows.py +18 -0
  60. csv_detective/detection/separator.py +44 -0
  61. csv_detective/detection/variables.py +98 -0
  62. csv_detective/explore_csv.py +40 -124
  63. csv_detective/output/dataframe.py +55 -0
  64. csv_detective/{create_example.py → output/example.py} +10 -9
  65. csv_detective/output/profile.py +87 -0
  66. csv_detective/{schema_generation.py → output/schema.py} +344 -343
  67. csv_detective/output/utils.py +51 -0
  68. csv_detective/parsing/columns.py +141 -0
  69. csv_detective/parsing/compression.py +11 -0
  70. csv_detective/parsing/csv.py +55 -0
  71. csv_detective/parsing/excel.py +169 -0
  72. csv_detective/parsing/load.py +97 -0
  73. csv_detective/utils.py +10 -236
  74. {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1228.data}/data/share/csv_detective/CHANGELOG.md +3 -0
  75. {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/METADATA +1 -1
  76. {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/RECORD +85 -71
  77. tests/test_fields.py +8 -7
  78. tests/test_file.py +15 -14
  79. csv_detective/detection.py +0 -633
  80. /csv_detective/{process_text.py → parsing/text.py} +0 -0
  81. {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1228.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
  82. {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1228.data}/data/share/csv_detective/README.md +0 -0
  83. {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/WHEEL +0 -0
  84. {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/entry_points.txt +0 -0
  85. {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
  86. {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/top_level.txt +0 -0
tests/test_file.py CHANGED
@@ -1,12 +1,13 @@
1
- from csv_detective import routine
1
+ import pandas as pd
2
2
  import pytest
3
3
  import responses
4
- import pandas as pd
4
+
5
+ from csv_detective import routine
5
6
 
6
7
 
7
8
  def test_columns_output_on_file():
8
9
  output = routine(
9
- csv_file_path="tests/a_test_file.csv",
10
+ file_path="tests/data/a_test_file.csv",
10
11
  num_rows=-1,
11
12
  output_profile=False,
12
13
  save_results=False,
@@ -40,7 +41,7 @@ def test_columns_output_on_file():
40
41
 
41
42
  def test_profile_output_on_file():
42
43
  output = routine(
43
- csv_file_path="tests/a_test_file.csv",
44
+ file_path="tests/data/a_test_file.csv",
44
45
  num_rows=-1,
45
46
  output_profile=True,
46
47
  save_results=False,
@@ -72,7 +73,7 @@ def test_profile_output_on_file():
72
73
  def test_profile_with_num_rows():
73
74
  with pytest.raises(ValueError):
74
75
  routine(
75
- csv_file_path="tests/a_test_file.csv",
76
+ file_path="tests/data/a_test_file.csv",
76
77
  num_rows=50,
77
78
  output_profile=True,
78
79
  save_results=False,
@@ -85,7 +86,7 @@ def test_exception_different_number_of_columns():
85
86
  """
86
87
  with pytest.raises(ValueError):
87
88
  routine(
88
- csv_file_path="tests/c_test_file.csv",
89
+ file_path="tests/data/c_test_file.csv",
89
90
  num_rows=-1,
90
91
  output_profile=True,
91
92
  save_results=False,
@@ -94,7 +95,7 @@ def test_exception_different_number_of_columns():
94
95
 
95
96
  def test_code_dep_reg_on_file():
96
97
  output = routine(
97
- csv_file_path="tests/b_test_file.csv",
98
+ file_path="tests/data/b_test_file.csv",
98
99
  num_rows=-1,
99
100
  output_profile=False,
100
101
  save_results=False,
@@ -106,7 +107,7 @@ def test_code_dep_reg_on_file():
106
107
 
107
108
  def test_schema_on_file():
108
109
  output = routine(
109
- csv_file_path="tests/b_test_file.csv",
110
+ file_path="tests/data/b_test_file.csv",
110
111
  num_rows=-1,
111
112
  output_schema=True,
112
113
  save_results=False,
@@ -149,7 +150,7 @@ params_others = [
149
150
  def test_non_csv_files(params):
150
151
  file_name, checks = params
151
152
  _ = routine(
152
- csv_file_path=f"tests/{file_name}",
153
+ file_path=f"tests/data/{file_name}",
153
154
  num_rows=-1,
154
155
  output_profile=False,
155
156
  save_results=False,
@@ -181,11 +182,11 @@ def test_urls(mocked_responses, params):
181
182
  url = f"http://example.com/{file_name}"
182
183
  mocked_responses.get(
183
184
  url,
184
- body=open(f"tests/{file_name}", "rb").read(),
185
+ body=open(f"tests/data/{file_name}", "rb").read(),
185
186
  status=200,
186
187
  )
187
188
  _ = routine(
188
- csv_file_path=url,
189
+ file_path=url,
189
190
  num_rows=-1,
190
191
  output_profile=False,
191
192
  save_results=False,
@@ -211,7 +212,7 @@ def test_nan_values(expected_type):
211
212
  # if skipping NaN, the column contains only ints
212
213
  skipna, expected_type = expected_type
213
214
  output = routine(
214
- csv_file_path="tests/b_test_file.csv",
215
+ file_path="tests/data/b_test_file.csv",
215
216
  num_rows=-1,
216
217
  save_results=False,
217
218
  skipna=skipna,
@@ -221,7 +222,7 @@ def test_nan_values(expected_type):
221
222
 
222
223
  def test_output_df():
223
224
  output, df = routine(
224
- csv_file_path="tests/b_test_file.csv",
225
+ file_path="tests/data/b_test_file.csv",
225
226
  num_rows=-1,
226
227
  output_profile=False,
227
228
  save_results=False,
@@ -249,7 +250,7 @@ def test_cast_json(mocked_responses, cast_json):
249
250
  status=200,
250
251
  )
251
252
  analysis, df = routine(
252
- csv_file_path='http://example.com/test.csv',
253
+ file_path='http://example.com/test.csv',
253
254
  num_rows=-1,
254
255
  output_profile=False,
255
256
  save_results=False,