csv-detective 0.9.3.dev2241__py3-none-any.whl → 0.9.3.dev2319__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. csv_detective/detection/formats.py +12 -15
  2. csv_detective/detection/headers.py +6 -8
  3. csv_detective/explore_csv.py +28 -9
  4. csv_detective/format.py +67 -0
  5. csv_detective/formats/__init__.py +9 -0
  6. csv_detective/{detect_fields/FR/geo/adresse/__init__.py → formats/adresse.py} +116 -100
  7. csv_detective/{detect_fields/other/booleen/__init__.py → formats/booleen.py} +35 -27
  8. csv_detective/formats/code_commune_insee.py +26 -0
  9. csv_detective/{detect_fields/FR/other/code_csp_insee/__init__.py → formats/code_csp_insee.py} +36 -29
  10. csv_detective/{detect_fields/FR/geo/code_departement/__init__.py → formats/code_departement.py} +29 -15
  11. csv_detective/formats/code_fantoir.py +21 -0
  12. csv_detective/{detect_fields/FR/other/code_import/__init__.py → formats/code_import.py} +17 -9
  13. csv_detective/formats/code_postal.py +25 -0
  14. csv_detective/formats/code_region.py +22 -0
  15. csv_detective/formats/code_rna.py +29 -0
  16. csv_detective/formats/code_waldec.py +17 -0
  17. csv_detective/{detect_fields/FR/geo/commune/__init__.py → formats/commune.py} +27 -16
  18. csv_detective/{detect_fields/FR/other/csp_insee/__init__.py → formats/csp_insee.py} +31 -19
  19. csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
  20. csv_detective/{detect_fields/temp/date/__init__.py → formats/date.py} +99 -62
  21. csv_detective/formats/date_fr.py +22 -0
  22. csv_detective/{detect_fields/temp/datetime_aware/__init__.py → formats/datetime_aware.py} +18 -7
  23. csv_detective/{detect_fields/temp/datetime_naive/__init__.py → formats/datetime_naive.py} +21 -2
  24. csv_detective/{detect_fields/temp/datetime_rfc822/__init__.py → formats/datetime_rfc822.py} +24 -18
  25. csv_detective/formats/departement.py +37 -0
  26. csv_detective/formats/email.py +28 -0
  27. csv_detective/{detect_fields/other/float/__init__.py → formats/float.py} +29 -21
  28. csv_detective/formats/geojson.py +36 -0
  29. csv_detective/{detect_fields/FR/other/insee_ape700/__init__.py → formats/insee_ape700.py} +31 -19
  30. csv_detective/{detect_fields/FR/geo/insee_canton/__init__.py → formats/insee_canton.py} +28 -15
  31. csv_detective/{detect_fields/other/int/__init__.py → formats/int.py} +23 -16
  32. csv_detective/formats/iso_country_code_alpha2.py +30 -0
  33. csv_detective/formats/iso_country_code_alpha3.py +30 -0
  34. csv_detective/formats/iso_country_code_numeric.py +31 -0
  35. csv_detective/{detect_fields/FR/temp/jour_de_la_semaine/__init__.py → formats/jour_de_la_semaine.py} +41 -25
  36. csv_detective/{detect_fields/other/json/__init__.py → formats/json.py} +20 -14
  37. csv_detective/formats/latitude_l93.py +48 -0
  38. csv_detective/formats/latitude_wgs.py +42 -0
  39. csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
  40. csv_detective/formats/latlon_wgs.py +53 -0
  41. csv_detective/formats/longitude_l93.py +39 -0
  42. csv_detective/formats/longitude_wgs.py +32 -0
  43. csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
  44. csv_detective/formats/lonlat_wgs.py +36 -0
  45. csv_detective/{detect_fields/FR/temp/mois_de_annee/__init__.py → formats/mois_de_lannee.py} +48 -39
  46. csv_detective/formats/money.py +18 -0
  47. csv_detective/formats/mongo_object_id.py +14 -0
  48. csv_detective/formats/pays.py +35 -0
  49. csv_detective/formats/percent.py +16 -0
  50. csv_detective/{detect_fields/FR/geo/region/__init__.py → formats/region.py} +70 -50
  51. csv_detective/formats/sexe.py +17 -0
  52. csv_detective/{detect_fields/FR/other/siren/__init__.py → formats/siren.py} +37 -20
  53. csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -31
  54. csv_detective/formats/tel_fr.py +36 -0
  55. csv_detective/formats/uai.py +36 -0
  56. csv_detective/formats/url.py +45 -0
  57. csv_detective/formats/username.py +14 -0
  58. csv_detective/formats/uuid.py +16 -0
  59. csv_detective/formats/year.py +28 -0
  60. csv_detective/output/__init__.py +3 -4
  61. csv_detective/output/dataframe.py +3 -3
  62. csv_detective/output/profile.py +2 -3
  63. csv_detective/output/schema.py +2 -2
  64. csv_detective/parsing/columns.py +35 -50
  65. csv_detective/parsing/csv.py +2 -2
  66. csv_detective/parsing/load.py +10 -11
  67. csv_detective/validate.py +9 -4
  68. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/METADATA +6 -5
  69. csv_detective-0.9.3.dev2319.dist-info/RECORD +102 -0
  70. tests/test_fields.py +39 -364
  71. tests/test_file.py +1 -1
  72. tests/test_labels.py +5 -3
  73. tests/test_structure.py +40 -36
  74. csv_detective/detect_fields/FR/__init__.py +0 -0
  75. csv_detective/detect_fields/FR/geo/__init__.py +0 -0
  76. csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -9
  77. csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -9
  78. csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -9
  79. csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -10
  80. csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -16
  81. csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -19
  82. csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
  83. csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -19
  84. csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
  85. csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -16
  86. csv_detective/detect_fields/FR/other/__init__.py +0 -0
  87. csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
  88. csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -9
  89. csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -9
  90. csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
  91. csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -11
  92. csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -17
  93. csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
  94. csv_detective/detect_fields/FR/temp/__init__.py +0 -0
  95. csv_detective/detect_fields/__init__.py +0 -112
  96. csv_detective/detect_fields/geo/__init__.py +0 -0
  97. csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
  98. csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
  99. csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
  100. csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -18
  101. csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
  102. csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -16
  103. csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
  104. csv_detective/detect_fields/geo/lonlat_wgs/__init__.py +0 -16
  105. csv_detective/detect_fields/other/__init__.py +0 -0
  106. csv_detective/detect_fields/other/email/__init__.py +0 -10
  107. csv_detective/detect_fields/other/money/__init__.py +0 -11
  108. csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
  109. csv_detective/detect_fields/other/percent/__init__.py +0 -9
  110. csv_detective/detect_fields/other/twitter/__init__.py +0 -8
  111. csv_detective/detect_fields/other/url/__init__.py +0 -14
  112. csv_detective/detect_fields/other/uuid/__init__.py +0 -10
  113. csv_detective/detect_fields/temp/__init__.py +0 -0
  114. csv_detective/detect_fields/temp/year/__init__.py +0 -10
  115. csv_detective/detect_labels/FR/__init__.py +0 -0
  116. csv_detective/detect_labels/FR/geo/__init__.py +0 -0
  117. csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -15
  118. csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -17
  119. csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -15
  120. csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -12
  121. csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -16
  122. csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -14
  123. csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -12
  124. csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -22
  125. csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -13
  126. csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -30
  127. csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -30
  128. csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -21
  129. csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -21
  130. csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -20
  131. csv_detective/detect_labels/FR/geo/region/__init__.py +0 -20
  132. csv_detective/detect_labels/FR/other/__init__.py +0 -0
  133. csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -8
  134. csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -13
  135. csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -8
  136. csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -13
  137. csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -9
  138. csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -15
  139. csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -8
  140. csv_detective/detect_labels/FR/other/siren/__init__.py +0 -17
  141. csv_detective/detect_labels/FR/other/siret/__init__.py +0 -16
  142. csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -20
  143. csv_detective/detect_labels/FR/other/uai/__init__.py +0 -25
  144. csv_detective/detect_labels/FR/temp/__init__.py +0 -0
  145. csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -16
  146. csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -8
  147. csv_detective/detect_labels/__init__.py +0 -94
  148. csv_detective/detect_labels/geo/__init__.py +0 -0
  149. csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -16
  150. csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -16
  151. csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -16
  152. csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -17
  153. csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -30
  154. csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -39
  155. csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -21
  156. csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +0 -23
  157. csv_detective/detect_labels/other/__init__.py +0 -0
  158. csv_detective/detect_labels/other/booleen/__init__.py +0 -8
  159. csv_detective/detect_labels/other/email/__init__.py +0 -20
  160. csv_detective/detect_labels/other/float/__init__.py +0 -8
  161. csv_detective/detect_labels/other/int/__init__.py +0 -8
  162. csv_detective/detect_labels/other/money/__init__.py +0 -8
  163. csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -8
  164. csv_detective/detect_labels/other/twitter/__init__.py +0 -8
  165. csv_detective/detect_labels/other/url/__init__.py +0 -23
  166. csv_detective/detect_labels/other/uuid/__init__.py +0 -8
  167. csv_detective/detect_labels/temp/__init__.py +0 -0
  168. csv_detective/detect_labels/temp/date/__init__.py +0 -28
  169. csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -19
  170. csv_detective/detect_labels/temp/year/__init__.py +0 -19
  171. csv_detective/load_tests.py +0 -59
  172. csv_detective-0.9.3.dev2241.dist-info/RECORD +0 -166
  173. /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
  174. /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
  175. /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
  176. /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
  177. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/WHEEL +0 -0
  178. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/entry_points.txt +0 -0
  179. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/licenses/LICENSE +0 -0
  180. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,28 @@
1
+ proportion = 1
2
+ tags = ["temp"]
3
+ labels = [
4
+ "year",
5
+ "annee",
6
+ "annee depot",
7
+ "an nais",
8
+ "exercice",
9
+ "data year",
10
+ "annee de publication",
11
+ "exercice comptable",
12
+ "annee de naissance",
13
+ "annee ouverture",
14
+ ]
15
+
16
+
17
+ def _is(val):
18
+ try:
19
+ val = int(val)
20
+ except ValueError:
21
+ return False
22
+ return (1800 <= val) and (val <= 2100)
23
+
24
+
25
+ _test_values = {
26
+ True: ["2015"],
27
+ False: ["20166", "123"],
28
+ }
@@ -4,12 +4,11 @@ from typing import Iterator
4
4
 
5
5
  import pandas as pd
6
6
 
7
+ from csv_detective.output.dataframe import cast_df_chunks
8
+ from csv_detective.output.profile import create_profile
9
+ from csv_detective.output.schema import generate_table_schema
7
10
  from csv_detective.utils import is_url
8
11
 
9
- from .dataframe import cast_df_chunks
10
- from .profile import create_profile
11
- from .schema import generate_table_schema
12
-
13
12
 
14
13
  def generate_output(
15
14
  table: pd.DataFrame,
@@ -5,9 +5,9 @@ from typing import Iterator
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from csv_detective.detect_fields.other.booleen import bool_casting
9
- from csv_detective.detect_fields.other.float import float_casting
10
- from csv_detective.detect_fields.temp.date import date_casting
8
+ from csv_detective.formats.booleen import bool_casting
9
+ from csv_detective.formats.date import date_casting
10
+ from csv_detective.formats.float import float_casting
11
11
  from csv_detective.parsing.csv import CHUNK_SIZE
12
12
  from csv_detective.utils import display_logs_depending_process_time
13
13
 
@@ -1,12 +1,11 @@
1
1
  import logging
2
2
  from collections import defaultdict
3
3
  from time import time
4
- from typing import Optional
5
4
 
6
5
  import numpy as np
7
6
  import pandas as pd
8
7
 
9
- from csv_detective.detect_fields.other.float import float_casting
8
+ from csv_detective.formats.float import float_casting
10
9
  from csv_detective.utils import cast_prevent_nan, display_logs_depending_process_time
11
10
 
12
11
 
@@ -17,7 +16,7 @@ def create_profile(
17
16
  limited_output: bool = True,
18
17
  cast_json: bool = True,
19
18
  verbose: bool = False,
20
- _col_values: Optional[dict[str, pd.Series]] = None,
19
+ _col_values: dict[str, pd.Series] | None = None,
21
20
  ) -> dict:
22
21
  if verbose:
23
22
  start = time()
@@ -103,7 +103,7 @@ def get_validata_type(format: str) -> str:
103
103
  "datetime_aware": "datetime",
104
104
  "datetime_naive": "datetime",
105
105
  "datetime_rfc822": "datetime",
106
- "json_geojson": "geojson",
106
+ "geojson": "geojson",
107
107
  "latitude": "number",
108
108
  "latitude_l93": "number",
109
109
  "latitude_wgs": "number",
@@ -150,7 +150,7 @@ def get_example(format: str) -> str:
150
150
  "iso_country_code_alpha3": "FRA",
151
151
  "iso_country_code_numeric": 250,
152
152
  "jour_de_la_semaine": "lundi",
153
- "json_geojson": '{"type": "Point", "coordinates": [0, 0]}',
153
+ "geojson": '{"type": "Point", "coordinates": [0, 0]}',
154
154
  "latitude": 42.42,
155
155
  "latitude_l93": 6037008,
156
156
  "latitude_wgs": 42.42,
@@ -5,6 +5,7 @@ from typing import Callable
5
5
  import pandas as pd
6
6
  from more_itertools import peekable
7
7
 
8
+ from csv_detective.format import Format
8
9
  from csv_detective.parsing.csv import CHUNK_SIZE
9
10
  from csv_detective.utils import display_logs_depending_process_time
10
11
 
@@ -14,15 +15,13 @@ MAX_NUMBER_CATEGORICAL_VALUES = 25
14
15
 
15
16
  def test_col_val(
16
17
  serie: pd.Series,
17
- test_func: Callable,
18
- proportion: float = 0.9,
18
+ format: Format,
19
19
  skipna: bool = True,
20
20
  limited_output: bool = False,
21
21
  verbose: bool = False,
22
22
  ) -> float:
23
23
  """Tests values of the serie using test_func.
24
- - skipna : if True indicates that NaNs are not counted as False
25
- - proportion : indicates the proportion of values that have to pass the test
24
+ - skipna : if True indicates that NaNs are considered True
26
25
  for the serie to be detected as a certain format
27
26
  """
28
27
  if verbose:
@@ -34,28 +33,28 @@ def test_col_val(
34
33
 
35
34
  try:
36
35
  if skipna:
37
- serie = serie[serie.notnull()]
36
+ serie = serie.loc[serie.notnull()]
38
37
  ser_len = len(serie)
39
38
  if ser_len == 0:
40
39
  # being here means the whole column is NaN, so if skipna it's a pass
41
40
  return 1.0 if skipna else 0.0
42
41
  if not limited_output:
43
- result = apply_test_func(serie, test_func, ser_len).sum() / ser_len
44
- return result if result >= proportion else 0.0
42
+ result = apply_test_func(serie, format.func, ser_len).sum() / ser_len
43
+ return result if result >= format.proportion else 0.0
45
44
  else:
46
- if proportion == 1:
45
+ if format.proportion == 1:
47
46
  # early stops (1 then 5 rows) to not waste time if directly unsuccessful
48
47
  for _range in [
49
48
  min(1, ser_len),
50
49
  min(5, ser_len),
51
50
  ser_len,
52
51
  ]:
53
- if not all(apply_test_func(serie, test_func, _range)):
52
+ if not all(apply_test_func(serie, format.func, _range)):
54
53
  return 0.0
55
54
  return 1.0
56
55
  else:
57
- result = apply_test_func(serie, test_func, ser_len).sum() / ser_len
58
- return result if result >= proportion else 0.0
56
+ result = apply_test_func(serie, format.func, ser_len).sum() / ser_len
57
+ return result if result >= format.proportion else 0.0
59
58
  finally:
60
59
  if verbose and time() - start > 3:
61
60
  display_logs_depending_process_time(
@@ -64,42 +63,27 @@ def test_col_val(
64
63
  )
65
64
 
66
65
 
67
- def test_col_label(
68
- label: str, test_func: Callable, proportion: float = 1, limited_output: bool = False
69
- ):
70
- """Tests label (from header) using test_func.
71
- - proportion : indicates the minimum score to pass the test for the serie
72
- to be detected as a certain format
73
- """
74
- if not limited_output:
75
- return test_func(label)
76
- else:
77
- result = test_func(label)
78
- return result if result >= proportion else 0
79
-
80
-
81
66
  def test_col(
82
67
  table: pd.DataFrame,
83
- all_tests: dict[str, dict],
68
+ formats: dict[str, Format],
84
69
  limited_output: bool,
85
70
  skipna: bool = True,
86
71
  verbose: bool = False,
87
72
  ):
88
73
  if verbose:
89
74
  start = time()
90
- logging.info("Testing columns to get types")
75
+ logging.info("Testing columns to get formats")
91
76
  return_table = pd.DataFrame(columns=table.columns)
92
- for idx, (name, attributes) in enumerate(all_tests.items()):
77
+ for idx, (label, format) in enumerate(formats.items()):
93
78
  if verbose:
94
79
  start_type = time()
95
- logging.info(f"\t- Starting with type '{name}'")
80
+ logging.info(f"\t- Starting with format '{label}'")
96
81
  # improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
97
82
  # => the following needs to change, "apply" means all columns are tested for one type at once
98
- return_table.loc[name] = table.apply(
83
+ return_table.loc[label] = table.apply(
99
84
  lambda serie: test_col_val(
100
85
  serie,
101
- attributes["func"],
102
- attributes["prop"],
86
+ format,
103
87
  skipna=skipna,
104
88
  limited_output=limited_output,
105
89
  verbose=verbose,
@@ -107,7 +91,7 @@ def test_col(
107
91
  )
108
92
  if verbose:
109
93
  display_logs_depending_process_time(
110
- f'\t> Done with type "{name}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(all_tests)})',
94
+ f'\t> Done with type "{label}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(formats)})',
111
95
  time() - start_type,
112
96
  )
113
97
  if verbose:
@@ -118,23 +102,20 @@ def test_col(
118
102
 
119
103
 
120
104
  def test_label(
121
- columns: list[str], all_tests: dict[str, dict], limited_output: bool, verbose: bool = False
105
+ columns: list[str], formats: dict[str, Format], limited_output: bool, verbose: bool = False
122
106
  ):
123
107
  if verbose:
124
108
  start = time()
125
109
  logging.info("Testing labels to get types")
126
110
 
127
111
  return_table = pd.DataFrame(columns=columns)
128
- for idx, (key, value) in enumerate(all_tests.items()):
112
+ for idx, (label, format) in enumerate(formats.items()):
129
113
  if verbose:
130
114
  start_type = time()
131
- return_table.loc[key] = [
132
- test_col_label(col_name, value["func"], value["prop"], limited_output=limited_output)
133
- for col_name in columns
134
- ]
115
+ return_table.loc[label] = [format.is_valid_label(col_name) for col_name in columns]
135
116
  if verbose:
136
117
  display_logs_depending_process_time(
137
- f'\t- Done with type "{key}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(all_tests)})',
118
+ f'\t- Done with type "{label}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(formats)})',
138
119
  time() - start_type,
139
120
  )
140
121
  if verbose:
@@ -148,23 +129,28 @@ def test_col_chunks(
148
129
  table: pd.DataFrame,
149
130
  file_path: str,
150
131
  analysis: dict,
151
- all_tests: list,
132
+ formats: dict[str, Format],
152
133
  limited_output: bool,
153
134
  skipna: bool = True,
154
135
  verbose: bool = False,
155
136
  ) -> tuple[pd.DataFrame, dict, dict[str, pd.Series]]:
156
137
  def build_remaining_tests_per_col(return_table: pd.DataFrame) -> dict[str, list[str]]:
138
+ # returns a dict with the table's columns as keys and the list of remaining format labels to apply
157
139
  return {
158
- col: [test for test in return_table.index if return_table.loc[test, col] > 0]
140
+ col: [
141
+ fmt_label
142
+ for fmt_label in return_table.index
143
+ if return_table.loc[fmt_label, col] > 0
144
+ ]
159
145
  for col in return_table.columns
160
146
  }
161
147
 
162
148
  if verbose:
163
149
  start = time()
164
- logging.info("Testing columns to get types on chunks")
150
+ logging.info("Testing columns to get formats on chunks")
165
151
 
166
152
  # analysing the sample to get a first guess
167
- return_table = test_col(table, all_tests, limited_output, skipna=skipna, verbose=verbose)
153
+ return_table = test_col(table, formats, limited_output, skipna=skipna, verbose=verbose)
168
154
  remaining_tests_per_col = build_remaining_tests_per_col(return_table)
169
155
 
170
156
  # hashing rows to get nb_duplicates
@@ -217,23 +203,22 @@ def test_col_chunks(
217
203
  if not any(remaining_tests for remaining_tests in remaining_tests_per_col.values()):
218
204
  # no more potential tests to do on any column, early stop
219
205
  break
220
- for col, tests in remaining_tests_per_col.items():
206
+ for col, fmt_labels in remaining_tests_per_col.items():
221
207
  # testing each column with the tests that are still competing
222
208
  # after previous batchs analyses
223
- for test in tests:
209
+ for label in fmt_labels:
224
210
  batch_col_test = test_col_val(
225
211
  batch[col],
226
- all_tests[test]["func"],
227
- all_tests[test]["prop"],
212
+ formats[label],
228
213
  limited_output=limited_output,
229
214
  skipna=skipna,
230
215
  )
231
- return_table.loc[test, col] = (
216
+ return_table.loc[label, col] = (
232
217
  # if this batch's column tested 0 then test fails overall
233
218
  0
234
219
  if batch_col_test == 0
235
220
  # otherwise updating the score with weighted average
236
- else ((return_table.loc[test, col] * idx + batch_col_test) / (idx + 1))
221
+ else ((return_table.loc[label, col] * idx + batch_col_test) / (idx + 1))
237
222
  )
238
223
  remaining_tests_per_col = build_remaining_tests_per_col(return_table)
239
224
  batch, batch_number = [], batch_number + 1
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from time import time
3
- from typing import Optional, TextIO
3
+ from typing import TextIO
4
4
 
5
5
  import pandas as pd
6
6
 
@@ -18,7 +18,7 @@ def parse_csv(
18
18
  skiprows: int,
19
19
  random_state: int = 42,
20
20
  verbose: bool = False,
21
- ) -> tuple[pd.DataFrame, Optional[int], Optional[int]]:
21
+ ) -> tuple[pd.DataFrame, int | None, int | None]:
22
22
  if verbose:
23
23
  start = time()
24
24
  logging.info("Parsing table")
@@ -12,14 +12,13 @@ from csv_detective.detection.engine import (
12
12
  )
13
13
  from csv_detective.detection.headers import detect_headers
14
14
  from csv_detective.detection.separator import detect_separator
15
- from csv_detective.utils import is_url
16
-
17
- from .compression import unzip
18
- from .csv import parse_csv
19
- from .excel import (
15
+ from csv_detective.parsing.compression import unzip
16
+ from csv_detective.parsing.csv import parse_csv
17
+ from csv_detective.parsing.excel import (
20
18
  XLS_LIKE_EXT,
21
19
  parse_excel,
22
20
  )
21
+ from csv_detective.utils import is_url
23
22
 
24
23
 
25
24
  def load_file(
@@ -47,6 +46,8 @@ def load_file(
47
46
  if table.empty:
48
47
  raise ValueError("Table seems to be empty")
49
48
  header = table.columns.to_list()
49
+ if any(col.startswith("Unnamed") for col in header):
50
+ raise ValueError("Could not retrieve headers")
50
51
  analysis = {
51
52
  "engine": engine,
52
53
  "sheet_name": sheet_name,
@@ -99,12 +100,10 @@ def load_file(
99
100
  }
100
101
  if engine is not None:
101
102
  analysis["compression"] = engine
102
- analysis.update(
103
- {
104
- "header_row_idx": header_row_idx,
105
- "header": header,
106
- }
107
- )
103
+ analysis |= {
104
+ "header_row_idx": header_row_idx,
105
+ "header": header,
106
+ }
108
107
  if total_lines is not None:
109
108
  analysis["total_lines"] = total_lines
110
109
  if nb_duplicates is not None:
csv_detective/validate.py CHANGED
@@ -2,13 +2,13 @@ import logging
2
2
 
3
3
  import pandas as pd
4
4
 
5
- from csv_detective.load_tests import return_all_tests
5
+ from csv_detective.format import FormatsManager
6
6
  from csv_detective.parsing.columns import MAX_NUMBER_CATEGORICAL_VALUES, test_col_val
7
7
 
8
8
  VALIDATION_CHUNK_SIZE = int(1e5)
9
9
  logging.basicConfig(level=logging.INFO)
10
10
 
11
- tests = return_all_tests("ALL", "detect_fields")
11
+ formats = FormatsManager().formats
12
12
 
13
13
 
14
14
  def validate(
@@ -19,6 +19,12 @@ def validate(
19
19
  ) -> tuple[bool, pd.DataFrame | None, dict | None, dict[str, pd.Series] | None]:
20
20
  """
21
21
  Verify is the given file has the same fields and types as in the given analysis.
22
+
23
+ Args:
24
+ file_path: the path of the file to validate
25
+ previous_analysis: the previous analysis to validate against (expected in the same structure as the output of the routine)
26
+ verbose: whether the code displays the steps it's going through
27
+ skipna: whether to ignore NaN values in the checks
22
28
  """
23
29
  try:
24
30
  if previous_analysis.get("separator"):
@@ -101,8 +107,7 @@ def validate(
101
107
  continue
102
108
  test_result: float = test_col_val(
103
109
  serie=chunk[col_name],
104
- test_func=tests[args["format"]]["func"],
105
- proportion=tests[args["format"]]["prop"],
110
+ format=formats[args["format"]],
106
111
  skipna=skipna,
107
112
  )
108
113
  if not bool(test_result):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv-detective
3
- Version: 0.9.3.dev2241
3
+ Version: 0.9.3.dev2319
4
4
  Summary: Detect tabular files column content
5
5
  Author-email: Etalab <opendatateam@data.gouv.fr>
6
6
  License: MIT
@@ -33,7 +33,7 @@ Dynamic: license-file
33
33
 
34
34
  This is a package to **automatically detect column content in tabular files**. The script reads either the whole file or the first few rows and performs various checks (regex, casting, comparison with official lists...) to see for each column if it matches with various content types.
35
35
 
36
- Currently supported file types: csv, xls, xlsx, ods.
36
+ Currently supported file types: csv(.gz), xls, xlsx, ods.
37
37
 
38
38
  You can also directly feed the URL of a remote file (from data.gouv.fr for instance).
39
39
 
@@ -65,7 +65,8 @@ inspection_results = routine(
65
65
  num_rows=-1, # Value -1 will analyze all lines of your file, you can change with the number of lines you wish to analyze
66
66
  save_results=False, # Default False. If True, it will save result output into the same directory as the analyzed file, using the same name as your file and .json extension
67
67
  output_profile=True, # Default False. If True, returned dict will contain a property "profile" indicating profile (min, max, mean, tops...) of every column of you csv
68
- output_schema=True, # Default False. If True, returned dict will contain a property "schema" containing basic [tableschema](https://specs.frictionlessdata.io/table-schema/) of your file. This can be use to validate structure of other csv which should match same structure.
68
+ output_schema=True, # Default False. If True, returned dict will contain a property "schema" containing basic [tableschema](https://specs.frictionlessdata.io/table-schema/) of your file. This can be use to validate structure of other csv which should match same structure.
69
+ tags=["fr"], # Default None. If set as a list of strings, only performs checks related to the specified tags (you can see the available tags with FormatsManager().available_tags())
69
70
  )
70
71
  ```
71
72
 
@@ -73,7 +74,7 @@ inspection_results = routine(
73
74
 
74
75
  ### Output
75
76
 
76
- The program creates a `Python` dictionnary with the following information :
77
+ The program creates a `python` dictionnary with the following information :
77
78
 
78
79
  ```
79
80
  {
@@ -216,7 +217,7 @@ Only the format with highest score is present in the output.
216
217
  ## Improvement suggestions
217
218
 
218
219
  - Smarter refactors
219
- - Improve performances
220
+ - Performances improvements
220
221
  - Test other ways to load and process data (`pandas` alternatives)
221
222
  - Add more and more detection modules...
222
223
 
@@ -0,0 +1,102 @@
1
+ csv_detective/__init__.py,sha256=zlYElTOp_I2_VG7ZdOTuAu0wuCXSc0cr3sH6gtk2bcg,152
2
+ csv_detective/cli.py,sha256=mu5anmBmaDk52_uZGiA4T37wYZCuV43gZAepjs1Cqzc,1389
3
+ csv_detective/explore_csv.py,sha256=-LCHr7vyT0Q0oLtXeOO8pEevJ6-8Ib9JP3D7nVgZM8o,7090
4
+ csv_detective/format.py,sha256=XX_cSTQc0jlsQq3GUqHi7Cz36AiRrpjrwPmeoOTLMvo,2396
5
+ csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
6
+ csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
7
+ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
9
+ csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
10
+ csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
11
+ csv_detective/detection/formats.py,sha256=uxmWz7J3btAwaOONIACxiL9vTZ8Iv7NdTSUqAOPQy0o,5381
12
+ csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6HfzOkU,1051
13
+ csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
14
+ csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
15
+ csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0JzeS6JbkxY,3576
16
+ csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
17
+ csv_detective/formats/adresse.py,sha256=jALDpEDAWyAcgqEfNVRg_W1r6XaYuJKD_jAaP2l-bxk,1943
18
+ csv_detective/formats/booleen.py,sha256=AnDDKShkSYpWO4POhwY2V7_C4yPWbmqBu8CJPgQ9Gwc,648
19
+ csv_detective/formats/code_commune_insee.py,sha256=MhwCPVAhwWH-MyaNAIVRNbqKfeNe3oiCpzEGfpHkpJY,504
20
+ csv_detective/formats/code_csp_insee.py,sha256=_JQ-YbnHMenNnwIg1xBmNVqgCa1tLD2hbPN1soODhDk,656
21
+ csv_detective/formats/code_departement.py,sha256=odwVbmktgjEhL-dSFHXuCRVwhkF8bL8G7VlpVTnMY2A,628
22
+ csv_detective/formats/code_fantoir.py,sha256=nFVFYJEP2HHE2TyhR_dhGdPCMLfCROBO_B8wxwQn7T8,366
23
+ csv_detective/formats/code_import.py,sha256=N5NVvnHkRwC7ARHoM77R-2cYSeyNmPoRIn6JL3Fbnjs,346
24
+ csv_detective/formats/code_postal.py,sha256=C6XMkiVTxhMFvfyvJmGp3iwvh722EzMwD_UdqQU4aR0,427
25
+ csv_detective/formats/code_region.py,sha256=VFKh1rGYVYTNWBJZ2_m0xS4rhJlrI_Gr8q8RXuZCr-w,366
26
+ csv_detective/formats/code_rna.py,sha256=WExlQtlAUfOFT4N3MKsMBhZVxTdNzgexFjmXhZdRM1w,512
27
+ csv_detective/formats/code_waldec.py,sha256=kJEJfikbhMfVwtA8hBpup0tpeSFoY_rWrEdXQxgNwhg,297
28
+ csv_detective/formats/commune.py,sha256=oVpwINGqpwMOT43KkasozipJ9hBeoQ5FrKV_wIeVJGE,532
29
+ csv_detective/formats/csp_insee.py,sha256=HE6NK6Sw91mLFeAAKwWUXZZfXX6fiA0zK4RI4YdkUFY,656
30
+ csv_detective/formats/date.py,sha256=X4ohXaFO8cXPJktUSumc3bfdlbDIWEYTG8S9ugVRcsE,2730
31
+ csv_detective/formats/date_fr.py,sha256=3hTw5RommrhcgECFRSt9KgyB9zyi1j4W3UygEHmRgoE,502
32
+ csv_detective/formats/datetime_aware.py,sha256=-1ZBix6vYlYXTvhXrijP-98AN7iPB0x_DbbwU1QjMCI,1470
33
+ csv_detective/formats/datetime_naive.py,sha256=nvA8qT1fb2RmpXN5_Cw9YZA6pC4BryX_B0V-E6O2UbU,1521
34
+ csv_detective/formats/datetime_rfc822.py,sha256=l-SLb34hSuHxC2JQ-9SD-nG38JqzoozwUZiGtoybb0A,601
35
+ csv_detective/formats/departement.py,sha256=UP9UF23BFq_-mIS8N10K5XkoCXwPmDeSoa_7lCAkI4w,768
36
+ csv_detective/formats/email.py,sha256=Qen2EBDYY5TtWXwxrrTGWRrbIybz0ySlVpl4ZRk8pzA,517
37
+ csv_detective/formats/float.py,sha256=tWs_tW64OuacNQENu3uk5GOEVQMQls2iiteFOacQRAQ,832
38
+ csv_detective/formats/geojson.py,sha256=udbBxCBRmb0o6TD8z5ryemfqdinBz6njNJU0XcbfMig,757
39
+ csv_detective/formats/insee_ape700.py,sha256=cLs3Eersqm4wX6oqsqp0Vb3WGPJb2xY5Za_vh0uLgKc,780
40
+ csv_detective/formats/insee_canton.py,sha256=Q5jczsOmh1wPP2KtDkcmqZ7Hlv50Zz9YvPIbxy46qs0,531
41
+ csv_detective/formats/int.py,sha256=ZBUOn50luMtlNKWPyOaMIkY3J4f4hA0MqwcoFtksozU,482
42
+ csv_detective/formats/iso_country_code_alpha2.py,sha256=vIep_j0xuqlXKyuvk8c8GaJC73HuJqKfQ4QzQKHsPc0,613
43
+ csv_detective/formats/iso_country_code_alpha3.py,sha256=yOmm91O8ot6KoUBfss5cqykDfeeMNCwafDAvPNvbufA,668
44
+ csv_detective/formats/iso_country_code_numeric.py,sha256=989ypOmjIrNTV9vFnrBlbpRWQ9whd3Rv9gNasdF_O4g,685
45
+ csv_detective/formats/jour_de_la_semaine.py,sha256=c5QBw9eZfwRs_jL_Ckm95UH-TxlExdFmfZNYW7-_iZI,606
46
+ csv_detective/formats/json.py,sha256=E-s7IHW0q5WgAJVK0I-5Rv7W_RdofROB5wnIXbNegZQ,446
47
+ csv_detective/formats/latitude_l93.py,sha256=GteGpxAht-jeOBLr_deCuEXA_LliVYIAmyr_7jFAWgI,986
48
+ csv_detective/formats/latitude_wgs.py,sha256=HPcFlLzJNqynLugDQ07vO04rOCNBuAabVJEP8FQ89Q0,780
49
+ csv_detective/formats/latitude_wgs_fr_metropole.py,sha256=ruGzQLJPiMV2AlnsBneQIhMzstseddzWA0bDg5gfTG4,791
50
+ csv_detective/formats/latlon_wgs.py,sha256=CbNi4Y-ZgBfNyYi54xwcZGLpEusiLAWVpFP1YgHtI1M,1224
51
+ csv_detective/formats/longitude_l93.py,sha256=vJE4k_DyQOjAruqu_Q0E2sJKZB4mXGGN6bS9WCelsbs,768
52
+ csv_detective/formats/longitude_wgs.py,sha256=DUZCUxJQl53HHVQbXlz_lWXoAZhy3MvJWcPNdiK5cCM,552
53
+ csv_detective/formats/longitude_wgs_fr_metropole.py,sha256=wPlJP06K0BVWfrx1wwEAKK93AKIqvsuw705gKAlWAfQ,550
54
+ csv_detective/formats/lonlat_wgs.py,sha256=BgtTl2ReI0hSQB-7mcR4TDxx-QzvA1B9fiZWxTb5xPI,1005
55
+ csv_detective/formats/mois_de_lannee.py,sha256=4_mmdr9S83utVCgPaK_epkeBm2mhwdUWQEoB_Fhdh2o,759
56
+ csv_detective/formats/money.py,sha256=HpjrmfUmbG8sXF557XbYzQ7TLtpNVRgpC991gGokO8I,414
57
+ csv_detective/formats/mongo_object_id.py,sha256=XsiP4iMxfBBIeuL-4g5bm3jgS6yUMJC2X5CmrEJ40oI,296
58
+ csv_detective/formats/pays.py,sha256=FRvoQwIWiKbm0RC62Sus1X0Y_yJ-cfvdB5RYhkY-4NY,693
59
+ csv_detective/formats/percent.py,sha256=s6eQBMwJr2uyTZMUCK1_ifA0c4Rt2iEe9_E_hKKU_mk,308
60
+ csv_detective/formats/region.py,sha256=CkN7JTsZB1X3bH5xohbtMCxL5BX9MSpith36_1mHMd4,1483
61
+ csv_detective/formats/sexe.py,sha256=yioD4W6EkgUgo74rxn6KLZtN_0XYXtmA4mqVyI7e1mU,387
62
+ csv_detective/formats/siren.py,sha256=ieLe50vdSnkXadcUI8VXnnId9GFGHyIBWVTP6bJtyMo,758
63
+ csv_detective/formats/siret.py,sha256=ehkZgOH-HggN6IgxF4G0DMut_6giZ3gc4g9wMdwZFHQ,997
64
+ csv_detective/formats/tel_fr.py,sha256=yKCqIlqKO2yKucCoCjYfSjqNKfTjqFcmNXxg6THG0WE,624
65
+ csv_detective/formats/uai.py,sha256=uT5gjdTmoFH9QPZdTFkJgiyuKLW0B6KmT6yqHQeaeOU,711
66
+ csv_detective/formats/url.py,sha256=GYE9j_i4kpEQueBXa1Fla0wk8_sc0n230GL3KaIRvwY,932
67
+ csv_detective/formats/username.py,sha256=y38OggfWpEQsGi0JnD9QRM30musa29lO6nz-qybR24U,249
68
+ csv_detective/formats/uuid.py,sha256=ekMEFfzQtz0cLudzmu3AoCM0Yf5pu23qAcFNFgHWJ1A,346
69
+ csv_detective/formats/year.py,sha256=pkAfYPKZdy0g1ZoHGgJNpgTS5y5weGEKXCVMGaxIX8k,472
70
+ csv_detective/formats/data/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
71
+ csv_detective/formats/data/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
72
+ csv_detective/formats/data/iso_country_code_alpha2.txt,sha256=YyPlDqCdz65ecf4Wes_r0P4rDSJG35niXtjc4MmctXM,1740
73
+ csv_detective/formats/data/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
74
+ csv_detective/formats/data/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
75
+ csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
76
+ csv_detective/output/dataframe.py,sha256=TyBc2ObaVUns_ydJWOMKmCYvuj7ddxag0QN3z37g3GE,3219
77
+ csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
78
+ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf44,4876
79
+ csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
80
+ csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
81
+ csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
+ csv_detective/parsing/columns.py,sha256=CqtZRZYMYDNMopxnqs4eZLSABi-ms61wqv5M9vWJ7iU,9343
83
+ csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
84
+ csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
85
+ csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
86
+ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
87
+ csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
88
+ csv_detective-0.9.3.dev2319.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
89
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
+ tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
91
+ tests/test_fields.py,sha256=EWHIKwRSdIh74bBSoozYmZBETf7V03JMWpglyxA0ci0,5616
92
+ tests/test_file.py,sha256=MxJOWwhRG2Xm1_m3C9x8CS9FepjUebET-6EsMi3DvmY,13125
93
+ tests/test_labels.py,sha256=kDPerWC3_J3l1p5I3-MHwz7BmhcuxZAws_wSgHCHUuI,536
94
+ tests/test_structure.py,sha256=XDbviuuvk-0Mu9Y9PI6He2e5hry2dXVJ6yBVwEqF_2o,1043
95
+ tests/test_validation.py,sha256=9djBT-PDhu_563OFgWyE20o-wPEWEIQGXp6Pjh0_MQM,3463
96
+ venv/bin/activate_this.py,sha256=wS7qPipy8R-dS_0ICD8PqqUQ8F-PrtcpiJw2DUPngYM,1287
97
+ venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
98
+ csv_detective-0.9.3.dev2319.dist-info/METADATA,sha256=2io1FfiaxuMCfKwwBeU6-y6N3jfM0hAkAPZus1fpoRg,11038
99
+ csv_detective-0.9.3.dev2319.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
+ csv_detective-0.9.3.dev2319.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
101
+ csv_detective-0.9.3.dev2319.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
102
+ csv_detective-0.9.3.dev2319.dist-info/RECORD,,