PyPI - csv-detective - Versions diffs - 0.9.3.dev2232__py3-none-any.whl → 0.9.3.dev2241__py3-none-any.whl - Mend

csv-detective 0.9.3.dev2232py3-none-any.whl → 0.9.3.dev2241py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

csv_detective/output/profile.py CHANGED Viewed

@@ -30,6 +30,10 @@ def create_profile(
             k: v[0] if v else {"python_type": "string", "format": "string", "score": 1.0}
             for k, v in columns.items()
         }
+    # value_counts().reset_index() tries to insert a "count" column, and fails if it's already here
+    _count_col = "count"
+    while _count_col in table.columns:
+        _count_col = "_" + _count_col
     profile = defaultdict(dict)
     for c in table.columns:
         # for numerical formats we want min, max, mean, std
@@ -79,14 +83,14 @@ def create_profile(
         # for all formats we want most frequent values, nb unique values and nb missing values
         tops_bruts = (
             (table[c].value_counts() if _col_values is None else _col_values[c].sort_values())
-            .reset_index()
+            .reset_index(name=_count_col)
             .iloc[:10]
             .to_dict(orient="records")
         )
         profile[c].update(
             tops=[
                 {
-                    "count": tb["count"],
+                    "count": tb[_count_col],
                     "value": tb[c],
                 }
                 for tb in tops_bruts

{csv_detective-0.9.3.dev2232.dist-info → csv_detective-0.9.3.dev2241.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: csv-detective
-Version: 0.9.3.dev2232
+Version: 0.9.3.dev2241
 Summary: Detect tabular files column content
 Author-email: Etalab <opendatateam@data.gouv.fr>
 License: MIT

{csv_detective-0.9.3.dev2232.dist-info → csv_detective-0.9.3.dev2241.dist-info}/RECORD RENAMED Viewed

@@ -139,7 +139,7 @@ csv_detective/detection/variables.py,sha256=-QtZOB96z3pWbqnZ-c1RU3yzoYqcO61A0Jze
 csv_detective/output/__init__.py,sha256=B0RRaXEUAEduLFOoHll4Hl6x35b55Kwko-tQv5WmPt0,2045
 csv_detective/output/dataframe.py,sha256=J_617q8j1_INQOYl668IJt8M0Mi5zWYWAwtzdV4sJSo,3254
 csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
-csv_detective/output/profile.py,sha256=oWIuHchiZ72VzGLB9q3mW-hhWq1VxiU1Z09VWjAU-JM,4696
+csv_detective/output/profile.py,sha256=ZGKMSeVfmQerAfVhViWXVU9j4jbCrv5K484SQNep7Xw,4920
 csv_detective/output/schema.py,sha256=vXPlEw44zRR4GcYd-PQ_R_qXeCaefEDxW2XmprdNP_c,10453
 csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
 csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -149,18 +149,18 @@ csv_detective/parsing/csv.py,sha256=BJ_fqoCCCCSJ61uHyiEpDmXlBdrqWLY-UKtKwkYw65c,
 csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
 csv_detective/parsing/load.py,sha256=Ks1S92H_GErvd2Uy0_EuShMzZSkiuWdTmVQFJ_XX5lg,4167
 csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
-csv_detective-0.9.3.dev2232.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
+csv_detective-0.9.3.dev2241.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
 tests/test_fields.py,sha256=QoMsVR-ZhH5F9DFqYDvzP6vQCZcoalEi8JBb_fxWR44,13665
-tests/test_file.py,sha256=bYP-NzPoGEXPwNZLD1EjJlviT9a_27IY6cb0shdiR4U,12329
+tests/test_file.py,sha256=EKFW08W96VA5nVwNPvN1v7zXDL0qEEuGWnUqfJJdMh4,13130
 tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
 tests/test_structure.py,sha256=KGpw45weVK3iEWAg3OVHHEbj7RYALFicnZ59z7rCFuU,1450
 tests/test_validation.py,sha256=9djBT-PDhu_563OFgWyE20o-wPEWEIQGXp6Pjh0_MQM,3463
 venv/bin/activate_this.py,sha256=wS7qPipy8R-dS_0ICD8PqqUQ8F-PrtcpiJw2DUPngYM,1287
 venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
-csv_detective-0.9.3.dev2232.dist-info/METADATA,sha256=q8o2SRFri-iFmUgOp3tL5jGlIsuXB-TDyUj7BOaCPhg,10845
-csv_detective-0.9.3.dev2232.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-csv_detective-0.9.3.dev2232.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.9.3.dev2232.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
-csv_detective-0.9.3.dev2232.dist-info/RECORD,,
+csv_detective-0.9.3.dev2241.dist-info/METADATA,sha256=Cy0R4v1C7Lg-KRgD1_pBP4uO_huMU0158VorSXw8b2w,10845
+csv_detective-0.9.3.dev2241.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+csv_detective-0.9.3.dev2241.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
+csv_detective-0.9.3.dev2241.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
+csv_detective-0.9.3.dev2241.dist-info/RECORD,,

tests/test_file.py CHANGED Viewed

@@ -370,22 +370,44 @@ def test_almost_uniform_column(mocked_responses):
 def test_full_nan_column(mocked_responses):
     # we want a file that needs sampling
-    expected_content = "only_nan,second_col\n" + ",1\n" * (CHUNK_SIZE + 1)
+    col_name = "only_nan"
+    expected_content = f"{col_name},second_col\n" + ",1\n" * (CHUNK_SIZE + 1)
     mocked_responses.get(
         "http://example.com/test.csv",
         body=expected_content,
         status=200,
     )
     with patch("urllib.request.urlopen") as mock_urlopen:
-        # Create a mock HTTP response object
         mock_response = MagicMock()
         mock_response.read.return_value = expected_content.encode("utf-8")
         mock_response.__enter__.return_value = mock_response
         mock_urlopen.return_value = mock_response
-        # just testing it doesn't fail
-        routine(
+        # only NaNs should return "string"
+        analysis = routine(
             file_path="http://example.com/test.csv",
             num_rows=-1,
             output_profile=False,
             save_results=False,
         )
+        assert analysis["columns"][col_name]["format"] == "string"
+def test_count_column(mocked_responses):
+    expected_content = "count,_count\n" + "a,1\n" * 100
+    mocked_responses.get(
+        "http://example.com/test.csv",
+        body=expected_content,
+        status=200,
+    )
+    with patch("urllib.request.urlopen") as mock_urlopen:
+        mock_response = MagicMock()
+        mock_response.read.return_value = expected_content.encode("utf-8")
+        mock_response.__enter__.return_value = mock_response
+        mock_urlopen.return_value = mock_response
+        # only testing it doesn't fail with output_profile=True
+        routine(
+            file_path="http://example.com/test.csv",
+            num_rows=-1,
+            output_profile=True,
+            save_results=False,
+        )

{csv_detective-0.9.3.dev2232.dist-info → csv_detective-0.9.3.dev2241.dist-info}/WHEEL RENAMED Viewed

File without changes

{csv_detective-0.9.3.dev2232.dist-info → csv_detective-0.9.3.dev2241.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{csv_detective-0.9.3.dev2232.dist-info → csv_detective-0.9.3.dev2241.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{csv_detective-0.9.3.dev2232.dist-info → csv_detective-0.9.3.dev2241.dist-info}/top_level.txt RENAMED Viewed

File without changes

csv-detective 0.9.3.dev2232__py3-none-any.whl → 0.9.3.dev2241__py3-none-any.whl

csv-detective 0.9.3.dev2232py3-none-any.whl → 0.9.3.dev2241py3-none-any.whl