PyPI - h4md - Versions diffs - 0.1.0__tar.gz - Mend

h4md 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

h4md-0.1.0/LICENSE +28 -0
h4md-0.1.0/MANIFEST.in +7 -0
h4md-0.1.0/PKG-INFO +62 -0
h4md-0.1.0/README.md +34 -0
h4md-0.1.0/h4md/__init__.py +3 -0
h4md-0.1.0/h4md/h4md.py +239 -0
h4md-0.1.0/h4md.egg-info/PKG-INFO +62 -0
h4md-0.1.0/h4md.egg-info/SOURCES.txt +14 -0
h4md-0.1.0/h4md.egg-info/dependency_links.txt +1 -0
h4md-0.1.0/h4md.egg-info/entry_points.txt +2 -0
h4md-0.1.0/h4md.egg-info/requires.txt +2 -0
h4md-0.1.0/h4md.egg-info/top_level.txt +1 -0
h4md-0.1.0/requirements.txt +5 -0
h4md-0.1.0/setup.cfg +4 -0
h4md-0.1.0/setup.py +41 -0
h4md-0.1.0/tests/test_h4md.py +112 -0

h4md-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,28 @@
+BSD 3-Clause License
+Copyright (c) 2025, H. Joe Lee
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

h4md-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,7 @@
+include README.md
+include LICENSE
+include requirements.txt
+recursive-include h4md *.py
+global-exclude *.pyc
+global-exclude __pycache__
+global-exclude .pytest_cache

h4md-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,62 @@
+Metadata-Version: 2.1
+Name: h4md
+Version: 0.1.0
+Summary: A command-line tool to convert HDF4 datasets to markdown
+Home-page: https://github.com/iowarp/h4md
+Author: IOWarp User
+Author-email: user@iowarp.org
+Keywords: hdf4,markdown,conversion,hdf,data
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: Utilities
+Requires-Python: >=3.6
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pyhdf>=0.10.5
+Requires-Dist: click>=8.1.0
+# h4md (HDF4 to Markdown)
+A command-line tool to convert HDF4 datasets and attributes to markdown format.
+## Installation
+You can install h4md directly from the repository:
+```bash
+pip install .
+## Usage
+After installation, you can use the `h4md` command directly:
+```bash
+h4md input.hdf output.md
+```
+If you don't specify an output file, it will use the input filename with a `.md` extension:
+```bash
+h4md input.hdf
+# Creates input.md
+```
+## Output Format
+The tool generates markdown with the following structure:
+- File name as main heading
+- Global attributes section
+- Datasets section with each dataset containing:
+  - Shape information
+  - Data type
+  - Dataset-specific attributes

h4md-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,34 @@
+# h4md (HDF4 to Markdown)
+A command-line tool to convert HDF4 datasets and attributes to markdown format.
+## Installation
+You can install h4md directly from the repository:
+```bash
+pip install .
+## Usage
+After installation, you can use the `h4md` command directly:
+```bash
+h4md input.hdf output.md
+```
+If you don't specify an output file, it will use the input filename with a `.md` extension:
+```bash
+h4md input.hdf
+# Creates input.md
+```
+## Output Format
+The tool generates markdown with the following structure:
+- File name as main heading
+- Global attributes section
+- Datasets section with each dataset containing:
+  - Shape information
+  - Data type
+  - Dataset-specific attributes

h4md-0.1.0/h4md/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .h4md import main
+__version__ = "0.1.0"

h4md-0.1.0/h4md/h4md.py ADDED Viewed

@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+"""
+h4md - Convert HDF4 datasets and attributes to markdown
+"""
+import click
+from pyhdf.SD import SD, SDC
+import os
+import sys
+def format_dataset(dataset, dataset_name=None):
+    """Format a dataset's information as markdown."""
+    try:
+        # Get basic dataset information
+        info = dataset.info()
+        # Use provided dataset_name or extract from info tuple when name() method fails
+        if dataset_name is not None:
+            name = dataset_name
+        else:
+            try:
+                name = dataset.name()
+            except (AttributeError, Exception):
+                # Extract name from info tuple: (name, rank, shape, data_type, n_attrs)
+                name = info[0] if info and len(info) > 0 else "Unknown"
+        shape = info[2]
+        data_type = info[3]
+        # Build markdown content
+        md = f"### Dataset: {name}\n\n"
+        md += f"- **Shape**: {shape}\n"
+        md += f"- **Type**: {data_type}\n"
+        # Get attributes if any
+        try:
+            # This returns a dictionary of attributes
+            attrs_dict = dataset.attributes()
+            if attrs_dict:
+                md += "\n#### Attributes:\n\n"
+                # The attributes dict contains values directly
+                for attr_name, attr_value in attrs_dict.items():
+                    # Handle different attribute value types
+                    if isinstance(attr_value, bytes):
+                        # Convert bytes to string safely, replacing null bytes
+                        try:
+                            attr_str = attr_value.decode('utf-8', errors='replace').replace('\x00', '')
+                        except:
+                            attr_str = f"<binary data length={len(attr_value)}>"
+                    elif isinstance(attr_value, (list, tuple)):
+                        # Handle lists/tuples
+                        attr_str = str(attr_value)
+                    else:
+                        # Handle other types (strings, numbers)
+                        attr_str = str(attr_value)
+                    # Truncate very long attributes to avoid huge markdown files
+                    if len(attr_str) > 50:
+                        attr_str = attr_str[:50] + "... (truncated, length=" + str(len(attr_str)) + ")"
+                    # Clean up any remaining control characters
+                    attr_str = ''.join(char for char in attr_str if ord(char) >= 32 or char in '\n\r\t')
+                    md += f"- **{attr_name}**: {attr_str}\n"
+        except Exception as attrs_err:
+            # In case of error, continue with other parts of the dataset
+            md += "\n*Error reading attributes*\n"
+        return md
+    except Exception as e:
+        # If there's an error with the dataset, return a placeholder
+        return f"### Dataset: {dataset.name() if hasattr(dataset, 'name') else 'Unknown'}\n\n*Error formatting dataset*\n"
+def hdf4_to_markdown(file_path):
+    """Convert HDF4 file content to markdown format."""
+    hdf = None
+    try:
+        # Check if file exists and is readable
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+        # Open the HDF4 file
+        hdf = SD(file_path, SDC.READ)
+        # Get filename for the header
+        filename = os.path.basename(file_path)
+        md = f"# HDF4 File: {filename}\n\n"
+        # Process global attributes
+        try:
+            # This returns a dictionary of attributes
+            attrs_dict = hdf.attributes()
+            if attrs_dict:
+                md += "## Global Attributes\n\n"
+                # Use the attributes dictionary directly
+                for attr_name, attr_value in attrs_dict.items():
+                    # Handle different attribute value types
+                    if isinstance(attr_value, bytes):
+                        # Convert bytes to string safely, replacing null bytes
+                        try:
+                            attr_str = attr_value.decode('utf-8', errors='replace').replace('\x00', '')
+                        except:
+                            attr_str = f"<binary data length={len(attr_value)}>"
+                    elif isinstance(attr_value, (list, tuple)):
+                        # Handle lists/tuples
+                        attr_str = str(attr_value)
+                    else:
+                        # Handle other types (strings, numbers)
+                        attr_str = str(attr_value)
+                    # Truncate very long attributes to avoid huge markdown files
+                    if len(attr_str) > 50:
+                        attr_str = attr_str[:50] + "... (truncated, length=" + str(len(attr_str)) + ")"
+                    # Clean up any remaining control characters
+                    attr_str = ''.join(char for char in attr_str if ord(char) >= 32 or char in '\n\r\t')
+                    md += f"- **{attr_name}**: {attr_str}\n"
+                md += "\n"
+        except Exception:
+            # In case of error, continue with datasets
+            pass
+        # Process datasets
+        try:
+            # Get all dataset names
+            datasets_dict = hdf.datasets()
+            dataset_names = list(datasets_dict.keys())
+            if dataset_names:
+                md += "## Datasets\n\n"
+                for ds_name in dataset_names:
+                    try:
+                        # Select and process each dataset
+                        dataset = hdf.select(ds_name)
+                        md += format_dataset(dataset, ds_name)
+                        md += "\n"
+                    except Exception:
+                        # If there's an error with a dataset, continue with others
+                        md += f"### Dataset: {ds_name}\n\n*Error processing dataset*\n\n"
+        except Exception:
+            # If there's an error getting datasets, note it
+            md += "*Error reading datasets*\n"
+        return md
+    except Exception as e:
+        # Handle any other errors
+        error_msg = str(e)
+        if "File is supported, must be either hdf, cdf, netcdf" in error_msg:
+            raise click.ClickException(f"File '{os.path.basename(file_path)}' is not a valid HDF4 file. It may be a different format (NetCDF, HDF5, etc.).")
+        elif "No such file or directory" in error_msg:
+            raise click.ClickException(f"File not found: {file_path}")
+        else:
+            raise click.ClickException(f"Error processing HDF4 file: {e}")
+    finally:
+        # Always close the file
+        if hdf is not None:
+            try:
+                hdf.end()
+            except Exception:
+                # If we can't close it cleanly, that's ok
+                pass
+@click.command()
+@click.argument('input_file', type=click.Path(exists=True))
+@click.argument('output_file', type=click.Path(), required=False)
+def main(input_file, output_file=None):
+    """
+    Convert HDF4 file to markdown format.
+    INPUT_FILE: Path to the input HDF4 file
+    OUTPUT_FILE: Optional path for the output markdown file (defaults to input_file with .md extension)
+    """
+    if output_file is None:
+        output_file = os.path.splitext(input_file)[0] + '.md'
+    try:
+        markdown_content = hdf4_to_markdown(input_file)
+        with open(output_file, 'w', encoding='utf-8') as f:
+            f.write(markdown_content)
+        click.echo(f"Successfully converted {input_file} to {output_file}")
+    except Exception as e:
+        raise click.ClickException(str(e))
+def test_function():
+    """Test function to debug the hdf4_to_markdown function"""
+    import tempfile
+    from pyhdf.SD import SD, SDC
+    import numpy as np
+    # Create a temporary HDF4 file
+    with tempfile.NamedTemporaryFile(suffix='.hdf', delete=False) as tmp:
+        file_path = tmp.name
+    print(f"Creating test HDF4 file at: {file_path}")
+    try:
+        hdf = SD(file_path, SDC.WRITE | SDC.CREATE)
+        # Add global attributes
+        hdf.setattr('title', 'Test Dataset')
+        hdf.setattr('description', 'Sample HDF4 file for testing')
+        # Create a sample dataset
+        data = np.arange(6).reshape(2, 3)
+        sds = hdf.create('sample_data', SDC.FLOAT32, (2, 3))
+        sds.data[:] = data
+        sds.setattr('units', 'meters')
+        hdf.end()
+        # Now convert to markdown
+        print("\nTesting hdf4_to_markdown function:")
+        markdown = hdf4_to_markdown(file_path)
+        print("\nGenerated Markdown:")
+        print(markdown)
+        return True
+    except Exception as e:
+        print(f"\nTest FAILED with error: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+    finally:
+        import os
+        if os.path.exists(file_path):
+            try:
+                os.remove(file_path)
+                print(f"Removed temporary file: {file_path}")
+            except:
+                print(f"Failed to remove temporary file: {file_path}")
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        main()
+    else:
+        print("Running test function...")
+        success = test_function()
+        print(f"Test {'succeeded' if success else 'failed'}")

h4md-0.1.0/h4md.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,62 @@
+Metadata-Version: 2.1
+Name: h4md
+Version: 0.1.0
+Summary: A command-line tool to convert HDF4 datasets to markdown
+Home-page: https://github.com/iowarp/h4md
+Author: IOWarp User
+Author-email: user@iowarp.org
+Keywords: hdf4,markdown,conversion,hdf,data
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Classifier: Topic :: Utilities
+Requires-Python: >=3.6
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pyhdf>=0.10.5
+Requires-Dist: click>=8.1.0
+# h4md (HDF4 to Markdown)
+A command-line tool to convert HDF4 datasets and attributes to markdown format.
+## Installation
+You can install h4md directly from the repository:
+```bash
+pip install .
+## Usage
+After installation, you can use the `h4md` command directly:
+```bash
+h4md input.hdf output.md
+```
+If you don't specify an output file, it will use the input filename with a `.md` extension:
+```bash
+h4md input.hdf
+# Creates input.md
+```
+## Output Format
+The tool generates markdown with the following structure:
+- File name as main heading
+- Global attributes section
+- Datasets section with each dataset containing:
+  - Shape information
+  - Data type
+  - Dataset-specific attributes

h4md-0.1.0/h4md.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,14 @@
+LICENSE
+MANIFEST.in
+README.md
+requirements.txt
+setup.py
+h4md/__init__.py
+h4md/h4md.py
+h4md.egg-info/PKG-INFO
+h4md.egg-info/SOURCES.txt
+h4md.egg-info/dependency_links.txt
+h4md.egg-info/entry_points.txt
+h4md.egg-info/requires.txt
+h4md.egg-info/top_level.txt
+tests/test_h4md.py

h4md-0.1.0/h4md.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

h4md-0.1.0/h4md.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ h4md = h4md:main

h4md-0.1.0/h4md.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ pyhdf>=0.10.5
2	+ click>=8.1.0

h4md-0.1.0/h4md.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ h4md

h4md-0.1.0/requirements.txt ADDED Viewed

@@ -0,0 +1,5 @@
+pyhdf>=0.10.5
+click>=8.1.0
+setuptools>=42.0.0
+pytest>=7.0.0
+numpy>=1.20.0

h4md-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

h4md-0.1.0/setup.py ADDED Viewed

@@ -0,0 +1,41 @@
+from setuptools import setup, find_packages
+setup(
+    name="h4md",
+    version="0.1.0",
+    packages=find_packages(),
+    py_modules=["h4md"],
+    install_requires=[
+        "pyhdf>=0.10.5",
+        "click>=8.1.0",
+    ],
+    entry_points={
+        "console_scripts": [
+            "h4md=h4md:main",
+        ],
+    },
+    author="IOWarp User",
+    author_email="user@iowarp.org",
+    url="https://github.com/iowarp/h4md",
+    description="A command-line tool to convert HDF4 datasets to markdown",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
+    keywords="hdf4, markdown, conversion, hdf, data",
+    python_requires=">=3.6",
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Topic :: Scientific/Engineering :: Information Analysis",
+        "Topic :: Utilities",
+    ],
+)

h4md-0.1.0/tests/test_h4md.py ADDED Viewed

@@ -0,0 +1,112 @@
+import os
+import pytest
+from pyhdf.SD import SD, SDC
+import numpy as np
+from h4md.h4md import hdf4_to_markdown
+@pytest.fixture
+def sample_hdf_file(tmp_path):
+    """Create a sample HDF4 file for testing."""
+    file_path = tmp_path / "test.hdf"
+    file_path_str = str(file_path)
+    # Create the HDF4 file
+    hdf = SD(file_path_str, SDC.WRITE | SDC.CREATE)
+    # Add global attributes
+    hdf.setattr('title', 'Test Dataset')
+    hdf.setattr('description', 'Sample HDF4 file for testing')
+    # Create a sample dataset
+    data = np.arange(6).reshape(2, 3)
+    sds = hdf.create('sample_data', SDC.FLOAT32, (2, 3))
+    sds.data[:] = data
+    sds.setattr('units', 'meters')
+    # Close file
+    hdf.end()
+    # Verify the file exists before returning
+    assert file_path.exists(), f"HDF4 file was not created: {file_path_str}"
+    return file_path
+def test_hdf4_to_markdown(sample_hdf_file, tmp_path):
+    """Test conversion of HDF4 to markdown."""
+    import traceback
+    print("\n-------------- Starting test_hdf4_to_markdown ---------------")
+    output_file = tmp_path / "output.md"
+    print(f"Sample HDF file path: {sample_hdf_file}")
+    print(f"Output file path: {output_file}")
+    try:
+        # Verify file exists
+        import os
+        file_path_str = str(sample_hdf_file)
+        print(f"File exists check: {os.path.exists(file_path_str)}")
+        # Try to manually open the HDF file to check if it's valid
+        print("Checking if HDF file is readable:")
+        try:
+            from pyhdf.SD import SD, SDC
+            hdf = SD(file_path_str, SDC.READ)
+            print(f"  - HDF file opened successfully")
+            print(f"  - Datasets: {list(hdf.datasets().keys())}")
+            attr_names = list(hdf.attributes().keys())
+            print(f"  - Attributes: {attr_names}")
+            # Try reading an attribute
+            if attr_names:
+                for name in attr_names:
+                    print(f"  - Reading attribute '{name}':")
+                    value = hdf.attr(name).get()
+                    print(f"    Value: {value}")
+            hdf.end()
+            print("Successfully closed test HDF file")
+        except Exception as e:
+            print(f"Error reading test HDF file: {type(e).__name__}: {e}")
+            traceback.print_exc()
+        # Convert HDF4 to markdown
+        print("\nCalling hdf4_to_markdown now...")
+        markdown_content = hdf4_to_markdown(file_path_str)
+        print("hdf4_to_markdown returned successfully")
+        # Write markdown to file
+        with open(output_file, 'w') as f:
+            f.write(markdown_content)
+        print(f"Wrote markdown to {output_file}")
+        # Read and verify the markdown content
+        with open(output_file) as f:
+            content = f.read()
+        print(f"\nGenerated markdown content:\n{content[:200]}...")
+        # Check for expected content
+        print("\nVerifying markdown content...")
+        assert "# HDF4 File: test.hdf" in content, "Missing file name in header"
+        assert "Test Dataset" in content, "Missing 'Test Dataset' in content"
+        assert "Sample HDF4 file for testing" in content, "Missing description"
+        assert "sample_data" in content, "Missing dataset name"
+        assert "meters" in content, "Missing units attribute"
+        assert "(2, 3)" in content, "Missing shape information"
+        print("All assertions passed successfully!")
+    except Exception as e:
+        print(f"\nEXCEPTION IN TEST: {type(e).__name__}: {e}")
+        traceback.print_exc()
+        raise
+def test_nonexistent_file():
+    """Test handling of non-existent file."""
+    with pytest.raises(Exception):
+        hdf4_to_markdown("nonexistent.hdf")
+def test_invalid_hdf4_file(tmp_path):
+    """Test handling of invalid HDF4 file."""
+    invalid_file = tmp_path / "invalid.hdf"
+    with open(invalid_file, 'w') as f:
+        f.write("Not an HDF4 file")
+    with pytest.raises(Exception):
+        hdf4_to_markdown(str(invalid_file))