PyPI - file-identifier - Versions diffs - 0.1.0__tar.gz - Mend

file-identifier 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

file_identifier-0.1.0/LICENSE +6 -0
file_identifier-0.1.0/PKG-INFO +12 -0
file_identifier-0.1.0/README +49 -0
file_identifier-0.1.0/file_identifier/__init__.py +9 -0
file_identifier-0.1.0/file_identifier/cli.py +41 -0
file_identifier-0.1.0/file_identifier/core.py +144 -0
file_identifier-0.1.0/file_identifier.egg-info/PKG-INFO +12 -0
file_identifier-0.1.0/file_identifier.egg-info/SOURCES.txt +11 -0
file_identifier-0.1.0/file_identifier.egg-info/dependency_links.txt +1 -0
file_identifier-0.1.0/file_identifier.egg-info/entry_points.txt +2 -0
file_identifier-0.1.0/file_identifier.egg-info/top_level.txt +1 -0
file_identifier-0.1.0/pyproject.toml +23 -0
file_identifier-0.1.0/setup.cfg +4 -0

file_identifier-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,6 @@
+MIT License
+Copyright (c) 2026 Priyanshi Dwivedi
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software...

file_identifier-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,12 @@
+Metadata-Version: 2.4
+Name: file-identifier
+Version: 0.1.0
+Summary: File Type Identifier using Magic Numbers
+Author-email: Author <author@example.com>
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file

file_identifier-0.1.0/README ADDED Viewed

@@ -0,0 +1,49 @@
+# 🔍 File Identifier
+Identify file types using **magic numbers (file signatures)** instead of file extensions.
+Works even if files have incorrect or fake extensions (e.g., `.pdf.exe`).
+---
+## 🚀 Installation
+```bash
+pip install file-identifier
+```
+---
+## 🛠 Usage
+```bash
+file-identifier <file_path>
+```
+### Example
+```bash
+file-identifier "C:\Users\YourName\Downloads\file.zip"
+```
+---
+## 📊 Output
+* File type
+* Likely extension
+* File size
+* Confidence level
+* Hex header
+---
+## 🧠 How it works
+Reads the first bytes of a file and matches them with known **magic signatures**.
+---
+## 📜 License
+MIT

file_identifier-0.1.0/file_identifier/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""
+File Type Identifier using Magic Numbers
+Reads file headers (magic bytes) to identify file types - no extensions needed.
+"""
+from .core import identify_file, identify_many, read_header, print_report
+__version__ = "0.1.0"
+__all__ = ["identify_file", "identify_many", "read_header", "print_report"]

file_identifier-0.1.0/file_identifier/cli.py ADDED Viewed

@@ -0,0 +1,41 @@
+import sys
+import os
+import tempfile
+import argparse
+from .core import identify_file, print_report
+def main():
+    parser = argparse.ArgumentParser(description="File Type Identifier using Magic Numbers.")
+    parser.add_argument("files", nargs="*", help="List of files to identify")
+    args = parser.parse_args()
+    # If files are provided via CLI arguments
+    if args.files:
+        for fp in args.files:
+            print_report(identify_file(fp))
+    else:
+        # Default behavior with no arguments: Self-demo
+        print("\n🔍 File Type Identifier — Demo Mode")
+        print("Creating sample files with known magic bytes...\n")
+        samples = {
+            "sample.jpg":  b'\xff\xd8\xff\xe0\x00\x10JFIF',
+            "sample.png":  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR',
+            "sample.pdf":  b'%PDF-1.4\n%\xe2\xe3\xcf\xd3',
+            "sample.gz":   b'\x1f\x8b\x08\x00\x00\x00\x00\x00',
+            "sample.exe":  b'MZ\x90\x00\x03\x00\x00\x00',
+            "sample.zip":  b'PK\x03\x04\x14\x00\x00\x00',
+            "sample.mp3":  b'ID3\x03\x00\x00\x00\x00',
+            "sample.txt":  b'Hello, world!\nThis is plain text.\n',
+            "sample.bin":  bytes(range(16)),
+        }
+        with tempfile.TemporaryDirectory() as tmpdir:
+            for name, data in samples.items():
+                fp = os.path.join(tmpdir, name)
+                with open(fp, 'wb') as f:
+                    f.write(data)
+                print_report(identify_file(fp))
+if __name__ == "__main__":
+    main()

file_identifier-0.1.0/file_identifier/core.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""
+File Type Identifier using Magic Numbers
+Reads file headers (magic bytes) to identify file types - no extensions needed.
+"""
+import struct
+from pathlib import Path
+MAGIC_SIGNATURES = [
+    # Images
+    (b'\xff\xd8\xff',                       "JPEG Image",           ".jpg"),
+    (b'\x89PNG\r\n\x1a\n',                  "PNG Image",            ".png"),
+    (b'GIF87a',                             "GIF Image (87a)",      ".gif"),
+    (b'GIF89a',                             "GIF Image (89a)",      ".gif"),
+    (b'BM',                                 "BMP Image",            ".bmp"),
+    (b'RIFF',                               "RIFF (WAV/AVI/WebP)",  ".riff"),
+    (b'\x00\x00\x01\x00',                   "ICO Icon",             ".ico"),
+    (b'\x49\x49\x2a\x00',                   "TIFF (little-endian)", ".tif"),
+    (b'\x4d\x4d\x00\x2a',                   "TIFF (big-endian)",    ".tif"),
+    # Documents
+    (b'%PDF',                               "PDF Document",         ".pdf"),
+    (b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1',  "MS Office (old)",      ".doc/.xls/.ppt"),
+    (b'PK\x03\x04',                         "ZIP / Office Open XML",".zip/.docx/.xlsx"),
+    # Audio / Video
+    (b'ID3',                                "MP3 Audio (ID3 tag)",  ".mp3"),
+    (b'\xff\xfb',                           "MP3 Audio",            ".mp3"),
+    (b'\xff\xf3',                           "MP3 Audio",            ".mp3"),
+    (b'fLaC',                               "FLAC Audio",           ".flac"),
+    (b'OggS',                               "OGG Container",        ".ogg"),
+    (b'\x1aE\xdf\xa3',                      "Matroska/WebM",        ".mkv/.webm"),
+    (b'\x00\x00\x00\x18ftypmp4',            "MP4 Video",            ".mp4"),
+    (b'\x00\x00\x00\x20ftyp',              "MP4 Video",            ".mp4"),
+    # Archives
+    (b'\x1f\x8b',                           "GZIP Archive",         ".gz"),
+    (b'BZh',                                "BZIP2 Archive",        ".bz2"),
+    (b'\xfd7zXZ\x00',                       "XZ Archive",           ".xz"),
+    (b'7z\xbc\xaf\x27\x1c',                "7-Zip Archive",        ".7z"),
+    (b'Rar!\x1a\x07\x00',                  "RAR Archive (v4)",     ".rar"),
+    (b'Rar!\x1a\x07\x01\x00',             "RAR Archive (v5)",     ".rar"),
+    # Executables / Binary
+    (b'MZ',                                 "Windows Executable",   ".exe/.dll"),
+    (b'\x7fELF',                            "ELF Executable (Linux)",".elf"),
+    (b'\xca\xfe\xba\xbe',                   "Java Class / Mach-O",  ".class"),
+    (b'\xfe\xed\xfa\xce',                   "Mach-O 32-bit",        ".macho"),
+    (b'\xfe\xed\xfa\xcf',                   "Mach-O 64-bit",        ".macho"),
+    # Text / Code (heuristic, checked last)
+    (b'#!/',                                "Shell Script",         ".sh"),
+    (b'<?xml',                              "XML Document",         ".xml"),
+    (b'<?php',                              "PHP Script",           ".php"),
+    (b'<html',                              "HTML Document",        ".html"),
+    (b'<HTML',                              "HTML Document",        ".html"),
+    (b'{\n',                                "JSON (likely)",        ".json"),
+    (b'{\r\n',                              "JSON (likely)",        ".json"),
+]
+MAX_HEADER = 32  # bytes to read from the file
+def read_header(filepath: str) -> bytes:
+    """Read the first MAX_HEADER bytes of a file."""
+    with open(filepath, 'rb') as f:
+        return f.read(MAX_HEADER)
+def identify_file(filepath: str) -> dict:
+    """
+    Identify a file's type by reading its magic bytes.
+    Returns a dict with:
+        path        - original filepath
+        hex_header  - hex dump of the first bytes
+        file_type   - human-readable type name
+        extension   - likely extension(s)
+        matched_sig - the raw signature that matched (hex)
+        confidence  - 'high' or 'low'
+    """
+    path = Path(filepath)
+    result = {
+        "path":        str(path),
+        "size_bytes":  path.stat().st_size if path.exists() else 0,
+        "file_type":   "Unknown",
+        "extension":   "?",
+        "hex_header":  "",
+        "matched_sig": "",
+        "confidence":  "low",
+    }
+    if not path.exists():
+        result["file_type"] = "File not found"
+        return result
+    if path.stat().st_size == 0:
+        result["file_type"] = "Empty file"
+        return result
+    header = read_header(filepath)
+    result["hex_header"] = " ".join(f"{b:02X}" for b in header)
+    for sig, name, ext in MAGIC_SIGNATURES:
+        if header[:len(sig)] == sig:
+            result["file_type"]   = name
+            result["extension"]   = ext
+            result["matched_sig"] = sig.hex(" ")
+            result["confidence"]  = "high" if len(sig) >= 4 else "medium"
+            return result
+    # Fallback: try to detect plain text (reject if null bytes present)
+    try:
+        decoded = header.decode('utf-8')
+        if '\x00' in decoded:
+            raise UnicodeDecodeError('utf-8', b'', 0, 1, 'null byte')
+        result["file_type"]  = "Plain Text (UTF-8)"
+        result["extension"]  = ".txt"
+        result["confidence"] = "medium"
+    except UnicodeDecodeError:
+        result["file_type"]  = "Unknown Binary"
+        result["extension"]  = ".bin"
+        result["confidence"] = "low"
+    return result
+def identify_many(filepaths: list[str]) -> list[dict]:
+    """Identify multiple files at once."""
+    return [identify_file(fp) for fp in filepaths]
+def print_report(result: dict) -> None:
+    """Pretty-print a single file identification result."""
+    print(f"\n{'='*55}")
+    print(f"  File    : {result['path']}")
+    print(f"  Type    : {result['file_type']}")
+    print(f"  Ext     : {result['extension']}")
+    print(f"  Size    : {result['size_bytes']:,} bytes")
+    print(f"  Confidence : {result['confidence']}")
+    print(f"  Header  : {result['hex_header']}")
+    if result['matched_sig']:
+        print(f"  Matched : {result['matched_sig']}")
+    print(f"{'='*55}")

file_identifier-0.1.0/file_identifier.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,12 @@
+Metadata-Version: 2.4
+Name: file-identifier
+Version: 0.1.0
+Summary: File Type Identifier using Magic Numbers
+Author-email: Author <author@example.com>
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file

file_identifier-0.1.0/file_identifier.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,11 @@
+LICENSE
+README
+pyproject.toml
+file_identifier/__init__.py
+file_identifier/cli.py
+file_identifier/core.py
+file_identifier.egg-info/PKG-INFO
+file_identifier.egg-info/SOURCES.txt
+file_identifier.egg-info/dependency_links.txt
+file_identifier.egg-info/entry_points.txt
+file_identifier.egg-info/top_level.txt

file_identifier-0.1.0/file_identifier.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

file_identifier-0.1.0/file_identifier.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ file-identifier = file_identifier.cli:main

file_identifier-0.1.0/file_identifier.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ file_identifier

file_identifier-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,23 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "file-identifier"
+version = "0.1.0"
+description = "File Type Identifier using Magic Numbers"
+readme = "README.md"
+requires-python = ">=3.8"
+authors = [
+  { name="Author", email="author@example.com" }
+]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+[project.scripts]
+file-identifier = "file_identifier.cli:main"

file_identifier-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0