PyPI - linkedin2md - Versions diffs - 0.2.2__tar.gz → 0.3.0__tar.gz - Mend

linkedin2md 0.2.2tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.3.0] - 2025-01-20
+### Added
+- Extensible multilingual system: `BilingualText` → `MultilingualText` supporting N languages
+- `LanguageDetector.supported_languages` property for detector introspection
+- Proper logging module integration (replaces print statements)
+- Fallback chain support in `_get_text()` for flexible language resolution
+### Changed
+- Version now single-sourced from `pyproject.toml` via `importlib.metadata`
+- CLI errors now use structured logging to stderr
+- `MultilingualText` uses `**kwargs` for language flexibility while maintaining backward compatibility
+### Fixed
+- Version mismatch between `__init__.py` and `pyproject.toml`
 ## [0.2.0] - 2025-01-20
 ### Added
@@ -61,7 +77,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SOLID architecture for extensibility
 - Security features (path traversal protection, URL sanitization, file size limits)
-[Unreleased]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.2.0...HEAD
+[Unreleased]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.3.0...HEAD
+[0.3.0]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.2.0...v0.3.0
 [0.2.0]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.3...v0.2.0
 [0.1.3]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.2...v0.1.3
 [0.1.2]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.1...v0.1.2

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: linkedin2md
-Version: 0.2.2
+Version: 0.3.0
 Summary: Convert LinkedIn data exports to Markdown
 Project-URL: Homepage, https://github.com/juanmanueldaza/linkedin2md
 Project-URL: Repository, https://github.com/juanmanueldaza/linkedin2md
@@ -22,6 +22,7 @@ Classifier: Topic :: Text Processing :: Markup :: Markdown
 Classifier: Topic :: Utilities
 Requires-Python: >=3.13
 Provides-Extra: dev
+Requires-Dist: pyright>=1.1.408; extra == 'dev'
 Requires-Dist: pytest>=9.0; extra == 'dev'
 Requires-Dist: ruff>=0.9; extra == 'dev'
 Description-Content-Type: text/markdown

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "linkedin2md"
-version = "0.2.2"
+version = "0.3.0"
 description = "Convert LinkedIn data exports to Markdown"
 readme = "README.md"
 license = { text = "GPL-2.0" }
@@ -26,7 +26,7 @@ dependencies = []
 linkedin2md = "linkedin2md.cli:main"
 [project.optional-dependencies]
-dev = ["pytest>=9.0", "ruff>=0.9"]
+dev = ["pytest>=9.0", "ruff>=0.9", "pyright>=1.1.408"]
 [project.urls]
 Homepage = "https://github.com/juanmanueldaza/linkedin2md"
@@ -49,8 +49,3 @@ select = ["E", "W", "F", "I", "B", "UP"]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-[dependency-groups]
-dev = [
-    "pyright>=1.1.408",
-]

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/__init__.py RENAMED Viewed

@@ -8,18 +8,30 @@ SOLID-compliant architecture:
 - D: Converter depends on abstractions, not concretions
 """
-__version__ = "0.1.0"
+import logging
+from importlib.metadata import PackageNotFoundError, version
+try:
+    __version__ = version("linkedin2md")
+except PackageNotFoundError:
+    __version__ = "0.0.0"  # Development fallback
+# Configure package logger (NullHandler = library best practice)
+logging.getLogger(__name__).addHandler(logging.NullHandler())
 # Main public API
-from linkedin2md.converter import LinkedInToMarkdownConverter, create_converter
+from linkedin2md.converter import (  # noqa: E402
+    LinkedInToMarkdownConverter,
+    create_converter,
+)
 # Backward compatibility - import old API
 # (These are deprecated but kept for compatibility)
-from linkedin2md.formatter import MarkdownFormatter
-from linkedin2md.parser import LinkedInExportParser
+from linkedin2md.formatter import MarkdownFormatter  # noqa: E402
+from linkedin2md.parser import LinkedInExportParser  # noqa: E402
 # Protocols for type hints and custom implementations
-from linkedin2md.protocols import (
+from linkedin2md.protocols import (  # noqa: E402
     BilingualText,
     DataExtractor,
     FormatterRegistry,
@@ -31,7 +43,7 @@ from linkedin2md.protocols import (
 )
 # Registries for extension
-from linkedin2md.registry import (
+from linkedin2md.registry import (  # noqa: E402
     get_formatter_registry,
     get_parser_registry,
     register_formatter,

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/cli.py RENAMED Viewed

@@ -4,34 +4,44 @@ Dependency Inversion: Uses factory function, doesn't create dependencies directl
 """
 import argparse
+import logging
 import sys
 from pathlib import Path
 from linkedin2md.converter import create_converter
+logger = logging.getLogger(__name__)
 # Maximum allowed file size in megabytes (500 MB)
 MAX_FILE_SIZE_MB = 500
 def main() -> int:
     """Main entry point."""
+    # Configure logging for CLI use
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(levelname)s: %(message)s",
+        stream=sys.stderr,
+    )
     args = _parse_args(sys.argv[1:])
     if not args.source.exists():
-        print(f"Error: File not found: {args.source}", file=sys.stderr)
+        logger.error("File not found: %s", args.source)
         return 1
     if not args.source.suffix.lower() == ".zip":
-        print(f"Error: Expected .zip file, got {args.source.suffix}", file=sys.stderr)
+        logger.error("Expected .zip file, got %s", args.source.suffix)
         return 1
     # Check file size to prevent resource exhaustion
     file_size_mb = args.source.stat().st_size / (1024 * 1024)
     if file_size_mb > MAX_FILE_SIZE_MB:
-        print(
-            f"Error: File too large ({file_size_mb:.1f} MB). "
-            f"Maximum allowed is {MAX_FILE_SIZE_MB} MB",
-            file=sys.stderr,
+        logger.error(
+            "File too large (%.1f MB). Maximum allowed is %d MB",
+            file_size_mb,
+            MAX_FILE_SIZE_MB,
         )
         return 1
@@ -40,9 +50,10 @@ def main() -> int:
         converter = create_converter(args.source, args.output)
         files = converter.convert(lang=args.lang)
     except Exception as e:
-        print(f"Error: {e}", file=sys.stderr)
+        logger.error("%s", e)
         return 1
+    # Success messages go to stdout (user-facing output)
     print(f"Created {len(files)} files in {args.output}/")
     for f in files:
         print(f"  - {f.name}")

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/converter.py RENAMED Viewed

@@ -5,6 +5,7 @@ Implements the Dependency Inversion Principle:
 - All dependencies are injected, not created internally
 """
+import logging
 from pathlib import Path
 from linkedin2md.protocols import (
@@ -14,6 +15,8 @@ from linkedin2md.protocols import (
     ParserRegistry,
 )
+logger = logging.getLogger(__name__)
 class LinkedInToMarkdownConverter:
     """Main orchestrator for LinkedIn to Markdown conversion.
@@ -74,7 +77,7 @@ class LinkedInToMarkdownConverter:
                 parsed[parser.section_key] = result
             except Exception as e:
                 # Log but don't fail on individual section errors
-                print(f"Warning: Failed to parse {parser.section_key}: {e}")
+                logger.warning("Failed to parse %s: %s", parser.section_key, e)
         return parsed
@@ -105,7 +108,7 @@ class LinkedInToMarkdownConverter:
                     path = self._writer.write(formatter.section_key, content)
                     files.append(path)
             except Exception as e:
-                print(f"Warning: Failed to format {formatter.section_key}: {e}")
+                logger.warning("Failed to format %s: %s", formatter.section_key, e)
         return files

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/base.py RENAMED Viewed

@@ -6,7 +6,10 @@ Provides common formatting functionality that section formatters can use.
 from abc import ABC, abstractmethod
 from typing import Any
-from linkedin2md.protocols import BilingualText, SectionFormatter
+from linkedin2md.protocols import MultilingualText, SectionFormatter
+# Backward compatibility alias
+BilingualText = MultilingualText
 class BaseFormatter(ABC, SectionFormatter):
@@ -31,16 +34,36 @@ class BaseFormatter(ABC, SectionFormatter):
     # Shared Utilities
     # ========================================================================
-    def _get_text(self, bilingual: BilingualText | dict | str | None, lang: str) -> str:
-        """Extract text in preferred language with fallback."""
-        if bilingual is None:
+    def _get_text(
+        self,
+        multilingual: MultilingualText | dict | str | None,
+        lang: str,
+        fallback_chain: list[str] | None = None,
+    ) -> str:
+        """Extract text in preferred language with fallback chain.
+        Args:
+            multilingual: Text container (MultilingualText, dict, str, or None)
+            lang: Preferred language code
+            fallback_chain: Languages to try if preferred not found
+                (default: ["en", "es"])
+        Returns:
+            Text in requested or fallback language
+        """
+        if multilingual is None:
             return ""
-        if isinstance(bilingual, str):
-            return bilingual
-        if isinstance(bilingual, BilingualText):
-            return bilingual.get(lang)
+        if isinstance(multilingual, str):
+            return multilingual
+        if isinstance(multilingual, MultilingualText):
+            return multilingual.get(lang, fallback_chain=fallback_chain or ["en", "es"])
         # Dict fallback for compatibility
-        return bilingual.get(lang) or bilingual.get("en") or bilingual.get("es") or ""
+        if lang in multilingual and multilingual[lang]:
+            return multilingual[lang]
+        for fb in fallback_chain or ["en", "es"]:
+            if fb in multilingual and multilingual[fb]:
+                return multilingual[fb]
+        return ""
     def _escape_pipe(self, text: str) -> str:
         """Escape pipe characters for Markdown tables."""

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/language.py RENAMED Viewed

@@ -5,13 +5,17 @@ Single Responsibility: Detect language of text.
 import re
-from linkedin2md.protocols import BilingualText, LanguageDetector
+from linkedin2md.protocols import LanguageDetector, MultilingualText
+# Backward compatibility alias
+BilingualText = MultilingualText
 class SpanishEnglishDetector(LanguageDetector):
     """Detect Spanish vs English text.
     Single Responsibility: Only handles language detection.
+    Extensible: Implement LanguageDetector protocol for other languages.
     """
     # Spanish language detection patterns
@@ -24,9 +28,14 @@ class SpanishEnglishDetector(LanguageDetector):
         r"[áéíóúñ¿¡]",  # Spanish characters
     ]
-    def __init__(self):
+    def __init__(self) -> None:
         self._regex = re.compile("|".join(self.SPANISH_PATTERNS), re.IGNORECASE)
+    @property
+    def supported_languages(self) -> list[str]:
+        """Return list of detectable language codes."""
+        return ["en", "es"]
     def detect(self, text: str) -> str:
         """Detect if text is Spanish or English."""
         if not text:
@@ -40,37 +49,39 @@ class SpanishEnglishDetector(LanguageDetector):
         return "en"
-class BilingualTextFactory:
-    """Factory for creating BilingualText objects.
+class MultilingualTextFactory:
+    """Factory for creating MultilingualText objects.
-    Single Responsibility: Create bilingual text with language detection.
+    Single Responsibility: Create multilingual text with language detection.
     Dependency Inversion: Depends on LanguageDetector protocol.
     """
     def __init__(self, detector: LanguageDetector):
         self._detector = detector
-    def create(self, text: str, lang: str | None = None) -> BilingualText:
-        """Create BilingualText with text in detected/specified language."""
+    def create(self, text: str, lang: str | None = None) -> MultilingualText:
+        """Create MultilingualText with text in detected/specified language."""
         if not text:
-            return BilingualText()
+            return MultilingualText()
         detected = lang or self._detector.detect(text)
+        return MultilingualText(**{detected: text})
-        if detected == "es":
-            return BilingualText(es=text)
-        return BilingualText(en=text)
+    def merge(self, *texts: MultilingualText) -> MultilingualText:
+        """Merge multiple MultilingualText objects.
-    def merge(self, *texts: BilingualText) -> BilingualText:
-        """Merge multiple BilingualText objects."""
-        en = ""
-        es = ""
+        First non-empty value for each language wins.
+        """
+        merged: dict[str, str] = {}
         for t in texts:
-            if t.en and not en:
-                en = t.en
-            if t.es and not es:
-                es = t.es
-        return BilingualText(en=en, es=es)
+            for lang in t.languages:
+                if lang not in merged:
+                    merged[lang] = t.get(lang)
+        return MultilingualText(**merged)
+# Backward compatibility alias
+BilingualTextFactory = MultilingualTextFactory
 # Default instances

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/base.py RENAMED Viewed

@@ -7,11 +7,15 @@ Dependency Inversion: Depends on LanguageDetector protocol.
 from abc import ABC, abstractmethod
 from linkedin2md.language import (
-    BilingualTextFactory,
+    MultilingualTextFactory,
     get_default_detector,
     get_default_factory,
 )
-from linkedin2md.protocols import BilingualText, LanguageDetector, SectionParser
+from linkedin2md.protocols import LanguageDetector, MultilingualText, SectionParser
+# Backward compatibility alias
+BilingualText = MultilingualText
+BilingualTextFactory = MultilingualTextFactory
 # Month names for date formatting
 MONTHS = [
@@ -144,10 +148,10 @@ def merge_bilingual_entries(
     key_fields: list[str],
     bilingual_fields: list[str],
 ) -> list[dict]:
-    """Merge duplicate entries with bilingual content.
+    """Merge duplicate entries with multilingual content.
-    Groups entries by matching key fields and merges bilingual text from
-    English and Spanish versions into complete BilingualText objects.
+    Groups entries by matching key fields and merges multilingual text from
+    different language versions into complete MultilingualText objects.
     """
     if not entries:
         return []
@@ -185,23 +189,21 @@ def _merge_bilingual_group(group: list[dict], bilingual_fields: list[str]) -> di
     return merged
-def _merge_bilingual_field(group: list[dict], field: str) -> BilingualText:
-    """Merge a bilingual field from multiple entries."""
-    en = ""
-    es = ""
+def _merge_bilingual_field(group: list[dict], field: str) -> MultilingualText:
+    """Merge a multilingual field from multiple entries."""
+    merged: dict[str, str] = {}
     for entry in group:
         value = entry.get(field)
         if not value:
             continue
-        if isinstance(value, BilingualText):
-            if value.en:
-                en = value.en
-            if value.es:
-                es = value.es
+        if isinstance(value, MultilingualText):
+            for lang in value.languages:
+                if lang not in merged:
+                    merged[lang] = value.get(lang)
-    return BilingualText(en=en, es=es)
+    return MultilingualText(**merged)
 def _merge_achievements(group: list[dict]) -> list[dict]:
@@ -215,8 +217,7 @@ def _merge_achievements(group: list[dict]) -> list[dict]:
     merged_achievements = []
     for i in range(max_len):
-        en = ""
-        es = ""
+        merged_text: dict[str, str] = {}
         for achievements in achievement_lists:
             if i >= len(achievements):
@@ -225,12 +226,11 @@ def _merge_achievements(group: list[dict]) -> list[dict]:
             achievement = achievements[i]
             text = achievement.get("text")
-            if isinstance(text, BilingualText):
-                if text.en:
-                    en = text.en
-                if text.es:
-                    es = text.es
+            if isinstance(text, MultilingualText):
+                for lang in text.languages:
+                    if lang not in merged_text:
+                        merged_text[lang] = text.get(lang)
-        merged_achievements.append({"text": BilingualText(en=en, es=es)})
+        merged_achievements.append({"text": MultilingualText(**merged_text)})
     return merged_achievements

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/protocols.py RENAMED Viewed

@@ -15,26 +15,82 @@ from typing import Any, Protocol, runtime_checkable
 # ============================================================================
-class BilingualText:
-    """Immutable bilingual text container."""
+class MultilingualText:
+    """Immutable multilingual text container.
-    __slots__ = ("en", "es")
+    Supports any number of languages via keyword arguments.
+    Backward compatible with BilingualText API (en/es properties).
+    """
+    __slots__ = ("_texts",)
+    def __init__(self, **langs: str):
+        """Create with language codes as kwargs.
+        Examples:
+            MultilingualText(en="Hello", es="Hola")
+            MultilingualText(en="Hi", es="Hola", fr="Salut")
+        """
+        object.__setattr__(self, "_texts", dict(langs))
-    def __init__(self, en: str = "", es: str = ""):
-        object.__setattr__(self, "en", en)
-        object.__setattr__(self, "es", es)
+    def __setattr__(self, name: str, value: object) -> None:
+        raise AttributeError("MultilingualText is immutable")
-    def __setattr__(self, name, value):
-        raise AttributeError("BilingualText is immutable")
+    @property
+    def en(self) -> str:
+        """Backward compatibility: get English text."""
+        return self._texts.get("en", "")
+    @property
+    def es(self) -> str:
+        """Backward compatibility: get Spanish text."""
+        return self._texts.get("es", "")
+    def get(
+        self,
+        lang: str,
+        fallback_chain: list[str] | None = None,
+        default: str = "",
+    ) -> str:
+        """Get text in specified language with fallback chain.
+        Args:
+            lang: Primary language code to retrieve
+            fallback_chain: Languages to try if primary not found
+                (default: ["en", "es"])
+            default: Value if no language found
+        Returns:
+            Text in requested or fallback language, or default
+        """
+        if lang in self._texts and self._texts[lang]:
+            return self._texts[lang]
+        for fb in fallback_chain or ["en", "es"]:
+            if fb in self._texts and self._texts[fb]:
+                return self._texts[fb]
+        return default
-    def get(self, lang: str, default: str = "") -> str:
-        """Get text in specified language with fallback."""
-        if lang == "en":
-            return self.en or self.es or default
-        return self.es or self.en or default
+    @property
+    def languages(self) -> list[str]:
+        """Return list of language codes with content."""
+        return [lang for lang, text in self._texts.items() if text]
     def __repr__(self) -> str:
-        return f"BilingualText(en={self.en!r}, es={self.es!r})"
+        return f"MultilingualText({self._texts!r})"
+    def __eq__(self, other: object) -> bool:
+        if isinstance(other, MultilingualText):
+            return self._texts == other._texts
+        return False
+    def __hash__(self) -> int:
+        return hash(tuple(sorted(self._texts.items())))
+# Backward compatibility alias
+BilingualText = MultilingualText
 # ============================================================================
@@ -48,7 +104,13 @@ class LanguageDetector(Protocol):
     @abstractmethod
     def detect(self, text: str) -> str:
-        """Detect language of text. Returns 'en' or 'es'."""
+        """Detect language of text. Returns ISO 639-1 code (e.g., 'en', 'es')."""
+        ...
+    @property
+    @abstractmethod
+    def supported_languages(self) -> list[str]:
+        """Return list of language codes this detector can identify."""
         ...

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_cli.py RENAMED Viewed

@@ -62,16 +62,15 @@ class TestParseArgs:
 class TestMain:
     """Tests for main entry point."""
-    def test_file_not_found(self, capsys):
+    def test_file_not_found(self, caplog):
         """Test error when file doesn't exist."""
         with patch("sys.argv", ["linkedin2md", "nonexistent.zip"]):
             result = main()
         assert result == 1
-        captured = capsys.readouterr()
-        assert "File not found" in captured.err
+        assert "File not found" in caplog.text
-    def test_not_a_zip_file(self, capsys):
+    def test_not_a_zip_file(self, caplog):
         """Test error when file is not a ZIP."""
         with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
             f.write(b"not a zip")
@@ -82,12 +81,11 @@ class TestMain:
                 result = main()
             assert result == 1
-            captured = capsys.readouterr()
-            assert "Expected .zip file" in captured.err
+            assert "Expected .zip file" in caplog.text
         finally:
             Path(temp_path).unlink()
-    def test_file_too_large(self, capsys):
+    def test_file_too_large(self, caplog):
         """Test error when file exceeds size limit."""
         with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as f:
             temp_path = f.name
@@ -104,8 +102,7 @@ class TestMain:
                             result = main()
             assert result == 1
-            captured = capsys.readouterr()
-            assert "File too large" in captured.err
+            assert "File too large" in caplog.text
         finally:
             Path(temp_path).unlink()
@@ -151,7 +148,7 @@ class TestMain:
             assert result == 0
-    def test_invalid_zip_file(self, capsys):
+    def test_invalid_zip_file(self, caplog):
         """Test error when ZIP file is corrupted."""
         with tempfile.TemporaryDirectory() as tmpdir:
             zip_path = Path(tmpdir) / "corrupt.zip"
@@ -161,8 +158,7 @@ class TestMain:
                 result = main()
             assert result == 1
-            captured = capsys.readouterr()
-            assert "Error" in captured.err
+            assert "Invalid" in caplog.text or "corrupted" in caplog.text
 class TestMaxFileSize:

{linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_solid.py RENAMED Viewed

@@ -41,7 +41,8 @@ class TestBilingualText:
     def test_immutable(self):
         text = BilingualText(en="Hello")
         try:
-            text.en = "Changed"
+            # Use setattr to bypass static type checking while testing runtime behavior
+            setattr(text, "en", "Changed")  # noqa: B010
             raise AssertionError("Should have raised AttributeError")
         except AttributeError:
             pass