linkedin2md 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/CHANGELOG.md +18 -1
  2. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/PKG-INFO +2 -1
  3. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/pyproject.toml +2 -7
  4. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/__init__.py +18 -6
  5. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/cli.py +18 -7
  6. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/converter.py +5 -2
  7. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/base.py +32 -9
  8. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/language.py +31 -20
  9. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/base.py +23 -23
  10. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/protocols.py +77 -15
  11. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_cli.py +8 -12
  12. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_solid.py +2 -1
  13. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  14. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  15. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  16. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/workflows/ci.yml +0 -0
  17. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/workflows/publish.yml +0 -0
  18. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.gitignore +0 -0
  19. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/CODE_OF_CONDUCT.md +0 -0
  20. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/CONTRIBUTING.md +0 -0
  21. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/LICENSE +0 -0
  22. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/README.md +0 -0
  23. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/__main__.py +0 -0
  24. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/extractor.py +0 -0
  25. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatter.py +0 -0
  26. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/__init__.py +0 -0
  27. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/activity.py +0 -0
  28. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/advertising.py +0 -0
  29. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/content.py +0 -0
  30. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/identity.py +0 -0
  31. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/jobs.py +0 -0
  32. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/learning.py +0 -0
  33. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/network.py +0 -0
  34. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/payments.py +0 -0
  35. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/professional.py +0 -0
  36. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/profile.py +0 -0
  37. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/recommendations.py +0 -0
  38. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/services.py +0 -0
  39. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parser.py +0 -0
  40. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/__init__.py +0 -0
  41. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/activity.py +0 -0
  42. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/advertising.py +0 -0
  43. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/content.py +0 -0
  44. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/identity.py +0 -0
  45. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/jobs.py +0 -0
  46. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/learning.py +0 -0
  47. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/network.py +0 -0
  48. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/payments.py +0 -0
  49. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/professional.py +0 -0
  50. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/profile.py +0 -0
  51. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/recommendations.py +0 -0
  52. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/services.py +0 -0
  53. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/registry.py +0 -0
  54. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/writer.py +0 -0
  55. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/__init__.py +0 -0
  56. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_e2e.py +0 -0
  57. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_formatter.py +0 -0
  58. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_formatters.py +0 -0
  59. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_parser.py +0 -0
  60. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_parsers.py +0 -0
  61. {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_security.py +0 -0
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.3.0] - 2025-01-20
11
+
12
+ ### Added
13
+ - Extensible multilingual system: `BilingualText` → `MultilingualText` supporting N languages
14
+ - `LanguageDetector.supported_languages` property for detector introspection
15
+ - Proper logging module integration (replaces print statements)
16
+ - Fallback chain support in `_get_text()` for flexible language resolution
17
+
18
+ ### Changed
19
+ - Version now single-sourced from `pyproject.toml` via `importlib.metadata`
20
+ - CLI errors now use structured logging to stderr
21
+ - `MultilingualText` uses `**kwargs` for language flexibility while maintaining backward compatibility
22
+
23
+ ### Fixed
24
+ - Version mismatch between `__init__.py` and `pyproject.toml`
25
+
10
26
  ## [0.2.0] - 2025-01-20
11
27
 
12
28
  ### Added
@@ -61,7 +77,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
61
77
  - SOLID architecture for extensibility
62
78
  - Security features (path traversal protection, URL sanitization, file size limits)
63
79
 
64
- [Unreleased]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.2.0...HEAD
80
+ [Unreleased]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.3.0...HEAD
81
+ [0.3.0]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.2.0...v0.3.0
65
82
  [0.2.0]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.3...v0.2.0
66
83
  [0.1.3]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.2...v0.1.3
67
84
  [0.1.2]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.1...v0.1.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: linkedin2md
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Convert LinkedIn data exports to Markdown
5
5
  Project-URL: Homepage, https://github.com/juanmanueldaza/linkedin2md
6
6
  Project-URL: Repository, https://github.com/juanmanueldaza/linkedin2md
@@ -22,6 +22,7 @@ Classifier: Topic :: Text Processing :: Markup :: Markdown
22
22
  Classifier: Topic :: Utilities
23
23
  Requires-Python: >=3.13
24
24
  Provides-Extra: dev
25
+ Requires-Dist: pyright>=1.1.408; extra == 'dev'
25
26
  Requires-Dist: pytest>=9.0; extra == 'dev'
26
27
  Requires-Dist: ruff>=0.9; extra == 'dev'
27
28
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "linkedin2md"
3
- version = "0.2.2"
3
+ version = "0.3.0"
4
4
  description = "Convert LinkedIn data exports to Markdown"
5
5
  readme = "README.md"
6
6
  license = { text = "GPL-2.0" }
@@ -26,7 +26,7 @@ dependencies = []
26
26
  linkedin2md = "linkedin2md.cli:main"
27
27
 
28
28
  [project.optional-dependencies]
29
- dev = ["pytest>=9.0", "ruff>=0.9"]
29
+ dev = ["pytest>=9.0", "ruff>=0.9", "pyright>=1.1.408"]
30
30
 
31
31
  [project.urls]
32
32
  Homepage = "https://github.com/juanmanueldaza/linkedin2md"
@@ -49,8 +49,3 @@ select = ["E", "W", "F", "I", "B", "UP"]
49
49
 
50
50
  [tool.pytest.ini_options]
51
51
  testpaths = ["tests"]
52
-
53
- [dependency-groups]
54
- dev = [
55
- "pyright>=1.1.408",
56
- ]
@@ -8,18 +8,30 @@ SOLID-compliant architecture:
8
8
  - D: Converter depends on abstractions, not concretions
9
9
  """
10
10
 
11
- __version__ = "0.1.0"
11
+ import logging
12
+ from importlib.metadata import PackageNotFoundError, version
13
+
14
+ try:
15
+ __version__ = version("linkedin2md")
16
+ except PackageNotFoundError:
17
+ __version__ = "0.0.0" # Development fallback
18
+
19
+ # Configure package logger (NullHandler = library best practice)
20
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
12
21
 
13
22
  # Main public API
14
- from linkedin2md.converter import LinkedInToMarkdownConverter, create_converter
23
+ from linkedin2md.converter import ( # noqa: E402
24
+ LinkedInToMarkdownConverter,
25
+ create_converter,
26
+ )
15
27
 
16
28
  # Backward compatibility - import old API
17
29
  # (These are deprecated but kept for compatibility)
18
- from linkedin2md.formatter import MarkdownFormatter
19
- from linkedin2md.parser import LinkedInExportParser
30
+ from linkedin2md.formatter import MarkdownFormatter # noqa: E402
31
+ from linkedin2md.parser import LinkedInExportParser # noqa: E402
20
32
 
21
33
  # Protocols for type hints and custom implementations
22
- from linkedin2md.protocols import (
34
+ from linkedin2md.protocols import ( # noqa: E402
23
35
  BilingualText,
24
36
  DataExtractor,
25
37
  FormatterRegistry,
@@ -31,7 +43,7 @@ from linkedin2md.protocols import (
31
43
  )
32
44
 
33
45
  # Registries for extension
34
- from linkedin2md.registry import (
46
+ from linkedin2md.registry import ( # noqa: E402
35
47
  get_formatter_registry,
36
48
  get_parser_registry,
37
49
  register_formatter,
@@ -4,34 +4,44 @@ Dependency Inversion: Uses factory function, doesn't create dependencies directl
4
4
  """
5
5
 
6
6
  import argparse
7
+ import logging
7
8
  import sys
8
9
  from pathlib import Path
9
10
 
10
11
  from linkedin2md.converter import create_converter
11
12
 
13
+ logger = logging.getLogger(__name__)
14
+
12
15
  # Maximum allowed file size in megabytes (500 MB)
13
16
  MAX_FILE_SIZE_MB = 500
14
17
 
15
18
 
16
19
  def main() -> int:
17
20
  """Main entry point."""
21
+ # Configure logging for CLI use
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="%(levelname)s: %(message)s",
25
+ stream=sys.stderr,
26
+ )
27
+
18
28
  args = _parse_args(sys.argv[1:])
19
29
 
20
30
  if not args.source.exists():
21
- print(f"Error: File not found: {args.source}", file=sys.stderr)
31
+ logger.error("File not found: %s", args.source)
22
32
  return 1
23
33
 
24
34
  if not args.source.suffix.lower() == ".zip":
25
- print(f"Error: Expected .zip file, got {args.source.suffix}", file=sys.stderr)
35
+ logger.error("Expected .zip file, got %s", args.source.suffix)
26
36
  return 1
27
37
 
28
38
  # Check file size to prevent resource exhaustion
29
39
  file_size_mb = args.source.stat().st_size / (1024 * 1024)
30
40
  if file_size_mb > MAX_FILE_SIZE_MB:
31
- print(
32
- f"Error: File too large ({file_size_mb:.1f} MB). "
33
- f"Maximum allowed is {MAX_FILE_SIZE_MB} MB",
34
- file=sys.stderr,
41
+ logger.error(
42
+ "File too large (%.1f MB). Maximum allowed is %d MB",
43
+ file_size_mb,
44
+ MAX_FILE_SIZE_MB,
35
45
  )
36
46
  return 1
37
47
 
@@ -40,9 +50,10 @@ def main() -> int:
40
50
  converter = create_converter(args.source, args.output)
41
51
  files = converter.convert(lang=args.lang)
42
52
  except Exception as e:
43
- print(f"Error: {e}", file=sys.stderr)
53
+ logger.error("%s", e)
44
54
  return 1
45
55
 
56
+ # Success messages go to stdout (user-facing output)
46
57
  print(f"Created {len(files)} files in {args.output}/")
47
58
  for f in files:
48
59
  print(f" - {f.name}")
@@ -5,6 +5,7 @@ Implements the Dependency Inversion Principle:
5
5
  - All dependencies are injected, not created internally
6
6
  """
7
7
 
8
+ import logging
8
9
  from pathlib import Path
9
10
 
10
11
  from linkedin2md.protocols import (
@@ -14,6 +15,8 @@ from linkedin2md.protocols import (
14
15
  ParserRegistry,
15
16
  )
16
17
 
18
+ logger = logging.getLogger(__name__)
19
+
17
20
 
18
21
  class LinkedInToMarkdownConverter:
19
22
  """Main orchestrator for LinkedIn to Markdown conversion.
@@ -74,7 +77,7 @@ class LinkedInToMarkdownConverter:
74
77
  parsed[parser.section_key] = result
75
78
  except Exception as e:
76
79
  # Log but don't fail on individual section errors
77
- print(f"Warning: Failed to parse {parser.section_key}: {e}")
80
+ logger.warning("Failed to parse %s: %s", parser.section_key, e)
78
81
 
79
82
  return parsed
80
83
 
@@ -105,7 +108,7 @@ class LinkedInToMarkdownConverter:
105
108
  path = self._writer.write(formatter.section_key, content)
106
109
  files.append(path)
107
110
  except Exception as e:
108
- print(f"Warning: Failed to format {formatter.section_key}: {e}")
111
+ logger.warning("Failed to format %s: %s", formatter.section_key, e)
109
112
 
110
113
  return files
111
114
 
@@ -6,7 +6,10 @@ Provides common formatting functionality that section formatters can use.
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Any
8
8
 
9
- from linkedin2md.protocols import BilingualText, SectionFormatter
9
+ from linkedin2md.protocols import MultilingualText, SectionFormatter
10
+
11
+ # Backward compatibility alias
12
+ BilingualText = MultilingualText
10
13
 
11
14
 
12
15
  class BaseFormatter(ABC, SectionFormatter):
@@ -31,16 +34,36 @@ class BaseFormatter(ABC, SectionFormatter):
31
34
  # Shared Utilities
32
35
  # ========================================================================
33
36
 
34
- def _get_text(self, bilingual: BilingualText | dict | str | None, lang: str) -> str:
35
- """Extract text in preferred language with fallback."""
36
- if bilingual is None:
37
+ def _get_text(
38
+ self,
39
+ multilingual: MultilingualText | dict | str | None,
40
+ lang: str,
41
+ fallback_chain: list[str] | None = None,
42
+ ) -> str:
43
+ """Extract text in preferred language with fallback chain.
44
+
45
+ Args:
46
+ multilingual: Text container (MultilingualText, dict, str, or None)
47
+ lang: Preferred language code
48
+ fallback_chain: Languages to try if preferred not found
49
+ (default: ["en", "es"])
50
+
51
+ Returns:
52
+ Text in requested or fallback language
53
+ """
54
+ if multilingual is None:
37
55
  return ""
38
- if isinstance(bilingual, str):
39
- return bilingual
40
- if isinstance(bilingual, BilingualText):
41
- return bilingual.get(lang)
56
+ if isinstance(multilingual, str):
57
+ return multilingual
58
+ if isinstance(multilingual, MultilingualText):
59
+ return multilingual.get(lang, fallback_chain=fallback_chain or ["en", "es"])
42
60
  # Dict fallback for compatibility
43
- return bilingual.get(lang) or bilingual.get("en") or bilingual.get("es") or ""
61
+ if lang in multilingual and multilingual[lang]:
62
+ return multilingual[lang]
63
+ for fb in fallback_chain or ["en", "es"]:
64
+ if fb in multilingual and multilingual[fb]:
65
+ return multilingual[fb]
66
+ return ""
44
67
 
45
68
  def _escape_pipe(self, text: str) -> str:
46
69
  """Escape pipe characters for Markdown tables."""
@@ -5,13 +5,17 @@ Single Responsibility: Detect language of text.
5
5
 
6
6
  import re
7
7
 
8
- from linkedin2md.protocols import BilingualText, LanguageDetector
8
+ from linkedin2md.protocols import LanguageDetector, MultilingualText
9
+
10
+ # Backward compatibility alias
11
+ BilingualText = MultilingualText
9
12
 
10
13
 
11
14
  class SpanishEnglishDetector(LanguageDetector):
12
15
  """Detect Spanish vs English text.
13
16
 
14
17
  Single Responsibility: Only handles language detection.
18
+ Extensible: Implement LanguageDetector protocol for other languages.
15
19
  """
16
20
 
17
21
  # Spanish language detection patterns
@@ -24,9 +28,14 @@ class SpanishEnglishDetector(LanguageDetector):
24
28
  r"[áéíóúñ¿¡]", # Spanish characters
25
29
  ]
26
30
 
27
- def __init__(self):
31
+ def __init__(self) -> None:
28
32
  self._regex = re.compile("|".join(self.SPANISH_PATTERNS), re.IGNORECASE)
29
33
 
34
+ @property
35
+ def supported_languages(self) -> list[str]:
36
+ """Return list of detectable language codes."""
37
+ return ["en", "es"]
38
+
30
39
  def detect(self, text: str) -> str:
31
40
  """Detect if text is Spanish or English."""
32
41
  if not text:
@@ -40,37 +49,39 @@ class SpanishEnglishDetector(LanguageDetector):
40
49
  return "en"
41
50
 
42
51
 
43
- class BilingualTextFactory:
44
- """Factory for creating BilingualText objects.
52
+ class MultilingualTextFactory:
53
+ """Factory for creating MultilingualText objects.
45
54
 
46
- Single Responsibility: Create bilingual text with language detection.
55
+ Single Responsibility: Create multilingual text with language detection.
47
56
  Dependency Inversion: Depends on LanguageDetector protocol.
48
57
  """
49
58
 
50
59
  def __init__(self, detector: LanguageDetector):
51
60
  self._detector = detector
52
61
 
53
- def create(self, text: str, lang: str | None = None) -> BilingualText:
54
- """Create BilingualText with text in detected/specified language."""
62
+ def create(self, text: str, lang: str | None = None) -> MultilingualText:
63
+ """Create MultilingualText with text in detected/specified language."""
55
64
  if not text:
56
- return BilingualText()
65
+ return MultilingualText()
57
66
 
58
67
  detected = lang or self._detector.detect(text)
68
+ return MultilingualText(**{detected: text})
59
69
 
60
- if detected == "es":
61
- return BilingualText(es=text)
62
- return BilingualText(en=text)
70
+ def merge(self, *texts: MultilingualText) -> MultilingualText:
71
+ """Merge multiple MultilingualText objects.
63
72
 
64
- def merge(self, *texts: BilingualText) -> BilingualText:
65
- """Merge multiple BilingualText objects."""
66
- en = ""
67
- es = ""
73
+ First non-empty value for each language wins.
74
+ """
75
+ merged: dict[str, str] = {}
68
76
  for t in texts:
69
- if t.en and not en:
70
- en = t.en
71
- if t.es and not es:
72
- es = t.es
73
- return BilingualText(en=en, es=es)
77
+ for lang in t.languages:
78
+ if lang not in merged:
79
+ merged[lang] = t.get(lang)
80
+ return MultilingualText(**merged)
81
+
82
+
83
+ # Backward compatibility alias
84
+ BilingualTextFactory = MultilingualTextFactory
74
85
 
75
86
 
76
87
  # Default instances
@@ -7,11 +7,15 @@ Dependency Inversion: Depends on LanguageDetector protocol.
7
7
  from abc import ABC, abstractmethod
8
8
 
9
9
  from linkedin2md.language import (
10
- BilingualTextFactory,
10
+ MultilingualTextFactory,
11
11
  get_default_detector,
12
12
  get_default_factory,
13
13
  )
14
- from linkedin2md.protocols import BilingualText, LanguageDetector, SectionParser
14
+ from linkedin2md.protocols import LanguageDetector, MultilingualText, SectionParser
15
+
16
+ # Backward compatibility alias
17
+ BilingualText = MultilingualText
18
+ BilingualTextFactory = MultilingualTextFactory
15
19
 
16
20
  # Month names for date formatting
17
21
  MONTHS = [
@@ -144,10 +148,10 @@ def merge_bilingual_entries(
144
148
  key_fields: list[str],
145
149
  bilingual_fields: list[str],
146
150
  ) -> list[dict]:
147
- """Merge duplicate entries with bilingual content.
151
+ """Merge duplicate entries with multilingual content.
148
152
 
149
- Groups entries by matching key fields and merges bilingual text from
150
- English and Spanish versions into complete BilingualText objects.
153
+ Groups entries by matching key fields and merges multilingual text from
154
+ different language versions into complete MultilingualText objects.
151
155
  """
152
156
  if not entries:
153
157
  return []
@@ -185,23 +189,21 @@ def _merge_bilingual_group(group: list[dict], bilingual_fields: list[str]) -> di
185
189
  return merged
186
190
 
187
191
 
188
- def _merge_bilingual_field(group: list[dict], field: str) -> BilingualText:
189
- """Merge a bilingual field from multiple entries."""
190
- en = ""
191
- es = ""
192
+ def _merge_bilingual_field(group: list[dict], field: str) -> MultilingualText:
193
+ """Merge a multilingual field from multiple entries."""
194
+ merged: dict[str, str] = {}
192
195
 
193
196
  for entry in group:
194
197
  value = entry.get(field)
195
198
  if not value:
196
199
  continue
197
200
 
198
- if isinstance(value, BilingualText):
199
- if value.en:
200
- en = value.en
201
- if value.es:
202
- es = value.es
201
+ if isinstance(value, MultilingualText):
202
+ for lang in value.languages:
203
+ if lang not in merged:
204
+ merged[lang] = value.get(lang)
203
205
 
204
- return BilingualText(en=en, es=es)
206
+ return MultilingualText(**merged)
205
207
 
206
208
 
207
209
  def _merge_achievements(group: list[dict]) -> list[dict]:
@@ -215,8 +217,7 @@ def _merge_achievements(group: list[dict]) -> list[dict]:
215
217
  merged_achievements = []
216
218
 
217
219
  for i in range(max_len):
218
- en = ""
219
- es = ""
220
+ merged_text: dict[str, str] = {}
220
221
 
221
222
  for achievements in achievement_lists:
222
223
  if i >= len(achievements):
@@ -225,12 +226,11 @@ def _merge_achievements(group: list[dict]) -> list[dict]:
225
226
  achievement = achievements[i]
226
227
  text = achievement.get("text")
227
228
 
228
- if isinstance(text, BilingualText):
229
- if text.en:
230
- en = text.en
231
- if text.es:
232
- es = text.es
229
+ if isinstance(text, MultilingualText):
230
+ for lang in text.languages:
231
+ if lang not in merged_text:
232
+ merged_text[lang] = text.get(lang)
233
233
 
234
- merged_achievements.append({"text": BilingualText(en=en, es=es)})
234
+ merged_achievements.append({"text": MultilingualText(**merged_text)})
235
235
 
236
236
  return merged_achievements
@@ -15,26 +15,82 @@ from typing import Any, Protocol, runtime_checkable
15
15
  # ============================================================================
16
16
 
17
17
 
18
- class BilingualText:
19
- """Immutable bilingual text container."""
18
+ class MultilingualText:
19
+ """Immutable multilingual text container.
20
20
 
21
- __slots__ = ("en", "es")
21
+ Supports any number of languages via keyword arguments.
22
+ Backward compatible with BilingualText API (en/es properties).
23
+ """
24
+
25
+ __slots__ = ("_texts",)
26
+
27
+ def __init__(self, **langs: str):
28
+ """Create with language codes as kwargs.
29
+
30
+ Examples:
31
+ MultilingualText(en="Hello", es="Hola")
32
+ MultilingualText(en="Hi", es="Hola", fr="Salut")
33
+ """
34
+ object.__setattr__(self, "_texts", dict(langs))
22
35
 
23
- def __init__(self, en: str = "", es: str = ""):
24
- object.__setattr__(self, "en", en)
25
- object.__setattr__(self, "es", es)
36
+ def __setattr__(self, name: str, value: object) -> None:
37
+ raise AttributeError("MultilingualText is immutable")
26
38
 
27
- def __setattr__(self, name, value):
28
- raise AttributeError("BilingualText is immutable")
39
+ @property
40
+ def en(self) -> str:
41
+ """Backward compatibility: get English text."""
42
+ return self._texts.get("en", "")
43
+
44
+ @property
45
+ def es(self) -> str:
46
+ """Backward compatibility: get Spanish text."""
47
+ return self._texts.get("es", "")
48
+
49
+ def get(
50
+ self,
51
+ lang: str,
52
+ fallback_chain: list[str] | None = None,
53
+ default: str = "",
54
+ ) -> str:
55
+ """Get text in specified language with fallback chain.
56
+
57
+ Args:
58
+ lang: Primary language code to retrieve
59
+ fallback_chain: Languages to try if primary not found
60
+ (default: ["en", "es"])
61
+ default: Value if no language found
62
+
63
+ Returns:
64
+ Text in requested or fallback language, or default
65
+ """
66
+ if lang in self._texts and self._texts[lang]:
67
+ return self._texts[lang]
68
+
69
+ for fb in fallback_chain or ["en", "es"]:
70
+ if fb in self._texts and self._texts[fb]:
71
+ return self._texts[fb]
72
+
73
+ return default
29
74
 
30
- def get(self, lang: str, default: str = "") -> str:
31
- """Get text in specified language with fallback."""
32
- if lang == "en":
33
- return self.en or self.es or default
34
- return self.es or self.en or default
75
+ @property
76
+ def languages(self) -> list[str]:
77
+ """Return list of language codes with content."""
78
+ return [lang for lang, text in self._texts.items() if text]
35
79
 
36
80
  def __repr__(self) -> str:
37
- return f"BilingualText(en={self.en!r}, es={self.es!r})"
81
+ return f"MultilingualText({self._texts!r})"
82
+
83
+ def __eq__(self, other: object) -> bool:
84
+ if isinstance(other, MultilingualText):
85
+ return self._texts == other._texts
86
+ return False
87
+
88
+ def __hash__(self) -> int:
89
+ return hash(tuple(sorted(self._texts.items())))
90
+
91
+
92
+ # Backward compatibility alias
93
+ BilingualText = MultilingualText
38
94
 
39
95
 
40
96
  # ============================================================================
@@ -48,7 +104,13 @@ class LanguageDetector(Protocol):
48
104
 
49
105
  @abstractmethod
50
106
  def detect(self, text: str) -> str:
51
- """Detect language of text. Returns 'en' or 'es'."""
107
+ """Detect language of text. Returns ISO 639-1 code (e.g., 'en', 'es')."""
108
+ ...
109
+
110
+ @property
111
+ @abstractmethod
112
+ def supported_languages(self) -> list[str]:
113
+ """Return list of language codes this detector can identify."""
52
114
  ...
53
115
 
54
116
 
@@ -62,16 +62,15 @@ class TestParseArgs:
62
62
  class TestMain:
63
63
  """Tests for main entry point."""
64
64
 
65
- def test_file_not_found(self, capsys):
65
+ def test_file_not_found(self, caplog):
66
66
  """Test error when file doesn't exist."""
67
67
  with patch("sys.argv", ["linkedin2md", "nonexistent.zip"]):
68
68
  result = main()
69
69
 
70
70
  assert result == 1
71
- captured = capsys.readouterr()
72
- assert "File not found" in captured.err
71
+ assert "File not found" in caplog.text
73
72
 
74
- def test_not_a_zip_file(self, capsys):
73
+ def test_not_a_zip_file(self, caplog):
75
74
  """Test error when file is not a ZIP."""
76
75
  with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
77
76
  f.write(b"not a zip")
@@ -82,12 +81,11 @@ class TestMain:
82
81
  result = main()
83
82
 
84
83
  assert result == 1
85
- captured = capsys.readouterr()
86
- assert "Expected .zip file" in captured.err
84
+ assert "Expected .zip file" in caplog.text
87
85
  finally:
88
86
  Path(temp_path).unlink()
89
87
 
90
- def test_file_too_large(self, capsys):
88
+ def test_file_too_large(self, caplog):
91
89
  """Test error when file exceeds size limit."""
92
90
  with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as f:
93
91
  temp_path = f.name
@@ -104,8 +102,7 @@ class TestMain:
104
102
  result = main()
105
103
 
106
104
  assert result == 1
107
- captured = capsys.readouterr()
108
- assert "File too large" in captured.err
105
+ assert "File too large" in caplog.text
109
106
  finally:
110
107
  Path(temp_path).unlink()
111
108
 
@@ -151,7 +148,7 @@ class TestMain:
151
148
 
152
149
  assert result == 0
153
150
 
154
- def test_invalid_zip_file(self, capsys):
151
+ def test_invalid_zip_file(self, caplog):
155
152
  """Test error when ZIP file is corrupted."""
156
153
  with tempfile.TemporaryDirectory() as tmpdir:
157
154
  zip_path = Path(tmpdir) / "corrupt.zip"
@@ -161,8 +158,7 @@ class TestMain:
161
158
  result = main()
162
159
 
163
160
  assert result == 1
164
- captured = capsys.readouterr()
165
- assert "Error" in captured.err
161
+ assert "Invalid" in caplog.text or "corrupted" in caplog.text
166
162
 
167
163
 
168
164
  class TestMaxFileSize:
@@ -41,7 +41,8 @@ class TestBilingualText:
41
41
  def test_immutable(self):
42
42
  text = BilingualText(en="Hello")
43
43
  try:
44
- text.en = "Changed"
44
+ # Use setattr to bypass static type checking while testing runtime behavior
45
+ setattr(text, "en", "Changed") # noqa: B010
45
46
  raise AssertionError("Should have raised AttributeError")
46
47
  except AttributeError:
47
48
  pass
File without changes
File without changes
File without changes
File without changes