linkedin2md 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/CHANGELOG.md +18 -1
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/PKG-INFO +2 -1
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/pyproject.toml +2 -7
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/__init__.py +18 -6
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/cli.py +18 -7
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/converter.py +5 -2
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/base.py +32 -9
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/language.py +31 -20
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/base.py +23 -23
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/protocols.py +77 -15
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_cli.py +8 -12
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_solid.py +2 -1
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/workflows/ci.yml +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.github/workflows/publish.yml +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/.gitignore +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/CODE_OF_CONDUCT.md +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/CONTRIBUTING.md +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/LICENSE +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/README.md +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/__main__.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/extractor.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatter.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/__init__.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/activity.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/advertising.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/content.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/identity.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/jobs.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/learning.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/network.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/payments.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/professional.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/profile.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/recommendations.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/formatters/services.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parser.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/__init__.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/activity.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/advertising.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/content.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/identity.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/jobs.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/learning.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/network.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/payments.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/professional.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/profile.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/recommendations.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/parsers/services.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/registry.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/src/linkedin2md/writer.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/__init__.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_e2e.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_formatter.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_formatters.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_parser.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_parsers.py +0 -0
- {linkedin2md-0.2.2 → linkedin2md-0.3.0}/tests/test_security.py +0 -0
|
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.3.0] - 2025-01-20
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Extensible multilingual system: `BilingualText` → `MultilingualText` supporting N languages
|
|
14
|
+
- `LanguageDetector.supported_languages` property for detector introspection
|
|
15
|
+
- Proper logging module integration (replaces print statements)
|
|
16
|
+
- Fallback chain support in `_get_text()` for flexible language resolution
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
- Version now single-sourced from `pyproject.toml` via `importlib.metadata`
|
|
20
|
+
- CLI errors now use structured logging to stderr
|
|
21
|
+
- `MultilingualText` uses `**kwargs` for language flexibility while maintaining backward compatibility
|
|
22
|
+
|
|
23
|
+
### Fixed
|
|
24
|
+
- Version mismatch between `__init__.py` and `pyproject.toml`
|
|
25
|
+
|
|
10
26
|
## [0.2.0] - 2025-01-20
|
|
11
27
|
|
|
12
28
|
### Added
|
|
@@ -61,7 +77,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
61
77
|
- SOLID architecture for extensibility
|
|
62
78
|
- Security features (path traversal protection, URL sanitization, file size limits)
|
|
63
79
|
|
|
64
|
-
[Unreleased]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.
|
|
80
|
+
[Unreleased]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.3.0...HEAD
|
|
81
|
+
[0.3.0]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.2.0...v0.3.0
|
|
65
82
|
[0.2.0]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.3...v0.2.0
|
|
66
83
|
[0.1.3]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.2...v0.1.3
|
|
67
84
|
[0.1.2]: https://github.com/juanmanueldaza/linkedin2md/compare/v0.1.1...v0.1.2
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: linkedin2md
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Convert LinkedIn data exports to Markdown
|
|
5
5
|
Project-URL: Homepage, https://github.com/juanmanueldaza/linkedin2md
|
|
6
6
|
Project-URL: Repository, https://github.com/juanmanueldaza/linkedin2md
|
|
@@ -22,6 +22,7 @@ Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
|
22
22
|
Classifier: Topic :: Utilities
|
|
23
23
|
Requires-Python: >=3.13
|
|
24
24
|
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pyright>=1.1.408; extra == 'dev'
|
|
25
26
|
Requires-Dist: pytest>=9.0; extra == 'dev'
|
|
26
27
|
Requires-Dist: ruff>=0.9; extra == 'dev'
|
|
27
28
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "linkedin2md"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "Convert LinkedIn data exports to Markdown"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "GPL-2.0" }
|
|
@@ -26,7 +26,7 @@ dependencies = []
|
|
|
26
26
|
linkedin2md = "linkedin2md.cli:main"
|
|
27
27
|
|
|
28
28
|
[project.optional-dependencies]
|
|
29
|
-
dev = ["pytest>=9.0", "ruff>=0.9"]
|
|
29
|
+
dev = ["pytest>=9.0", "ruff>=0.9", "pyright>=1.1.408"]
|
|
30
30
|
|
|
31
31
|
[project.urls]
|
|
32
32
|
Homepage = "https://github.com/juanmanueldaza/linkedin2md"
|
|
@@ -49,8 +49,3 @@ select = ["E", "W", "F", "I", "B", "UP"]
|
|
|
49
49
|
|
|
50
50
|
[tool.pytest.ini_options]
|
|
51
51
|
testpaths = ["tests"]
|
|
52
|
-
|
|
53
|
-
[dependency-groups]
|
|
54
|
-
dev = [
|
|
55
|
-
"pyright>=1.1.408",
|
|
56
|
-
]
|
|
@@ -8,18 +8,30 @@ SOLID-compliant architecture:
|
|
|
8
8
|
- D: Converter depends on abstractions, not concretions
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
import logging
|
|
12
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
__version__ = version("linkedin2md")
|
|
16
|
+
except PackageNotFoundError:
|
|
17
|
+
__version__ = "0.0.0" # Development fallback
|
|
18
|
+
|
|
19
|
+
# Configure package logger (NullHandler = library best practice)
|
|
20
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
12
21
|
|
|
13
22
|
# Main public API
|
|
14
|
-
from linkedin2md.converter import
|
|
23
|
+
from linkedin2md.converter import ( # noqa: E402
|
|
24
|
+
LinkedInToMarkdownConverter,
|
|
25
|
+
create_converter,
|
|
26
|
+
)
|
|
15
27
|
|
|
16
28
|
# Backward compatibility - import old API
|
|
17
29
|
# (These are deprecated but kept for compatibility)
|
|
18
|
-
from linkedin2md.formatter import MarkdownFormatter
|
|
19
|
-
from linkedin2md.parser import LinkedInExportParser
|
|
30
|
+
from linkedin2md.formatter import MarkdownFormatter # noqa: E402
|
|
31
|
+
from linkedin2md.parser import LinkedInExportParser # noqa: E402
|
|
20
32
|
|
|
21
33
|
# Protocols for type hints and custom implementations
|
|
22
|
-
from linkedin2md.protocols import (
|
|
34
|
+
from linkedin2md.protocols import ( # noqa: E402
|
|
23
35
|
BilingualText,
|
|
24
36
|
DataExtractor,
|
|
25
37
|
FormatterRegistry,
|
|
@@ -31,7 +43,7 @@ from linkedin2md.protocols import (
|
|
|
31
43
|
)
|
|
32
44
|
|
|
33
45
|
# Registries for extension
|
|
34
|
-
from linkedin2md.registry import (
|
|
46
|
+
from linkedin2md.registry import ( # noqa: E402
|
|
35
47
|
get_formatter_registry,
|
|
36
48
|
get_parser_registry,
|
|
37
49
|
register_formatter,
|
|
@@ -4,34 +4,44 @@ Dependency Inversion: Uses factory function, doesn't create dependencies directl
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import argparse
|
|
7
|
+
import logging
|
|
7
8
|
import sys
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
|
|
10
11
|
from linkedin2md.converter import create_converter
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
12
15
|
# Maximum allowed file size in megabytes (500 MB)
|
|
13
16
|
MAX_FILE_SIZE_MB = 500
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
def main() -> int:
|
|
17
20
|
"""Main entry point."""
|
|
21
|
+
# Configure logging for CLI use
|
|
22
|
+
logging.basicConfig(
|
|
23
|
+
level=logging.INFO,
|
|
24
|
+
format="%(levelname)s: %(message)s",
|
|
25
|
+
stream=sys.stderr,
|
|
26
|
+
)
|
|
27
|
+
|
|
18
28
|
args = _parse_args(sys.argv[1:])
|
|
19
29
|
|
|
20
30
|
if not args.source.exists():
|
|
21
|
-
|
|
31
|
+
logger.error("File not found: %s", args.source)
|
|
22
32
|
return 1
|
|
23
33
|
|
|
24
34
|
if not args.source.suffix.lower() == ".zip":
|
|
25
|
-
|
|
35
|
+
logger.error("Expected .zip file, got %s", args.source.suffix)
|
|
26
36
|
return 1
|
|
27
37
|
|
|
28
38
|
# Check file size to prevent resource exhaustion
|
|
29
39
|
file_size_mb = args.source.stat().st_size / (1024 * 1024)
|
|
30
40
|
if file_size_mb > MAX_FILE_SIZE_MB:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
41
|
+
logger.error(
|
|
42
|
+
"File too large (%.1f MB). Maximum allowed is %d MB",
|
|
43
|
+
file_size_mb,
|
|
44
|
+
MAX_FILE_SIZE_MB,
|
|
35
45
|
)
|
|
36
46
|
return 1
|
|
37
47
|
|
|
@@ -40,9 +50,10 @@ def main() -> int:
|
|
|
40
50
|
converter = create_converter(args.source, args.output)
|
|
41
51
|
files = converter.convert(lang=args.lang)
|
|
42
52
|
except Exception as e:
|
|
43
|
-
|
|
53
|
+
logger.error("%s", e)
|
|
44
54
|
return 1
|
|
45
55
|
|
|
56
|
+
# Success messages go to stdout (user-facing output)
|
|
46
57
|
print(f"Created {len(files)} files in {args.output}/")
|
|
47
58
|
for f in files:
|
|
48
59
|
print(f" - {f.name}")
|
|
@@ -5,6 +5,7 @@ Implements the Dependency Inversion Principle:
|
|
|
5
5
|
- All dependencies are injected, not created internally
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import logging
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
|
|
10
11
|
from linkedin2md.protocols import (
|
|
@@ -14,6 +15,8 @@ from linkedin2md.protocols import (
|
|
|
14
15
|
ParserRegistry,
|
|
15
16
|
)
|
|
16
17
|
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
17
20
|
|
|
18
21
|
class LinkedInToMarkdownConverter:
|
|
19
22
|
"""Main orchestrator for LinkedIn to Markdown conversion.
|
|
@@ -74,7 +77,7 @@ class LinkedInToMarkdownConverter:
|
|
|
74
77
|
parsed[parser.section_key] = result
|
|
75
78
|
except Exception as e:
|
|
76
79
|
# Log but don't fail on individual section errors
|
|
77
|
-
|
|
80
|
+
logger.warning("Failed to parse %s: %s", parser.section_key, e)
|
|
78
81
|
|
|
79
82
|
return parsed
|
|
80
83
|
|
|
@@ -105,7 +108,7 @@ class LinkedInToMarkdownConverter:
|
|
|
105
108
|
path = self._writer.write(formatter.section_key, content)
|
|
106
109
|
files.append(path)
|
|
107
110
|
except Exception as e:
|
|
108
|
-
|
|
111
|
+
logger.warning("Failed to format %s: %s", formatter.section_key, e)
|
|
109
112
|
|
|
110
113
|
return files
|
|
111
114
|
|
|
@@ -6,7 +6,10 @@ Provides common formatting functionality that section formatters can use.
|
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from linkedin2md.protocols import
|
|
9
|
+
from linkedin2md.protocols import MultilingualText, SectionFormatter
|
|
10
|
+
|
|
11
|
+
# Backward compatibility alias
|
|
12
|
+
BilingualText = MultilingualText
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
class BaseFormatter(ABC, SectionFormatter):
|
|
@@ -31,16 +34,36 @@ class BaseFormatter(ABC, SectionFormatter):
|
|
|
31
34
|
# Shared Utilities
|
|
32
35
|
# ========================================================================
|
|
33
36
|
|
|
34
|
-
def _get_text(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
def _get_text(
|
|
38
|
+
self,
|
|
39
|
+
multilingual: MultilingualText | dict | str | None,
|
|
40
|
+
lang: str,
|
|
41
|
+
fallback_chain: list[str] | None = None,
|
|
42
|
+
) -> str:
|
|
43
|
+
"""Extract text in preferred language with fallback chain.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
multilingual: Text container (MultilingualText, dict, str, or None)
|
|
47
|
+
lang: Preferred language code
|
|
48
|
+
fallback_chain: Languages to try if preferred not found
|
|
49
|
+
(default: ["en", "es"])
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Text in requested or fallback language
|
|
53
|
+
"""
|
|
54
|
+
if multilingual is None:
|
|
37
55
|
return ""
|
|
38
|
-
if isinstance(
|
|
39
|
-
return
|
|
40
|
-
if isinstance(
|
|
41
|
-
return
|
|
56
|
+
if isinstance(multilingual, str):
|
|
57
|
+
return multilingual
|
|
58
|
+
if isinstance(multilingual, MultilingualText):
|
|
59
|
+
return multilingual.get(lang, fallback_chain=fallback_chain or ["en", "es"])
|
|
42
60
|
# Dict fallback for compatibility
|
|
43
|
-
|
|
61
|
+
if lang in multilingual and multilingual[lang]:
|
|
62
|
+
return multilingual[lang]
|
|
63
|
+
for fb in fallback_chain or ["en", "es"]:
|
|
64
|
+
if fb in multilingual and multilingual[fb]:
|
|
65
|
+
return multilingual[fb]
|
|
66
|
+
return ""
|
|
44
67
|
|
|
45
68
|
def _escape_pipe(self, text: str) -> str:
|
|
46
69
|
"""Escape pipe characters for Markdown tables."""
|
|
@@ -5,13 +5,17 @@ Single Responsibility: Detect language of text.
|
|
|
5
5
|
|
|
6
6
|
import re
|
|
7
7
|
|
|
8
|
-
from linkedin2md.protocols import
|
|
8
|
+
from linkedin2md.protocols import LanguageDetector, MultilingualText
|
|
9
|
+
|
|
10
|
+
# Backward compatibility alias
|
|
11
|
+
BilingualText = MultilingualText
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
class SpanishEnglishDetector(LanguageDetector):
|
|
12
15
|
"""Detect Spanish vs English text.
|
|
13
16
|
|
|
14
17
|
Single Responsibility: Only handles language detection.
|
|
18
|
+
Extensible: Implement LanguageDetector protocol for other languages.
|
|
15
19
|
"""
|
|
16
20
|
|
|
17
21
|
# Spanish language detection patterns
|
|
@@ -24,9 +28,14 @@ class SpanishEnglishDetector(LanguageDetector):
|
|
|
24
28
|
r"[áéíóúñ¿¡]", # Spanish characters
|
|
25
29
|
]
|
|
26
30
|
|
|
27
|
-
def __init__(self):
|
|
31
|
+
def __init__(self) -> None:
|
|
28
32
|
self._regex = re.compile("|".join(self.SPANISH_PATTERNS), re.IGNORECASE)
|
|
29
33
|
|
|
34
|
+
@property
|
|
35
|
+
def supported_languages(self) -> list[str]:
|
|
36
|
+
"""Return list of detectable language codes."""
|
|
37
|
+
return ["en", "es"]
|
|
38
|
+
|
|
30
39
|
def detect(self, text: str) -> str:
|
|
31
40
|
"""Detect if text is Spanish or English."""
|
|
32
41
|
if not text:
|
|
@@ -40,37 +49,39 @@ class SpanishEnglishDetector(LanguageDetector):
|
|
|
40
49
|
return "en"
|
|
41
50
|
|
|
42
51
|
|
|
43
|
-
class
|
|
44
|
-
"""Factory for creating
|
|
52
|
+
class MultilingualTextFactory:
|
|
53
|
+
"""Factory for creating MultilingualText objects.
|
|
45
54
|
|
|
46
|
-
Single Responsibility: Create
|
|
55
|
+
Single Responsibility: Create multilingual text with language detection.
|
|
47
56
|
Dependency Inversion: Depends on LanguageDetector protocol.
|
|
48
57
|
"""
|
|
49
58
|
|
|
50
59
|
def __init__(self, detector: LanguageDetector):
|
|
51
60
|
self._detector = detector
|
|
52
61
|
|
|
53
|
-
def create(self, text: str, lang: str | None = None) ->
|
|
54
|
-
"""Create
|
|
62
|
+
def create(self, text: str, lang: str | None = None) -> MultilingualText:
|
|
63
|
+
"""Create MultilingualText with text in detected/specified language."""
|
|
55
64
|
if not text:
|
|
56
|
-
return
|
|
65
|
+
return MultilingualText()
|
|
57
66
|
|
|
58
67
|
detected = lang or self._detector.detect(text)
|
|
68
|
+
return MultilingualText(**{detected: text})
|
|
59
69
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
return BilingualText(en=text)
|
|
70
|
+
def merge(self, *texts: MultilingualText) -> MultilingualText:
|
|
71
|
+
"""Merge multiple MultilingualText objects.
|
|
63
72
|
|
|
64
|
-
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
es = ""
|
|
73
|
+
First non-empty value for each language wins.
|
|
74
|
+
"""
|
|
75
|
+
merged: dict[str, str] = {}
|
|
68
76
|
for t in texts:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
for lang in t.languages:
|
|
78
|
+
if lang not in merged:
|
|
79
|
+
merged[lang] = t.get(lang)
|
|
80
|
+
return MultilingualText(**merged)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# Backward compatibility alias
|
|
84
|
+
BilingualTextFactory = MultilingualTextFactory
|
|
74
85
|
|
|
75
86
|
|
|
76
87
|
# Default instances
|
|
@@ -7,11 +7,15 @@ Dependency Inversion: Depends on LanguageDetector protocol.
|
|
|
7
7
|
from abc import ABC, abstractmethod
|
|
8
8
|
|
|
9
9
|
from linkedin2md.language import (
|
|
10
|
-
|
|
10
|
+
MultilingualTextFactory,
|
|
11
11
|
get_default_detector,
|
|
12
12
|
get_default_factory,
|
|
13
13
|
)
|
|
14
|
-
from linkedin2md.protocols import
|
|
14
|
+
from linkedin2md.protocols import LanguageDetector, MultilingualText, SectionParser
|
|
15
|
+
|
|
16
|
+
# Backward compatibility alias
|
|
17
|
+
BilingualText = MultilingualText
|
|
18
|
+
BilingualTextFactory = MultilingualTextFactory
|
|
15
19
|
|
|
16
20
|
# Month names for date formatting
|
|
17
21
|
MONTHS = [
|
|
@@ -144,10 +148,10 @@ def merge_bilingual_entries(
|
|
|
144
148
|
key_fields: list[str],
|
|
145
149
|
bilingual_fields: list[str],
|
|
146
150
|
) -> list[dict]:
|
|
147
|
-
"""Merge duplicate entries with
|
|
151
|
+
"""Merge duplicate entries with multilingual content.
|
|
148
152
|
|
|
149
|
-
Groups entries by matching key fields and merges
|
|
150
|
-
|
|
153
|
+
Groups entries by matching key fields and merges multilingual text from
|
|
154
|
+
different language versions into complete MultilingualText objects.
|
|
151
155
|
"""
|
|
152
156
|
if not entries:
|
|
153
157
|
return []
|
|
@@ -185,23 +189,21 @@ def _merge_bilingual_group(group: list[dict], bilingual_fields: list[str]) -> di
|
|
|
185
189
|
return merged
|
|
186
190
|
|
|
187
191
|
|
|
188
|
-
def _merge_bilingual_field(group: list[dict], field: str) ->
|
|
189
|
-
"""Merge a
|
|
190
|
-
|
|
191
|
-
es = ""
|
|
192
|
+
def _merge_bilingual_field(group: list[dict], field: str) -> MultilingualText:
|
|
193
|
+
"""Merge a multilingual field from multiple entries."""
|
|
194
|
+
merged: dict[str, str] = {}
|
|
192
195
|
|
|
193
196
|
for entry in group:
|
|
194
197
|
value = entry.get(field)
|
|
195
198
|
if not value:
|
|
196
199
|
continue
|
|
197
200
|
|
|
198
|
-
if isinstance(value,
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
es = value.es
|
|
201
|
+
if isinstance(value, MultilingualText):
|
|
202
|
+
for lang in value.languages:
|
|
203
|
+
if lang not in merged:
|
|
204
|
+
merged[lang] = value.get(lang)
|
|
203
205
|
|
|
204
|
-
return
|
|
206
|
+
return MultilingualText(**merged)
|
|
205
207
|
|
|
206
208
|
|
|
207
209
|
def _merge_achievements(group: list[dict]) -> list[dict]:
|
|
@@ -215,8 +217,7 @@ def _merge_achievements(group: list[dict]) -> list[dict]:
|
|
|
215
217
|
merged_achievements = []
|
|
216
218
|
|
|
217
219
|
for i in range(max_len):
|
|
218
|
-
|
|
219
|
-
es = ""
|
|
220
|
+
merged_text: dict[str, str] = {}
|
|
220
221
|
|
|
221
222
|
for achievements in achievement_lists:
|
|
222
223
|
if i >= len(achievements):
|
|
@@ -225,12 +226,11 @@ def _merge_achievements(group: list[dict]) -> list[dict]:
|
|
|
225
226
|
achievement = achievements[i]
|
|
226
227
|
text = achievement.get("text")
|
|
227
228
|
|
|
228
|
-
if isinstance(text,
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
es = text.es
|
|
229
|
+
if isinstance(text, MultilingualText):
|
|
230
|
+
for lang in text.languages:
|
|
231
|
+
if lang not in merged_text:
|
|
232
|
+
merged_text[lang] = text.get(lang)
|
|
233
233
|
|
|
234
|
-
merged_achievements.append({"text":
|
|
234
|
+
merged_achievements.append({"text": MultilingualText(**merged_text)})
|
|
235
235
|
|
|
236
236
|
return merged_achievements
|
|
@@ -15,26 +15,82 @@ from typing import Any, Protocol, runtime_checkable
|
|
|
15
15
|
# ============================================================================
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class
|
|
19
|
-
"""Immutable
|
|
18
|
+
class MultilingualText:
|
|
19
|
+
"""Immutable multilingual text container.
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
Supports any number of languages via keyword arguments.
|
|
22
|
+
Backward compatible with BilingualText API (en/es properties).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
__slots__ = ("_texts",)
|
|
26
|
+
|
|
27
|
+
def __init__(self, **langs: str):
|
|
28
|
+
"""Create with language codes as kwargs.
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
MultilingualText(en="Hello", es="Hola")
|
|
32
|
+
MultilingualText(en="Hi", es="Hola", fr="Salut")
|
|
33
|
+
"""
|
|
34
|
+
object.__setattr__(self, "_texts", dict(langs))
|
|
22
35
|
|
|
23
|
-
def
|
|
24
|
-
|
|
25
|
-
object.__setattr__(self, "es", es)
|
|
36
|
+
def __setattr__(self, name: str, value: object) -> None:
|
|
37
|
+
raise AttributeError("MultilingualText is immutable")
|
|
26
38
|
|
|
27
|
-
|
|
28
|
-
|
|
39
|
+
@property
|
|
40
|
+
def en(self) -> str:
|
|
41
|
+
"""Backward compatibility: get English text."""
|
|
42
|
+
return self._texts.get("en", "")
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def es(self) -> str:
|
|
46
|
+
"""Backward compatibility: get Spanish text."""
|
|
47
|
+
return self._texts.get("es", "")
|
|
48
|
+
|
|
49
|
+
def get(
|
|
50
|
+
self,
|
|
51
|
+
lang: str,
|
|
52
|
+
fallback_chain: list[str] | None = None,
|
|
53
|
+
default: str = "",
|
|
54
|
+
) -> str:
|
|
55
|
+
"""Get text in specified language with fallback chain.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
lang: Primary language code to retrieve
|
|
59
|
+
fallback_chain: Languages to try if primary not found
|
|
60
|
+
(default: ["en", "es"])
|
|
61
|
+
default: Value if no language found
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Text in requested or fallback language, or default
|
|
65
|
+
"""
|
|
66
|
+
if lang in self._texts and self._texts[lang]:
|
|
67
|
+
return self._texts[lang]
|
|
68
|
+
|
|
69
|
+
for fb in fallback_chain or ["en", "es"]:
|
|
70
|
+
if fb in self._texts and self._texts[fb]:
|
|
71
|
+
return self._texts[fb]
|
|
72
|
+
|
|
73
|
+
return default
|
|
29
74
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
return self.es or self.en or default
|
|
75
|
+
@property
|
|
76
|
+
def languages(self) -> list[str]:
|
|
77
|
+
"""Return list of language codes with content."""
|
|
78
|
+
return [lang for lang, text in self._texts.items() if text]
|
|
35
79
|
|
|
36
80
|
def __repr__(self) -> str:
|
|
37
|
-
return f"
|
|
81
|
+
return f"MultilingualText({self._texts!r})"
|
|
82
|
+
|
|
83
|
+
def __eq__(self, other: object) -> bool:
|
|
84
|
+
if isinstance(other, MultilingualText):
|
|
85
|
+
return self._texts == other._texts
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
def __hash__(self) -> int:
|
|
89
|
+
return hash(tuple(sorted(self._texts.items())))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# Backward compatibility alias
|
|
93
|
+
BilingualText = MultilingualText
|
|
38
94
|
|
|
39
95
|
|
|
40
96
|
# ============================================================================
|
|
@@ -48,7 +104,13 @@ class LanguageDetector(Protocol):
|
|
|
48
104
|
|
|
49
105
|
@abstractmethod
|
|
50
106
|
def detect(self, text: str) -> str:
|
|
51
|
-
"""Detect language of text. Returns 'en'
|
|
107
|
+
"""Detect language of text. Returns ISO 639-1 code (e.g., 'en', 'es')."""
|
|
108
|
+
...
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
@abstractmethod
|
|
112
|
+
def supported_languages(self) -> list[str]:
|
|
113
|
+
"""Return list of language codes this detector can identify."""
|
|
52
114
|
...
|
|
53
115
|
|
|
54
116
|
|
|
@@ -62,16 +62,15 @@ class TestParseArgs:
|
|
|
62
62
|
class TestMain:
|
|
63
63
|
"""Tests for main entry point."""
|
|
64
64
|
|
|
65
|
-
def test_file_not_found(self,
|
|
65
|
+
def test_file_not_found(self, caplog):
|
|
66
66
|
"""Test error when file doesn't exist."""
|
|
67
67
|
with patch("sys.argv", ["linkedin2md", "nonexistent.zip"]):
|
|
68
68
|
result = main()
|
|
69
69
|
|
|
70
70
|
assert result == 1
|
|
71
|
-
|
|
72
|
-
assert "File not found" in captured.err
|
|
71
|
+
assert "File not found" in caplog.text
|
|
73
72
|
|
|
74
|
-
def test_not_a_zip_file(self,
|
|
73
|
+
def test_not_a_zip_file(self, caplog):
|
|
75
74
|
"""Test error when file is not a ZIP."""
|
|
76
75
|
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
|
|
77
76
|
f.write(b"not a zip")
|
|
@@ -82,12 +81,11 @@ class TestMain:
|
|
|
82
81
|
result = main()
|
|
83
82
|
|
|
84
83
|
assert result == 1
|
|
85
|
-
|
|
86
|
-
assert "Expected .zip file" in captured.err
|
|
84
|
+
assert "Expected .zip file" in caplog.text
|
|
87
85
|
finally:
|
|
88
86
|
Path(temp_path).unlink()
|
|
89
87
|
|
|
90
|
-
def test_file_too_large(self,
|
|
88
|
+
def test_file_too_large(self, caplog):
|
|
91
89
|
"""Test error when file exceeds size limit."""
|
|
92
90
|
with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as f:
|
|
93
91
|
temp_path = f.name
|
|
@@ -104,8 +102,7 @@ class TestMain:
|
|
|
104
102
|
result = main()
|
|
105
103
|
|
|
106
104
|
assert result == 1
|
|
107
|
-
|
|
108
|
-
assert "File too large" in captured.err
|
|
105
|
+
assert "File too large" in caplog.text
|
|
109
106
|
finally:
|
|
110
107
|
Path(temp_path).unlink()
|
|
111
108
|
|
|
@@ -151,7 +148,7 @@ class TestMain:
|
|
|
151
148
|
|
|
152
149
|
assert result == 0
|
|
153
150
|
|
|
154
|
-
def test_invalid_zip_file(self,
|
|
151
|
+
def test_invalid_zip_file(self, caplog):
|
|
155
152
|
"""Test error when ZIP file is corrupted."""
|
|
156
153
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
157
154
|
zip_path = Path(tmpdir) / "corrupt.zip"
|
|
@@ -161,8 +158,7 @@ class TestMain:
|
|
|
161
158
|
result = main()
|
|
162
159
|
|
|
163
160
|
assert result == 1
|
|
164
|
-
|
|
165
|
-
assert "Error" in captured.err
|
|
161
|
+
assert "Invalid" in caplog.text or "corrupted" in caplog.text
|
|
166
162
|
|
|
167
163
|
|
|
168
164
|
class TestMaxFileSize:
|
|
@@ -41,7 +41,8 @@ class TestBilingualText:
|
|
|
41
41
|
def test_immutable(self):
|
|
42
42
|
text = BilingualText(en="Hello")
|
|
43
43
|
try:
|
|
44
|
-
|
|
44
|
+
# Use setattr to bypass static type checking while testing runtime behavior
|
|
45
|
+
setattr(text, "en", "Changed") # noqa: B010
|
|
45
46
|
raise AssertionError("Should have raised AttributeError")
|
|
46
47
|
except AttributeError:
|
|
47
48
|
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|