linkedin2md 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. linkedin2md/__init__.py +64 -0
  2. linkedin2md/__main__.py +6 -0
  3. linkedin2md/cli.py +81 -0
  4. linkedin2md/converter.py +136 -0
  5. linkedin2md/extractor.py +80 -0
  6. linkedin2md/formatter.py +1073 -0
  7. linkedin2md/formatters/__init__.py +133 -0
  8. linkedin2md/formatters/activity.py +85 -0
  9. linkedin2md/formatters/advertising.py +109 -0
  10. linkedin2md/formatters/base.py +71 -0
  11. linkedin2md/formatters/content.py +265 -0
  12. linkedin2md/formatters/identity.py +70 -0
  13. linkedin2md/formatters/jobs.py +173 -0
  14. linkedin2md/formatters/learning.py +57 -0
  15. linkedin2md/formatters/network.py +133 -0
  16. linkedin2md/formatters/payments.py +34 -0
  17. linkedin2md/formatters/professional.py +225 -0
  18. linkedin2md/formatters/profile.py +53 -0
  19. linkedin2md/formatters/recommendations.py +139 -0
  20. linkedin2md/formatters/services.py +74 -0
  21. linkedin2md/language.py +88 -0
  22. linkedin2md/parser.py +1504 -0
  23. linkedin2md/parsers/__init__.py +147 -0
  24. linkedin2md/parsers/activity.py +92 -0
  25. linkedin2md/parsers/advertising.py +111 -0
  26. linkedin2md/parsers/base.py +236 -0
  27. linkedin2md/parsers/content.py +269 -0
  28. linkedin2md/parsers/identity.py +62 -0
  29. linkedin2md/parsers/jobs.py +195 -0
  30. linkedin2md/parsers/learning.py +71 -0
  31. linkedin2md/parsers/network.py +162 -0
  32. linkedin2md/parsers/payments.py +35 -0
  33. linkedin2md/parsers/professional.py +269 -0
  34. linkedin2md/parsers/profile.py +149 -0
  35. linkedin2md/parsers/recommendations.py +133 -0
  36. linkedin2md/parsers/services.py +66 -0
  37. linkedin2md/protocols.py +185 -0
  38. linkedin2md/registry.py +76 -0
  39. linkedin2md/writer.py +54 -0
  40. linkedin2md-0.1.0.dist-info/METADATA +155 -0
  41. linkedin2md-0.1.0.dist-info/RECORD +44 -0
  42. linkedin2md-0.1.0.dist-info/WHEEL +4 -0
  43. linkedin2md-0.1.0.dist-info/entry_points.txt +2 -0
  44. linkedin2md-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,64 @@
1
+ """linkedin2md - Convert LinkedIn data exports to Markdown.
2
+
3
+ SOLID-compliant architecture:
4
+ - S: Each parser/formatter handles one section
5
+ - O: New sections added via registry decorators
6
+ - L: All parsers/formatters are substitutable via protocols
7
+ - I: Focused protocols for each concern
8
+ - D: Converter depends on abstractions, not concretions
9
+ """
10
+
11
+ __version__ = "0.1.0"
12
+
13
+ # Main public API
14
+ from linkedin2md.converter import LinkedInToMarkdownConverter, create_converter
15
+
16
+ # Backward compatibility - import old API
17
+ # (These are deprecated but kept for compatibility)
18
+ from linkedin2md.formatter import MarkdownFormatter
19
+ from linkedin2md.parser import LinkedInExportParser
20
+
21
+ # Protocols for type hints and custom implementations
22
+ from linkedin2md.protocols import (
23
+ BilingualText,
24
+ DataExtractor,
25
+ FormatterRegistry,
26
+ LanguageDetector,
27
+ OutputWriter,
28
+ ParserRegistry,
29
+ SectionFormatter,
30
+ SectionParser,
31
+ )
32
+
33
+ # Registries for extension
34
+ from linkedin2md.registry import (
35
+ get_formatter_registry,
36
+ get_parser_registry,
37
+ register_formatter,
38
+ register_parser,
39
+ )
40
+
41
+ __all__ = [
42
+ # Version
43
+ "__version__",
44
+ # Main API
45
+ "LinkedInToMarkdownConverter",
46
+ "create_converter",
47
+ # Protocols
48
+ "BilingualText",
49
+ "DataExtractor",
50
+ "FormatterRegistry",
51
+ "LanguageDetector",
52
+ "OutputWriter",
53
+ "ParserRegistry",
54
+ "SectionFormatter",
55
+ "SectionParser",
56
+ # Registry
57
+ "get_formatter_registry",
58
+ "get_parser_registry",
59
+ "register_formatter",
60
+ "register_parser",
61
+ # Backward compatibility (deprecated)
62
+ "LinkedInExportParser",
63
+ "MarkdownFormatter",
64
+ ]
@@ -0,0 +1,6 @@
1
+ """Allow running as python -m linkedin2md."""
2
+
3
+ from linkedin2md.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
linkedin2md/cli.py ADDED
@@ -0,0 +1,81 @@
1
+ """CLI for linkedin2md.
2
+
3
+ Dependency Inversion: Uses factory function, doesn't create dependencies directly.
4
+ """
5
+
6
+ import argparse
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from linkedin2md.converter import create_converter
11
+
12
+ # Maximum allowed file size in megabytes (500 MB)
13
+ MAX_FILE_SIZE_MB = 500
14
+
15
+
16
+ def main() -> int:
17
+ """Main entry point."""
18
+ args = _parse_args(sys.argv[1:])
19
+
20
+ if not args.source.exists():
21
+ print(f"Error: File not found: {args.source}", file=sys.stderr)
22
+ return 1
23
+
24
+ if not args.source.suffix.lower() == ".zip":
25
+ print(f"Error: Expected .zip file, got {args.source.suffix}", file=sys.stderr)
26
+ return 1
27
+
28
+ # Check file size to prevent resource exhaustion
29
+ file_size_mb = args.source.stat().st_size / (1024 * 1024)
30
+ if file_size_mb > MAX_FILE_SIZE_MB:
31
+ print(
32
+ f"Error: File too large ({file_size_mb:.1f} MB). "
33
+ f"Maximum allowed is {MAX_FILE_SIZE_MB} MB",
34
+ file=sys.stderr,
35
+ )
36
+ return 1
37
+
38
+ try:
39
+ # Use factory to create converter with all dependencies
40
+ converter = create_converter(args.source, args.output)
41
+ files = converter.convert(lang=args.lang)
42
+ except Exception as e:
43
+ print(f"Error: {e}", file=sys.stderr)
44
+ return 1
45
+
46
+ print(f"Created {len(files)} files in {args.output}/")
47
+ for f in files:
48
+ print(f" - {f.name}")
49
+
50
+ return 0
51
+
52
+
53
+ def _parse_args(argv: list[str]) -> argparse.Namespace:
54
+ """Parse command line arguments."""
55
+ parser = argparse.ArgumentParser(
56
+ prog="linkedin2md",
57
+ description="Convert LinkedIn data exports to Markdown",
58
+ )
59
+ parser.add_argument(
60
+ "source",
61
+ type=Path,
62
+ help="LinkedIn ZIP export file",
63
+ )
64
+ parser.add_argument(
65
+ "-o",
66
+ "--output",
67
+ type=Path,
68
+ default=Path("linkedin_export"),
69
+ help="Output directory (default: linkedin_export)",
70
+ )
71
+ parser.add_argument(
72
+ "--lang",
73
+ choices=["en", "es"],
74
+ default="en",
75
+ help="Output language (default: en)",
76
+ )
77
+ return parser.parse_args(argv)
78
+
79
+
80
+ if __name__ == "__main__":
81
+ sys.exit(main())
@@ -0,0 +1,136 @@
1
+ """Main converter orchestrator with dependency injection.
2
+
3
+ Implements the Dependency Inversion Principle:
4
+ - Depends on abstractions (protocols), not concretions
5
+ - All dependencies are injected, not created internally
6
+ """
7
+
8
+ from pathlib import Path
9
+
10
+ from linkedin2md.protocols import (
11
+ DataExtractor,
12
+ FormatterRegistry,
13
+ OutputWriter,
14
+ ParserRegistry,
15
+ )
16
+
17
+
18
+ class LinkedInToMarkdownConverter:
19
+ """Main orchestrator for LinkedIn to Markdown conversion.
20
+
21
+ SOLID Principles:
22
+ - Single Responsibility: Only orchestrates the conversion process
23
+ - Open/Closed: New parsers/formatters added via registries
24
+ - Dependency Inversion: Depends on protocols, not implementations
25
+
26
+ All dependencies are injected via constructor.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ extractor: DataExtractor,
32
+ parser_registry: ParserRegistry,
33
+ formatter_registry: FormatterRegistry,
34
+ writer: OutputWriter,
35
+ ):
36
+ """Initialize with injected dependencies.
37
+
38
+ Args:
39
+ extractor: Extracts raw data from source
40
+ parser_registry: Registry of section parsers
41
+ formatter_registry: Registry of section formatters
42
+ writer: Writes formatted output
43
+ """
44
+ self._extractor = extractor
45
+ self._parsers = parser_registry
46
+ self._formatters = formatter_registry
47
+ self._writer = writer
48
+
49
+ def convert(self, lang: str = "en") -> list[Path]:
50
+ """Convert LinkedIn export to Markdown files.
51
+
52
+ Args:
53
+ lang: Output language ('en' or 'es')
54
+
55
+ Returns:
56
+ List of paths to created files
57
+ """
58
+ # Step 1: Extract raw CSV data
59
+ raw_data = self._extractor.extract()
60
+
61
+ # Step 2: Parse all sections
62
+ parsed_data = self._parse_all(raw_data)
63
+
64
+ # Step 3: Format and write all sections
65
+ return self._format_and_write_all(parsed_data, lang)
66
+
67
+ def _parse_all(self, raw_data: dict[str, list[dict]]) -> dict[str, object]:
68
+ """Parse all sections using registered parsers."""
69
+ parsed = {}
70
+
71
+ for parser in self._parsers.get_all():
72
+ try:
73
+ result = parser.parse(raw_data)
74
+ parsed[parser.section_key] = result
75
+ except Exception as e:
76
+ # Log but don't fail on individual section errors
77
+ print(f"Warning: Failed to parse {parser.section_key}: {e}")
78
+
79
+ return parsed
80
+
81
+ def _format_and_write_all(self, data: dict[str, object], lang: str) -> list[Path]:
82
+ """Format and write all sections."""
83
+ files = []
84
+
85
+ # Special handling for profile (needs full data)
86
+ profile_formatter = self._formatters.get("profile")
87
+ if profile_formatter:
88
+ content = profile_formatter.format(data, lang)
89
+ if content and content.strip():
90
+ path = self._writer.write("profile", content)
91
+ files.append(path)
92
+
93
+ # Format other sections
94
+ for formatter in self._formatters.get_all():
95
+ if formatter.section_key == "profile":
96
+ continue # Already handled
97
+
98
+ section_data = data.get(formatter.section_key)
99
+ if not section_data:
100
+ continue
101
+
102
+ try:
103
+ content = formatter.format(section_data, lang)
104
+ if content and content.strip():
105
+ path = self._writer.write(formatter.section_key, content)
106
+ files.append(path)
107
+ except Exception as e:
108
+ print(f"Warning: Failed to format {formatter.section_key}: {e}")
109
+
110
+ return files
111
+
112
+
113
+ def create_converter(
114
+ source: Path,
115
+ output_dir: Path,
116
+ ) -> LinkedInToMarkdownConverter:
117
+ """Factory function to create a converter with default dependencies.
118
+
119
+ This provides a convenient way to create a fully configured converter
120
+ while still allowing dependency injection for testing.
121
+ """
122
+ # Import here to trigger registration of parsers and formatters
123
+ from linkedin2md import (
124
+ formatters, # noqa: F401
125
+ parsers, # noqa: F401
126
+ )
127
+ from linkedin2md.extractor import ZipDataExtractor
128
+ from linkedin2md.registry import get_formatter_registry, get_parser_registry
129
+ from linkedin2md.writer import MarkdownFileWriter
130
+
131
+ return LinkedInToMarkdownConverter(
132
+ extractor=ZipDataExtractor(source),
133
+ parser_registry=get_parser_registry(),
134
+ formatter_registry=get_formatter_registry(),
135
+ writer=MarkdownFileWriter(output_dir),
136
+ )
@@ -0,0 +1,80 @@
1
+ """Data extraction implementations.
2
+
3
+ Single Responsibility: Extract raw data from sources.
4
+ """
5
+
6
+ import csv
7
+ import io
8
+ import zipfile
9
+ from pathlib import Path
10
+
11
+ from linkedin2md.protocols import DataExtractor
12
+
13
+
14
+ class ZipDataExtractor(DataExtractor):
15
+ """Extract CSV data from LinkedIn ZIP export.
16
+
17
+ Single Responsibility: Only handles ZIP I/O and CSV parsing.
18
+ Does NOT transform or interpret the data.
19
+ """
20
+
21
+ def __init__(self, zip_path: Path | str):
22
+ self.zip_path = Path(zip_path)
23
+
24
+ def extract(self) -> dict[str, list[dict]]:
25
+ """Extract all CSVs from ZIP into raw dict format.
26
+
27
+ Raises:
28
+ ValueError: If the ZIP file is invalid or corrupted.
29
+ """
30
+ data: dict[str, list[dict]] = {}
31
+
32
+ try:
33
+ with zipfile.ZipFile(self.zip_path, "r") as zf:
34
+ for name in zf.namelist():
35
+ if name.endswith(".csv"):
36
+ with zf.open(name) as f:
37
+ content = f.read().decode("utf-8")
38
+ content = self._skip_header_notes(content)
39
+ reader = csv.DictReader(io.StringIO(content))
40
+ key = Path(name).stem.lower().replace(" ", "_")
41
+ data[key] = list(reader)
42
+ except zipfile.BadZipFile as err:
43
+ raise ValueError(f"Invalid or corrupted ZIP file: {self.zip_path}") from err
44
+
45
+ return data
46
+
47
+ def _skip_header_notes(self, content: str) -> str:
48
+ """Skip header notes in LinkedIn CSVs.
49
+
50
+ Some files like Connections.csv start with:
51
+ Notes:
52
+ "When exporting your connection data..."
53
+
54
+ First Name,Last Name,URL,...
55
+ """
56
+ lines = content.split("\n")
57
+
58
+ if lines and lines[0].strip().startswith("Notes"):
59
+ for i, line in enumerate(lines):
60
+ stripped = line.strip()
61
+ if not stripped:
62
+ continue
63
+ if "," in stripped and not stripped.startswith('"'):
64
+ return "\n".join(lines[i:])
65
+
66
+ return content
67
+
68
+
69
+ class DictDataExtractor(DataExtractor):
70
+ """Extract data from a pre-loaded dict (for testing).
71
+
72
+ Single Responsibility: Wraps existing data in extractor interface.
73
+ """
74
+
75
+ def __init__(self, data: dict[str, list[dict]]):
76
+ self._data = data
77
+
78
+ def extract(self) -> dict[str, list[dict]]:
79
+ """Return the pre-loaded data."""
80
+ return self._data