bear-export 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: bear-export
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.13
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pandas>=2.0.0
8
+ Requires-Dist: click>=8.0.0
9
+ Requires-Dist: pyyaml>=6.0
10
+ Requires-Dist: python-dateutil>=2.8.0
11
+
12
+ # Bear Export CLI Tool
13
+
14
+ A command-line tool to export Bear blog CSV data to organized markdown files.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install -r requirements.txt
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```bash
25
+ python main.py [OPTIONS]
26
+ ```
27
+
28
+ ### Options
29
+
30
+ - `-c, --csv-file TEXT`: Path to Bear blog CSV export file (default: post_export.csv)
31
+ - `-o, --output-dir TEXT`: Output directory name (default: Blog)
32
+ - `--include-drafts`: Include unpublished draft posts
33
+ - `--organize-by [date|tags|none]`: Organize files by date, tags, or no organization (default: none)
34
+ - `--front-matter [yaml|toml|none]`: Front matter format for static site generators (default: yaml)
35
+
36
+ ### Examples
37
+
38
+ Basic export (published posts only, flat structure):
39
+ ```bash
40
+ python main.py
41
+ ```
42
+
43
+ Include drafts and organize by date:
44
+ ```bash
45
+ python main.py --include-drafts --organize-by date
46
+ ```
47
+
48
+ Organize by tags with TOML front matter:
49
+ ```bash
50
+ python main.py --organize-by tags --front-matter toml
51
+ ```
52
+
53
+ Custom CSV file and output directory:
54
+ ```bash
55
+ python main.py --csv-file my_export.csv --output-dir MyBlog
56
+ ```
57
+
58
+ ## Output Structure
59
+
60
+ ### Flat Organization (default)
61
+ ```
62
+ Blog/
63
+ ├── post1.md
64
+ ├── post2.md
65
+ ├── post3.md
66
+ └── README.md
67
+ ```
68
+
69
+ ### Date Organization
70
+ ```
71
+ Blog/
72
+ ├── 2023/
73
+ │ ├── 01/
74
+ │ │ ├── january-post.md
75
+ │ │ └── another-january-post.md
76
+ │ └── 12/
77
+ │ └── december-post.md
78
+ ├── 2024/
79
+ │ └── 03/
80
+ │ └── march-post.md
81
+ └── README.md
82
+ ```
83
+
84
+ ### Tags Organization
85
+ ```
86
+ Blog/
87
+ ├── personal/
88
+ │ ├── my-life.md
89
+ │ └── thoughts.md
90
+ ├── coding/
91
+ │ ├── python-tips.md
92
+ │ └── vim-guide.md
93
+ ├── uncategorized-post.md
94
+ └── README.md
95
+ ```
96
+
97
+ ## Front Matter
98
+
99
+ The tool generates front matter compatible with static site generators like Hugo, Jekyll, and others.
100
+
101
+ ### YAML Front Matter (default)
102
+ ```yaml
103
+ ---
104
+ title: My Blog Post
105
+ slug: my-blog-post
106
+ date: 2024-03-15T10:30:00
107
+ tags:
108
+ - personal
109
+ - coding
110
+ lang: en
111
+ published: true
112
+ type: post
113
+ uid: abc123
114
+ ---
115
+ ```
116
+
117
+ ### TOML Front Matter
118
+ ```toml
119
+ +++
120
+ title = 'My Blog Post'
121
+ slug = 'my-blog-post'
122
+ date = '2024-03-15T10:30:00'
123
+ tags = ['personal', 'coding']
124
+ lang = 'en'
125
+ published = true
126
+ type = 'post'
127
+ uid = 'abc123'
128
+ +++
129
+ ```
130
+
131
+ ## Features
132
+
133
+ - ✅ Parse Bear blog CSV exports with proper encoding
134
+ - ✅ Generate clean markdown files with front matter
135
+ - ✅ Support for YAML and TOML front matter formats
136
+ - ✅ Multiple organization strategies (flat, by date, by tags)
137
+ - ✅ Filter published/draft posts
138
+ - ✅ Generate index/README file with post listings
139
+ - ✅ Handle special characters and filenames safely
140
+ - ✅ Preserve post metadata (tags, dates, language, etc.)
141
+
142
+ ## Requirements
143
+
144
+ - Python 3.7+
145
+ - pandas
146
+ - click
147
+ - pyyaml
148
+ - python-dateutil
149
+
150
+ See `requirements.txt` for exact versions.
@@ -0,0 +1,139 @@
1
+ # Bear Export CLI Tool
2
+
3
+ A command-line tool to export Bear blog CSV data to organized markdown files.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install -r requirements.txt
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ python main.py [OPTIONS]
15
+ ```
16
+
17
+ ### Options
18
+
19
+ - `-c, --csv-file TEXT`: Path to Bear blog CSV export file (default: post_export.csv)
20
+ - `-o, --output-dir TEXT`: Output directory name (default: Blog)
21
+ - `--include-drafts`: Include unpublished draft posts
22
+ - `--organize-by [date|tags|none]`: Organize files by date, tags, or no organization (default: none)
23
+ - `--front-matter [yaml|toml|none]`: Front matter format for static site generators (default: yaml)
24
+
25
+ ### Examples
26
+
27
+ Basic export (published posts only, flat structure):
28
+ ```bash
29
+ python main.py
30
+ ```
31
+
32
+ Include drafts and organize by date:
33
+ ```bash
34
+ python main.py --include-drafts --organize-by date
35
+ ```
36
+
37
+ Organize by tags with TOML front matter:
38
+ ```bash
39
+ python main.py --organize-by tags --front-matter toml
40
+ ```
41
+
42
+ Custom CSV file and output directory:
43
+ ```bash
44
+ python main.py --csv-file my_export.csv --output-dir MyBlog
45
+ ```
46
+
47
+ ## Output Structure
48
+
49
+ ### Flat Organization (default)
50
+ ```
51
+ Blog/
52
+ ├── post1.md
53
+ ├── post2.md
54
+ ├── post3.md
55
+ └── README.md
56
+ ```
57
+
58
+ ### Date Organization
59
+ ```
60
+ Blog/
61
+ ├── 2023/
62
+ │ ├── 01/
63
+ │ │ ├── january-post.md
64
+ │ │ └── another-january-post.md
65
+ │ └── 12/
66
+ │ └── december-post.md
67
+ ├── 2024/
68
+ │ └── 03/
69
+ │ └── march-post.md
70
+ └── README.md
71
+ ```
72
+
73
+ ### Tags Organization
74
+ ```
75
+ Blog/
76
+ ├── personal/
77
+ │ ├── my-life.md
78
+ │ └── thoughts.md
79
+ ├── coding/
80
+ │ ├── python-tips.md
81
+ │ └── vim-guide.md
82
+ ├── uncategorized-post.md
83
+ └── README.md
84
+ ```
85
+
86
+ ## Front Matter
87
+
88
+ The tool generates front matter compatible with static site generators like Hugo, Jekyll, and others.
89
+
90
+ ### YAML Front Matter (default)
91
+ ```yaml
92
+ ---
93
+ title: My Blog Post
94
+ slug: my-blog-post
95
+ date: 2024-03-15T10:30:00
96
+ tags:
97
+ - personal
98
+ - coding
99
+ lang: en
100
+ published: true
101
+ type: post
102
+ uid: abc123
103
+ ---
104
+ ```
105
+
106
+ ### TOML Front Matter
107
+ ```toml
108
+ +++
109
+ title = 'My Blog Post'
110
+ slug = 'my-blog-post'
111
+ date = '2024-03-15T10:30:00'
112
+ tags = ['personal', 'coding']
113
+ lang = 'en'
114
+ published = true
115
+ type = 'post'
116
+ uid = 'abc123'
117
+ +++
118
+ ```
119
+
120
+ ## Features
121
+
122
+ - ✅ Parse Bear blog CSV exports with proper encoding
123
+ - ✅ Generate clean markdown files with front matter
124
+ - ✅ Support for YAML and TOML front matter formats
125
+ - ✅ Multiple organization strategies (flat, by date, by tags)
126
+ - ✅ Filter published/draft posts
127
+ - ✅ Generate index/README file with post listings
128
+ - ✅ Handle special characters and filenames safely
129
+ - ✅ Preserve post metadata (tags, dates, language, etc.)
130
+
131
+ ## Requirements
132
+
133
+ - Python 3.7+
134
+ - pandas
135
+ - click
136
+ - pyyaml
137
+ - python-dateutil
138
+
139
+ See `requirements.txt` for exact versions.
@@ -0,0 +1,12 @@
1
+ [project]
2
+ name = "bear-export"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "pandas>=2.0.0",
9
+ "click>=8.0.0",
10
+ "pyyaml>=6.0",
11
+ "python-dateutil>=2.8.0",
12
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ # Bear Export Package
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: bear-export
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.13
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pandas>=2.0.0
8
+ Requires-Dist: click>=8.0.0
9
+ Requires-Dist: pyyaml>=6.0
10
+ Requires-Dist: python-dateutil>=2.8.0
11
+
12
+ # Bear Export CLI Tool
13
+
14
+ A command-line tool to export Bear blog CSV data to organized markdown files.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install -r requirements.txt
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```bash
25
+ python main.py [OPTIONS]
26
+ ```
27
+
28
+ ### Options
29
+
30
+ - `-c, --csv-file TEXT`: Path to Bear blog CSV export file (default: post_export.csv)
31
+ - `-o, --output-dir TEXT`: Output directory name (default: Blog)
32
+ - `--include-drafts`: Include unpublished draft posts
33
+ - `--organize-by [date|tags|none]`: Organize files by date, tags, or no organization (default: none)
34
+ - `--front-matter [yaml|toml|none]`: Front matter format for static site generators (default: yaml)
35
+
36
+ ### Examples
37
+
38
+ Basic export (published posts only, flat structure):
39
+ ```bash
40
+ python main.py
41
+ ```
42
+
43
+ Include drafts and organize by date:
44
+ ```bash
45
+ python main.py --include-drafts --organize-by date
46
+ ```
47
+
48
+ Organize by tags with TOML front matter:
49
+ ```bash
50
+ python main.py --organize-by tags --front-matter toml
51
+ ```
52
+
53
+ Custom CSV file and output directory:
54
+ ```bash
55
+ python main.py --csv-file my_export.csv --output-dir MyBlog
56
+ ```
57
+
58
+ ## Output Structure
59
+
60
+ ### Flat Organization (default)
61
+ ```
62
+ Blog/
63
+ ├── post1.md
64
+ ├── post2.md
65
+ ├── post3.md
66
+ └── README.md
67
+ ```
68
+
69
+ ### Date Organization
70
+ ```
71
+ Blog/
72
+ ├── 2023/
73
+ │ ├── 01/
74
+ │ │ ├── january-post.md
75
+ │ │ └── another-january-post.md
76
+ │ └── 12/
77
+ │ └── december-post.md
78
+ ├── 2024/
79
+ │ └── 03/
80
+ │ └── march-post.md
81
+ └── README.md
82
+ ```
83
+
84
+ ### Tags Organization
85
+ ```
86
+ Blog/
87
+ ├── personal/
88
+ │ ├── my-life.md
89
+ │ └── thoughts.md
90
+ ├── coding/
91
+ │ ├── python-tips.md
92
+ │ └── vim-guide.md
93
+ ├── uncategorized-post.md
94
+ └── README.md
95
+ ```
96
+
97
+ ## Front Matter
98
+
99
+ The tool generates front matter compatible with static site generators like Hugo, Jekyll, and others.
100
+
101
+ ### YAML Front Matter (default)
102
+ ```yaml
103
+ ---
104
+ title: My Blog Post
105
+ slug: my-blog-post
106
+ date: 2024-03-15T10:30:00
107
+ tags:
108
+ - personal
109
+ - coding
110
+ lang: en
111
+ published: true
112
+ type: post
113
+ uid: abc123
114
+ ---
115
+ ```
116
+
117
+ ### TOML Front Matter
118
+ ```toml
119
+ +++
120
+ title = 'My Blog Post'
121
+ slug = 'my-blog-post'
122
+ date = '2024-03-15T10:30:00'
123
+ tags = ['personal', 'coding']
124
+ lang = 'en'
125
+ published = true
126
+ type = 'post'
127
+ uid = 'abc123'
128
+ +++
129
+ ```
130
+
131
+ ## Features
132
+
133
+ - ✅ Parse Bear blog CSV exports with proper encoding
134
+ - ✅ Generate clean markdown files with front matter
135
+ - ✅ Support for YAML and TOML front matter formats
136
+ - ✅ Multiple organization strategies (flat, by date, by tags)
137
+ - ✅ Filter published/draft posts
138
+ - ✅ Generate index/README file with post listings
139
+ - ✅ Handle special characters and filenames safely
140
+ - ✅ Preserve post metadata (tags, dates, language, etc.)
141
+
142
+ ## Requirements
143
+
144
+ - Python 3.7+
145
+ - pandas
146
+ - click
147
+ - pyyaml
148
+ - python-dateutil
149
+
150
+ See `requirements.txt` for exact versions.
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/__init__.py
4
+ src/converter.py
5
+ src/organizer.py
6
+ src/parser.py
7
+ src/bear_export.egg-info/PKG-INFO
8
+ src/bear_export.egg-info/SOURCES.txt
9
+ src/bear_export.egg-info/dependency_links.txt
10
+ src/bear_export.egg-info/requires.txt
11
+ src/bear_export.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ pandas>=2.0.0
2
+ click>=8.0.0
3
+ pyyaml>=6.0
4
+ python-dateutil>=2.8.0
@@ -0,0 +1,4 @@
1
+ __init__
2
+ converter
3
+ organizer
4
+ parser
@@ -0,0 +1,300 @@
1
+ """
2
+ Markdown Converter Module
3
+
4
+ Converts Bear blog posts to markdown format with front matter.
5
+ """
6
+
7
+ import re
8
+ from datetime import datetime
9
+ from typing import List, Dict, Any, Optional
10
+ from urllib.parse import urlparse
11
+
12
+
13
+ class MarkdownConverter:
14
+ """Converts Bear blog posts to markdown format."""
15
+
16
+ def __init__(self, front_matter_format: str = "yaml"):
17
+ """
18
+ Initialize converter with front matter format.
19
+
20
+ Args:
21
+ front_matter_format: 'yaml', 'toml', or 'none'
22
+ """
23
+ self.front_matter_format = front_matter_format.lower()
24
+
25
+ def convert_posts(self, posts: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
26
+ """
27
+ Convert a list of posts to markdown format.
28
+
29
+ Args:
30
+ posts: List of post dictionaries
31
+
32
+ Returns:
33
+ List of posts with added 'markdown_content' field
34
+ """
35
+ converted_posts = []
36
+
37
+ for post in posts:
38
+ markdown_post = post.copy()
39
+ markdown_post["markdown_content"] = self.convert_post(post)
40
+ markdown_post["filename"] = self.generate_filename(post)
41
+ converted_posts.append(markdown_post)
42
+
43
+ return converted_posts
44
+
45
+ def convert_post(self, post: Dict[str, Any]) -> str:
46
+ """
47
+ Convert a single post to markdown format.
48
+
49
+ Args:
50
+ post: Post dictionary
51
+
52
+ Returns:
53
+ Complete markdown content with front matter
54
+ """
55
+ # Generate front matter
56
+ front_matter = self.generate_front_matter(post)
57
+
58
+ # Clean and process content
59
+ content = self.clean_content(post.get("content", ""))
60
+
61
+ # Combine front matter and content
62
+ if self.front_matter_format == "none":
63
+ return content
64
+ else:
65
+ delimiter = "---" if self.front_matter_format == "yaml" else "+++"
66
+ return f"{delimiter}\n{front_matter}\n{delimiter}\n\n{content}"
67
+
68
+ def generate_front_matter(self, post: Dict[str, Any]) -> str:
69
+ """
70
+ Generate front matter in the specified format.
71
+
72
+ Args:
73
+ post: Post dictionary
74
+
75
+ Returns:
76
+ Front matter string
77
+ """
78
+ if self.front_matter_format == "yaml":
79
+ return self._generate_yaml_front_matter(post)
80
+ elif self.front_matter_format == "toml":
81
+ return self._generate_toml_front_matter(post)
82
+ else:
83
+ return ""
84
+
85
+ def _generate_yaml_front_matter(self, post: Dict[str, Any]) -> str:
86
+ """Generate YAML front matter."""
87
+ lines = []
88
+
89
+ # Title
90
+ if post.get("title"):
91
+ lines.append(f"title: {self._escape_yaml(post['title'])}")
92
+
93
+ # Slug
94
+ if post.get("slug"):
95
+ lines.append(f"slug: {self._escape_yaml(post['slug'])}")
96
+
97
+ # Date (use published date, fallback to first published at)
98
+ date = post.get("published date") or post.get("first published at")
99
+ if date:
100
+ lines.append(f"date: {date}")
101
+
102
+ # Tags
103
+ tags = post.get("all tags", [])
104
+ if tags:
105
+ tags_yaml = "\n ".join([f"- {self._escape_yaml(tag)}" for tag in tags])
106
+ lines.append(f"tags:\n {tags_yaml}")
107
+
108
+ # Language
109
+ if post.get("lang"):
110
+ lines.append(f"lang: {post['lang']}")
111
+
112
+ # Meta description
113
+ if post.get("meta description"):
114
+ lines.append(f"description: {self._escape_yaml(post['meta description'])}")
115
+
116
+ # Meta image
117
+ if post.get("meta image"):
118
+ lines.append(f"image: {self._escape_yaml(post['meta image'])}")
119
+
120
+ # Post type
121
+ if post.get("is page"):
122
+ lines.append("type: page")
123
+ else:
124
+ lines.append("type: post")
125
+
126
+ # Published status
127
+ lines.append(f"published: {str(post.get('publish', False)).lower()}")
128
+
129
+ # UID (for reference)
130
+ if post.get("uid"):
131
+ lines.append(f"uid: {post['uid']}")
132
+
133
+ # Custom CSS class
134
+ if post.get("class name"):
135
+ lines.append(f"className: {self._escape_yaml(post['class name'])}")
136
+
137
+ return "\n".join(lines)
138
+
139
+ def _generate_toml_front_matter(self, post: Dict[str, Any]) -> str:
140
+ """Generate TOML front matter."""
141
+ lines = []
142
+
143
+ # Title
144
+ if post.get("title"):
145
+ lines.append(f"title = '{self._escape_toml(post['title'])}'")
146
+
147
+ # Slug
148
+ if post.get("slug"):
149
+ lines.append(f"slug = '{self._escape_toml(post['slug'])}'")
150
+
151
+ # Date
152
+ date = post.get("published date") or post.get("first published at")
153
+ if date:
154
+ lines.append(f"date = '{date}'")
155
+
156
+ # Tags
157
+ tags = post.get("all tags", [])
158
+ if tags:
159
+ tags_toml = ", ".join([f"'{self._escape_toml(tag)}'" for tag in tags])
160
+ lines.append(f"tags = [{tags_toml}]")
161
+
162
+ # Language
163
+ if post.get("lang"):
164
+ lines.append(f"lang = '{post['lang']}'")
165
+
166
+ # Meta description
167
+ if post.get("meta description"):
168
+ lines.append(
169
+ f"description = '{self._escape_toml(post['meta description'])}'"
170
+ )
171
+
172
+ # Meta image
173
+ if post.get("meta image"):
174
+ lines.append(f"image = '{self._escape_toml(post['meta image'])}'")
175
+
176
+ # Post type
177
+ post_type = "page" if post.get("is page") else "post"
178
+ lines.append(f"type = '{post_type}'")
179
+
180
+ # Published status
181
+ lines.append(f"published = {str(post.get('publish', False)).lower()}")
182
+
183
+ # UID
184
+ if post.get("uid"):
185
+ lines.append(f"uid = '{post['uid']}'")
186
+
187
+ # Custom CSS class
188
+ if post.get("class name"):
189
+ lines.append(f"className = '{self._escape_toml(post['class name'])}'")
190
+
191
+ return "\n".join(lines)
192
+
193
+ def generate_filename(self, post: Dict[str, Any]) -> str:
194
+ """
195
+ Generate a safe filename for the post.
196
+
197
+ Args:
198
+ post: Post dictionary
199
+
200
+ Returns:
201
+ Filename with .md extension
202
+ """
203
+ # Try to use slug first, then title, then UID
204
+ name = post.get("slug") or post.get("title") or post.get("uid", "untitled")
205
+
206
+ # Clean the name
207
+ name = self._clean_filename(name)
208
+
209
+ # Ensure it's not empty
210
+ if not name:
211
+ name = f"post-{post.get('uid', 'unknown')}"
212
+
213
+ return f"{name}.md"
214
+
215
+ def clean_content(self, content: str) -> str:
216
+ """
217
+ Clean and process post content.
218
+
219
+ Args:
220
+ content: Raw post content
221
+
222
+ Returns:
223
+ Cleaned markdown content
224
+ """
225
+ if not content:
226
+ return ""
227
+
228
+ # Normalize line endings
229
+ content = content.replace("\r\n", "\n").replace("\r", "\n")
230
+
231
+ # Clean up excessive whitespace
232
+ content = re.sub(r"\n{3,}", "\n\n", content)
233
+
234
+ # Fix common Bear-specific markdown issues
235
+ content = self._fix_bear_markdown(content)
236
+
237
+ # Strip leading/trailing whitespace
238
+ content = content.strip()
239
+
240
+ return content
241
+
242
+ def _fix_bear_markdown(self, content: str) -> str:
243
+ """Fix Bear-specific markdown issues."""
244
+ # Fix Bear's image syntax: ![image](/path/to/image) -> ![alt text](/path/to/image)
245
+ content = re.sub(r"!\[image\]", "![image]", content)
246
+
247
+ # Fix Bear's link syntax issues
248
+ content = re.sub(r"\[([^\]]+)\]\(\s*([^\)]+)\s*\)", r"[\1](\2)", content)
249
+
250
+ # Clean up HTML that Bear might have added
251
+ content = re.sub(r"<br\s*/?>", "\n", content)
252
+
253
+ return content
254
+
255
+ def _clean_filename(self, name: str) -> str:
256
+ """Clean string to be safe for filename."""
257
+ # Remove or replace unsafe characters
258
+ name = re.sub(r'[<>:"/\\|?*]', "", name)
259
+
260
+ # Replace spaces and other separators with hyphens
261
+ name = re.sub(r"[\s_]+", "-", name)
262
+
263
+ # Remove consecutive hyphens
264
+ name = re.sub(r"-+", "-", name)
265
+
266
+ # Remove leading/trailing hyphens and dots
267
+ name = name.strip("-.")
268
+
269
+ # Convert to lowercase
270
+ name = name.lower()
271
+
272
+ # Limit length
273
+ if len(name) > 50:
274
+ name = name[:50].rstrip("-")
275
+
276
+ return name
277
+
278
+ def _escape_yaml(self, text: str) -> str:
279
+ """Escape text for YAML."""
280
+ if not text:
281
+ return ""
282
+
283
+ # Basic YAML escaping
284
+ text = str(text).replace("'", "''")
285
+
286
+ # If it contains special characters, quote it
287
+ if any(char in text for char in ":{}[],&*#?|-<>" + " \n\t"):
288
+ return f"'{text}'"
289
+
290
+ return text
291
+
292
+ def _escape_toml(self, text: str) -> str:
293
+ """Escape text for TOML."""
294
+ if not text:
295
+ return ""
296
+
297
+ # Basic TOML escaping
298
+ text = str(text).replace("\\", "\\\\").replace("'", "\\'")
299
+
300
+ return text
@@ -0,0 +1,225 @@
1
+ """
2
+ File Organizer Module
3
+
4
+ Organizes markdown files into directory structures.
5
+ """
6
+
7
+ import os
8
+ import shutil
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import List, Dict, Any
12
+
13
+
14
+ class FileOrganizer:
15
+ """Organizes markdown files into directory structures."""
16
+
17
+ def __init__(self, output_dir: str = "Blog", organization: str = "none"):
18
+ """
19
+ Initialize file organizer.
20
+
21
+ Args:
22
+ output_dir: Base output directory name
23
+ organization: How to organize files ('date', 'tags', 'none')
24
+ """
25
+ self.output_dir = Path(output_dir)
26
+ self.organization = organization.lower()
27
+
28
+ # Create output directory if it doesn't exist
29
+ self.output_dir.mkdir(exist_ok=True)
30
+
31
+ def organize_posts(self, posts: List[Dict[str, Any]]) -> None:
32
+ """
33
+ Organize posts into directory structure and write files.
34
+
35
+ Args:
36
+ posts: List of posts with markdown_content and filename
37
+ """
38
+ # Clean existing directory if it exists (except for hidden files)
39
+ if self.output_dir.exists():
40
+ self._clean_output_directory()
41
+
42
+ # Organize based on selected method
43
+ if self.organization == "date":
44
+ self._organize_by_date(posts)
45
+ elif self.organization == "tags":
46
+ self._organize_by_tags(posts)
47
+ else:
48
+ self._organize_flat(posts)
49
+
50
+ # Create index file
51
+ self._create_index(posts)
52
+
53
+ def _clean_output_directory(self) -> None:
54
+ """Clean output directory of non-hidden files."""
55
+ for item in self.output_dir.iterdir():
56
+ if not item.name.startswith("."):
57
+ if item.is_file():
58
+ item.unlink()
59
+ elif item.is_dir():
60
+ shutil.rmtree(item)
61
+
62
+ def _organize_flat(self, posts: List[Dict[str, Any]]) -> None:
63
+ """Organize posts in a flat structure."""
64
+ for post in posts:
65
+ filename = post.get("filename", "untitled.md")
66
+ content = post.get("markdown_content", "")
67
+
68
+ filepath = self.output_dir / filename
69
+ self._write_file(filepath, content)
70
+
71
+ def _organize_by_date(self, posts: List[Dict[str, Any]]) -> None:
72
+ """Organize posts by year/month."""
73
+ for post in posts:
74
+ filename = post.get("filename", "untitled.md")
75
+ content = post.get("markdown_content", "")
76
+
77
+ # Get date from post
78
+ date_str = post.get("published date") or post.get("first published at")
79
+ if date_str:
80
+ try:
81
+ # Parse date and create year/month directory
82
+ date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
83
+ year_dir = self.output_dir / str(date.year)
84
+ month_dir = year_dir / f"{date.month:02d}"
85
+
86
+ year_dir.mkdir(exist_ok=True)
87
+ month_dir.mkdir(exist_ok=True)
88
+
89
+ filepath = month_dir / filename
90
+ except (ValueError, TypeError):
91
+ # Fallback to root if date parsing fails
92
+ filepath = self.output_dir / filename
93
+ else:
94
+ # No date, put in root
95
+ filepath = self.output_dir / filename
96
+
97
+ self._write_file(filepath, content)
98
+
99
+ def _organize_by_tags(self, posts: List[Dict[str, Any]]) -> None:
100
+ """Organize posts by tags (posts can be in multiple tag directories)."""
101
+ # Create a mapping of tag to posts
102
+ tag_posts = {}
103
+ uncategorized_posts = []
104
+
105
+ for post in posts:
106
+ tags = post.get("all tags", [])
107
+ if tags:
108
+ for tag in tags:
109
+ if tag not in tag_posts:
110
+ tag_posts[tag] = []
111
+ tag_posts[tag].append(post)
112
+ else:
113
+ uncategorized_posts.append(post)
114
+
115
+ # Write posts to tag directories
116
+ for tag, tagged_posts in tag_posts.items():
117
+ # Create tag directory
118
+ tag_dir = self.output_dir / self._clean_tag_name(tag)
119
+ tag_dir.mkdir(exist_ok=True)
120
+
121
+ # Write posts to this tag directory
122
+ for post in tagged_posts:
123
+ filename = post.get("filename", "untitled.md")
124
+ content = post.get("markdown_content", "")
125
+
126
+ filepath = tag_dir / filename
127
+ self._write_file(filepath, content)
128
+
129
+ # Write uncategorized posts to root
130
+ for post in uncategorized_posts:
131
+ filename = post.get("filename", "untitled.md")
132
+ content = post.get("markdown_content", "")
133
+
134
+ filepath = self.output_dir / filename
135
+ self._write_file(filepath, content)
136
+
137
+ def _create_index(self, posts: List[Dict[str, Any]]) -> None:
138
+ """Create an index file listing all posts."""
139
+ index_content = self._generate_index_content(posts)
140
+ index_path = self.output_dir / "README.md"
141
+ self._write_file(index_path, index_content)
142
+
143
+ def _generate_index_content(self, posts: List[Dict[str, Any]]) -> str:
144
+ """Generate content for index file."""
145
+ lines = ["# Blog Posts\n"]
146
+
147
+ # Sort posts by date (newest first)
148
+ sorted_posts = sorted(posts, key=self._get_post_date, reverse=True)
149
+
150
+ for post in sorted_posts:
151
+ title = post.get("title", "Untitled")
152
+ filename = post.get("filename", "untitled.md")
153
+ date_str = post.get("published date") or post.get("first published at")
154
+ tags = post.get("all tags", [])
155
+
156
+ # Format date
157
+ date_display = ""
158
+ if date_str:
159
+ try:
160
+ date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
161
+ date_display = date.strftime("%Y-%m-%d")
162
+ except (ValueError, TypeError):
163
+ date_display = date_str
164
+
165
+ # Format tags
166
+ tags_display = ""
167
+ if tags:
168
+ tags_display = f" - `{'`, `'.join(tags)}`"
169
+
170
+ # Add to index
171
+ line = f"- [{title}]({filename})"
172
+ if date_display:
173
+ line += f" ({date_display})"
174
+ if tags_display:
175
+ line += f" {tags_display}"
176
+
177
+ lines.append(line)
178
+
179
+ # Add summary
180
+ lines.append(f"\n---")
181
+ lines.append(f"**Total posts:** {len(posts)}")
182
+
183
+ published_count = sum(1 for p in posts if p.get("publish", False))
184
+ draft_count = len(posts) - published_count
185
+
186
+ lines.append(f"**Published:** {published_count}")
187
+ lines.append(f"**Drafts:** {draft_count}")
188
+
189
+ return "\n".join(lines)
190
+
191
+ def _write_file(self, filepath: Path, content: str) -> None:
192
+ """Write content to file, creating directories as needed."""
193
+ # Create parent directories if they don't exist
194
+ filepath.parent.mkdir(parents=True, exist_ok=True)
195
+
196
+ # Write file
197
+ with open(filepath, "w", encoding="utf-8") as f:
198
+ f.write(content)
199
+
200
+ def _clean_tag_name(self, tag: str) -> str:
201
+ """Clean tag name for directory use."""
202
+ # Remove special characters and replace spaces with hyphens
203
+ import re
204
+
205
+ tag = re.sub(r'[<>:"/\\|?*]', "", tag)
206
+ tag = re.sub(r"[\s_]+", "-", tag)
207
+ tag = re.sub(r"-+", "-", tag)
208
+ tag = tag.strip("-.")
209
+
210
+ # Convert to lowercase
211
+ tag = tag.lower()
212
+
213
+ return tag or "untagged"
214
+
215
+ def _get_post_date(self, post: Dict[str, Any]) -> datetime:
216
+ """Get datetime for sorting posts."""
217
+ date_str = post.get("published date") or post.get("first published at")
218
+ if date_str:
219
+ try:
220
+ return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
221
+ except (ValueError, TypeError):
222
+ pass
223
+
224
+ # Fallback to very old date for posts without dates
225
+ return datetime(1970, 1, 1)
@@ -0,0 +1,191 @@
1
+ """
2
+ Bear CSV Parser Module
3
+
4
+ Handles parsing of Bear blog export CSV files with proper encoding and data cleaning.
5
+ """
6
+
7
+ import pandas as pd
8
+ import json
9
+ from datetime import datetime
10
+ from typing import List, Dict, Any
11
+
12
+
13
+ class BearCSVParser:
14
+ """Parser for Bear blog CSV export files."""
15
+
16
+ def __init__(self):
17
+ self.expected_columns = [
18
+ "uid",
19
+ "title",
20
+ "slug",
21
+ "alias",
22
+ "published date",
23
+ "all tags",
24
+ "publish",
25
+ "make discoverable",
26
+ "is page",
27
+ "content",
28
+ "canonical url",
29
+ "meta description",
30
+ "meta image",
31
+ "lang",
32
+ "class name",
33
+ "first published at",
34
+ ]
35
+
36
+ def parse(self, csv_file: str) -> List[Dict[str, Any]]:
37
+ """
38
+ Parse Bear blog CSV file and return list of post dictionaries.
39
+
40
+ Args:
41
+ csv_file: Path to the CSV file
42
+
43
+ Returns:
44
+ List of post dictionaries with cleaned data
45
+ """
46
+ try:
47
+ # Read CSV with UTF-8 BOM encoding (common in Bear exports)
48
+ df = pd.read_csv(csv_file, encoding="utf-8-sig")
49
+
50
+ # Validate columns
51
+ self._validate_columns(df)
52
+
53
+ # Convert to list of dictionaries and clean data
54
+ posts = []
55
+ for _, row in df.iterrows():
56
+ post = self._clean_post_data(row.to_dict())
57
+ posts.append(post)
58
+
59
+ return posts
60
+
61
+ except UnicodeDecodeError:
62
+ # Fallback to other encodings
63
+ try:
64
+ df = pd.read_csv(csv_file, encoding="utf-8")
65
+ posts = [
66
+ self._clean_post_data(row.to_dict()) for _, row in df.iterrows()
67
+ ]
68
+ return posts
69
+ except Exception as e:
70
+ raise ValueError(f"Could not read CSV file with any encoding: {str(e)}")
71
+ except Exception as e:
72
+ raise ValueError(f"Error parsing CSV file: {str(e)}")
73
+
74
+ def _validate_columns(self, df: pd.DataFrame) -> None:
75
+ """Validate that expected columns exist in the DataFrame."""
76
+ missing_columns = set(self.expected_columns) - set(df.columns)
77
+ if missing_columns:
78
+ raise ValueError(f"Missing expected columns in CSV: {missing_columns}")
79
+
80
+ def _clean_post_data(self, post_data: Dict[str, Any]) -> Dict[str, Any]:
81
+ """
82
+ Clean and normalize post data.
83
+
84
+ Args:
85
+ post_data: Raw post data from CSV
86
+
87
+ Returns:
88
+ Cleaned post data dictionary
89
+ """
90
+ cleaned = {}
91
+
92
+ # Basic string fields
93
+ string_fields = [
94
+ "uid",
95
+ "title",
96
+ "slug",
97
+ "alias",
98
+ "canonical url",
99
+ "meta description",
100
+ "meta image",
101
+ "lang",
102
+ "class name",
103
+ ]
104
+
105
+ for field in string_fields:
106
+ cleaned[field] = self._clean_string(post_data.get(field, ""))
107
+
108
+ # Date fields
109
+ date_fields = ["published date", "first published at"]
110
+ for field in date_fields:
111
+ cleaned[field] = self._clean_date(post_data.get(field, ""))
112
+
113
+ # Boolean fields
114
+ bool_fields = ["publish", "make discoverable", "is page"]
115
+ for field in bool_fields:
116
+ cleaned[field] = self._clean_boolean(post_data.get(field, False))
117
+
118
+ # Tags field (JSON array)
119
+ cleaned["all tags"] = self._clean_tags(post_data.get("all tags", "[]"))
120
+
121
+ # Content field
122
+ cleaned["content"] = self._clean_content(post_data.get("content", ""))
123
+
124
+ return cleaned
125
+
126
+ def _clean_string(self, value: Any) -> str:
127
+ """Clean string values."""
128
+ if pd.isna(value) or value is None:
129
+ return ""
130
+ return str(value).strip()
131
+
132
+ def _clean_date(self, value: Any) -> str:
133
+ """Clean date values."""
134
+ if pd.isna(value) or value is None or value == "":
135
+ return ""
136
+
137
+ try:
138
+ # Try to parse as datetime and format as ISO
139
+ if isinstance(value, str):
140
+ # Handle various date formats
141
+ for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"]:
142
+ try:
143
+ dt = datetime.strptime(value, fmt)
144
+ return dt.isoformat()
145
+ except ValueError:
146
+ continue
147
+ return str(value)
148
+ except:
149
+ return str(value)
150
+
151
+ def _clean_boolean(self, value: Any) -> bool:
152
+ """Clean boolean values."""
153
+ if pd.isna(value) or value is None:
154
+ return False
155
+
156
+ if isinstance(value, bool):
157
+ return value
158
+
159
+ if isinstance(value, str):
160
+ return value.lower() in ["true", "yes", "1", "on"]
161
+
162
+ return bool(value)
163
+
164
+ def _clean_tags(self, value: Any) -> List[str]:
165
+ """Clean tags field (JSON array)."""
166
+ if pd.isna(value) or value is None or value == "":
167
+ return []
168
+
169
+ try:
170
+ if isinstance(value, str):
171
+ tags = json.loads(value)
172
+ if isinstance(tags, list):
173
+ return [
174
+ tag.strip()
175
+ for tag in tags
176
+ if isinstance(tag, str) and tag.strip()
177
+ ]
178
+ elif isinstance(value, list):
179
+ return [
180
+ tag.strip() for tag in value if isinstance(tag, str) and tag.strip()
181
+ ]
182
+ except (json.JSONDecodeError, TypeError):
183
+ pass
184
+
185
+ return []
186
+
187
+ def _clean_content(self, value: Any) -> str:
188
+ """Clean content field."""
189
+ if pd.isna(value) or value is None:
190
+ return ""
191
+ return str(value)