clipmd 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clipmd-0.1.0/.claude/settings.json +49 -0
- clipmd-0.1.0/.github/workflows/ci.yml +40 -0
- clipmd-0.1.0/.gitignore +99 -0
- clipmd-0.1.0/.pre-commit-config.yaml +16 -0
- clipmd-0.1.0/CHANGELOG.md +104 -0
- clipmd-0.1.0/CLAUDE.md +229 -0
- clipmd-0.1.0/LICENSE +21 -0
- clipmd-0.1.0/Makefile +38 -0
- clipmd-0.1.0/PKG-INFO +381 -0
- clipmd-0.1.0/README.md +333 -0
- clipmd-0.1.0/SPEC.md +1933 -0
- clipmd-0.1.0/TODO.md +7 -0
- clipmd-0.1.0/Todo-Later.md +800 -0
- clipmd-0.1.0/pyproject.toml +100 -0
- clipmd-0.1.0/src/clipmd/__init__.py +3 -0
- clipmd-0.1.0/src/clipmd/cli.py +144 -0
- clipmd-0.1.0/src/clipmd/commands/__init__.py +1 -0
- clipmd-0.1.0/src/clipmd/commands/duplicates.py +101 -0
- clipmd-0.1.0/src/clipmd/commands/extract.py +102 -0
- clipmd-0.1.0/src/clipmd/commands/fetch.py +173 -0
- clipmd-0.1.0/src/clipmd/commands/init.py +79 -0
- clipmd-0.1.0/src/clipmd/commands/move.py +98 -0
- clipmd-0.1.0/src/clipmd/commands/preprocess.py +74 -0
- clipmd-0.1.0/src/clipmd/commands/stats.py +72 -0
- clipmd-0.1.0/src/clipmd/commands/trash.py +88 -0
- clipmd-0.1.0/src/clipmd/commands/validate.py +71 -0
- clipmd-0.1.0/src/clipmd/config.py +383 -0
- clipmd-0.1.0/src/clipmd/context.py +40 -0
- clipmd-0.1.0/src/clipmd/core/__init__.py +1 -0
- clipmd-0.1.0/src/clipmd/core/cache.py +473 -0
- clipmd-0.1.0/src/clipmd/core/dates.py +279 -0
- clipmd-0.1.0/src/clipmd/core/discovery.py +110 -0
- clipmd-0.1.0/src/clipmd/core/duplicates.py +207 -0
- clipmd-0.1.0/src/clipmd/core/extractor.py +373 -0
- clipmd-0.1.0/src/clipmd/core/fetcher.py +559 -0
- clipmd-0.1.0/src/clipmd/core/filepath_utils.py +41 -0
- clipmd-0.1.0/src/clipmd/core/formatters.py +116 -0
- clipmd-0.1.0/src/clipmd/core/frontmatter.py +386 -0
- clipmd-0.1.0/src/clipmd/core/hasher.py +36 -0
- clipmd-0.1.0/src/clipmd/core/initializer.py +269 -0
- clipmd-0.1.0/src/clipmd/core/mover.py +406 -0
- clipmd-0.1.0/src/clipmd/core/preprocessor.py +385 -0
- clipmd-0.1.0/src/clipmd/core/rss.py +70 -0
- clipmd-0.1.0/src/clipmd/core/sanitizer.py +196 -0
- clipmd-0.1.0/src/clipmd/core/stats.py +203 -0
- clipmd-0.1.0/src/clipmd/core/trash.py +167 -0
- clipmd-0.1.0/src/clipmd/core/url_utils.py +97 -0
- clipmd-0.1.0/src/clipmd/core/validator.py +309 -0
- clipmd-0.1.0/src/clipmd/exceptions.py +35 -0
- clipmd-0.1.0/tests/__init__.py +1 -0
- clipmd-0.1.0/tests/cli/__init__.py +1 -0
- clipmd-0.1.0/tests/cli/test_cli.py +102 -0
- clipmd-0.1.0/tests/cli/test_duplicates_cmd.py +185 -0
- clipmd-0.1.0/tests/cli/test_extract_cmd.py +372 -0
- clipmd-0.1.0/tests/cli/test_fetch_cmd.py +1192 -0
- clipmd-0.1.0/tests/cli/test_init_cmd.py +148 -0
- clipmd-0.1.0/tests/cli/test_move_cmd.py +392 -0
- clipmd-0.1.0/tests/cli/test_preprocess_cmd.py +344 -0
- clipmd-0.1.0/tests/cli/test_stats_cmd.py +165 -0
- clipmd-0.1.0/tests/cli/test_trash_cmd.py +218 -0
- clipmd-0.1.0/tests/cli/test_validate_cmd.py +561 -0
- clipmd-0.1.0/tests/conftest.py +80 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/.clipmd/cache.json +33 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/20240115-Duplicate-Article.md +13 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/20240115-Sample-Article.md +23 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/20240116-Article-With-Issues.md +17 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/20240117-No-Frontmatter-Date.md +17 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/20240118-Wikilink-Issue.md +14 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/AI-Tools/20240110-Claude-API-Guide.md +20 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/Science/20240112-Space-Discovery.md +23 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/config.yaml +56 -0
- clipmd-0.1.0/tests/fixtures/sample-vault/no-date-prefix-article.md +13 -0
- clipmd-0.1.0/tests/integration/__init__.py +1 -0
- clipmd-0.1.0/tests/unit/__init__.py +1 -0
- clipmd-0.1.0/tests/unit/test_cache.py +416 -0
- clipmd-0.1.0/tests/unit/test_config.py +386 -0
- clipmd-0.1.0/tests/unit/test_dates.py +216 -0
- clipmd-0.1.0/tests/unit/test_discovery.py +237 -0
- clipmd-0.1.0/tests/unit/test_frontmatter.py +254 -0
- clipmd-0.1.0/tests/unit/test_hasher.py +40 -0
- clipmd-0.1.0/tests/unit/test_sanitizer.py +141 -0
- clipmd-0.1.0/uv.lock +1012 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(file:*)",
|
|
5
|
+
"Bash(gh pr:*)",
|
|
6
|
+
"Bash(git add:*)",
|
|
7
|
+
"Bash(git branch:*)",
|
|
8
|
+
"Bash(git checkout:*)",
|
|
9
|
+
"Bash(git commit:*)",
|
|
10
|
+
"Bash(git diff:*)",
|
|
11
|
+
"Bash(git fetch:*)",
|
|
12
|
+
"Bash(git log:*)",
|
|
13
|
+
"Bash(git mv:*)",
|
|
14
|
+
"Bash(git pull:*)",
|
|
15
|
+
"Bash(git push:*)",
|
|
16
|
+
"Bash(git reset:*)",
|
|
17
|
+
"Bash(git rm:*)",
|
|
18
|
+
"Bash(git show:*)",
|
|
19
|
+
"Bash(git stash:*)",
|
|
20
|
+
"Bash(git status:*)",
|
|
21
|
+
"Bash(git switch:*)",
|
|
22
|
+
"Bash(ls:*)",
|
|
23
|
+
"Bash(make:*)",
|
|
24
|
+
"Bash(mkdir:*)",
|
|
25
|
+
"Bash(pre-commit:*)",
|
|
26
|
+
"Bash(pwd:*)",
|
|
27
|
+
"Bash(python3:*)",
|
|
28
|
+
"Bash(rm -rf *.egg-info:*)",
|
|
29
|
+
"Bash(rm -rf .pytest_cache:*)",
|
|
30
|
+
"Bash(rm -rf .ruff_cache:*)",
|
|
31
|
+
"Bash(rm -rf __pycache__:*)",
|
|
32
|
+
"Bash(rm -rf build:*)",
|
|
33
|
+
"Bash(rm -rf dist:*)",
|
|
34
|
+
"Bash(rm -rf tests/fixtures/temp:*)",
|
|
35
|
+
"Bash(stat:*)",
|
|
36
|
+
"Bash(uv:*)",
|
|
37
|
+
"Bash(wc:*)",
|
|
38
|
+
"Bash(which:*)"
|
|
39
|
+
],
|
|
40
|
+
"deny": [
|
|
41
|
+
"Bash(git push --force:*)",
|
|
42
|
+
"Bash(git push -f:*)",
|
|
43
|
+
"Bash(git reset --hard:*)",
|
|
44
|
+
"Bash(rm -rf /*:*)",
|
|
45
|
+
"Bash(rm -rf ~:*)",
|
|
46
|
+
"Bash(rm -rf .:*)"
|
|
47
|
+
]
|
|
48
|
+
}
|
|
49
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.13", "3.14"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v5
|
|
21
|
+
with:
|
|
22
|
+
version: "latest"
|
|
23
|
+
|
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
25
|
+
run: uv python install ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --all-extras
|
|
29
|
+
|
|
30
|
+
- name: Run linter
|
|
31
|
+
run: uv run ruff check src/ tests/
|
|
32
|
+
|
|
33
|
+
- name: Run formatter check
|
|
34
|
+
run: uv run ruff format --check src/ tests/
|
|
35
|
+
|
|
36
|
+
- name: Run type checker
|
|
37
|
+
run: uv run ty check src/
|
|
38
|
+
|
|
39
|
+
- name: Run tests with coverage
|
|
40
|
+
run: uv run pytest --cov=clipmd --cov-report=term-missing --cov-fail-under=89
|
clipmd-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
*.manifest
|
|
31
|
+
*.spec
|
|
32
|
+
|
|
33
|
+
# Installer logs
|
|
34
|
+
pip-log.txt
|
|
35
|
+
pip-delete-this-directory.txt
|
|
36
|
+
|
|
37
|
+
# Unit test / coverage reports
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
.cache
|
|
44
|
+
nosetests.xml
|
|
45
|
+
coverage.xml
|
|
46
|
+
*.cover
|
|
47
|
+
*.py,cover
|
|
48
|
+
.hypothesis/
|
|
49
|
+
.pytest_cache/
|
|
50
|
+
|
|
51
|
+
# Translations
|
|
52
|
+
*.mo
|
|
53
|
+
*.pot
|
|
54
|
+
|
|
55
|
+
# Environments
|
|
56
|
+
.env
|
|
57
|
+
.venv
|
|
58
|
+
env/
|
|
59
|
+
venv/
|
|
60
|
+
ENV/
|
|
61
|
+
env.bak/
|
|
62
|
+
venv.bak/
|
|
63
|
+
|
|
64
|
+
# Spyder project settings
|
|
65
|
+
.spyderproject
|
|
66
|
+
.spyproject
|
|
67
|
+
|
|
68
|
+
# Rope project settings
|
|
69
|
+
.ropeproject
|
|
70
|
+
|
|
71
|
+
# mkdocs documentation
|
|
72
|
+
/site
|
|
73
|
+
|
|
74
|
+
# mypy / type checkers
|
|
75
|
+
.mypy_cache/
|
|
76
|
+
.dmypy.json
|
|
77
|
+
dmypy.json
|
|
78
|
+
.ty/
|
|
79
|
+
|
|
80
|
+
# Ruff
|
|
81
|
+
.ruff_cache/
|
|
82
|
+
|
|
83
|
+
# IDE
|
|
84
|
+
.idea/
|
|
85
|
+
.vscode/
|
|
86
|
+
*.swp
|
|
87
|
+
*.swo
|
|
88
|
+
*~
|
|
89
|
+
|
|
90
|
+
# OS
|
|
91
|
+
.DS_Store
|
|
92
|
+
Thumbs.db
|
|
93
|
+
|
|
94
|
+
# Project specific
|
|
95
|
+
.clipmd/cache.json
|
|
96
|
+
*.log
|
|
97
|
+
|
|
98
|
+
# Test artifacts
|
|
99
|
+
tests/fixtures/temp/
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v6.0.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: trailing-whitespace
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
- id: check-yaml
|
|
8
|
+
- id: check-toml
|
|
9
|
+
- id: check-added-large-files
|
|
10
|
+
|
|
11
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
12
|
+
rev: v0.14.13
|
|
13
|
+
hooks:
|
|
14
|
+
- id: ruff-check
|
|
15
|
+
args: [--fix]
|
|
16
|
+
- id: ruff-format
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2024-01-20
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
#### Core Commands
|
|
15
|
+
- `init` command for initializing clipmd in a directory with config scaffolding
|
|
16
|
+
- `validate` command for validating configuration and setup
|
|
17
|
+
- `fetch` command for fetching web content and converting to markdown with frontmatter
|
|
18
|
+
- RSS/Atom feed support with `--rss` flag and `--rss-limit` option
|
|
19
|
+
- Duplicate detection with `--check-duplicates` (enabled by default)
|
|
20
|
+
- Readability mode for extracting main content
|
|
21
|
+
- Dry run mode with `--dry-run` flag
|
|
22
|
+
- Support for reading URLs from file with `--file` flag
|
|
23
|
+
- Async fetching with configurable concurrency
|
|
24
|
+
- Meta-refresh redirect handling
|
|
25
|
+
- Automatic tracking parameter cleaning
|
|
26
|
+
- Never overwrites existing files (appends suffix `-2.md`, `-3.md`, etc.)
|
|
27
|
+
- `preprocess` command for cleaning and preparing articles
|
|
28
|
+
- URL cleaning (tracking parameters, redirect unwrapping)
|
|
29
|
+
- Filename sanitization with configurable replacements
|
|
30
|
+
- Date prefix addition (from frontmatter or content extraction)
|
|
31
|
+
- Frontmatter fixing (multi-line fields, wikilinks, YAML validation)
|
|
32
|
+
- Duplicate detection during preprocessing
|
|
33
|
+
- `extract` command for generating LLM-optimized metadata
|
|
34
|
+
- Multiple output formats: markdown, json, yaml
|
|
35
|
+
- Configurable description/content preview length
|
|
36
|
+
- Optional word count and language detection with `--include-stats`
|
|
37
|
+
- Folder list inclusion with `--folders`
|
|
38
|
+
- `move` command for executing categorization decisions
|
|
39
|
+
- Automatic folder creation
|
|
40
|
+
- System trash integration for TRASH category
|
|
41
|
+
- Cache updates after moves
|
|
42
|
+
- Dry run mode
|
|
43
|
+
- `trash` command for moving files to system trash
|
|
44
|
+
- Glob pattern support
|
|
45
|
+
- Cache updates marking files as removed
|
|
46
|
+
- `stats` command for folder statistics
|
|
47
|
+
- Configurable warning thresholds
|
|
48
|
+
- Multiple output formats: table, json, yaml
|
|
49
|
+
- Optional special folder inclusion
|
|
50
|
+
- `duplicates` command for finding duplicate articles
|
|
51
|
+
- Detection by URL (default)
|
|
52
|
+
- Detection by content hash
|
|
53
|
+
- Detection by filename similarity
|
|
54
|
+
- Multiple output formats: markdown, json
|
|
55
|
+
|
|
56
|
+
#### Core Features
|
|
57
|
+
- XDG-compliant configuration file search (project root, .clipmd/, ~/.config/clipmd/)
|
|
58
|
+
- Pydantic-based configuration validation
|
|
59
|
+
- Flexible frontmatter field mapping (supports multiple field name variants)
|
|
60
|
+
- Date parsing with multiple input formats
|
|
61
|
+
- Date extraction from article content using regex patterns
|
|
62
|
+
- URL cache for duplicate detection and history tracking
|
|
63
|
+
- Content hashing for duplicate detection
|
|
64
|
+
- Rich-formatted console output with progress bars and colored status
|
|
65
|
+
- Shell completion support (bash, zsh, fish)
|
|
66
|
+
- Global options: `--verbose`, `--quiet`, `--config`, `--no-color`
|
|
67
|
+
- Custom exception hierarchy with exit codes (0=success, 1=error, 2=partial success)
|
|
68
|
+
|
|
69
|
+
#### Development Infrastructure
|
|
70
|
+
- Python 3.13+ support
|
|
71
|
+
- UV package management
|
|
72
|
+
- Ruff linting and formatting
|
|
73
|
+
- Ty type checking
|
|
74
|
+
- Pre-commit hooks
|
|
75
|
+
- Pytest test suite with >89% coverage
|
|
76
|
+
- Comprehensive unit, CLI, and integration tests
|
|
77
|
+
- GitHub Actions CI/CD pipeline
|
|
78
|
+
- Makefile for common development tasks
|
|
79
|
+
|
|
80
|
+
#### Documentation
|
|
81
|
+
- Complete specification in SPEC.md
|
|
82
|
+
- Development guide with architecture patterns
|
|
83
|
+
- Claude Code integration examples
|
|
84
|
+
- LLM workflow examples
|
|
85
|
+
|
|
86
|
+
### Dependencies
|
|
87
|
+
- click>=8.1 (CLI framework)
|
|
88
|
+
- pyyaml>=6.0 (config file parsing)
|
|
89
|
+
- pydantic>=2.10 (config validation)
|
|
90
|
+
- send2trash>=1.8 (system trash integration)
|
|
91
|
+
- python-dateutil>=2.9 (date parsing)
|
|
92
|
+
- python-frontmatter>=1.1.0 (frontmatter parsing)
|
|
93
|
+
- rich>=13.9 (terminal formatting)
|
|
94
|
+
- httpx>=0.28 (async HTTP client)
|
|
95
|
+
- beautifulsoup4>=4.12 (HTML parsing)
|
|
96
|
+
- trafilatura>=2.0 (content extraction)
|
|
97
|
+
- markdownify>=0.14 (HTML to markdown conversion)
|
|
98
|
+
- feedparser>=6.0 (RSS/Atom feed parsing)
|
|
99
|
+
|
|
100
|
+
### Optional Dependencies
|
|
101
|
+
- langdetect>=1.0 (language detection for `--include-stats`)
|
|
102
|
+
|
|
103
|
+
[Unreleased]: https://github.com/jmlrt/clipmd/compare/v0.1.0...HEAD
|
|
104
|
+
[0.1.0]: https://github.com/jmlrt/clipmd/releases/tag/v0.1.0
|
clipmd-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
Project-specific guidance for the **clipmd** CLI tool.
|
|
4
|
+
|
|
5
|
+
For general Python development patterns, see the **python-development** skill in Claude Code.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# clipmd
|
|
10
|
+
|
|
11
|
+
CLI tool for saving, organizing, and managing markdown articles with YAML frontmatter.
|
|
12
|
+
|
|
13
|
+
**See `SPEC.md` for full specification (source of truth).**
|
|
14
|
+
|
|
15
|
+
## Quick Reference
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Development setup
|
|
19
|
+
make dev # Install all dependencies (including dev extras)
|
|
20
|
+
make install # Install dependencies without extras
|
|
21
|
+
|
|
22
|
+
# Quality checks (must pass before commit)
|
|
23
|
+
make check # Run lint, typecheck, tests with coverage
|
|
24
|
+
make lint # Run ruff linter
|
|
25
|
+
make format # Format code with ruff
|
|
26
|
+
make typecheck # Run ty type checker
|
|
27
|
+
|
|
28
|
+
# Testing
|
|
29
|
+
uv run pytest tests/unit # Run only unit tests
|
|
30
|
+
uv run pytest tests/cli # Run only CLI tests
|
|
31
|
+
uv run pytest tests/integration # Run only integration tests
|
|
32
|
+
uv run pytest tests/unit/test_config.py # Run specific test file
|
|
33
|
+
uv run pytest -k test_function_name # Run specific test by name
|
|
34
|
+
uv run pytest --cov # Run with coverage report
|
|
35
|
+
make test # Run all tests
|
|
36
|
+
make test-cov # Run with coverage (89% minimum)
|
|
37
|
+
|
|
38
|
+
# Running the CLI
|
|
39
|
+
uv run clipmd --help # Show help
|
|
40
|
+
uv run clipmd init # Initialize new vault
|
|
41
|
+
uv run clipmd --config ./test-config.yaml extract # Use specific config
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Implementation Approach
|
|
45
|
+
|
|
46
|
+
- Work on feature branch
|
|
47
|
+
- Phase-by-phase implementation (see spec for 9 phases)
|
|
48
|
+
- Atomic commits: each commit must pass `make check`
|
|
49
|
+
|
|
50
|
+
## Architecture Overview
|
|
51
|
+
|
|
52
|
+
**Separation of Concerns:**
|
|
53
|
+
- `cli.py` - Click CLI application entry point, global options, command registration
|
|
54
|
+
- `context.py` - Context object holding config, verbosity, vault path (passed via Click context)
|
|
55
|
+
- `config.py` - Pydantic-based configuration loading and validation (XDG-compliant paths)
|
|
56
|
+
- `commands/` - Thin CLI wrappers (50-150 lines): parse args → call core → display output
|
|
57
|
+
- `core/` - Pure business logic: no Click dependencies, returns dataclass results
|
|
58
|
+
- `exceptions.py` - Custom exception hierarchy with exit codes
|
|
59
|
+
|
|
60
|
+
**Key clipmd Decisions:**
|
|
61
|
+
- **Config as parameter**: Core functions take `Config` as a parameter, not from global context
|
|
62
|
+
- **Async fetching**: `core/fetcher.py` uses `httpx` async with semaphore for concurrent URL fetching (see [Async Fetching Architecture](#async-fetching-architecture) below)
|
|
63
|
+
- **Result dataclasses**: Core functions return typed dataclasses, commands format them for display
|
|
64
|
+
- **TYPE_CHECKING imports**: Core modules import `Config` under `TYPE_CHECKING` to avoid circular imports
|
|
65
|
+
|
|
66
|
+
## Key Paths
|
|
67
|
+
|
|
68
|
+
| Path | Purpose |
|
|
69
|
+
|------|---------|
|
|
70
|
+
| `SPEC.md` | Full specification (source of truth) |
|
|
71
|
+
| `CHANGELOG.md` | Project changelog (Keep a Changelog format) |
|
|
72
|
+
| `src/clipmd/cli.py` | CLI entry point, global options |
|
|
73
|
+
| `src/clipmd/context.py` | Context object (config, verbosity) |
|
|
74
|
+
| `src/clipmd/config.py` | Pydantic config models and loading |
|
|
75
|
+
| `src/clipmd/commands/` | CLI command modules (thin wrappers) |
|
|
76
|
+
| `src/clipmd/core/` | Business logic (pure functions) |
|
|
77
|
+
| `src/clipmd/exceptions.py` | Custom exceptions with exit codes |
|
|
78
|
+
| `tests/unit/` | Unit tests for core modules |
|
|
79
|
+
| `tests/cli/` | CLI command tests |
|
|
80
|
+
| `tests/integration/` | End-to-end workflow tests |
|
|
81
|
+
| `tests/fixtures/sample-vault/` | Test data and config |
|
|
82
|
+
|
|
83
|
+
## Configuration
|
|
84
|
+
|
|
85
|
+
**Config Location (XDG-compliant search order):**
|
|
86
|
+
1. `./config.yaml` (project root)
|
|
87
|
+
2. `./.clipmd/config.yaml` (project .clipmd directory)
|
|
88
|
+
3. `~/.config/clipmd/config.yaml` (user-wide config)
|
|
89
|
+
4. `--config PATH` flag overrides all
|
|
90
|
+
|
|
91
|
+
**Validation:**
|
|
92
|
+
- Config uses Pydantic v2 models for validation (`config.py`)
|
|
93
|
+
- Invalid config raises `ConfigError` with helpful messages
|
|
94
|
+
- Missing config falls back to sensible defaults
|
|
95
|
+
- All paths in config are resolved relative to vault root
|
|
96
|
+
|
|
97
|
+
## Error Handling
|
|
98
|
+
|
|
99
|
+
**Exception Hierarchy** (see `exceptions.py`):
|
|
100
|
+
- `ClipmdError` - Base exception (exit code 1)
|
|
101
|
+
- `ConfigError` - Configuration errors
|
|
102
|
+
- `FetchError` - URL fetching errors
|
|
103
|
+
- `ParseError` - Frontmatter/content parsing errors
|
|
104
|
+
- `CacheError` - Cache read/write errors
|
|
105
|
+
- `ValidationError` - Input validation errors
|
|
106
|
+
- `PartialSuccessError` - Some operations succeeded, some failed (exit code 2)
|
|
107
|
+
|
|
108
|
+
**Exit Codes:**
|
|
109
|
+
- `0` - Success
|
|
110
|
+
- `1` - Error (operation failed)
|
|
111
|
+
- `2` - Partial success (some items succeeded, some failed)
|
|
112
|
+
|
|
113
|
+
**Pattern**: Core functions return Result dataclasses with `success: bool` and optional `error: str | None`. Commands check results and raise `SystemExit(1)` on failure, or print Rich-formatted errors and exit.
|
|
114
|
+
|
|
115
|
+
## Development Practices
|
|
116
|
+
|
|
117
|
+
### When Fixing Bugs in Similar Commands
|
|
118
|
+
|
|
119
|
+
**clipmd-specific practice**: When fixing a bug or inconsistency in one CLI command, proactively check ALL similar commands for the same issue before considering the task done. Do not wait for the user to ask twice.
|
|
120
|
+
|
|
121
|
+
### Library Selection Principle
|
|
122
|
+
|
|
123
|
+
Prefer well-maintained, actively-developed libraries from PyPI over custom implementations:
|
|
124
|
+
|
|
125
|
+
- **Example**: Adopted `python-frontmatter` (20M+ downloads/month) to replace custom regex parsing
|
|
126
|
+
- **Evaluation**: Does it handle our specific requirements? (normalization, truncation, etc.)
|
|
127
|
+
- **Trade-off**: `python-slugify` NOT used because it doesn't support NFD normalization needed for sanitizer
|
|
128
|
+
|
|
129
|
+
### Testing Strategy
|
|
130
|
+
|
|
131
|
+
- `tests/unit/` - Test core business logic (frontmatter, config, sanitizer, etc.)
|
|
132
|
+
- `tests/cli/` - Test CLI interfaces (argument parsing, output formatting)
|
|
133
|
+
- `tests/integration/` - Test complete workflows (fetch → preprocess → extract → move)
|
|
134
|
+
- Target coverage: ≥89%
|
|
135
|
+
|
|
136
|
+
### Architecture Reference
|
|
137
|
+
|
|
138
|
+
For generic architectural patterns, see the **python-development** skill in Claude Code:
|
|
139
|
+
|
|
140
|
+
All clipmd commands follow these patterns with core logic in `core/` modules and thin CLI wrappers (50-150 lines) in `commands/`.
|
|
141
|
+
|
|
142
|
+
## Git Workflow
|
|
143
|
+
|
|
144
|
+
When staging and committing changes, ensure ONLY changes from the current task are included. Review staged files against the current session scope before committing.
|
|
145
|
+
|
|
146
|
+
### Addressing PR Review Comments
|
|
147
|
+
|
|
148
|
+
When asked to address PR review comments (from Copilot, human reviewers, etc.), follow this workflow to avoid excessive GitHub API calls:
|
|
149
|
+
|
|
150
|
+
1. **Fetch all comments once**: Use `gh api` to retrieve all PR comments (including outdated/resolved ones) and save to a temporary file
|
|
151
|
+
```bash
|
|
152
|
+
gh api repos/:owner/:repo/pulls/{PR_NUMBER}/comments --paginate > pr-comments-review.json
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
2. **Create human-readable summary**: Extract relevant fields into a readable format
|
|
156
|
+
```bash
|
|
157
|
+
cat pr-comments-review.json | jq -r '.[] | select(.in_reply_to_id == null) | "---\nID: \(.id)\nFile: \(.path)\nLine: \(.line // .original_line // "N/A")\nUser: \(.user.login)\nCreated: \(.created_at)\n\n\(.body)\n"' > pr-comments-summary.txt
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
3. **Work from the file**: Review each comment systematically, checking current code state against the issues raised
|
|
161
|
+
|
|
162
|
+
4. **Track progress**: Create an analysis file to track which issues are fixed, no longer applicable (due to refactoring), or still need work
|
|
163
|
+
|
|
164
|
+
5. **Clean up**: Remove temporary files when all issues are addressed
|
|
165
|
+
```bash
|
|
166
|
+
rm pr-comments-review.json pr-comments-summary.txt pr-comments-analysis.md
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Rationale**: This approach minimizes API calls, provides a stable reference while working, and creates a clear audit trail of what was addressed.
|
|
170
|
+
|
|
171
|
+
### Changelog Maintenance
|
|
172
|
+
|
|
173
|
+
**IMPORTANT**: Update `CHANGELOG.md` for all user-facing changes:
|
|
174
|
+
|
|
175
|
+
**When to update:**
|
|
176
|
+
- New features (commands, options, functionality)
|
|
177
|
+
- Bug fixes that affect behavior
|
|
178
|
+
- Breaking changes to CLI or config format
|
|
179
|
+
- Dependency updates (major versions)
|
|
180
|
+
- Deprecations or removals
|
|
181
|
+
|
|
182
|
+
**When NOT to update:**
|
|
183
|
+
- Internal refactoring (no behavior change)
|
|
184
|
+
- Test-only changes
|
|
185
|
+
- Documentation updates (unless major)
|
|
186
|
+
- Code formatting/linting
|
|
187
|
+
|
|
188
|
+
**How to update:**
|
|
189
|
+
1. Add entry under `[Unreleased]` section
|
|
190
|
+
2. Use Keep a Changelog categories: `Added`, `Changed`, `Deprecated`, `Removed`, `Fixed`, `Security`
|
|
191
|
+
3. Write user-facing descriptions (not technical implementation details)
|
|
192
|
+
4. Group related changes together
|
|
193
|
+
|
|
194
|
+
**Example:**
|
|
195
|
+
```markdown
|
|
196
|
+
## [Unreleased]
|
|
197
|
+
|
|
198
|
+
### Added
|
|
199
|
+
- `extract` command now supports `--include-tags` option for tag filtering
|
|
200
|
+
|
|
201
|
+
### Fixed
|
|
202
|
+
- `fetch` command no longer crashes on malformed HTML meta tags
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
**On release:**
|
|
206
|
+
- Move `[Unreleased]` entries to new version section with date
|
|
207
|
+
- Update version links at bottom of file
|
|
208
|
+
- Bump version in `pyproject.toml` following Semantic Versioning
|
|
209
|
+
|
|
210
|
+
## Async Fetching Architecture
|
|
211
|
+
|
|
212
|
+
The `core/fetcher.py` module uses async/await with `httpx` for concurrent URL fetching:
|
|
213
|
+
|
|
214
|
+
**Key Functions:**
|
|
215
|
+
- `fetch_url()` - Async function to fetch single URL with timeout and retries
|
|
216
|
+
- `fetch_urls()` - Async orchestrator using `asyncio.Semaphore` to limit concurrency
|
|
217
|
+
- `fetch_rss_feed()` - Async RSS/Atom feed parser
|
|
218
|
+
- `orchestrate_fetch()` - Main entry point coordinating all fetch operations
|
|
219
|
+
|
|
220
|
+
**Concurrency Control:**
|
|
221
|
+
- `max_concurrent` setting controls semaphore limit (default: 5)
|
|
222
|
+
- Uses `asyncio.gather()` for parallel execution
|
|
223
|
+
- Each fetch operation is independent (failures don't block others)
|
|
224
|
+
|
|
225
|
+
**Important Behaviors:**
|
|
226
|
+
1. Meta-refresh redirects are handled automatically
|
|
227
|
+
2. Tracking URL parameters are cleaned (utm_*, fbclid, etc.)
|
|
228
|
+
3. Never overwrites existing files (appends suffix like `-2.md`, `-3.md`)
|
|
229
|
+
4. Content extraction uses trafilatura for readability mode
|
clipmd-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Julien Mailleret
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
clipmd-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
.PHONY: install dev lint format typecheck test test-cov clean build publish check
|
|
2
|
+
|
|
3
|
+
# Development
|
|
4
|
+
install:
|
|
5
|
+
uv sync
|
|
6
|
+
|
|
7
|
+
dev:
|
|
8
|
+
uv sync --all-extras
|
|
9
|
+
|
|
10
|
+
# Quality
|
|
11
|
+
lint:
|
|
12
|
+
uv run ruff check src tests
|
|
13
|
+
|
|
14
|
+
format:
|
|
15
|
+
uv run ruff format src tests
|
|
16
|
+
|
|
17
|
+
typecheck:
|
|
18
|
+
uv run ty check src
|
|
19
|
+
|
|
20
|
+
# Testing
|
|
21
|
+
test:
|
|
22
|
+
uv run pytest
|
|
23
|
+
|
|
24
|
+
test-cov:
|
|
25
|
+
uv run pytest --cov=clipmd --cov-report=term-missing --cov-fail-under=89
|
|
26
|
+
|
|
27
|
+
# Build & Publish
|
|
28
|
+
clean:
|
|
29
|
+
rm -rf dist build *.egg-info
|
|
30
|
+
|
|
31
|
+
build: clean
|
|
32
|
+
uv build
|
|
33
|
+
|
|
34
|
+
publish: build
|
|
35
|
+
uv publish
|
|
36
|
+
|
|
37
|
+
# All checks (used by CI and pre-commit)
|
|
38
|
+
check: lint typecheck test-cov
|