clipmd 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. clipmd-0.1.0/.claude/settings.json +49 -0
  2. clipmd-0.1.0/.github/workflows/ci.yml +40 -0
  3. clipmd-0.1.0/.gitignore +99 -0
  4. clipmd-0.1.0/.pre-commit-config.yaml +16 -0
  5. clipmd-0.1.0/CHANGELOG.md +104 -0
  6. clipmd-0.1.0/CLAUDE.md +229 -0
  7. clipmd-0.1.0/LICENSE +21 -0
  8. clipmd-0.1.0/Makefile +38 -0
  9. clipmd-0.1.0/PKG-INFO +381 -0
  10. clipmd-0.1.0/README.md +333 -0
  11. clipmd-0.1.0/SPEC.md +1933 -0
  12. clipmd-0.1.0/TODO.md +7 -0
  13. clipmd-0.1.0/Todo-Later.md +800 -0
  14. clipmd-0.1.0/pyproject.toml +100 -0
  15. clipmd-0.1.0/src/clipmd/__init__.py +3 -0
  16. clipmd-0.1.0/src/clipmd/cli.py +144 -0
  17. clipmd-0.1.0/src/clipmd/commands/__init__.py +1 -0
  18. clipmd-0.1.0/src/clipmd/commands/duplicates.py +101 -0
  19. clipmd-0.1.0/src/clipmd/commands/extract.py +102 -0
  20. clipmd-0.1.0/src/clipmd/commands/fetch.py +173 -0
  21. clipmd-0.1.0/src/clipmd/commands/init.py +79 -0
  22. clipmd-0.1.0/src/clipmd/commands/move.py +98 -0
  23. clipmd-0.1.0/src/clipmd/commands/preprocess.py +74 -0
  24. clipmd-0.1.0/src/clipmd/commands/stats.py +72 -0
  25. clipmd-0.1.0/src/clipmd/commands/trash.py +88 -0
  26. clipmd-0.1.0/src/clipmd/commands/validate.py +71 -0
  27. clipmd-0.1.0/src/clipmd/config.py +383 -0
  28. clipmd-0.1.0/src/clipmd/context.py +40 -0
  29. clipmd-0.1.0/src/clipmd/core/__init__.py +1 -0
  30. clipmd-0.1.0/src/clipmd/core/cache.py +473 -0
  31. clipmd-0.1.0/src/clipmd/core/dates.py +279 -0
  32. clipmd-0.1.0/src/clipmd/core/discovery.py +110 -0
  33. clipmd-0.1.0/src/clipmd/core/duplicates.py +207 -0
  34. clipmd-0.1.0/src/clipmd/core/extractor.py +373 -0
  35. clipmd-0.1.0/src/clipmd/core/fetcher.py +559 -0
  36. clipmd-0.1.0/src/clipmd/core/filepath_utils.py +41 -0
  37. clipmd-0.1.0/src/clipmd/core/formatters.py +116 -0
  38. clipmd-0.1.0/src/clipmd/core/frontmatter.py +386 -0
  39. clipmd-0.1.0/src/clipmd/core/hasher.py +36 -0
  40. clipmd-0.1.0/src/clipmd/core/initializer.py +269 -0
  41. clipmd-0.1.0/src/clipmd/core/mover.py +406 -0
  42. clipmd-0.1.0/src/clipmd/core/preprocessor.py +385 -0
  43. clipmd-0.1.0/src/clipmd/core/rss.py +70 -0
  44. clipmd-0.1.0/src/clipmd/core/sanitizer.py +196 -0
  45. clipmd-0.1.0/src/clipmd/core/stats.py +203 -0
  46. clipmd-0.1.0/src/clipmd/core/trash.py +167 -0
  47. clipmd-0.1.0/src/clipmd/core/url_utils.py +97 -0
  48. clipmd-0.1.0/src/clipmd/core/validator.py +309 -0
  49. clipmd-0.1.0/src/clipmd/exceptions.py +35 -0
  50. clipmd-0.1.0/tests/__init__.py +1 -0
  51. clipmd-0.1.0/tests/cli/__init__.py +1 -0
  52. clipmd-0.1.0/tests/cli/test_cli.py +102 -0
  53. clipmd-0.1.0/tests/cli/test_duplicates_cmd.py +185 -0
  54. clipmd-0.1.0/tests/cli/test_extract_cmd.py +372 -0
  55. clipmd-0.1.0/tests/cli/test_fetch_cmd.py +1192 -0
  56. clipmd-0.1.0/tests/cli/test_init_cmd.py +148 -0
  57. clipmd-0.1.0/tests/cli/test_move_cmd.py +392 -0
  58. clipmd-0.1.0/tests/cli/test_preprocess_cmd.py +344 -0
  59. clipmd-0.1.0/tests/cli/test_stats_cmd.py +165 -0
  60. clipmd-0.1.0/tests/cli/test_trash_cmd.py +218 -0
  61. clipmd-0.1.0/tests/cli/test_validate_cmd.py +561 -0
  62. clipmd-0.1.0/tests/conftest.py +80 -0
  63. clipmd-0.1.0/tests/fixtures/sample-vault/.clipmd/cache.json +33 -0
  64. clipmd-0.1.0/tests/fixtures/sample-vault/20240115-Duplicate-Article.md +13 -0
  65. clipmd-0.1.0/tests/fixtures/sample-vault/20240115-Sample-Article.md +23 -0
  66. clipmd-0.1.0/tests/fixtures/sample-vault/20240116-Article-With-Issues.md +17 -0
  67. clipmd-0.1.0/tests/fixtures/sample-vault/20240117-No-Frontmatter-Date.md +17 -0
  68. clipmd-0.1.0/tests/fixtures/sample-vault/20240118-Wikilink-Issue.md +14 -0
  69. clipmd-0.1.0/tests/fixtures/sample-vault/AI-Tools/20240110-Claude-API-Guide.md +20 -0
  70. clipmd-0.1.0/tests/fixtures/sample-vault/Science/20240112-Space-Discovery.md +23 -0
  71. clipmd-0.1.0/tests/fixtures/sample-vault/config.yaml +56 -0
  72. clipmd-0.1.0/tests/fixtures/sample-vault/no-date-prefix-article.md +13 -0
  73. clipmd-0.1.0/tests/integration/__init__.py +1 -0
  74. clipmd-0.1.0/tests/unit/__init__.py +1 -0
  75. clipmd-0.1.0/tests/unit/test_cache.py +416 -0
  76. clipmd-0.1.0/tests/unit/test_config.py +386 -0
  77. clipmd-0.1.0/tests/unit/test_dates.py +216 -0
  78. clipmd-0.1.0/tests/unit/test_discovery.py +237 -0
  79. clipmd-0.1.0/tests/unit/test_frontmatter.py +254 -0
  80. clipmd-0.1.0/tests/unit/test_hasher.py +40 -0
  81. clipmd-0.1.0/tests/unit/test_sanitizer.py +141 -0
  82. clipmd-0.1.0/uv.lock +1012 -0
@@ -0,0 +1,49 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(file:*)",
5
+ "Bash(gh pr:*)",
6
+ "Bash(git add:*)",
7
+ "Bash(git branch:*)",
8
+ "Bash(git checkout:*)",
9
+ "Bash(git commit:*)",
10
+ "Bash(git diff:*)",
11
+ "Bash(git fetch:*)",
12
+ "Bash(git log:*)",
13
+ "Bash(git mv:*)",
14
+ "Bash(git pull:*)",
15
+ "Bash(git push:*)",
16
+ "Bash(git reset:*)",
17
+ "Bash(git rm:*)",
18
+ "Bash(git show:*)",
19
+ "Bash(git stash:*)",
20
+ "Bash(git status:*)",
21
+ "Bash(git switch:*)",
22
+ "Bash(ls:*)",
23
+ "Bash(make:*)",
24
+ "Bash(mkdir:*)",
25
+ "Bash(pre-commit:*)",
26
+ "Bash(pwd:*)",
27
+ "Bash(python3:*)",
28
+ "Bash(rm -rf *.egg-info:*)",
29
+ "Bash(rm -rf .pytest_cache:*)",
30
+ "Bash(rm -rf .ruff_cache:*)",
31
+ "Bash(rm -rf __pycache__:*)",
32
+ "Bash(rm -rf build:*)",
33
+ "Bash(rm -rf dist:*)",
34
+ "Bash(rm -rf tests/fixtures/temp:*)",
35
+ "Bash(stat:*)",
36
+ "Bash(uv:*)",
37
+ "Bash(wc:*)",
38
+ "Bash(which:*)"
39
+ ],
40
+ "deny": [
41
+ "Bash(git push --force:*)",
42
+ "Bash(git push -f:*)",
43
+ "Bash(git reset --hard:*)",
44
+ "Bash(rm -rf /*:*)",
45
+ "Bash(rm -rf ~:*)",
46
+ "Bash(rm -rf .:*)"
47
+ ]
48
+ }
49
+ }
@@ -0,0 +1,40 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.13", "3.14"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v5
21
+ with:
22
+ version: "latest"
23
+
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ run: uv python install ${{ matrix.python-version }}
26
+
27
+ - name: Install dependencies
28
+ run: uv sync --all-extras
29
+
30
+ - name: Run linter
31
+ run: uv run ruff check src/ tests/
32
+
33
+ - name: Run formatter check
34
+ run: uv run ruff format --check src/ tests/
35
+
36
+ - name: Run type checker
37
+ run: uv run ty check src/
38
+
39
+ - name: Run tests with coverage
40
+ run: uv run pytest --cov=clipmd --cov-report=term-missing --cov-fail-under=89
@@ -0,0 +1,99 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ *.manifest
31
+ *.spec
32
+
33
+ # Installer logs
34
+ pip-log.txt
35
+ pip-delete-this-directory.txt
36
+
37
+ # Unit test / coverage reports
38
+ htmlcov/
39
+ .tox/
40
+ .nox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ *.py,cover
48
+ .hypothesis/
49
+ .pytest_cache/
50
+
51
+ # Translations
52
+ *.mo
53
+ *.pot
54
+
55
+ # Environments
56
+ .env
57
+ .venv
58
+ env/
59
+ venv/
60
+ ENV/
61
+ env.bak/
62
+ venv.bak/
63
+
64
+ # Spyder project settings
65
+ .spyderproject
66
+ .spyproject
67
+
68
+ # Rope project settings
69
+ .ropeproject
70
+
71
+ # mkdocs documentation
72
+ /site
73
+
74
+ # mypy / type checkers
75
+ .mypy_cache/
76
+ .dmypy.json
77
+ dmypy.json
78
+ .ty/
79
+
80
+ # Ruff
81
+ .ruff_cache/
82
+
83
+ # IDE
84
+ .idea/
85
+ .vscode/
86
+ *.swp
87
+ *.swo
88
+ *~
89
+
90
+ # OS
91
+ .DS_Store
92
+ Thumbs.db
93
+
94
+ # Project specific
95
+ .clipmd/cache.json
96
+ *.log
97
+
98
+ # Test artifacts
99
+ tests/fixtures/temp/
@@ -0,0 +1,16 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v6.0.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-toml
9
+ - id: check-added-large-files
10
+
11
+ - repo: https://github.com/astral-sh/ruff-pre-commit
12
+ rev: v0.14.13
13
+ hooks:
14
+ - id: ruff-check
15
+ args: [--fix]
16
+ - id: ruff-format
@@ -0,0 +1,104 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] - 2024-01-20
11
+
12
+ ### Added
13
+
14
+ #### Core Commands
15
+ - `init` command for initializing clipmd in a directory with config scaffolding
16
+ - `validate` command for validating configuration and setup
17
+ - `fetch` command for fetching web content and converting to markdown with frontmatter
18
+ - RSS/Atom feed support with `--rss` flag and `--rss-limit` option
19
+ - Duplicate detection with `--check-duplicates` (enabled by default)
20
+ - Readability mode for extracting main content
21
+ - Dry run mode with `--dry-run` flag
22
+ - Support for reading URLs from file with `--file` flag
23
+ - Async fetching with configurable concurrency
24
+ - Meta-refresh redirect handling
25
+ - Automatic tracking parameter cleaning
26
+ - Never overwrites existing files (appends suffix `-2.md`, `-3.md`, etc.)
27
+ - `preprocess` command for cleaning and preparing articles
28
+ - URL cleaning (tracking parameters, redirect unwrapping)
29
+ - Filename sanitization with configurable replacements
30
+ - Date prefix addition (from frontmatter or content extraction)
31
+ - Frontmatter fixing (multi-line fields, wikilinks, YAML validation)
32
+ - Duplicate detection during preprocessing
33
+ - `extract` command for generating LLM-optimized metadata
34
+ - Multiple output formats: markdown, json, yaml
35
+ - Configurable description/content preview length
36
+ - Optional word count and language detection with `--include-stats`
37
+ - Folder list inclusion with `--folders`
38
+ - `move` command for executing categorization decisions
39
+ - Automatic folder creation
40
+ - System trash integration for TRASH category
41
+ - Cache updates after moves
42
+ - Dry run mode
43
+ - `trash` command for moving files to system trash
44
+ - Glob pattern support
45
+ - Cache updates marking files as removed
46
+ - `stats` command for folder statistics
47
+ - Configurable warning thresholds
48
+ - Multiple output formats: table, json, yaml
49
+ - Optional special folder inclusion
50
+ - `duplicates` command for finding duplicate articles
51
+ - Detection by URL (default)
52
+ - Detection by content hash
53
+ - Detection by filename similarity
54
+ - Multiple output formats: markdown, json
55
+
56
+ #### Core Features
57
+ - XDG-compliant configuration file search (project root, .clipmd/, ~/.config/clipmd/)
58
+ - Pydantic-based configuration validation
59
+ - Flexible frontmatter field mapping (supports multiple field name variants)
60
+ - Date parsing with multiple input formats
61
+ - Date extraction from article content using regex patterns
62
+ - URL cache for duplicate detection and history tracking
63
+ - Content hashing for duplicate detection
64
+ - Rich-formatted console output with progress bars and colored status
65
+ - Shell completion support (bash, zsh, fish)
66
+ - Global options: `--verbose`, `--quiet`, `--config`, `--no-color`
67
+ - Custom exception hierarchy with exit codes (0=success, 1=error, 2=partial success)
68
+
69
+ #### Development Infrastructure
70
+ - Python 3.13+ support
71
+ - UV package management
72
+ - Ruff linting and formatting
73
+ - Ty type checking
74
+ - Pre-commit hooks
75
+ - Pytest test suite with >89% coverage
76
+ - Comprehensive unit, CLI, and integration tests
77
+ - GitHub Actions CI/CD pipeline
78
+ - Makefile for common development tasks
79
+
80
+ #### Documentation
81
+ - Complete specification in SPEC.md
82
+ - Development guide with architecture patterns
83
+ - Claude Code integration examples
84
+ - LLM workflow examples
85
+
86
+ ### Dependencies
87
+ - click>=8.1 (CLI framework)
88
+ - pyyaml>=6.0 (config file parsing)
89
+ - pydantic>=2.10 (config validation)
90
+ - send2trash>=1.8 (system trash integration)
91
+ - python-dateutil>=2.9 (date parsing)
92
+ - python-frontmatter>=1.1.0 (frontmatter parsing)
93
+ - rich>=13.9 (terminal formatting)
94
+ - httpx>=0.28 (async HTTP client)
95
+ - beautifulsoup4>=4.12 (HTML parsing)
96
+ - trafilatura>=2.0 (content extraction)
97
+ - markdownify>=0.14 (HTML to markdown conversion)
98
+ - feedparser>=6.0 (RSS/Atom feed parsing)
99
+
100
+ ### Optional Dependencies
101
+ - langdetect>=1.0 (language detection for `--include-stats`)
102
+
103
+ [Unreleased]: https://github.com/jmlrt/clipmd/compare/v0.1.0...HEAD
104
+ [0.1.0]: https://github.com/jmlrt/clipmd/releases/tag/v0.1.0
clipmd-0.1.0/CLAUDE.md ADDED
@@ -0,0 +1,229 @@
1
+ # CLAUDE.md
2
+
3
+ Project-specific guidance for the **clipmd** CLI tool.
4
+
5
+ For general Python development patterns, see the **python-development** skill in Claude Code.
6
+
7
+ ---
8
+
9
+ # clipmd
10
+
11
+ CLI tool for saving, organizing, and managing markdown articles with YAML frontmatter.
12
+
13
+ **See `SPEC.md` for full specification (source of truth).**
14
+
15
+ ## Quick Reference
16
+
17
+ ```bash
18
+ # Development setup
19
+ make dev # Install all dependencies (including dev extras)
20
+ make install # Install dependencies without extras
21
+
22
+ # Quality checks (must pass before commit)
23
+ make check # Run lint, typecheck, tests with coverage
24
+ make lint # Run ruff linter
25
+ make format # Format code with ruff
26
+ make typecheck # Run ty type checker
27
+
28
+ # Testing
29
+ uv run pytest tests/unit # Run only unit tests
30
+ uv run pytest tests/cli # Run only CLI tests
31
+ uv run pytest tests/integration # Run only integration tests
32
+ uv run pytest tests/unit/test_config.py # Run specific test file
33
+ uv run pytest -k test_function_name # Run specific test by name
34
+ uv run pytest --cov # Run with coverage report
35
+ make test # Run all tests
36
+ make test-cov # Run with coverage (89% minimum)
37
+
38
+ # Running the CLI
39
+ uv run clipmd --help # Show help
40
+ uv run clipmd init # Initialize new vault
41
+ uv run clipmd --config ./test-config.yaml extract # Use specific config
42
+ ```
43
+
44
+ ## Implementation Approach
45
+
46
+ - Work on feature branch
47
+ - Phase-by-phase implementation (see spec for 9 phases)
48
+ - Atomic commits: each commit must pass `make check`
49
+
50
+ ## Architecture Overview
51
+
52
+ **Separation of Concerns:**
53
+ - `cli.py` - Click CLI application entry point, global options, command registration
54
+ - `context.py` - Context object holding config, verbosity, vault path (passed via Click context)
55
+ - `config.py` - Pydantic-based configuration loading and validation (XDG-compliant paths)
56
+ - `commands/` - Thin CLI wrappers (50-150 lines): parse args → call core → display output
57
+ - `core/` - Pure business logic: no Click dependencies, returns dataclass results
58
+ - `exceptions.py` - Custom exception hierarchy with exit codes
59
+
60
+ **Key clipmd Decisions:**
61
+ - **Config as parameter**: Core functions take `Config` as a parameter, not from global context
62
+ - **Async fetching**: `core/fetcher.py` uses `httpx` async with semaphore for concurrent URL fetching (see [Async Fetching Architecture](#async-fetching-architecture) below)
63
+ - **Result dataclasses**: Core functions return typed dataclasses, commands format them for display
64
+ - **TYPE_CHECKING imports**: Core modules import `Config` under `TYPE_CHECKING` to avoid circular imports
65
+
66
+ ## Key Paths
67
+
68
+ | Path | Purpose |
69
+ |------|---------|
70
+ | `SPEC.md` | Full specification (source of truth) |
71
+ | `CHANGELOG.md` | Project changelog (Keep a Changelog format) |
72
+ | `src/clipmd/cli.py` | CLI entry point, global options |
73
+ | `src/clipmd/context.py` | Context object (config, verbosity) |
74
+ | `src/clipmd/config.py` | Pydantic config models and loading |
75
+ | `src/clipmd/commands/` | CLI command modules (thin wrappers) |
76
+ | `src/clipmd/core/` | Business logic (pure functions) |
77
+ | `src/clipmd/exceptions.py` | Custom exceptions with exit codes |
78
+ | `tests/unit/` | Unit tests for core modules |
79
+ | `tests/cli/` | CLI command tests |
80
+ | `tests/integration/` | End-to-end workflow tests |
81
+ | `tests/fixtures/sample-vault/` | Test data and config |
82
+
83
+ ## Configuration
84
+
85
+ **Config Location (XDG-compliant search order):**
86
+ 1. `./config.yaml` (project root)
87
+ 2. `./.clipmd/config.yaml` (project .clipmd directory)
88
+ 3. `~/.config/clipmd/config.yaml` (user-wide config)
89
+ 4. `--config PATH` flag overrides all
90
+
91
+ **Validation:**
92
+ - Config uses Pydantic v2 models for validation (`config.py`)
93
+ - Invalid config raises `ConfigError` with helpful messages
94
+ - Missing config falls back to sensible defaults
95
+ - All paths in config are resolved relative to vault root
96
+
97
+ ## Error Handling
98
+
99
+ **Exception Hierarchy** (see `exceptions.py`):
100
+ - `ClipmdError` - Base exception (exit code 1)
101
+ - `ConfigError` - Configuration errors
102
+ - `FetchError` - URL fetching errors
103
+ - `ParseError` - Frontmatter/content parsing errors
104
+ - `CacheError` - Cache read/write errors
105
+ - `ValidationError` - Input validation errors
106
+ - `PartialSuccessError` - Some operations succeeded, some failed (exit code 2)
107
+
108
+ **Exit Codes:**
109
+ - `0` - Success
110
+ - `1` - Error (operation failed)
111
+ - `2` - Partial success (some items succeeded, some failed)
112
+
113
+ **Pattern**: Core functions return Result dataclasses with `success: bool` and optional `error: str | None`. Commands check results and raise `SystemExit(1)` on failure, or print Rich-formatted errors and exit.
114
+
115
+ ## Development Practices
116
+
117
+ ### When Fixing Bugs in Similar Commands
118
+
119
+ **clipmd-specific practice**: When fixing a bug or inconsistency in one CLI command, proactively check ALL similar commands for the same issue before considering the task done. Do not wait for the user to ask twice.
120
+
121
+ ### Library Selection Principle
122
+
123
+ Prefer well-maintained, actively-developed libraries from PyPI over custom implementations:
124
+
125
+ - **Example**: Adopted `python-frontmatter` (20M+ downloads/month) to replace custom regex parsing
126
+ - **Evaluation**: Does it handle our specific requirements? (normalization, truncation, etc.)
127
+ - **Trade-off**: `python-slugify` NOT used because it doesn't support NFD normalization needed for sanitizer
128
+
129
+ ### Testing Strategy
130
+
131
+ - `tests/unit/` - Test core business logic (frontmatter, config, sanitizer, etc.)
132
+ - `tests/cli/` - Test CLI interfaces (argument parsing, output formatting)
133
+ - `tests/integration/` - Test complete workflows (fetch → preprocess → extract → move)
134
+ - Target coverage: ≥89%
135
+
136
+ ### Architecture Reference
137
+
138
+ For generic architectural patterns, see the **python-development** skill in Claude Code:
139
+
140
+ All clipmd commands follow these patterns with core logic in `core/` modules and thin CLI wrappers (50-150 lines) in `commands/`.
141
+
142
+ ## Git Workflow
143
+
144
+ When staging and committing changes, ensure ONLY changes from the current task are included. Review staged files against the current session scope before committing.
145
+
146
+ ### Addressing PR Review Comments
147
+
148
+ When asked to address PR review comments (from Copilot, human reviewers, etc.), follow this workflow to avoid excessive GitHub API calls:
149
+
150
+ 1. **Fetch all comments once**: Use `gh api` to retrieve all PR comments (including outdated/resolved ones) and save to a temporary file
151
+ ```bash
152
+ gh api repos/:owner/:repo/pulls/{PR_NUMBER}/comments --paginate > pr-comments-review.json
153
+ ```
154
+
155
+ 2. **Create human-readable summary**: Extract relevant fields into a readable format
156
+ ```bash
157
+ cat pr-comments-review.json | jq -r '.[] | select(.in_reply_to_id == null) | "---\nID: \(.id)\nFile: \(.path)\nLine: \(.line // .original_line // "N/A")\nUser: \(.user.login)\nCreated: \(.created_at)\n\n\(.body)\n"' > pr-comments-summary.txt
158
+ ```
159
+
160
+ 3. **Work from the file**: Review each comment systematically, checking current code state against the issues raised
161
+
162
+ 4. **Track progress**: Create an analysis file to track which issues are fixed, no longer applicable (due to refactoring), or still need work
163
+
164
+ 5. **Clean up**: Remove temporary files when all issues are addressed
165
+ ```bash
166
+ rm pr-comments-review.json pr-comments-summary.txt pr-comments-analysis.md
167
+ ```
168
+
169
+ **Rationale**: This approach minimizes API calls, provides a stable reference while working, and creates a clear audit trail of what was addressed.
170
+
171
+ ### Changelog Maintenance
172
+
173
+ **IMPORTANT**: Update `CHANGELOG.md` for all user-facing changes:
174
+
175
+ **When to update:**
176
+ - New features (commands, options, functionality)
177
+ - Bug fixes that affect behavior
178
+ - Breaking changes to CLI or config format
179
+ - Dependency updates (major versions)
180
+ - Deprecations or removals
181
+
182
+ **When NOT to update:**
183
+ - Internal refactoring (no behavior change)
184
+ - Test-only changes
185
+ - Documentation updates (unless major)
186
+ - Code formatting/linting
187
+
188
+ **How to update:**
189
+ 1. Add entry under `[Unreleased]` section
190
+ 2. Use Keep a Changelog categories: `Added`, `Changed`, `Deprecated`, `Removed`, `Fixed`, `Security`
191
+ 3. Write user-facing descriptions (not technical implementation details)
192
+ 4. Group related changes together
193
+
194
+ **Example:**
195
+ ```markdown
196
+ ## [Unreleased]
197
+
198
+ ### Added
199
+ - `extract` command now supports `--include-tags` option for tag filtering
200
+
201
+ ### Fixed
202
+ - `fetch` command no longer crashes on malformed HTML meta tags
203
+ ```
204
+
205
+ **On release:**
206
+ - Move `[Unreleased]` entries to new version section with date
207
+ - Update version links at bottom of file
208
+ - Bump version in `pyproject.toml` following Semantic Versioning
209
+
210
+ ## Async Fetching Architecture
211
+
212
+ The `core/fetcher.py` module uses async/await with `httpx` for concurrent URL fetching:
213
+
214
+ **Key Functions:**
215
+ - `fetch_url()` - Async function to fetch single URL with timeout and retries
216
+ - `fetch_urls()` - Async orchestrator using `asyncio.Semaphore` to limit concurrency
217
+ - `fetch_rss_feed()` - Async RSS/Atom feed parser
218
+ - `orchestrate_fetch()` - Main entry point coordinating all fetch operations
219
+
220
+ **Concurrency Control:**
221
+ - `max_concurrent` setting controls semaphore limit (default: 5)
222
+ - Uses `asyncio.gather()` for parallel execution
223
+ - Each fetch operation is independent (failures don't block others)
224
+
225
+ **Important Behaviors:**
226
+ 1. Meta-refresh redirects are handled automatically
227
+ 2. Tracking URL parameters are cleaned (utm_*, fbclid, etc.)
228
+ 3. Never overwrites existing files (appends suffix like `-2.md`, `-3.md`)
229
+ 4. Content extraction uses trafilatura for readability mode
clipmd-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Julien Mailleret
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
clipmd-0.1.0/Makefile ADDED
@@ -0,0 +1,38 @@
1
+ .PHONY: install dev lint format typecheck test test-cov clean build publish check
2
+
3
+ # Development
4
+ install:
5
+ uv sync
6
+
7
+ dev:
8
+ uv sync --all-extras
9
+
10
+ # Quality
11
+ lint:
12
+ uv run ruff check src tests
13
+
14
+ format:
15
+ uv run ruff format src tests
16
+
17
+ typecheck:
18
+ uv run ty check src
19
+
20
+ # Testing
21
+ test:
22
+ uv run pytest
23
+
24
+ test-cov:
25
+ uv run pytest --cov=clipmd --cov-report=term-missing --cov-fail-under=89
26
+
27
+ # Build & Publish
28
+ clean:
29
+ rm -rf dist build *.egg-info
30
+
31
+ build: clean
32
+ uv build
33
+
34
+ publish: build
35
+ uv publish
36
+
37
+ # All checks (used by CI and pre-commit)
38
+ check: lint typecheck test-cov