fresh-docs 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. fresh_docs-0.1.8/.claude/settings.local.json +18 -0
  2. fresh_docs-0.1.8/.github/workflows/mypy.yml +19 -0
  3. fresh_docs-0.1.8/.github/workflows/pr-review.yml +96 -0
  4. fresh_docs-0.1.8/.github/workflows/pytest.yml +18 -0
  5. fresh_docs-0.1.8/.github/workflows/release.yml +51 -0
  6. fresh_docs-0.1.8/.github/workflows/ruff.yml +14 -0
  7. fresh_docs-0.1.8/.pre-commit-config.yaml +24 -0
  8. fresh_docs-0.1.8/CLAUDE.md +35 -0
  9. fresh_docs-0.1.8/LICENSE +21 -0
  10. fresh_docs-0.1.8/PKG-INFO +70 -0
  11. fresh_docs-0.1.8/README.md +44 -0
  12. fresh_docs-0.1.8/docs/PROJECT.md +52 -0
  13. fresh_docs-0.1.8/docs/features/FUTURE.md +62 -0
  14. fresh_docs-0.1.8/pyproject.toml +57 -0
  15. fresh_docs-0.1.8/src/fresh/__init__.py +29 -0
  16. fresh_docs-0.1.8/src/fresh/__main__.py +6 -0
  17. fresh_docs-0.1.8/src/fresh/commands/__init__.py +5 -0
  18. fresh_docs-0.1.8/src/fresh/commands/get.py +166 -0
  19. fresh_docs-0.1.8/src/fresh/commands/list.py +142 -0
  20. fresh_docs-0.1.8/src/fresh/scraper/__init__.py +47 -0
  21. fresh_docs-0.1.8/src/fresh/scraper/crawler.py +222 -0
  22. fresh_docs-0.1.8/src/fresh/scraper/filter.py +181 -0
  23. fresh_docs-0.1.8/src/fresh/scraper/http.py +448 -0
  24. fresh_docs-0.1.8/src/fresh/scraper/sitemap.py +161 -0
  25. fresh_docs-0.1.8/tests/test_commands_get.py +202 -0
  26. fresh_docs-0.1.8/tests/test_commands_list.py +224 -0
  27. fresh_docs-0.1.8/tests/test_main.py +14 -0
  28. fresh_docs-0.1.8/tests/test_scraper_crawler.py +301 -0
  29. fresh_docs-0.1.8/tests/test_scraper_filter.py +241 -0
  30. fresh_docs-0.1.8/tests/test_scraper_http.py +444 -0
  31. fresh_docs-0.1.8/tests/test_scraper_sitemap.py +168 -0
  32. fresh_docs-0.1.8/uv.lock +644 -0
@@ -0,0 +1,18 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(git add src/fresh/scraper/__init__.py)",
5
+ "Bash(git commit:*)",
6
+ "Bash(git add:*)",
7
+ "Bash(uv sync)",
8
+ "Bash(uv run python:*)",
9
+ "Bash(uv run ruff:*)",
10
+ "Bash(uv run mypy:*)",
11
+ "Bash(uv sync:*)",
12
+ "Bash(uv run pytest src/fresh/scraper/ -v)",
13
+ "Bash(gh issue edit:*)",
14
+ "Bash(gh issue view:*)",
15
+ "Bash(gh issue create:*)"
16
+ ]
17
+ }
18
+ }
@@ -0,0 +1,19 @@
1
+ name: Mypy
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ mypy:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: '3.12'
17
+ - run: pip install -e .
18
+ - run: pip install mypy
19
+ - run: mypy src
@@ -0,0 +1,96 @@
1
+ name: PR Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize, ready_for_review, reopened]
6
+
7
+ permissions:
8
+ contents: read
9
+ pull-requests: write
10
+ id-token: write
11
+
12
+ jobs:
13
+ review:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - name: Checkout PR branch
17
+ uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 1
20
+
21
+ - name: Generate Marty token
22
+ id: marty-token
23
+ uses: actions/create-github-app-token@v2
24
+ with:
25
+ app-id: ${{ secrets.MARTY_APP_ID }}
26
+ private-key: ${{ secrets.MARTY_APP_PRIVATE_KEY }}
27
+
28
+ - name: Run Marty PR Review
29
+ uses: nesalia-inc/marty-action@1.0.0
30
+ with:
31
+ github_token: ${{ steps.marty-token.outputs.token }}
32
+ prompt: |
33
+ REPO: ${{ github.repository }}
34
+ PR NUMBER: ${{ github.event.pull_request.number }}
35
+ AUTHOR: ${{ github.event.pull_request.user.login }}
36
+ TITLE: ${{ github.event.pull_request.title }}
37
+
38
+ Please review this pull request comprehensively:
39
+
40
+ ## Review Focus Areas
41
+
42
+ ### Code Quality
43
+ - Code follows best practices and design patterns
44
+ - Proper error handling and edge cases
45
+ - No code duplication (DRY principle)
46
+ - Clear and maintainable code structure
47
+
48
+ ### Security
49
+ - No hardcoded secrets or credentials
50
+ - Proper input validation and sanitization
51
+ - SQL injection and XSS vulnerabilities
52
+ - Authentication and authorization checks
53
+ - Sensitive data handling
54
+
55
+ ### Performance
56
+ - Potential performance bottlenecks
57
+ - Efficient database queries
58
+ - Proper caching strategies
59
+ - Resource cleanup and memory leaks
60
+
61
+ ### Testing
62
+ - Adequate test coverage for changes
63
+ - Edge cases covered
64
+ - Test quality and assertions
65
+
66
+ ### Documentation
67
+ - README updated if needed
68
+ - Inline comments for complex logic
69
+ - API documentation updated
70
+ - Breaking changes documented
71
+
72
+ ## Review Output Format
73
+
74
+ Use inline comments for specific code issues (highlight exact lines).
75
+ Use top-level PR comments for general observations and summary.
76
+
77
+ Structure your top-level comment as:
78
+ - **Summary**: Brief overview
79
+ - **Critical Issues**: Must-fix items
80
+ - **Recommendations**: Suggestions for improvement
81
+ - **Positive Notes**: Good practices observed
82
+
83
+ Note: The PR branch is already checked out in the current working directory.
84
+
85
+ Only post GitHub comments - don't submit review text as messages.
86
+
87
+ claude_args: |
88
+ --allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*)"
89
+ --max-turns 100
90
+
91
+ env:
92
+ ANTHROPIC_BASE_URL: https://api.minimax.io/anthropic
93
+ ANTHROPIC_AUTH_TOKEN: ${{ secrets.MINIMAX_API_KEY }}
94
+ ANTHROPIC_DEFAULT_SONNET_MODEL: MiniMax-M2.5
95
+ ANTHROPIC_DEFAULT_HAIKU_MODEL: MiniMax-M2.5
96
+ ANTHROPIC_DEFAULT_OPUS_MODEL: MiniMax-M2.5
@@ -0,0 +1,18 @@
1
+ name: Pytest
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ pytest:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: '3.12'
17
+ - run: pip install -e .[dev]
18
+ - run: pytest
@@ -0,0 +1,51 @@
1
+ name: Release
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ lint:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+ - uses: astral-sh/ruff-action@v3
13
+
14
+ typecheck:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: '3.12'
21
+ - run: pip install -e .[dev]
22
+ - run: mypy src/
23
+
24
+ test:
25
+ runs-on: ubuntu-latest
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: '3.12'
31
+ - run: pip install -e .[dev]
32
+ - run: pytest
33
+
34
+ pypi:
35
+ runs-on: ubuntu-latest
36
+ needs: [lint, typecheck, test]
37
+ environment: production
38
+ permissions:
39
+ id-token: write
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+ - uses: actions/setup-python@v5
43
+ with:
44
+ python-version: '3.12'
45
+ - run: pip install build uv
46
+ - run: rm -rf dist/
47
+ - run: python -m build
48
+ - run: ls -la dist/
49
+ - run: |
50
+ uv publish \
51
+ --token "${{ secrets.PYPI_TOKEN }}"
@@ -0,0 +1,14 @@
1
+ name: Ruff
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ ruff:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: astral-sh/ruff-action@v3
@@ -0,0 +1,24 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.8.4
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+
8
+ - repo: https://github.com/pre-commit/mirrors-mypy
9
+ rev: v1.14.1
10
+ hooks:
11
+ - id: mypy
12
+ pass_filenames: false
13
+ entry: uv
14
+ args: ['run', 'mypy', 'src']
15
+
16
+ - repo: local
17
+ hooks:
18
+ - id: pytest
19
+ name: pytest
20
+ entry: uv
21
+ args: ['run', 'pytest']
22
+ pass_filenames: false
23
+ language: system
24
+ stages: [pre-commit]
@@ -0,0 +1,35 @@
1
+ ## Language
2
+
3
+ You MUST always respond in English, regardless of the language used by the user.
4
+
5
+ ## Git Commit Convention
6
+
7
+ All commits MUST include both co-authors:
8
+ - `martty-code <nesalia.inc@gmail.com>`
9
+ - `Claude Sonnet <noreply@anthropic.com>`
10
+
11
+ ### Commit Message Format
12
+
13
+ ```bash
14
+ git commit -m "$(cat <<'EOF'
15
+ Your commit message here
16
+
17
+ Co-Authored-By: martty-code <nesalia.inc@gmail.com>
18
+ Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
19
+ EOF
20
+ )"
21
+ ```
22
+
23
+ ### Example
24
+
25
+ ```bash
26
+ git commit -m "$(cat <<'EOF'
27
+ feat: add employee search functionality
28
+
29
+ Implement search bar with real-time filtering for employee list
30
+
31
+ Co-Authored-By: martty-code <nesalia.inc@gmail.com>
32
+ Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
33
+ EOF
34
+ )"
35
+ ```
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nesalia Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: fresh-docs
3
+ Version: 0.1.8
4
+ Summary: CLI to get the latest and freshest documentation from any website in Markdown format
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.12
7
+ Requires-Dist: beautifulsoup4>=4.14.3
8
+ Requires-Dist: httpx>=0.27.0
9
+ Requires-Dist: markdownify>=0.12.0
10
+ Requires-Dist: typer>=0.12.0
11
+ Requires-Dist: typing-extensions>=4.0
12
+ Provides-Extra: all
13
+ Requires-Dist: pyyaml>=6.0; extra == 'all'
14
+ Requires-Dist: rich>=13.0.0; extra == 'all'
15
+ Provides-Extra: dev
16
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
17
+ Requires-Dist: pre-commit>=4.0.0; extra == 'dev'
18
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
19
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
20
+ Requires-Dist: ruff>=0.4.0; extra == 'dev'
21
+ Provides-Extra: rich
22
+ Requires-Dist: rich>=13.0.0; extra == 'rich'
23
+ Provides-Extra: yaml
24
+ Requires-Dist: pyyaml>=6.0; extra == 'yaml'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # fresh
28
+
29
+ A Python CLI application created with @nesalia/create.
30
+
31
+ ## Requirements
32
+
33
+ - Python 3.12+
34
+ - uv (recommended) or pip
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ # Using uv (recommended)
40
+ uv sync
41
+
42
+ # Or using pip
43
+ pip install -e .
44
+ ```
45
+
46
+ ## Usage
47
+
48
+ ```bash
49
+ # Run the CLI
50
+ fresh --help
51
+
52
+ # Say hello
53
+ fresh hello --name World
54
+
55
+ # Run tests
56
+ pytest
57
+ ```
58
+
59
+ ## Development
60
+
61
+ ```bash
62
+ # Install dev dependencies
63
+ uv sync --extra dev
64
+
65
+ # Run linter
66
+ ruff check .
67
+
68
+ # Run tests
69
+ pytest
70
+ ```
@@ -0,0 +1,44 @@
1
+ # fresh
2
+
3
+ A Python CLI application created with @nesalia/create.
4
+
5
+ ## Requirements
6
+
7
+ - Python 3.12+
8
+ - uv (recommended) or pip
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ # Using uv (recommended)
14
+ uv sync
15
+
16
+ # Or using pip
17
+ pip install -e .
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ ```bash
23
+ # Run the CLI
24
+ fresh --help
25
+
26
+ # Say hello
27
+ fresh hello --name World
28
+
29
+ # Run tests
30
+ pytest
31
+ ```
32
+
33
+ ## Development
34
+
35
+ ```bash
36
+ # Install dev dependencies
37
+ uv sync --extra dev
38
+
39
+ # Run linter
40
+ ruff check .
41
+
42
+ # Run tests
43
+ pytest
44
+ ```
@@ -0,0 +1,52 @@
1
+ # Fresh - Documentation Fetcher CLI
2
+
3
+ Agent-first CLI to get the latest and freshest documentation from any website in Markdown format.
4
+
5
+ ## ⚠️ Work in Progress
6
+
7
+ This project is currently under development. Features and API may change.
8
+
9
+ ## Purpose
10
+
11
+ This CLI scrapes documentation websites and converts pages to Markdown, making it easy to keep documentation fresh and accessible for AI agents.
12
+
13
+ ## Commands
14
+
15
+ ### `fresh list <url>`
16
+
17
+ Lists all documentation pages available on a given website.
18
+
19
+ ```bash
20
+ fresh list https://nextjs.org
21
+ # Output: /docs /api-reference /guides /tutorials ...
22
+ ```
23
+
24
+ ### `fresh get <url>`
25
+
26
+ Fetches a specific documentation page and outputs it in Markdown format.
27
+
28
+ ```bash
29
+ fresh get https://nextjs.org/docs/app/api-reference/file-conventions/page
30
+ # Output: # Page API Reference\ncontent...
31
+ ```
32
+
33
+ ## Features
34
+
35
+ - **Documentation discovery**: Automatically find all doc pages on a website
36
+ - **Markdown output**: Convert HTML documentation pages to clean Markdown
37
+ - **Public sources only**: Works with publicly accessible documentation sites
38
+ - **Implicit cache**: Pages are cached automatically. Use `--no-cache` to bypass
39
+ - **Flexible output**: Write to file or output to STDOUT
40
+
41
+ ## Use Cases
42
+
43
+ - Keep local documentation updated for offline access
44
+ - Feed documentation to AI agents for context-aware assistance
45
+ - Quickly explore the structure of unfamiliar documentation sites
46
+
47
+ ## Technical Stack
48
+
49
+ - **Language**: Python
50
+ - **CLI Framework**: Typer
51
+ - **Package Manager**: uv
52
+ - **License**: MIT
@@ -0,0 +1,62 @@
1
+ # Future Features
2
+
3
+ This document outlines potential features for future releases.
4
+
5
+ ## Core Features
6
+
7
+ ### Sitemap Parsing
8
+ Use website sitemaps for more reliable page discovery in `list` command, instead of relying solely on HTML scraping.
9
+
10
+ ### Retry & Rate Limiting
11
+ Automatic retry with exponential backoff and rate limiting to avoid getting blocked by websites.
12
+
13
+ ### Config File
14
+ Support for `~/.freshrc` configuration file to remember frequently accessed sites and default settings.
15
+
16
+ ### Custom Output Path
17
+ `--output /path/to/save` flag to choose where to save downloaded documentation.
18
+
19
+ ### Format Choice
20
+ `--format md` (default), but also support HTML or JSON output formats.
21
+
22
+ ### Diff Mode
23
+ `--diff` flag to see what has changed since the last fetch.
24
+
25
+ ### Parallel Fetching
26
+ Multi-threaded fetching to download multiple pages simultaneously for better performance.
27
+
28
+ ### Proxy Support
29
+ `--proxy http://...` flag to route requests through a proxy for sites that block scraping.
30
+
31
+ ### Verbose Mode
32
+ `-v` flag for detailed debug output when something doesn't work.
33
+
34
+ ## Nice to Have
35
+
36
+ ### Watch Mode
37
+ `fresh watch <url>` command to monitor a documentation site and notify when changes occur.
38
+
39
+ ### Plugins
40
+ Plugin system to add custom extractors for specific websites with unique structures.
41
+
42
+ ### CI Integration
43
+ CI-friendly output to verify if documentation has changed between builds.
44
+
45
+ ### Version Pinning
46
+ Ability to lock documentation to a specific date or version.
47
+
48
+ ## Implementation Priority
49
+
50
+ 1. **Sitemap parsing** - Improves reliability of `list` command
51
+ 2. **Retry + rate limiting** - Essential for production use
52
+ 3. **Config file** - Better UX for repeated use
53
+ 4. **Output path custom** - Common need
54
+ 5. **Format choice** - Depends on use case
55
+ 6. **Diff mode** - Very useful for change tracking
56
+ 7. **Parallel fetching** - Performance optimization
57
+ 8. **Proxy support** - Edge case but important when needed
58
+ 9. **Verbose mode** - Debugging tool
59
+ 10. **Watch mode** - Advanced feature
60
+ 11. **Plugins** - Only if needed for specific sites
61
+ 12. **CI integration** - Niche use case
62
+ 13. **Version pinning** - Low priority for MVP
@@ -0,0 +1,57 @@
1
+ [project]
2
+ name = "fresh-docs"
3
+ version = "0.1.8"
4
+ description = "CLI to get the latest and freshest documentation from any website in Markdown format"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "typer>=0.12.0",
9
+ "typing-extensions>=4.0",
10
+ "httpx>=0.27.0",
11
+ "beautifulsoup4>=4.14.3",
12
+ "markdownify>=0.12.0",
13
+ ]
14
+
15
+ [build-system]
16
+ requires = ["hatchling"]
17
+ build-backend = "hatchling.build"
18
+
19
+ [project.scripts]
20
+ fresh = "fresh.__main__:main"
21
+
22
+ [project.optional-dependencies]
23
+ dev = [
24
+ "pytest>=8.0.0",
25
+ "ruff>=0.4.0",
26
+ "pytest-cov>=4.0",
27
+ "mypy>=1.0.0",
28
+ "pre-commit>=4.0.0",
29
+ ]
30
+ rich = [
31
+ "rich>=13.0.0",
32
+ ]
33
+ yaml = [
34
+ "pyyaml>=6.0",
35
+ ]
36
+ all = [
37
+ "rich>=13.0.0",
38
+ "pyyaml>=6.0",
39
+ ]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
43
+
44
+ [tool.mypy]
45
+ python_version = "3.12"
46
+ warn_return_any = true
47
+ warn_unused_configs = true
48
+ disallow_untyped_defs = false
49
+
50
+ [tool.pytest.ini_options]
51
+ testpaths = ["tests"]
52
+
53
+ [tool.hatch.version]
54
+ path = "src/fresh/__init__.py"
55
+
56
+ [tool.hatch.build.targets.wheel]
57
+ packages = ["src/fresh"]
@@ -0,0 +1,29 @@
1
+ """fresh - CLI application."""
2
+
3
+ __version__ = "0.1.8"
4
+
5
+ import typer
6
+
7
+ from .commands.get import get
8
+ from .commands.list import list_urls
9
+
10
+ app = typer.Typer(help="fresh - A CLI application")
11
+
12
+ # Register subcommands
13
+ app.command(name="list")(list_urls)
14
+ app.command(name="get")(get)
15
+
16
+
17
+ @app.command()
18
+ def hello(name: str = "World") -> None:
19
+ """Say hello to someone."""
20
+ print(f"Hello, {name}!")
21
+
22
+
23
+ def main() -> None:
24
+ """Entry point for the CLI."""
25
+ app()
26
+
27
+
28
+ if __name__ == "__main__":
29
+ main()
@@ -0,0 +1,6 @@
1
+ """Entry point for fresh CLI."""
2
+
3
+ from . import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,5 @@
1
+ """CLI commands for fresh."""
2
+
3
+ from .list import list_urls
4
+
5
+ __all__ = ["list_urls"]