llmsbrieftxt 1.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. llmsbrieftxt-1.8.2/.github/ISSUE_TEMPLATE/bug_report.yml +104 -0
  2. llmsbrieftxt-1.8.2/.github/ISSUE_TEMPLATE/config.yml +8 -0
  3. llmsbrieftxt-1.8.2/.github/ISSUE_TEMPLATE/feature_request.yml +87 -0
  4. llmsbrieftxt-1.8.2/.github/ISSUE_TEMPLATE/question.yml +63 -0
  5. llmsbrieftxt-1.8.2/.github/copilot-instructions.md +115 -0
  6. llmsbrieftxt-1.8.2/.github/workflows/ci.yml +40 -0
  7. llmsbrieftxt-1.8.2/.github/workflows/claude-cli-qa.yml +360 -0
  8. llmsbrieftxt-1.8.2/.github/workflows/claude-doc-review.yml +292 -0
  9. llmsbrieftxt-1.8.2/.github/workflows/claude.yml +50 -0
  10. llmsbrieftxt-1.8.2/.github/workflows/pr-title-check.yml +48 -0
  11. llmsbrieftxt-1.8.2/.github/workflows/release.yml +111 -0
  12. llmsbrieftxt-1.8.2/.gitignore +14 -0
  13. llmsbrieftxt-1.8.2/CLAUDE.md +388 -0
  14. llmsbrieftxt-1.8.2/CONTRIBUTING.md +264 -0
  15. llmsbrieftxt-1.8.2/LICENSE +21 -0
  16. llmsbrieftxt-1.8.2/PKG-INFO +457 -0
  17. llmsbrieftxt-1.8.2/PRODUCTION_CLEANUP_PLAN.md +339 -0
  18. llmsbrieftxt-1.8.2/README.md +424 -0
  19. llmsbrieftxt-1.8.2/docs/USER_JOURNEYS.md +700 -0
  20. llmsbrieftxt-1.8.2/llmsbrieftxt/__init__.py +1 -0
  21. llmsbrieftxt-1.8.2/llmsbrieftxt/cli.py +282 -0
  22. llmsbrieftxt-1.8.2/llmsbrieftxt/constants.py +62 -0
  23. llmsbrieftxt-1.8.2/llmsbrieftxt/crawler.py +366 -0
  24. llmsbrieftxt-1.8.2/llmsbrieftxt/doc_loader.py +150 -0
  25. llmsbrieftxt-1.8.2/llmsbrieftxt/extractor.py +69 -0
  26. llmsbrieftxt-1.8.2/llmsbrieftxt/main.py +424 -0
  27. llmsbrieftxt-1.8.2/llmsbrieftxt/schema.py +42 -0
  28. llmsbrieftxt-1.8.2/llmsbrieftxt/summarizer.py +300 -0
  29. llmsbrieftxt-1.8.2/llmsbrieftxt/url_filters.py +75 -0
  30. llmsbrieftxt-1.8.2/llmsbrieftxt/url_utils.py +73 -0
  31. llmsbrieftxt-1.8.2/pyproject.toml +103 -0
  32. llmsbrieftxt-1.8.2/pytest.ini +17 -0
  33. llmsbrieftxt-1.8.2/scripts/bump_version.py +187 -0
  34. llmsbrieftxt-1.8.2/tests/__init__.py +1 -0
  35. llmsbrieftxt-1.8.2/tests/conftest.py +46 -0
  36. llmsbrieftxt-1.8.2/tests/fixtures/__init__.py +1 -0
  37. llmsbrieftxt-1.8.2/tests/integration/__init__.py +1 -0
  38. llmsbrieftxt-1.8.2/tests/integration/test_doc_loader_integration.py +181 -0
  39. llmsbrieftxt-1.8.2/tests/unit/__init__.py +1 -0
  40. llmsbrieftxt-1.8.2/tests/unit/test_cli.py +418 -0
  41. llmsbrieftxt-1.8.2/tests/unit/test_doc_loader.py +120 -0
  42. llmsbrieftxt-1.8.2/tests/unit/test_extractor.py +70 -0
  43. llmsbrieftxt-1.8.2/tests/unit/test_robustness.py +83 -0
  44. llmsbrieftxt-1.8.2/tests/unit/test_summarizer.py +197 -0
  45. llmsbrieftxt-1.8.2/uv.lock +1136 -0
@@ -0,0 +1,104 @@
1
+ name: Bug Report
2
+ description: Report a bug or unexpected behavior
3
+ title: "[Bug]: "
4
+ labels: ["bug", "triage"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Thanks for taking the time to report a bug! Please fill out the information below to help us resolve the issue.
10
+
11
+ - type: textarea
12
+ id: description
13
+ attributes:
14
+ label: Bug Description
15
+ description: A clear and concise description of what the bug is.
16
+ placeholder: When I run llmtxt with..., I expect..., but instead...
17
+ validations:
18
+ required: true
19
+
20
+ - type: textarea
21
+ id: reproduction
22
+ attributes:
23
+ label: Steps to Reproduce
24
+ description: Steps to reproduce the behavior
25
+ placeholder: |
26
+ 1. Run command: llmtxt https://example.com
27
+ 2. Observe error...
28
+ 3. ...
29
+ validations:
30
+ required: true
31
+
32
+ - type: textarea
33
+ id: expected
34
+ attributes:
35
+ label: Expected Behavior
36
+ description: What you expected to happen
37
+ placeholder: I expected the tool to...
38
+ validations:
39
+ required: true
40
+
41
+ - type: textarea
42
+ id: actual
43
+ attributes:
44
+ label: Actual Behavior
45
+ description: What actually happened
46
+ placeholder: Instead, the tool...
47
+ validations:
48
+ required: true
49
+
50
+ - type: textarea
51
+ id: logs
52
+ attributes:
53
+ label: Error Logs / Output
54
+ description: Please paste any relevant error messages or output
55
+ render: shell
56
+ placeholder: |
57
+ Paste error output here...
58
+
59
+ - type: input
60
+ id: version
61
+ attributes:
62
+ label: llmsbrieftxt Version
63
+ description: Run `pip show llmsbrieftxt` to get the version
64
+ placeholder: "1.0.0"
65
+ validations:
66
+ required: true
67
+
68
+ - type: input
69
+ id: python-version
70
+ attributes:
71
+ label: Python Version
72
+ description: Run `python --version` to get your Python version
73
+ placeholder: "3.11.0"
74
+ validations:
75
+ required: true
76
+
77
+ - type: input
78
+ id: os
79
+ attributes:
80
+ label: Operating System
81
+ description: Which OS are you using?
82
+ placeholder: "macOS 14.0, Ubuntu 22.04, Windows 11, etc."
83
+ validations:
84
+ required: true
85
+
86
+ - type: textarea
87
+ id: context
88
+ attributes:
89
+ label: Additional Context
90
+ description: Any other context, screenshots, or information that might be helpful
91
+ placeholder: Add any other context about the problem here
92
+
93
+ - type: checkboxes
94
+ id: checks
95
+ attributes:
96
+ label: Pre-submission Checklist
97
+ description: Please confirm the following before submitting
98
+ options:
99
+ - label: I have searched existing issues to ensure this is not a duplicate
100
+ required: true
101
+ - label: I have tested with the latest version of llmsbrieftxt
102
+ required: true
103
+ - label: I have included complete error messages and logs
104
+ required: true
@@ -0,0 +1,8 @@
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: 💬 GitHub Discussions
4
+ url: https://github.com/stevennevins/llmsbrief/discussions
5
+ about: Ask questions and discuss ideas with the community
6
+ - name: 📚 Documentation
7
+ url: https://github.com/stevennevins/llmsbrief#readme
8
+ about: Read the documentation and guides
@@ -0,0 +1,87 @@
1
+ name: Feature Request
2
+ description: Suggest a new feature or enhancement
3
+ title: "[Feature]: "
4
+ labels: ["enhancement", "triage"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Thanks for suggesting a new feature! Please provide as much detail as possible to help us understand your request.
10
+
11
+ **Note:** llmsbrieftxt follows the Unix philosophy of doing one thing well. Features should align with the core mission: generating llms-brief.txt files from documentation websites.
12
+
13
+ - type: textarea
14
+ id: problem
15
+ attributes:
16
+ label: Problem or Use Case
17
+ description: What problem does this feature solve? What use case does it enable?
18
+ placeholder: |
19
+ I want to be able to... because...
20
+ Currently, I can't... which means...
21
+ validations:
22
+ required: true
23
+
24
+ - type: textarea
25
+ id: solution
26
+ attributes:
27
+ label: Proposed Solution
28
+ description: How would you like this feature to work?
29
+ placeholder: |
30
+ Add a new option `--example` that would...
31
+ The behavior should be...
32
+ validations:
33
+ required: true
34
+
35
+ - type: textarea
36
+ id: alternatives
37
+ attributes:
38
+ label: Alternatives Considered
39
+ description: What alternatives have you considered? Are there workarounds?
40
+ placeholder: |
41
+ I considered using X, but...
42
+ As a workaround, I currently...
43
+
44
+ - type: textarea
45
+ id: examples
46
+ attributes:
47
+ label: Usage Examples
48
+ description: Provide examples of how this feature would be used
49
+ render: bash
50
+ placeholder: |
51
+ # Example usage
52
+ llmtxt https://example.com --new-option value
53
+
54
+ - type: dropdown
55
+ id: alignment
56
+ attributes:
57
+ label: Alignment with Project Philosophy
58
+ description: Does this feature align with the Unix philosophy of doing one thing well?
59
+ options:
60
+ - "Yes - Directly enhances llms-brief.txt generation"
61
+ - "Maybe - Could be considered a core feature"
62
+ - "No - This is a complementary tool/feature"
63
+ validations:
64
+ required: true
65
+
66
+ - type: textarea
67
+ id: impact
68
+ attributes:
69
+ label: Impact
70
+ description: |
71
+ Who benefits from this feature? How critical is it?
72
+ placeholder: |
73
+ This would benefit users who...
74
+ Impact: High/Medium/Low
75
+
76
+ - type: checkboxes
77
+ id: checks
78
+ attributes:
79
+ label: Pre-submission Checklist
80
+ description: Please confirm the following before submitting
81
+ options:
82
+ - label: I have searched existing issues and discussions to ensure this hasn't been requested
83
+ required: true
84
+ - label: I have considered whether this aligns with the project's Unix philosophy
85
+ required: true
86
+ - label: I have provided a clear use case and motivation
87
+ required: true
@@ -0,0 +1,63 @@
1
+ name: Question or Support
2
+ description: Ask a question or get help using llmsbrieftxt
3
+ title: "[Question]: "
4
+ labels: ["question", "support"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Have a question about using llmsbrieftxt? We're here to help!
10
+
11
+ **Tip:** For general discussions and community help, consider using [GitHub Discussions](https://github.com/stevennevins/llmsbrief/discussions) instead.
12
+
13
+ - type: textarea
14
+ id: question
15
+ attributes:
16
+ label: Question
17
+ description: What would you like to know?
18
+ placeholder: How do I...? What is the best way to...?
19
+ validations:
20
+ required: true
21
+
22
+ - type: textarea
23
+ id: context
24
+ attributes:
25
+ label: Context
26
+ description: Provide any relevant context about what you're trying to accomplish
27
+ placeholder: |
28
+ I'm trying to...
29
+ My use case is...
30
+
31
+ - type: textarea
32
+ id: attempted
33
+ attributes:
34
+ label: What Have You Tried?
35
+ description: What have you already attempted? Include commands, configurations, etc.
36
+ render: bash
37
+ placeholder: |
38
+ I tried running:
39
+ llmtxt https://example.com --option value
40
+
41
+ But I'm not sure if...
42
+
43
+ - type: textarea
44
+ id: documentation
45
+ attributes:
46
+ label: Documentation Consulted
47
+ description: Which documentation have you already checked?
48
+ placeholder: |
49
+ - [ ] README.md
50
+ - [ ] CLAUDE.md
51
+ - [ ] CONTRIBUTING.md
52
+ - [ ] Searched existing issues
53
+
54
+ - type: checkboxes
55
+ id: checks
56
+ attributes:
57
+ label: Pre-submission Checklist
58
+ description: Please confirm the following
59
+ options:
60
+ - label: I have checked the README and documentation
61
+ required: true
62
+ - label: I have searched existing issues and discussions
63
+ required: true
@@ -0,0 +1,115 @@
1
+ # GitHub Copilot Instructions for llmsbrieftxt
2
+
3
+ ## Project Overview
4
+
5
+ This is `llmsbrieftxt`, a Python package that generates llms-brief.txt files by crawling documentation websites and using OpenAI to create structured descriptions. The CLI command is `llmtxt` (not `llmsbrieftxt`).
6
+
7
+ ## Architecture and Code Patterns
8
+
9
+ ### Async-First Design
10
+ All main functions use async/await patterns. Use `asyncio.gather()` for concurrent operations and semaphore control for rate limiting. The processing pipeline flows: URL Discovery → Content Extraction → LLM Summarization → File Generation.
11
+
12
+ ### Module Organization
13
+ - **cli.py**: Simple CLI with positional URL argument (no subcommands)
14
+ - **main.py**: Orchestrates the async generation pipeline
15
+ - **crawler.py**: RobustDocCrawler for breadth-first URL discovery
16
+ - **doc_loader.py**: DocLoader wraps crawler with document loading
17
+ - **extractor.py**: HTML to markdown via trafilatura
18
+ - **summarizer.py**: OpenAI integration with retry logic (tenacity)
19
+ - **url_utils.py**: URLNormalizer for deduplication
20
+ - **url_filters.py**: Filter non-documentation URLs
21
+ - **schema.py**: Pydantic models (PageSummary)
22
+ - **constants.py**: Configuration constants
23
+
24
+ ### Type Safety
25
+ Use Pydantic models for all structured data. The OpenAI integration uses structured output with the PageSummary model.
26
+
27
+ ### Error Handling
28
+ Failed URL loads should be logged but not stop processing. LLM failures use exponential backoff retries via tenacity. Never let one failure break the entire pipeline.
29
+
30
+ ## Development Practices
31
+
32
+ ### Testing Requirements
33
+ Write tests before implementing features. Use pytest with these markers:
34
+ - `@pytest.mark.unit` for fast, isolated tests
35
+ - `@pytest.mark.requires_openai` for tests needing OPENAI_API_KEY
36
+ - `@pytest.mark.slow` for tests making external API calls
37
+
38
+ Tests go in:
39
+ - `tests/unit/` for fast tests with no external dependencies
40
+ - `tests/integration/` for tests requiring OPENAI_API_KEY
41
+
42
+ ### Code Quality Tools
43
+ Before committing, always run:
44
+ 1. Format: `uv run ruff format llmsbrieftxt/ tests/`
45
+ 2. Lint: `uv run ruff check llmsbrieftxt/ tests/`
46
+ 3. Type check: `uv run pyright llmsbrieftxt/`
47
+ 4. Tests: `uv run pytest tests/unit/`
48
+
49
+ ### Package Management
50
+ Use `uv` for all package operations:
51
+ - Install: `uv sync --group dev`
52
+ - Add dependency: `uv add package-name`
53
+ - Build: `uv build`
54
+
55
+ ## Design Philosophy
56
+
57
+ ### Unix Philosophy
58
+ This project follows "do one thing and do it well":
59
+ - Generate llms-brief.txt files only (no built-in search/list features)
60
+ - Compose with standard Unix tools (rg, grep, ls)
61
+ - Simple CLI: URL is a positional argument, no subcommands
62
+ - Plain text output for scriptability
63
+
64
+ ### Simplicity Over Features
65
+ Avoid adding functionality that duplicates mature Unix tools. Every line of code must serve the core mission of generating llms-brief.txt files.
66
+
67
+ ## Configuration Defaults
68
+
69
+ - **Crawl Depth**: 3 levels (hardcoded in crawler.py)
70
+ - **Output**: `~/.claude/docs/<domain>.txt` (override with `--output`)
71
+ - **Cache**: `.llmsbrieftxt_cache/` for intermediate results
72
+ - **OpenAI Model**: `gpt-5-mini` (override with `--model`)
73
+ - **Concurrency**: 10 concurrent LLM requests (prevents rate limiting)
74
+
75
+ ## Commit Convention
76
+
77
+ Use conventional commits for automated versioning:
78
+ - `fix:` → patch bump (1.0.0 → 1.0.1)
79
+ - `feat:` → minor bump (1.0.0 → 1.1.0)
80
+ - `BREAKING CHANGE` or `feat!:`/`fix!:` → major bump (1.0.0 → 2.0.0)
81
+
82
+ Examples:
83
+ ```bash
84
+ git commit -m "fix: handle empty sitemap gracefully"
85
+ git commit -m "feat: add --depth option for custom crawl depth"
86
+ git commit -m "feat!: change default output location"
87
+ ```
88
+
89
+ ## Non-Obvious Behaviors
90
+
91
+ 1. URL Discovery discovers ALL pages up to depth 3, not just direct links
92
+ 2. URLs like `/page`, `/page/`, and `/page#section` are deduplicated as the same URL
93
+ 3. Summaries are automatically cached in `.llmsbrieftxt_cache/summaries.json`
94
+ 4. Content extraction uses trafilatura to preserve HTML structure in markdown
95
+ 5. File I/O is synchronous (uses standard `Path.write_text()` for simplicity)
96
+
97
+ ## Known Limitations
98
+
99
+ 1. Only supports OpenAI API (no other LLM providers)
100
+ 2. Crawl depth is hardcoded to 3 in crawler.py
101
+ 3. No CLI flag to force resume from cache (though cache exists)
102
+ 4. No progress persistence if interrupted
103
+ 5. Prompts and parsing assume English documentation
104
+
105
+ ## Code Review Checklist
106
+
107
+ When reviewing code changes:
108
+ - Ensure async patterns are used correctly (no blocking I/O in async functions)
109
+ - Verify all functions have type hints
110
+ - Check that tests are included for new functionality
111
+ - Confirm error handling doesn't break the pipeline
112
+ - Validate that conventional commit format is used
113
+ - Ensure code follows Unix philosophy (simplicity, composability)
114
+ - Check that ruff and pyright pass without errors
115
+ - **IMPORTANT**: Always include specific file names and line numbers when providing review feedback (e.g., "main.py:165" or "line 182 in cli.py")
@@ -0,0 +1,40 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v5
21
+ with:
22
+ version: "latest"
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ uv python install ${{ matrix.python-version }}
27
+ uv sync --group dev
28
+
29
+ - name: Run unit tests
30
+ run: |
31
+ uv run pytest tests/unit -v --tb=short
32
+
33
+ - name: Run linting and formatting checks
34
+ run: |
35
+ uv run ruff check llmsbrieftxt/ tests/
36
+ uv run ruff format --check llmsbrieftxt/ tests/
37
+
38
+ - name: Type checking
39
+ run: |
40
+ uv run pyright llmsbrieftxt/