intake-ai-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. intake_ai_cli-0.1.0/.gitignore +96 -0
  2. intake_ai_cli-0.1.0/.intake.yaml.example +55 -0
  3. intake_ai_cli-0.1.0/CHANGELOG.md +120 -0
  4. intake_ai_cli-0.1.0/PKG-INFO +365 -0
  5. intake_ai_cli-0.1.0/README.md +328 -0
  6. intake_ai_cli-0.1.0/SEGUIMIENTO-V0.md +423 -0
  7. intake_ai_cli-0.1.0/docs/README.md +91 -0
  8. intake_ai_cli-0.1.0/docs/arquitectura.md +229 -0
  9. intake_ai_cli-0.1.0/docs/buenas-practicas.md +296 -0
  10. intake_ai_cli-0.1.0/docs/configuracion.md +271 -0
  11. intake_ai_cli-0.1.0/docs/exportacion.md +224 -0
  12. intake_ai_cli-0.1.0/docs/formatos-entrada.md +293 -0
  13. intake_ai_cli-0.1.0/docs/github-notes/v0.1.0.md +285 -0
  14. intake_ai_cli-0.1.0/docs/guia-cli.md +383 -0
  15. intake_ai_cli-0.1.0/docs/pipeline.md +289 -0
  16. intake_ai_cli-0.1.0/docs/solucion-problemas.md +401 -0
  17. intake_ai_cli-0.1.0/docs/verificacion.md +331 -0
  18. intake_ai_cli-0.1.0/examples/from-jira/README.md +37 -0
  19. intake_ai_cli-0.1.0/examples/from-jira/jira-export.json +73 -0
  20. intake_ai_cli-0.1.0/examples/from-markdown/README.md +40 -0
  21. intake_ai_cli-0.1.0/examples/from-markdown/requirements.md +69 -0
  22. intake_ai_cli-0.1.0/examples/from-scratch/README.md +37 -0
  23. intake_ai_cli-0.1.0/examples/from-scratch/idea.txt +26 -0
  24. intake_ai_cli-0.1.0/examples/multi-source/README.md +38 -0
  25. intake_ai_cli-0.1.0/examples/multi-source/api-decisions.json +68 -0
  26. intake_ai_cli-0.1.0/examples/multi-source/notes.txt +28 -0
  27. intake_ai_cli-0.1.0/examples/multi-source/user-stories.md +65 -0
  28. intake_ai_cli-0.1.0/pyproject.toml +97 -0
  29. intake_ai_cli-0.1.0/src/intake/__init__.py +6 -0
  30. intake_ai_cli-0.1.0/src/intake/__main__.py +7 -0
  31. intake_ai_cli-0.1.0/src/intake/analyze/__init__.py +8 -0
  32. intake_ai_cli-0.1.0/src/intake/analyze/analyzer.py +224 -0
  33. intake_ai_cli-0.1.0/src/intake/analyze/conflicts.py +63 -0
  34. intake_ai_cli-0.1.0/src/intake/analyze/dedup.py +115 -0
  35. intake_ai_cli-0.1.0/src/intake/analyze/design.py +136 -0
  36. intake_ai_cli-0.1.0/src/intake/analyze/extraction.py +113 -0
  37. intake_ai_cli-0.1.0/src/intake/analyze/models.py +152 -0
  38. intake_ai_cli-0.1.0/src/intake/analyze/prompts.py +208 -0
  39. intake_ai_cli-0.1.0/src/intake/analyze/questions.py +59 -0
  40. intake_ai_cli-0.1.0/src/intake/analyze/risks.py +70 -0
  41. intake_ai_cli-0.1.0/src/intake/cli.py +670 -0
  42. intake_ai_cli-0.1.0/src/intake/config/__init__.py +8 -0
  43. intake_ai_cli-0.1.0/src/intake/config/defaults.py +21 -0
  44. intake_ai_cli-0.1.0/src/intake/config/loader.py +143 -0
  45. intake_ai_cli-0.1.0/src/intake/config/presets.py +99 -0
  46. intake_ai_cli-0.1.0/src/intake/config/schema.py +84 -0
  47. intake_ai_cli-0.1.0/src/intake/diff/__init__.py +12 -0
  48. intake_ai_cli-0.1.0/src/intake/diff/differ.py +314 -0
  49. intake_ai_cli-0.1.0/src/intake/doctor/__init__.py +7 -0
  50. intake_ai_cli-0.1.0/src/intake/doctor/checks.py +355 -0
  51. intake_ai_cli-0.1.0/src/intake/export/__init__.py +17 -0
  52. intake_ai_cli-0.1.0/src/intake/export/architect.py +212 -0
  53. intake_ai_cli-0.1.0/src/intake/export/base.py +37 -0
  54. intake_ai_cli-0.1.0/src/intake/export/generic.py +183 -0
  55. intake_ai_cli-0.1.0/src/intake/export/registry.py +70 -0
  56. intake_ai_cli-0.1.0/src/intake/generate/__init__.py +8 -0
  57. intake_ai_cli-0.1.0/src/intake/generate/lock.py +154 -0
  58. intake_ai_cli-0.1.0/src/intake/generate/spec_builder.py +193 -0
  59. intake_ai_cli-0.1.0/src/intake/ingest/__init__.py +24 -0
  60. intake_ai_cli-0.1.0/src/intake/ingest/base.py +233 -0
  61. intake_ai_cli-0.1.0/src/intake/ingest/confluence.py +216 -0
  62. intake_ai_cli-0.1.0/src/intake/ingest/docx.py +192 -0
  63. intake_ai_cli-0.1.0/src/intake/ingest/image.py +125 -0
  64. intake_ai_cli-0.1.0/src/intake/ingest/jira.py +231 -0
  65. intake_ai_cli-0.1.0/src/intake/ingest/markdown.py +114 -0
  66. intake_ai_cli-0.1.0/src/intake/ingest/pdf.py +165 -0
  67. intake_ai_cli-0.1.0/src/intake/ingest/plaintext.py +95 -0
  68. intake_ai_cli-0.1.0/src/intake/ingest/registry.py +207 -0
  69. intake_ai_cli-0.1.0/src/intake/ingest/yaml_input.py +142 -0
  70. intake_ai_cli-0.1.0/src/intake/llm/__init__.py +7 -0
  71. intake_ai_cli-0.1.0/src/intake/llm/adapter.py +250 -0
  72. intake_ai_cli-0.1.0/src/intake/templates/acceptance.yaml.j2 +35 -0
  73. intake_ai_cli-0.1.0/src/intake/templates/context.md.j2 +43 -0
  74. intake_ai_cli-0.1.0/src/intake/templates/design.md.j2 +51 -0
  75. intake_ai_cli-0.1.0/src/intake/templates/requirements.md.j2 +68 -0
  76. intake_ai_cli-0.1.0/src/intake/templates/sources.md.j2 +29 -0
  77. intake_ai_cli-0.1.0/src/intake/templates/tasks.md.j2 +37 -0
  78. intake_ai_cli-0.1.0/src/intake/utils/__init__.py +3 -0
  79. intake_ai_cli-0.1.0/src/intake/utils/cost.py +109 -0
  80. intake_ai_cli-0.1.0/src/intake/utils/file_detect.py +56 -0
  81. intake_ai_cli-0.1.0/src/intake/utils/logging.py +34 -0
  82. intake_ai_cli-0.1.0/src/intake/utils/project_detect.py +104 -0
  83. intake_ai_cli-0.1.0/src/intake/verify/__init__.py +24 -0
  84. intake_ai_cli-0.1.0/src/intake/verify/engine.py +363 -0
  85. intake_ai_cli-0.1.0/src/intake/verify/reporter.py +206 -0
  86. intake_ai_cli-0.1.0/tests/__init__.py +0 -0
  87. intake_ai_cli-0.1.0/tests/conftest.py +130 -0
  88. intake_ai_cli-0.1.0/tests/fixtures/confluence_page.html +68 -0
  89. intake_ai_cli-0.1.0/tests/fixtures/jira_export.json +73 -0
  90. intake_ai_cli-0.1.0/tests/fixtures/jira_export_multi.json +22 -0
  91. intake_ai_cli-0.1.0/tests/fixtures/simple_spec.md +38 -0
  92. intake_ai_cli-0.1.0/tests/fixtures/slack_thread.txt +20 -0
  93. intake_ai_cli-0.1.0/tests/fixtures/structured_reqs.yaml +39 -0
  94. intake_ai_cli-0.1.0/tests/fixtures/wireframe.png +0 -0
  95. intake_ai_cli-0.1.0/tests/test_analyze/__init__.py +0 -0
  96. intake_ai_cli-0.1.0/tests/test_analyze/test_analyzer.py +250 -0
  97. intake_ai_cli-0.1.0/tests/test_analyze/test_conflicts.py +69 -0
  98. intake_ai_cli-0.1.0/tests/test_analyze/test_dedup.py +118 -0
  99. intake_ai_cli-0.1.0/tests/test_analyze/test_design.py +128 -0
  100. intake_ai_cli-0.1.0/tests/test_analyze/test_extraction.py +132 -0
  101. intake_ai_cli-0.1.0/tests/test_analyze/test_llm_adapter.py +190 -0
  102. intake_ai_cli-0.1.0/tests/test_analyze/test_risks.py +118 -0
  103. intake_ai_cli-0.1.0/tests/test_cli.py +256 -0
  104. intake_ai_cli-0.1.0/tests/test_config/__init__.py +0 -0
  105. intake_ai_cli-0.1.0/tests/test_config/test_loader.py +68 -0
  106. intake_ai_cli-0.1.0/tests/test_config/test_presets.py +46 -0
  107. intake_ai_cli-0.1.0/tests/test_config/test_schema.py +66 -0
  108. intake_ai_cli-0.1.0/tests/test_diff/__init__.py +0 -0
  109. intake_ai_cli-0.1.0/tests/test_diff/test_differ.py +195 -0
  110. intake_ai_cli-0.1.0/tests/test_doctor/__init__.py +0 -0
  111. intake_ai_cli-0.1.0/tests/test_doctor/test_checks.py +170 -0
  112. intake_ai_cli-0.1.0/tests/test_export/__init__.py +0 -0
  113. intake_ai_cli-0.1.0/tests/test_export/test_architect.py +156 -0
  114. intake_ai_cli-0.1.0/tests/test_export/test_generic.py +141 -0
  115. intake_ai_cli-0.1.0/tests/test_export/test_registry.py +56 -0
  116. intake_ai_cli-0.1.0/tests/test_generate/__init__.py +0 -0
  117. intake_ai_cli-0.1.0/tests/test_generate/test_lock.py +148 -0
  118. intake_ai_cli-0.1.0/tests/test_generate/test_spec_builder.py +274 -0
  119. intake_ai_cli-0.1.0/tests/test_ingest/__init__.py +0 -0
  120. intake_ai_cli-0.1.0/tests/test_ingest/test_confluence.py +71 -0
  121. intake_ai_cli-0.1.0/tests/test_ingest/test_hardening.py +142 -0
  122. intake_ai_cli-0.1.0/tests/test_ingest/test_image.py +72 -0
  123. intake_ai_cli-0.1.0/tests/test_ingest/test_jira.py +88 -0
  124. intake_ai_cli-0.1.0/tests/test_ingest/test_markdown.py +69 -0
  125. intake_ai_cli-0.1.0/tests/test_ingest/test_plaintext.py +62 -0
  126. intake_ai_cli-0.1.0/tests/test_ingest/test_registry.py +107 -0
  127. intake_ai_cli-0.1.0/tests/test_ingest/test_yaml_input.py +71 -0
  128. intake_ai_cli-0.1.0/tests/test_utils/__init__.py +0 -0
  129. intake_ai_cli-0.1.0/tests/test_utils/test_cost.py +55 -0
  130. intake_ai_cli-0.1.0/tests/test_utils/test_file_detect.py +71 -0
  131. intake_ai_cli-0.1.0/tests/test_utils/test_project_detect.py +60 -0
  132. intake_ai_cli-0.1.0/tests/test_verify/__init__.py +0 -0
  133. intake_ai_cli-0.1.0/tests/test_verify/test_engine.py +350 -0
  134. intake_ai_cli-0.1.0/tests/test_verify/test_reporter.py +157 -0
@@ -0,0 +1,96 @@
1
+ # build output
2
+ dist/
3
+
4
+ # generated types
5
+ .astro/
6
+
7
+ # dependencies
8
+ node_modules/
9
+
10
+ # logs
11
+ npm-debug.log*
12
+ yarn-debug.log*
13
+ yarn-error.log*
14
+ pnpm-debug.log*
15
+
16
+ # environment variables
17
+ .env
18
+ .env.production
19
+
20
+ # macOS-specific files
21
+ .DS_Store
22
+
23
+ # jetbrains setting folder
24
+ .idea/
25
+
26
+ # Python
27
+ __pycache__/
28
+ *.py[cod]
29
+ *$py.class
30
+ *.so
31
+ .Python
32
+ build/
33
+ develop-eggs/
34
+ dist/
35
+ downloads/
36
+ eggs/
37
+ .eggs/
38
+ lib/
39
+ lib64/
40
+ parts/
41
+ sdist/
42
+ var/
43
+ wheels/
44
+ pip-wheel-metadata/
45
+ share/python-wheels/
46
+ *.egg-info/
47
+ .installed.cfg
48
+ *.egg
49
+ MANIFEST
50
+
51
+ # Virtual environments
52
+ venv/
53
+ env/
54
+ ENV/
55
+ env.bak/
56
+ venv.bak/
57
+ .venv/
58
+
59
+ # IDEs
60
+ .vscode/
61
+ .idea/
62
+ *.swp
63
+ *.swo
64
+ *~
65
+ .DS_Store
66
+
67
+ # Testing
68
+ .pytest_cache/
69
+ .coverage
70
+ htmlcov/
71
+ .tox/
72
+ .nox/
73
+
74
+ # Type checking
75
+ .mypy_cache/
76
+ .dmypy.json
77
+ dmypy.json
78
+ .pyre/
79
+ .pytype/
80
+
81
+ # Config files con datos sensibles
82
+ config.yaml
83
+ *.local.yaml
84
+ .env
85
+ .env.local
86
+
87
+ # Logs
88
+ logs/
89
+ *.log
90
+ *.jsonl
91
+
92
+ tmp/
93
+ CLAUDE.md
94
+ .coverage
95
+
96
+ __pycache__/
@@ -0,0 +1,55 @@
1
+ # intake configuration
2
+ # Copy this file to .intake.yaml and customize as needed.
3
+ # All values shown below are the defaults — only override what you need.
4
+ #
5
+ # Configuration priority (highest wins):
6
+ # CLI flags > .intake.yaml > preset > hardcoded defaults
7
+
8
+ # LLM settings for requirement analysis
9
+ llm:
10
+ model: claude-sonnet-4 # Any model supported by LiteLLM
11
+ api_key_env: ANTHROPIC_API_KEY # Environment variable holding the API key
12
+ max_cost_per_spec: 0.50 # Budget limit per spec generation (USD)
13
+ temperature: 0.2 # Lower = more deterministic
14
+ max_retries: 3 # Retry count on LLM failures
15
+ timeout: 120 # Timeout per LLM call (seconds)
16
+
17
+ # Project metadata
18
+ project:
19
+ name: "" # Auto-detected from description if empty
20
+ stack: [] # Auto-detected from project files if empty
21
+ language: en # Language for generated spec content
22
+ conventions: {} # Custom conventions: { "testing": "pytest", ... }
23
+
24
+ # Spec generation settings
25
+ spec:
26
+ output_dir: ./specs # Where to write generated specs
27
+ requirements_format: ears # ears | user-stories | bdd | free
28
+ design_depth: moderate # minimal | moderate | detailed
29
+ task_granularity: medium # coarse | medium | fine
30
+ include_sources: true # Include source traceability in spec
31
+ version_specs: true # Create versioned spec directories
32
+ generate_lock: true # Generate spec.lock.yaml for reproducibility
33
+ risk_assessment: true # Include risk matrix in analysis
34
+
35
+ # Verification settings
36
+ verification:
37
+ auto_generate_tests: true # Generate acceptance checks from requirements
38
+ test_output_dir: ./tests/generated # Where to write generated tests
39
+ checks: [] # Additional custom checks
40
+ timeout_per_check: 120 # Timeout per acceptance check (seconds)
41
+
42
+ # Export settings
43
+ export:
44
+ default_format: generic # architect | generic (claude-code, cursor, kiro: planned)
45
+ architect_include_guardrails: true # Include guardrails in architect pipeline
46
+ architect_pipeline_template: standard
47
+ claude_code_generate_claude_md: true
48
+
49
+ # Security settings
50
+ security:
51
+ redact_patterns: [] # Regex patterns to redact from spec output
52
+ redact_files: # File patterns to never include
53
+ - "*.env"
54
+ - "*.pem"
55
+ - "*.key"
@@ -0,0 +1,120 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2026-03-02
9
+
10
+ ### Added
11
+
12
+ #### Phase 1 — Ingest (8 parsers + registry)
13
+
14
+ - **Project scaffolding**: `pyproject.toml` with hatchling build system, `src/intake/` package layout with 10 subpackages.
15
+ - **CLI framework**: Click-based CLI with 8 commands (`init`, `add`, `verify`, `export`, `show`, `list`, `diff`, `doctor`).
16
+ - **`intake doctor` command**: Full environment health check — validates Python version (3.12+), LLM API keys, optional dependencies (pdfplumber, python-docx, bs4, markdownify, litellm, jinja2), and `.intake.yaml` config validity. Outputs a Rich table with PASS/FAIL status and fix hints.
17
+ - **Configuration system**: Pydantic v2 models for all config (`LLMConfig`, `ProjectConfig`, `SpecConfig`, `VerificationConfig`, `ExportConfig`, `SecurityConfig`). Layered merge: defaults → preset → `.intake.yaml` → CLI flags.
18
+ - **Configuration presets**: `--preset minimal|standard|enterprise` for quick setup. Minimal is cheap/fast for prototyping, standard is balanced, enterprise is detailed with full traceability.
19
+ - **8 input parsers** — all producing normalized `ParsedContent`:
20
+ - `MarkdownParser` — `.md` files with YAML front matter support and heading-based section extraction.
21
+ - `PlaintextParser` — `.txt` files, stdin (`-`), Slack thread dumps. Paragraph-level sections.
22
+ - `YamlInputParser` — `.yaml`/`.yml`/`.json` structured requirements. Section extraction from top-level keys.
23
+ - `PdfParser` — `.pdf` files via pdfplumber. Text + table extraction (tables converted to Markdown).
24
+ - `DocxParser` — `.docx` files via python-docx. Text, tables, heading-based sections, document metadata (author, title, date).
25
+ - `JiraParser` — Jira JSON exports (both `{"issues":[...]}` API format and `[{"key":...}]` list format). Extracts issues with summary, description, comments (last 5), labels, priority, status, and inter-issue links.
26
+ - `ConfluenceParser` — Confluence HTML exports via BeautifulSoup4 + markdownify. Detects Confluence markers, extracts main content, converts to clean Markdown.
27
+ - `ImageParser` — `.png`/`.jpg`/`.jpeg`/`.webp`/`.gif` via injectable LLM vision callable. Ships with a stub; real vision analysis enabled when LLM adapter is configured.
28
+ - **Parser registry**: `ParserRegistry` with automatic format detection by file extension and content inspection. JSON subtype detection (Jira vs generic), HTML subtype detection (Confluence vs generic). Factory function `create_default_registry()` registers all 8 parsers.
29
+ - **Utilities**:
30
+ - `file_detect.py` — Extension-based format detection with `EXTENSION_MAP`.
31
+ - `project_detect.py` — Auto-detects project tech stack from 20+ marker files (pyproject.toml, package.json, Dockerfile, etc.) and content patterns (fastapi, django, react, etc.).
32
+ - `cost.py` — `CostTracker` for LLM cost accumulation with per-phase breakdown.
33
+ - `logging.py` — structlog configuration with console/JSON rendering.
34
+
35
+ #### Phase 2 — Analyze (LLM orchestration)
36
+
37
+ - **LLM Adapter** (`llm/adapter.py`): LiteLLM wrapper with async completion, retry with exponential backoff, cost tracking per call, budget enforcement (`CostLimitError`), JSON response parsing with markdown fence stripping, API key validation. Custom exceptions: `LLMError`, `CostLimitError`, `APIKeyMissingError`.
38
+ - **Analysis pipeline** (`analyze/`):
39
+ - `analyzer.py` — Orchestrator: coordinates extraction → deduplication → conflict validation → risk assessment → design phases. Supports multi-source analysis with automatic text combining.
40
+ - `prompts.py` — Three system prompts: `EXTRACTION_PROMPT` (requirements, conflicts, questions), `RISK_ASSESSMENT_PROMPT` (risk analysis per requirement), `DESIGN_PROMPT` (architecture, tasks, acceptance checks).
41
+ - `extraction.py` — Parses LLM JSON output into typed `AnalysisResult` with requirements, conflicts, and open questions.
42
+ - `dedup.py` — Jaccard word similarity deduplication across sources (threshold: 0.75).
43
+ - `conflicts.py` — Validates extracted conflicts, filters incomplete entries.
44
+ - `questions.py` — Validates extracted open questions, filters incomplete entries.
45
+ - `risks.py` — Parses LLM risk assessment into typed `RiskItem` list.
46
+ - `design.py` — Parses LLM design output into `DesignResult` with components, file actions, tasks, and acceptance checks.
47
+ - `models.py` — 10 dataclasses: `Requirement`, `Conflict`, `OpenQuestion`, `RiskItem`, `TechDecision`, `TaskItem`, `FileAction`, `AcceptanceCheck`, `DesignResult`, `AnalysisResult`.
48
+
49
+ #### Phase 3 — Generate (spec files + lock)
50
+
51
+ - **Generation module** (`generate/`):
52
+ - `spec_builder.py` — Orchestrates rendering of 6 spec files via Jinja2 templates + optional `spec.lock.yaml`.
53
+ - `lock.py` — `SpecLock` dataclass with SHA-256 source/spec hashing, staleness detection, YAML serialization. `create_lock()` factory function.
54
+ - **6 Jinja2 templates** (`templates/`): `requirements.md.j2`, `design.md.j2`, `tasks.md.j2`, `acceptance.yaml.j2`, `context.md.j2`, `sources.md.j2`.
55
+
56
+ #### Phase 4 — Verify (acceptance check engine)
57
+
58
+ - **Verification engine** (`verify/engine.py`): Runs acceptance.yaml checks against a project directory. Four check types: `command` (shell exit code), `files_exist` (path checks), `pattern_present` (regex in files), `pattern_absent` (forbidden patterns). Tag-based filtering, fail-fast mode, configurable timeout per check. `VerifyError` exception with reason + suggestion.
59
+ - **Report formatters** (`verify/reporter.py`):
60
+ - `TerminalReporter` — Rich table with colors, pass/fail status, duration, details.
61
+ - `JsonReporter` — Machine-readable JSON output.
62
+ - `JunitReporter` — JUnit XML for CI integration (GitHub Actions, Jenkins, etc.).
63
+ - `Reporter` Protocol with `@runtime_checkable` for extensibility.
64
+
65
+ #### Phase 5 — Export (agent-ready output)
66
+
67
+ - **Exporter framework** (`export/`):
68
+ - `base.py` — `Exporter` Protocol with `@runtime_checkable` for structural subtyping.
69
+ - `registry.py` — `ExporterRegistry` with format-based dispatch. Factory function `create_default_registry()` registers both built-in exporters.
70
+ - **Architect exporter** (`export/architect.py`): Generates `pipeline.yaml` with one step per task, checkpoint flags, project context injection, final verification step with required command checks. Copies all spec files to `output/spec/`.
71
+ - **Generic exporter** (`export/generic.py`): Generates `SPEC.md` (consolidated Markdown with all spec sections), `verify.sh` (executable bash script with `check()` helper, `set -euo pipefail`, exit code reporting). Copies all spec files to `output/spec/`.
72
+
73
+ #### Spec differ
74
+
75
+ - **Diff module** (`diff/differ.py`): Compares two spec versions by extracting sections from Markdown headings (FR/NFR IDs, task numbers) and acceptance check IDs. Reports added, removed, and modified items per section. `SpecDiff` dataclass with `added`, `removed`, `modified`, `has_changes` properties. `DiffError` exception.
76
+
77
+ #### CLI — Full pipeline wiring
78
+
79
+ - **`intake init`**: End-to-end pipeline: ingest → analyze → generate → (optional) export. Auto-detects tech stack, slugifies description for directory name, `--dry-run` support.
80
+ - **`intake add`**: Incremental source addition to existing spec. Parses new sources, re-analyzes, regenerates spec files.
81
+ - **`intake verify`**: Loads `acceptance.yaml`, runs checks via `VerificationEngine`, displays report in chosen format (terminal/json/junit), exits with semantic code (0/1/2).
82
+ - **`intake export`**: Exports spec to chosen format via `ExporterRegistry`.
83
+ - **`intake show`**: Displays spec summary from `spec.lock.yaml` (model, cost, counts) and file listing.
84
+ - **`intake list`**: Scans specs directory for valid spec subdirectories, shows table with metadata from lock files.
85
+ - **`intake diff`**: Compares two spec directories, shows Rich table with added/removed/modified items.
86
+
87
+ #### Test suite
88
+
89
+ - **313 tests**, **83% overall coverage**. 7 realistic fixture files (Markdown, plaintext, YAML, Jira JSON x2, Confluence HTML, PNG image).
90
+
91
+ #### Documentation and examples
92
+
93
+ - **`.intake.yaml.example`**: Fully documented configuration example with all options and defaults.
94
+ - **4 example projects** in `examples/`: `from-markdown`, `from-jira`, `from-scratch`, `multi-source` — each with README and realistic source files.
95
+
96
+ #### Error hardening
97
+
98
+ - **`EmptySourceError`** and **`FileTooLargeError`** exceptions for better error messages on edge cases.
99
+ - **`validate_file_readable()`** and **`read_text_safe()`** utilities — centralized file validation with UTF-8 → latin-1 encoding fallback, size limits (50MB), and empty file detection.
100
+ - All 8 parsers updated to use hardened file validation utilities.
101
+ - 16 new hardening tests covering empty files, encoding fallback, size limits, and directory rejection.
102
+
103
+ #### Doctor --fix
104
+
105
+ - **`intake doctor --fix`** command: auto-installs missing Python packages and creates default `.intake.yaml` configuration.
106
+ - `FixResult` dataclass for structured fix result reporting.
107
+ - `_find_pip()` detects `pip3.12`, `pip3`, or `pip` for package installation.
108
+ - `IMPORT_TO_PIP` mapping for correct PyPI package names (e.g., `bs4` → `beautifulsoup4`, `docx` → `python-docx`).
109
+
110
+ #### Code quality
111
+
112
+ - **0 ruff errors**: Fixed 88 lint issues (TC001/TC003, F401, I001, SIM103, RUF022, E501, SIM117, RUF043).
113
+ - **0 mypy --strict errors**: Fixed 26 type errors across 12 files. Proper isinstance narrowing, type-safe dict extraction, correct bool return types.
114
+
115
+ ### Fixed
116
+
117
+ - **structlog test isolation**: Replaced `StringIO` sink with persistent `_NullWriter` class and yield-based autouse fixture. Fixed `cache_logger_on_first_use=True` in `setup_logging()` that caused "I/O operation on closed file" errors when CLI tests ran before module tests.
118
+ - **`_get_list` type safety**: Split into `_get_list` (for dict lists) and `_get_str_list` (for string lists) to fix mypy --strict without breaking acceptance criteria extraction.
119
+ - **bs4 `find()` kwargs**: Changed `**selector` to `attrs=selector` in Confluence parser for correct BeautifulSoup4 type narrowing.
120
+
@@ -0,0 +1,365 @@
1
+ Metadata-Version: 2.4
2
+ Name: intake-ai-cli
3
+ Version: 0.1.0
4
+ Summary: From requirements in any format to verified implementation
5
+ Author: intake contributors
6
+ License: MIT
7
+ Keywords: ai,automation,cli,requirements,spec-driven
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Software Development :: Build Tools
14
+ Classifier: Topic :: Software Development :: Code Generators
15
+ Requires-Python: >=3.12
16
+ Requires-Dist: beautifulsoup4>=4.12
17
+ Requires-Dist: click>=8.1
18
+ Requires-Dist: httpx>=0.27
19
+ Requires-Dist: jinja2>=3.1
20
+ Requires-Dist: litellm>=1.40
21
+ Requires-Dist: markdownify>=0.13
22
+ Requires-Dist: pdfplumber>=0.11
23
+ Requires-Dist: pydantic>=2.0
24
+ Requires-Dist: python-docx>=1.1
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: rich>=13.0
27
+ Requires-Dist: structlog>=24.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: mypy>=1.10; extra == 'dev'
30
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
31
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
32
+ Requires-Dist: pytest>=8.0; extra == 'dev'
33
+ Requires-Dist: ruff>=0.5; extra == 'dev'
34
+ Requires-Dist: types-beautifulsoup4>=4.12; extra == 'dev'
35
+ Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # intake
39
+
40
+ > From requirements in any format to verified implementation.
41
+
42
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue.svg)](https://python.org)
43
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
44
+
45
+ **intake** is an open-source CLI tool that acts as a universal bridge between real-world requirements and AI coding agents. It accepts requirements from multiple sources and formats — Jira exports, Confluence pages, PDFs, Markdown, YAML, images, DOCX, free text — and transforms them into a normalized, verifiable spec that any AI agent can consume.
46
+
47
+ It's not an IDE. It's not an agent. It doesn't generate code. intake is **preparation infrastructure**: the missing step between "we have some requirements somewhere" and "an agent implements with automatic verification."
48
+
49
+ ```
50
+ intake = Chaotic requirements (N sources, N formats) → Executable spec → Any AI agent
51
+ ```
52
+
53
+ ---
54
+
55
+ ## How It Works
56
+
57
+ ```
58
+ INGEST (parsers) → ANALYZE (LLM) → GENERATE (spec files) → VERIFY (acceptance checks) → EXPORT (agent-ready output)
59
+ ```
60
+
61
+ intake processes requirements through a 5-phase pipeline:
62
+
63
+ 1. **Ingest** — Parse any input format into normalized `ParsedContent`
64
+ 2. **Analyze** — LLM extracts structured requirements, detects conflicts, deduplicates
65
+ 3. **Generate** — Produce 6 spec files + `spec.lock.yaml`
66
+ 4. **Verify** — Run executable acceptance checks against the implementation
67
+ 5. **Export** — Generate agent-ready output (architect, Claude Code, Cursor, generic)
68
+
69
+ ### The 6 Spec Files
70
+
71
+ | File | Purpose |
72
+ |------|---------|
73
+ | `requirements.md` | What to build. Functional and non-functional requirements in EARS format. |
74
+ | `design.md` | How to build it. Architecture, interfaces, technical decisions. |
75
+ | `tasks.md` | In what order. Atomic tasks with dependencies. |
76
+ | `acceptance.yaml` | How to verify. Executable checks: commands, patterns, file existence. |
77
+ | `context.md` | Project context for the agent: stack, conventions, current state. |
78
+ | `sources.md` | Full traceability: every requirement mapped to its original source. |
79
+
80
+ ---
81
+
82
+ ## Installation
83
+
84
+ ```bash
85
+ pip install intake-ai-cli
86
+ ```
87
+
88
+ Requires Python 3.12+. The CLI command is `intake`.
89
+
90
+ ### Development Setup
91
+
92
+ ```bash
93
+ git clone https://github.com/your-org/intake-cli.git
94
+ cd intake-cli
95
+ pip install -e ".[dev]"
96
+ ```
97
+
98
+ ---
99
+
100
+ ## Quick Start
101
+
102
+ ```bash
103
+ # Check your environment
104
+ intake doctor
105
+
106
+ # Generate a spec from a single source
107
+ intake init "OAuth2 authentication system" -s requirements.md
108
+
109
+ # Generate from multiple sources
110
+ intake init "Payments feature" -s jira.json -s confluence.html -s notes.md
111
+
112
+ # Use a preset for quick configuration
113
+ intake init "API gateway" -s reqs.yaml --preset enterprise
114
+
115
+ # Export for a specific agent
116
+ intake init "User endpoint" -s reqs.pdf --format architect
117
+ ```
118
+
119
+ ---
120
+
121
+ ## Supported Input Formats
122
+
123
+ | Format | Extensions | Parser |
124
+ |--------|-----------|--------|
125
+ | Markdown | `.md` | Front matter, heading-based sections |
126
+ | Plain text | `.txt`, stdin (`-`) | Paragraph sections, Slack dumps |
127
+ | YAML / JSON | `.yaml`, `.yml`, `.json` | Structured requirements |
128
+ | PDF | `.pdf` | Text + tables via pdfplumber |
129
+ | DOCX | `.docx` | Text, tables, headings, metadata via python-docx |
130
+ | Jira export | `.json` (auto-detected) | Issues, comments, links, priorities |
131
+ | Confluence export | `.html` (auto-detected) | Clean Markdown via BS4 + markdownify |
132
+ | Images | `.png`, `.jpg`, `.webp`, `.gif` | LLM vision analysis |
133
+
134
+ Format is auto-detected by file extension and content inspection. Jira JSON exports and Confluence HTML exports are distinguished automatically from generic JSON/HTML files.
135
+
136
+ ---
137
+
138
+ ## Commands
139
+
140
+ | Command | Description | Status |
141
+ |---------|-------------|--------|
142
+ | `intake init` | Generate a spec from requirement sources | **Available** |
143
+ | `intake add` | Add sources to an existing spec (incremental) | **Available** |
144
+ | `intake verify` | Verify implementation against the spec | **Available** |
145
+ | `intake export` | Export spec to agent-ready format | **Available** |
146
+ | `intake show` | Show spec summary | **Available** |
147
+ | `intake list` | List all specs in the project | **Available** |
148
+ | `intake diff` | Compare two spec versions | **Available** |
149
+ | `intake doctor` | Check environment and configuration health | **Available** |
150
+ | `intake doctor --fix` | Auto-fix environment issues (install deps, create config) | **Available** |
151
+
152
+ ---
153
+
154
+ ## Configuration
155
+
156
+ intake works with zero configuration — only an LLM API key is needed. For customization, create a `.intake.yaml`:
157
+
158
+ ```yaml
159
+ llm:
160
+ model: claude-sonnet-4
161
+ max_cost_per_spec: 0.50
162
+ temperature: 0.2
163
+
164
+ project:
165
+ name: my-project
166
+ language: en
167
+
168
+ spec:
169
+ output_dir: ./specs
170
+ requirements_format: ears # ears | user-stories | bdd | free
171
+ design_depth: moderate # minimal | moderate | detailed
172
+ task_granularity: medium # coarse | medium | fine
173
+ risk_assessment: true
174
+
175
+ export:
176
+ default_format: generic # architect | claude-code | cursor | kiro | generic
177
+ ```
178
+
179
+ ### Presets
180
+
181
+ Skip the config file and use a preset:
182
+
183
+ ```bash
184
+ intake init "My feature" -s reqs.md --preset minimal # Fast, cheap, prototyping
185
+ intake init "My feature" -s reqs.md --preset standard # Balanced (default)
186
+ intake init "My feature" -s reqs.md --preset enterprise # Detailed, full traceability
187
+ ```
188
+
189
+ ### Configuration Priority
190
+
191
+ ```
192
+ CLI flags > .intake.yaml > preset > hardcoded defaults
193
+ ```
194
+
195
+ ---
196
+
197
+ ## Examples
198
+
199
+ See the [`examples/`](examples/) directory for ready-to-run scenarios:
200
+
201
+ | Example | Description |
202
+ |---------|-------------|
203
+ | [`from-markdown`](examples/from-markdown/) | Single Markdown file with OAuth2 requirements |
204
+ | [`from-jira`](examples/from-jira/) | Jira JSON export with 3 issues |
205
+ | [`from-scratch`](examples/from-scratch/) | Free-text meeting notes |
206
+ | [`multi-source`](examples/multi-source/) | Combining Markdown + Jira JSON + text notes |
207
+
208
+ ---
209
+
210
+ ## Architecture
211
+
212
+ ```
213
+ src/intake/
214
+ ├── cli.py # Click CLI — thin adapter, no logic
215
+ ├── config/ # Pydantic v2 models, presets, layered loader
216
+ │ ├── schema.py # 6 config models (LLM, Project, Spec, Verification, Export, Security)
217
+ │ ├── presets.py # minimal / standard / enterprise presets
218
+ │ ├── loader.py # Layered merge: defaults → preset → YAML → CLI
219
+ │ └── defaults.py # Centralized constants
220
+ ├── ingest/ # Phase 1 — 8 parsers, registry, auto-detection
221
+ │ ├── base.py # ParsedContent dataclass + Parser Protocol
222
+ │ ├── registry.py # Auto-detection + parser dispatch
223
+ │ ├── markdown.py # .md with YAML front matter
224
+ │ ├── plaintext.py # .txt, stdin, Slack dumps
225
+ │ ├── yaml_input.py # .yaml/.yml/.json structured input
226
+ │ ├── pdf.py # .pdf via pdfplumber
227
+ │ ├── docx.py # .docx via python-docx
228
+ │ ├── jira.py # Jira JSON exports (API + list format)
229
+ │ ├── confluence.py # Confluence HTML via BS4 + markdownify
230
+ │ └── image.py # Image analysis via LLM vision
231
+ ├── analyze/ # Phase 2 — LLM orchestration (async)
232
+ │ ├── analyzer.py # Orchestrator: extraction → dedup → risk → design
233
+ │ ├── prompts.py # 3 system prompts (extraction, risk, design)
234
+ │ ├── models.py # 10 dataclasses for analysis pipeline
235
+ │ ├── extraction.py # LLM JSON → typed AnalysisResult
236
+ │ ├── dedup.py # Jaccard word similarity deduplication
237
+ │ ├── conflicts.py # Conflict validation
238
+ │ ├── questions.py # Open question validation
239
+ │ ├── risks.py # Risk assessment parsing
240
+ │ └── design.py # Design output parsing (tasks, checks)
241
+ ├── generate/ # Phase 3 — Jinja2 template rendering
242
+ │ ├── spec_builder.py # Orchestrates 6 spec files + lock
243
+ │ └── lock.py # spec.lock.yaml for reproducibility
244
+ ├── verify/ # Phase 4 — Acceptance check engine
245
+ │ ├── engine.py # 4 check types: command, files_exist, pattern_*
246
+ │ └── reporter.py # Terminal (Rich), JSON, JUnit XML reporters
247
+ ├── export/ # Phase 5 — Agent-ready output
248
+ │ ├── base.py # Exporter Protocol
249
+ │ ├── registry.py # Format-based exporter dispatch
250
+ │ ├── architect.py # pipeline.yaml generation
251
+ │ └── generic.py # SPEC.md + verify.sh generation
252
+ ├── diff/ # Spec comparison
253
+ │ └── differ.py # Compare two specs by requirement/task IDs
254
+ ├── doctor/ # Environment health checks
255
+ │ └── checks.py # Python, API keys, deps, config validation
256
+ ├── llm/ # LiteLLM wrapper (used by analyze/ only)
257
+ │ └── adapter.py # Async completion, retry, cost tracking, budget
258
+ ├── templates/ # Jinja2 templates for spec generation
259
+ │ ├── requirements.md.j2 # FR, NFR, conflicts, open questions
260
+ │ ├── design.md.j2 # Components, files, tech decisions
261
+ │ ├── tasks.md.j2 # Task summary + detailed sections
262
+ │ ├── acceptance.yaml.j2 # Executable acceptance checks
263
+ │ ├── context.md.j2 # Project context for agents
264
+ │ └── sources.md.j2 # Source traceability mapping
265
+ └── utils/ # Shared utilities (logging, cost, detection)
266
+ ├── file_detect.py # Extension-based format detection
267
+ ├── project_detect.py # Auto-detect tech stack from project files
268
+ ├── cost.py # Cost accumulation with per-phase breakdown
269
+ └── logging.py # structlog configuration
270
+ ```
271
+
272
+ **Key design principles:**
273
+
274
+ - **Protocol over ABC** — All extension points use `typing.Protocol`
275
+ - **Dataclasses for pipeline data, Pydantic for config** — Never mixed
276
+ - **Async only in analyze/** — Everything else is synchronous
277
+ - **Offline mode** — Parsing, verification, export, diff, doctor all work without LLM
278
+ - **No magic strings** — All constants defined explicitly
279
+ - **Budget enforcement** — LLM cost tracked per call with configurable limits
280
+
281
+ ---
282
+
283
+ ## Integration
284
+
285
+ ### With architect
286
+
287
+ ```bash
288
+ intake init "Auth system" -s reqs.md --format architect
289
+ architect pipeline specs/auth-system/pipeline.yaml
290
+ ```
291
+
292
+ ### With Claude Code
293
+
294
+ ```bash
295
+ intake init "Payments" -s reqs.pdf --format claude-code
296
+ # Generates CLAUDE.md + tasks + verify.sh
297
+ ```
298
+
299
+ ### With CI/CD
300
+
301
+ ```yaml
302
+ # GitHub Actions
303
+ - name: Verify spec compliance
304
+ run: |
305
+ pip install intake-ai-cli
306
+ intake verify specs/auth-system/ -p . --format junit
307
+ ```
308
+
309
+ ---
310
+
311
+ ## Development
312
+
313
+ ```bash
314
+ # Run tests
315
+ python -m pytest tests/ -v
316
+
317
+ # Run tests with coverage
318
+ python -m pytest tests/ --cov=intake --cov-report=term-missing
319
+
320
+ # Lint
321
+ ruff check src/ tests/
322
+
323
+ # Type check (strict)
324
+ mypy src/ --strict
325
+ ```
326
+
327
+ Current test suite: **313 tests**, **83% coverage**.
328
+
329
+ ### Implementation Status
330
+
331
+ | Phase | Module | Status |
332
+ |-------|--------|--------|
333
+ | Phase 1 — Ingest | `ingest/` (8 parsers + registry) | Implemented |
334
+ | Phase 2 — Analyze | `analyze/` (orchestrator + 7 sub-modules) | Implemented |
335
+ | Phase 3 — Generate | `generate/` (spec builder + 6 templates + lock) | Implemented |
336
+ | Phase 4 — Verify | `verify/` (engine + 3 reporters) | Implemented |
337
+ | Phase 5 — Export | `export/` (architect + generic) | Implemented |
338
+ | Standalone | `doctor/`, `config/`, `llm/`, `utils/` | Implemented |
339
+ | Standalone | `diff/` (spec differ) | Implemented |
340
+ | CLI | All 8 commands wired end-to-end | Implemented |
341
+
342
+ ---
343
+
344
+ ## Model Support
345
+
346
+ intake uses [LiteLLM](https://github.com/BerriAI/litellm) for LLM abstraction, supporting 100+ models:
347
+
348
+ - **Anthropic**: Claude Sonnet, Claude Opus, Claude Haiku
349
+ - **OpenAI**: GPT-4o, GPT-4, GPT-3.5
350
+ - **Google**: Gemini Pro, Gemini Flash
351
+ - **Local models**: Ollama, vLLM, etc.
352
+
353
+ Set your API key:
354
+
355
+ ```bash
356
+ export ANTHROPIC_API_KEY=sk-ant-...
357
+ # or
358
+ export OPENAI_API_KEY=sk-...
359
+ ```
360
+
361
+ ---
362
+
363
+ ## License
364
+
365
+ MIT