archex 0.1.0.dev0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. {archex-0.1.0.dev0 → archex-0.4.0}/.github/workflows/ci.yml +3 -1
  2. archex-0.4.0/CHANGELOG.md +148 -0
  3. archex-0.4.0/PKG-INFO +301 -0
  4. archex-0.4.0/README.md +241 -0
  5. {archex-0.1.0.dev0 → archex-0.4.0}/docs/OVERVIEW.md +2 -2
  6. {archex-0.1.0.dev0 → archex-0.4.0}/docs/ROADMAP.md +161 -79
  7. {archex-0.1.0.dev0 → archex-0.4.0}/pyproject.toml +12 -2
  8. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/__init__.py +1 -1
  9. archex-0.4.0/src/archex/acquire/__init__.py +9 -0
  10. archex-0.4.0/src/archex/acquire/discovery.py +127 -0
  11. archex-0.4.0/src/archex/acquire/git.py +70 -0
  12. archex-0.4.0/src/archex/acquire/local.py +27 -0
  13. archex-0.4.0/src/archex/analyze/decisions.py +185 -0
  14. archex-0.4.0/src/archex/analyze/interfaces.py +188 -0
  15. archex-0.4.0/src/archex/analyze/modules.py +164 -0
  16. archex-0.4.0/src/archex/analyze/patterns.py +480 -0
  17. archex-0.4.0/src/archex/api.py +344 -0
  18. archex-0.4.0/src/archex/cache.py +162 -0
  19. archex-0.4.0/src/archex/cli/analyze_cmd.py +53 -0
  20. archex-0.4.0/src/archex/cli/cache_cmd.py +47 -0
  21. archex-0.4.0/src/archex/cli/compare_cmd.py +124 -0
  22. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/cli/main.py +2 -0
  23. archex-0.4.0/src/archex/cli/mcp_cmd.py +24 -0
  24. archex-0.4.0/src/archex/cli/query_cmd.py +62 -0
  25. archex-0.4.0/src/archex/config.py +68 -0
  26. archex-0.4.0/src/archex/index/bm25.py +93 -0
  27. archex-0.4.0/src/archex/index/chunker.py +274 -0
  28. archex-0.4.0/src/archex/index/embeddings/__init__.py +9 -0
  29. archex-0.4.0/src/archex/index/embeddings/api.py +78 -0
  30. archex-0.4.0/src/archex/index/embeddings/base.py +15 -0
  31. archex-0.4.0/src/archex/index/embeddings/nomic.py +103 -0
  32. archex-0.4.0/src/archex/index/embeddings/sentence_tf.py +64 -0
  33. archex-0.4.0/src/archex/index/graph.py +208 -0
  34. archex-0.4.0/src/archex/index/store.py +206 -0
  35. archex-0.4.0/src/archex/index/vector.py +147 -0
  36. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/integrations/__init__.py +0 -2
  37. archex-0.4.0/src/archex/integrations/langchain.py +73 -0
  38. archex-0.4.0/src/archex/integrations/llamaindex.py +73 -0
  39. archex-0.4.0/src/archex/integrations/mcp.py +244 -0
  40. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/models.py +121 -16
  41. archex-0.4.0/src/archex/parse/__init__.py +17 -0
  42. archex-0.4.0/src/archex/parse/adapters/__init__.py +76 -0
  43. archex-0.4.0/src/archex/parse/adapters/base.py +48 -0
  44. archex-0.4.0/src/archex/parse/adapters/go.py +381 -0
  45. archex-0.4.0/src/archex/parse/adapters/python.py +451 -0
  46. archex-0.4.0/src/archex/parse/adapters/rust.py +610 -0
  47. archex-0.4.0/src/archex/parse/adapters/ts_node.py +47 -0
  48. archex-0.4.0/src/archex/parse/adapters/typescript.py +524 -0
  49. archex-0.4.0/src/archex/parse/engine.py +94 -0
  50. archex-0.4.0/src/archex/parse/imports.py +149 -0
  51. archex-0.4.0/src/archex/parse/symbols.py +111 -0
  52. archex-0.4.0/src/archex/providers/anthropic.py +87 -0
  53. archex-0.4.0/src/archex/providers/base.py +44 -0
  54. archex-0.4.0/src/archex/providers/openai.py +81 -0
  55. archex-0.4.0/src/archex/providers/openrouter.py +92 -0
  56. archex-0.4.0/src/archex/serve/compare.py +263 -0
  57. archex-0.4.0/src/archex/serve/context.py +184 -0
  58. archex-0.4.0/src/archex/serve/profile.py +91 -0
  59. archex-0.4.0/src/archex/serve/renderers/json.py +13 -0
  60. archex-0.4.0/src/archex/serve/renderers/markdown.py +72 -0
  61. archex-0.4.0/src/archex/serve/renderers/xml.py +72 -0
  62. archex-0.4.0/src/archex/utils.py +12 -0
  63. archex-0.4.0/tests/acquire/test_discovery.py +261 -0
  64. archex-0.4.0/tests/acquire/test_git.py +172 -0
  65. archex-0.4.0/tests/acquire/test_local.py +35 -0
  66. archex-0.4.0/tests/analyze/test_decisions.py +171 -0
  67. archex-0.4.0/tests/analyze/test_interfaces.py +230 -0
  68. archex-0.4.0/tests/analyze/test_modules.py +321 -0
  69. archex-0.4.0/tests/analyze/test_patterns.py +310 -0
  70. archex-0.4.0/tests/conftest.py +56 -0
  71. archex-0.4.0/tests/fixtures/go_simple/handlers.go +34 -0
  72. archex-0.4.0/tests/fixtures/go_simple/main.go +16 -0
  73. archex-0.4.0/tests/fixtures/go_simple/models.go +44 -0
  74. archex-0.4.0/tests/fixtures/go_simple/utils.go +20 -0
  75. archex-0.4.0/tests/fixtures/rust_simple/src/lib.rs +21 -0
  76. archex-0.4.0/tests/fixtures/rust_simple/src/main.rs +16 -0
  77. archex-0.4.0/tests/fixtures/rust_simple/src/models.rs +55 -0
  78. archex-0.4.0/tests/fixtures/rust_simple/src/utils.rs +26 -0
  79. archex-0.4.0/tests/index/embeddings/__init__.py +0 -0
  80. archex-0.4.0/tests/index/embeddings/test_embeddings.py +146 -0
  81. archex-0.4.0/tests/index/test_bm25.py +224 -0
  82. archex-0.4.0/tests/index/test_chunker.py +534 -0
  83. archex-0.4.0/tests/index/test_graph.py +140 -0
  84. archex-0.4.0/tests/index/test_store.py +224 -0
  85. archex-0.4.0/tests/index/test_vector.py +331 -0
  86. archex-0.4.0/tests/integrations/__init__.py +0 -0
  87. archex-0.4.0/tests/integrations/test_langchain.py +154 -0
  88. archex-0.4.0/tests/integrations/test_llamaindex.py +166 -0
  89. archex-0.4.0/tests/integrations/test_mcp.py +291 -0
  90. archex-0.4.0/tests/parse/adapters/test_go.py +363 -0
  91. archex-0.4.0/tests/parse/adapters/test_python.py +308 -0
  92. archex-0.4.0/tests/parse/adapters/test_rust.py +557 -0
  93. archex-0.4.0/tests/parse/adapters/test_typescript.py +463 -0
  94. archex-0.4.0/tests/parse/test_adapter_registry.py +96 -0
  95. archex-0.4.0/tests/parse/test_engine.py +90 -0
  96. archex-0.4.0/tests/parse/test_imports.py +257 -0
  97. archex-0.4.0/tests/parse/test_symbols.py +164 -0
  98. archex-0.4.0/tests/providers/__init__.py +0 -0
  99. archex-0.4.0/tests/providers/test_providers.py +239 -0
  100. archex-0.4.0/tests/serve/test_compare.py +528 -0
  101. archex-0.4.0/tests/serve/test_context.py +223 -0
  102. archex-0.4.0/tests/serve/test_profile.py +134 -0
  103. archex-0.4.0/tests/serve/test_renderers.py +192 -0
  104. archex-0.4.0/tests/test_api_robustness.py +101 -0
  105. archex-0.4.0/tests/test_cache.py +180 -0
  106. archex-0.4.0/tests/test_cli.py +161 -0
  107. archex-0.4.0/tests/test_config.py +165 -0
  108. archex-0.4.0/tests/test_integration.py +224 -0
  109. archex-0.4.0/tests/test_performance.py +402 -0
  110. {archex-0.1.0.dev0 → archex-0.4.0}/uv.lock +865 -3
  111. archex-0.1.0.dev0/PKG-INFO +0 -52
  112. archex-0.1.0.dev0/README.md +0 -2
  113. archex-0.1.0.dev0/src/archex/acquire/__init__.py +0 -5
  114. archex-0.1.0.dev0/src/archex/acquire/discovery.py +0 -5
  115. archex-0.1.0.dev0/src/archex/acquire/git.py +0 -5
  116. archex-0.1.0.dev0/src/archex/acquire/local.py +0 -5
  117. archex-0.1.0.dev0/src/archex/analyze/decisions.py +0 -5
  118. archex-0.1.0.dev0/src/archex/analyze/interfaces.py +0 -5
  119. archex-0.1.0.dev0/src/archex/analyze/modules.py +0 -5
  120. archex-0.1.0.dev0/src/archex/analyze/patterns.py +0 -5
  121. archex-0.1.0.dev0/src/archex/api.py +0 -45
  122. archex-0.1.0.dev0/src/archex/cache.py +0 -5
  123. archex-0.1.0.dev0/src/archex/cli/analyze_cmd.py +0 -12
  124. archex-0.1.0.dev0/src/archex/cli/cache_cmd.py +0 -11
  125. archex-0.1.0.dev0/src/archex/cli/compare_cmd.py +0 -13
  126. archex-0.1.0.dev0/src/archex/cli/query_cmd.py +0 -12
  127. archex-0.1.0.dev0/src/archex/config.py +0 -11
  128. archex-0.1.0.dev0/src/archex/index/bm25.py +0 -5
  129. archex-0.1.0.dev0/src/archex/index/chunker.py +0 -5
  130. archex-0.1.0.dev0/src/archex/index/embeddings/__init__.py +0 -5
  131. archex-0.1.0.dev0/src/archex/index/embeddings/api.py +0 -5
  132. archex-0.1.0.dev0/src/archex/index/embeddings/base.py +0 -5
  133. archex-0.1.0.dev0/src/archex/index/embeddings/nomic.py +0 -5
  134. archex-0.1.0.dev0/src/archex/index/embeddings/sentence_tf.py +0 -5
  135. archex-0.1.0.dev0/src/archex/index/graph.py +0 -5
  136. archex-0.1.0.dev0/src/archex/index/store.py +0 -5
  137. archex-0.1.0.dev0/src/archex/index/vector.py +0 -5
  138. archex-0.1.0.dev0/src/archex/integrations/langchain.py +0 -5
  139. archex-0.1.0.dev0/src/archex/integrations/llamaindex.py +0 -5
  140. archex-0.1.0.dev0/src/archex/integrations/mcp.py +0 -5
  141. archex-0.1.0.dev0/src/archex/parse/__init__.py +0 -5
  142. archex-0.1.0.dev0/src/archex/parse/adapters/__init__.py +0 -5
  143. archex-0.1.0.dev0/src/archex/parse/adapters/base.py +0 -5
  144. archex-0.1.0.dev0/src/archex/parse/adapters/go.py +0 -5
  145. archex-0.1.0.dev0/src/archex/parse/adapters/python.py +0 -5
  146. archex-0.1.0.dev0/src/archex/parse/adapters/rust.py +0 -5
  147. archex-0.1.0.dev0/src/archex/parse/adapters/typescript.py +0 -5
  148. archex-0.1.0.dev0/src/archex/parse/engine.py +0 -5
  149. archex-0.1.0.dev0/src/archex/parse/imports.py +0 -5
  150. archex-0.1.0.dev0/src/archex/parse/symbols.py +0 -5
  151. archex-0.1.0.dev0/src/archex/providers/anthropic.py +0 -5
  152. archex-0.1.0.dev0/src/archex/providers/base.py +0 -5
  153. archex-0.1.0.dev0/src/archex/providers/openai.py +0 -5
  154. archex-0.1.0.dev0/src/archex/providers/openrouter.py +0 -5
  155. archex-0.1.0.dev0/src/archex/serve/compare.py +0 -5
  156. archex-0.1.0.dev0/src/archex/serve/context.py +0 -5
  157. archex-0.1.0.dev0/src/archex/serve/profile.py +0 -5
  158. archex-0.1.0.dev0/src/archex/serve/renderers/json.py +0 -5
  159. archex-0.1.0.dev0/src/archex/serve/renderers/markdown.py +0 -5
  160. archex-0.1.0.dev0/src/archex/serve/renderers/xml.py +0 -5
  161. archex-0.1.0.dev0/tests/acquire/test_discovery.py +0 -5
  162. archex-0.1.0.dev0/tests/acquire/test_git.py +0 -5
  163. archex-0.1.0.dev0/tests/acquire/test_local.py +0 -5
  164. archex-0.1.0.dev0/tests/analyze/test_modules.py +0 -5
  165. archex-0.1.0.dev0/tests/analyze/test_patterns.py +0 -5
  166. archex-0.1.0.dev0/tests/conftest.py +0 -23
  167. archex-0.1.0.dev0/tests/index/test_bm25.py +0 -5
  168. archex-0.1.0.dev0/tests/index/test_chunker.py +0 -5
  169. archex-0.1.0.dev0/tests/index/test_graph.py +0 -5
  170. archex-0.1.0.dev0/tests/parse/adapters/test_python.py +0 -5
  171. archex-0.1.0.dev0/tests/parse/adapters/test_typescript.py +0 -5
  172. archex-0.1.0.dev0/tests/parse/test_engine.py +0 -5
  173. archex-0.1.0.dev0/tests/parse/test_imports.py +0 -5
  174. archex-0.1.0.dev0/tests/parse/test_symbols.py +0 -5
  175. archex-0.1.0.dev0/tests/serve/test_context.py +0 -5
  176. archex-0.1.0.dev0/tests/serve/test_profile.py +0 -5
  177. archex-0.1.0.dev0/tests/test_cli.py +0 -23
  178. {archex-0.1.0.dev0 → archex-0.4.0}/.gitignore +0 -0
  179. {archex-0.1.0.dev0 → archex-0.4.0}/.pre-commit-config.yaml +0 -0
  180. {archex-0.1.0.dev0 → archex-0.4.0}/AGENTS.md +0 -0
  181. {archex-0.1.0.dev0 → archex-0.4.0}/LICENSE +0 -0
  182. {archex-0.1.0.dev0 → archex-0.4.0}/docs/SYSTEM_DESIGN.md +0 -0
  183. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/analyze/__init__.py +0 -0
  184. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/cli/__init__.py +0 -0
  185. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/exceptions.py +0 -0
  186. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/index/__init__.py +0 -0
  187. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/providers/__init__.py +0 -0
  188. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/serve/__init__.py +0 -0
  189. {archex-0.1.0.dev0 → archex-0.4.0}/src/archex/serve/renderers/__init__.py +0 -0
  190. {archex-0.1.0.dev0 → archex-0.4.0}/tests/__init__.py +0 -0
  191. {archex-0.1.0.dev0 → archex-0.4.0}/tests/acquire/__init__.py +0 -0
  192. {archex-0.1.0.dev0 → archex-0.4.0}/tests/analyze/__init__.py +0 -0
  193. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/monorepo_simple/packages/cli/pyproject.toml +0 -0
  194. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/monorepo_simple/packages/cli/src/__init__.py +0 -0
  195. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/monorepo_simple/packages/cli/src/main.py +0 -0
  196. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/monorepo_simple/packages/core/pyproject.toml +0 -0
  197. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/monorepo_simple/packages/core/src/__init__.py +0 -0
  198. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/monorepo_simple/packages/core/src/index.py +0 -0
  199. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/monorepo_simple/pyproject.toml +0 -0
  200. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_patterns/events.py +0 -0
  201. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_patterns/middleware.py +0 -0
  202. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_patterns/plugins.py +0 -0
  203. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_patterns/repository.py +0 -0
  204. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_patterns/strategies.py +0 -0
  205. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_simple/main.py +0 -0
  206. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_simple/models.py +0 -0
  207. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_simple/pyproject.toml +0 -0
  208. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_simple/services/__init__.py +0 -0
  209. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_simple/services/auth.py +0 -0
  210. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/python_simple/utils.py +0 -0
  211. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/typescript_simple/package.json +0 -0
  212. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/typescript_simple/src/handlers/auth.ts +0 -0
  213. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/typescript_simple/src/index.ts +0 -0
  214. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/typescript_simple/src/types.ts +0 -0
  215. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/typescript_simple/src/utils.ts +0 -0
  216. {archex-0.1.0.dev0 → archex-0.4.0}/tests/fixtures/typescript_simple/tsconfig.json +0 -0
  217. {archex-0.1.0.dev0 → archex-0.4.0}/tests/index/__init__.py +0 -0
  218. {archex-0.1.0.dev0 → archex-0.4.0}/tests/parse/__init__.py +0 -0
  219. {archex-0.1.0.dev0 → archex-0.4.0}/tests/parse/adapters/__init__.py +0 -0
  220. {archex-0.1.0.dev0 → archex-0.4.0}/tests/serve/__init__.py +0 -0
  221. {archex-0.1.0.dev0 → archex-0.4.0}/tests/test_models.py +0 -0
@@ -23,9 +23,11 @@ jobs:
23
23
 
24
24
  - name: Install uv
25
25
  uses: astral-sh/setup-uv@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
26
28
 
27
29
  - name: Install dependencies
28
- run: uv sync --dev
30
+ run: uv sync --all-extras
29
31
 
30
32
  - name: Lint
31
33
  run: uv run ruff check .
@@ -0,0 +1,148 @@
1
+ # Changelog
2
+
3
+ ## 0.4.0 (2026-03-01)
4
+
5
+ ### Refactoring
6
+
7
+ - **Shared tree-sitter helpers:** Extract duplicate `_text`/`_type`/`_children`/`_field`/`_start_line`/`_end_line` accessors from all four language adapters into a single `ts_node` module
8
+ - **Dead code removal:** Remove unused `get_adapter()`, `_extract_interfaces()`, redundant `add_node` calls in `DependencyGraph.from_edges()`, unused `index_config` param from `api.analyze()`
9
+ - **Dependency deduplication:** Use sets instead of lists in `_build_module_from_community` for O(1) membership checks
10
+ - **Chunker optimization:** Remove unnecessary `sorted()` call on already-ordered covered ranges
11
+ - **Vector load:** `copy=False` on `numpy.astype` for zero-copy when array is already float32
12
+
13
+ ### Error handling & logging
14
+
15
+ - **`infer_decisions()`:** Log LLM enrichment failures with `logger.warning()` instead of silently catching
16
+ - **`BM25Index.search()`:** Log FTS5 query failures instead of silently returning empty results
17
+ - **`SentenceTransformerEmbedder.dimension`:** Replace bare `assert` with explicit `ArchexIndexError`
18
+
19
+ ### Configuration & standards
20
+
21
+ - **`DEFAULT_CACHE_DIR` constant:** Centralized in `config.py`, used by all CLI cache commands
22
+ - **Config validation:** `model_fields` over `hasattr` for Pydantic v2 correctness
23
+ - **`validate_dimensions()`:** Extracted from `compare_repos()` for reuse by MCP integration
24
+ - **Install instructions:** All `pip install` references replaced with `uv add`
25
+
26
+ ### Testing
27
+
28
+ - 3 new test files: `test_config.py`, `test_adapter_registry.py`, `test_renderers.py`
29
+ - 7 extended test files covering parse, index, analyze, serve, and acquire layers
30
+ - 538 → 641 tests (+103), 84% → 90% coverage
31
+
32
+ ## 0.3.0 (2026-03-01)
33
+
34
+ ### Phase 6a — Harden
35
+
36
+ - **Git URL validation:** `_validate_url()` restricts to `http://`, `https://`, local paths only
37
+ - **Branch name validation:** Regex guard rejects injection characters and `-` prefix
38
+ - **FTS5 query escaping:** Strip non-alphanumeric characters from BM25 query tokens
39
+ - **Cache key validation:** Enforce `^[0-9a-f]{64}$` pattern in `db_path()` and `meta_path()`
40
+ - **Vector safety:** `allow_pickle=False` and `dtype='U512'` for `.npz` persistence, length validation on load
41
+ - **File size guard:** `max_file_size` config in `discover_files()` and `parse_file()`
42
+ - **Store safety:** `IndexStore.__init__` wrapped in try/except for connection cleanup on failure
43
+ - **Parse logging:** `symbols.py` and `imports.py` log warnings on parse failures
44
+ - **MCP validation:** Dimension list validated against `SUPPORTED_DIMENSIONS` before `compare()`
45
+ - **MCP event loop:** `asyncio.get_event_loop()` → `asyncio.get_running_loop()`
46
+ - **CLI error handling:** API calls wrapped in `try/except ArchexError` → `click.ClickException`
47
+ - **Embeddings timeout:** `timeout=30` added to API `urlopen()`
48
+ - **Compare CLI:** `assert isinstance(...)` replaced with explicit type check
49
+
50
+ ### Phase 6b — Performance
51
+
52
+ - **Cache-first query:** `query()` checks cache BEFORE parsing — cache hit skips entire parse pipeline
53
+ - **Graph round-trip:** `DependencyGraph.from_edges()` classmethod reconstructs graph from stored edges
54
+ - **Batch fetch:** `IndexStore.get_chunks_by_ids()` with `WHERE id IN (...)`, used in `BM25Index.search()`
55
+ - **Parallel config:** `Config.parallel` flag passed to `extract_symbols()` and `parse_imports()`
56
+ - **Parallel compare:** `ThreadPoolExecutor(max_workers=2)` runs both `analyze()` calls concurrently
57
+ - **O(N) top-k:** `np.argpartition` replaces `np.argsort` in VectorIndex search
58
+ - **Vector cache:** `CacheManager.vector_path()` persists vector indices across queries
59
+ - **Centrality cache:** Lazy `_centrality_cache` on `DependencyGraph`, invalidated on mutation
60
+ - **Chunker optimization:** Source split once in `chunk_file()`, pre-split lines passed downstream
61
+ - **Git-aware cache:** Cache key includes git HEAD commit hash for local repos
62
+
63
+ ### Phase 6c — Wire & Polish
64
+
65
+ - **Hybrid retrieval wired:** VectorIndex built and searched in cache-miss query path, results passed through RRF to `assemble_context()`
66
+ - **`resolve_source()` utility:** Extracted from 4 inline copies, fixes `query_cmd` bug (`startswith("http")` → `startswith("http://")`)
67
+ - **Compare CLI routing:** Routes through `api.compare()` instead of manual `analyze()` x2
68
+ - **MCP dimension fix:** `testing_strategy` → `testing`, `dependency_management` → `state_management`, `configuration_management` → `configuration`
69
+ - **Dead field removal:** `CodeChunk.module` removed from models, store schema, and chunker
70
+ - **RepoSource validator:** `model_validator(mode="after")` requires `url` or `local_path`
71
+ - **`load_config()`:** Reads `~/.archex/config.toml` via `tomllib` + `ARCHEX_*` env vars
72
+ - **Provider model IDs:** Centralized in `DEFAULT_MODELS` dict in `config.py`
73
+ - **Pipeline logging:** `logging.getLogger(__name__)` with timing at all stage boundaries
74
+ - **Test improvements:** Cache CLI tests, `__version__` import in test_cli
75
+
76
+ ### Phase 6d — Extensibility
77
+
78
+ - **`ScoringWeights` model:** Parameterized context scoring (relevance=0.6, structural=0.3, type_coverage=0.1) with sum-to-1 validator, accepted in `assemble_context()` and `query()`
79
+ - **`PatternRegistry`:** `register()` decorator, `load_entry_points()` for `archex.pattern_detectors` group, optional `registry` param in `detect_patterns()`
80
+ - **`AdapterRegistry`:** `register()`, `build_all()`, `load_entry_points()` for `archex.language_adapters` group, public `adapter_classes` property
81
+ - **`Chunker` Protocol:** `runtime_checkable`, accepted as optional `chunker` param in `query()`
82
+ - **Entry points:** `archex.language_adapters` and `archex.pattern_detectors` groups declared in `pyproject.toml`
83
+ - **Integration tests:** 12 end-to-end tests covering analyze, query (BM25, caching, custom weights, hybrid fallback), compare (default + specific dimensions), full analyze→query pipeline
84
+ - 538 tests, 84% coverage
85
+
86
+ ## 0.2.0 (2026-02-28)
87
+
88
+ ### Phase 5 — Ecosystem
89
+
90
+ - **MCP server:** 3 tools (analyze_repo, query_repo, compare_repos) with async stdio transport, `archex mcp` CLI command
91
+ - **LangChain integration:** `ArchexRetriever(BaseRetriever)` mapping RankedChunks to Documents
92
+ - **LlamaIndex integration:** `ArchexRetriever(BaseRetriever)` mapping RankedChunks to NodeWithScore
93
+ - **Parallel parsing:** `extract_symbols()` and `parse_imports()` accept `parallel=True` for ProcessPoolExecutor concurrency
94
+ - **ONNX model caching:** `NomicCodeEmbedder` supports `cache_dir` for persistent model storage at `~/.archex/models/`
95
+ - **New optional deps:** `archex[mcp]`, `archex[langchain]`, `archex[llamaindex]`
96
+ - 422 tests, 81% coverage
97
+
98
+ ## 0.1.0 (2026-02-28)
99
+
100
+ ### Phase 0 — Scaffold
101
+
102
+ - Project structure with hatchling build system, CLI stub, CI config
103
+ - Test fixtures: `python_simple`, `python_patterns`, `typescript_simple`, `monorepo_simple`
104
+ - Tooling: ruff, pyright (strict), pytest + pytest-cov, pre-commit
105
+
106
+ ### Phase 1 — Foundation
107
+
108
+ - **Acquire:** `clone_repo()`, `open_local()`, `discover_files()` with git ls-files + rglob fallback
109
+ - **Parse:** `TreeSitterEngine` with cached Language/Parser, `PythonAdapter` (full AST walk)
110
+ - **Index:** `DependencyGraph` wrapping dual NetworkX DiGraphs (file-level, symbol-level), SQLite round-trip, PageRank centrality
111
+ - **Serve:** Basic `ArchProfile` assembly with stats, dependency summary, interface surface
112
+ - **API:** `analyze()` pipeline — discover, parse, resolve, graph, profile
113
+ - **CLI:** `archex analyze <source> --format json|markdown`
114
+ - **Models:** Complete Pydantic v2 model hierarchy, exception classes, StrEnum types
115
+ - 107 tests, 88% coverage
116
+
117
+ ### Phase 2 — Retrieval
118
+
119
+ - **Chunker:** `ASTChunker` with symbol-based boundaries, import context, small-chunk merging, tiktoken counting
120
+ - **BM25:** SQLite FTS5 index with OR-joined query tokens
121
+ - **Store:** `IndexStore` with WAL mode, chunks/edges/metadata tables
122
+ - **Cache:** `CacheManager` with TTL, WAL checkpoint before copy, FTS rebuild on load
123
+ - **Context assembly:** BM25 search, graph neighborhood expansion, composite scoring (0.6 relevance + 0.3 structural + 0.1 type), greedy bin-packing
124
+ - **Renderers:** XML (CDATA), JSON (model_dump), Markdown
125
+ - **API:** `query()` pipeline — acquire, parse, chunk, index, search, assemble
126
+ - **CLI:** `archex query`, `archex cache list|clean|info`
127
+ - 174 tests, 85% coverage
128
+
129
+ ### Phase 3 — Intelligence
130
+
131
+ - **Modules:** Louvain community detection on file-level dependency graph
132
+ - **Patterns:** Rule-based detection — middleware chain, plugin system, event bus, repository/DAO, strategy
133
+ - **Interfaces:** Public API surface extraction with usage counts
134
+ - **TypeScript adapter:** ES modules, CommonJS, type-only imports, re-exports, index resolution
135
+ - **LLM enrichment:** Optional `Provider` protocol (Anthropic, OpenAI, OpenRouter), structured output
136
+ - **Decisions:** `infer_decisions()` with structural evidence + optional LLM inference
137
+ - Full `ArchProfile` assembly with modules, patterns, interfaces, decisions
138
+
139
+ ### Phase 4 — Compare + Polish
140
+
141
+ - **Go adapter:** Functions, methods (pointer/value receivers), structs, interfaces, const/var, Go visibility (uppercase = public)
142
+ - **Rust adapter:** fn, struct, enum, trait, impl blocks, const/static, macro_rules, pub/pub(crate)/pub(super) visibility
143
+ - **Vector index:** Numpy-based cosine similarity (L2-norm at build, dot at search), `.npz` persistence
144
+ - **Embedder protocol:** `encode(texts) -> list[list[float]]`, `dimension -> int` — Nomic Code ONNX, API (OpenAI-compatible), SentenceTransformers backends
145
+ - **Hybrid retrieval:** Reciprocal rank fusion merging BM25 + vector results by chunk ID
146
+ - **Comparison engine:** `compare_repos()` across 6 structural dimensions (api_surface, concurrency, configuration, error_handling, state_management, testing), no LLM required
147
+ - **CLI polish:** `--timing` flag on analyze/query, `--strategy bm25|hybrid` on query, `--dimensions` on compare
148
+ - 372 tests, 81% coverage
archex-0.4.0/PKG-INFO ADDED
@@ -0,0 +1,301 @@
1
+ Metadata-Version: 2.4
2
+ Name: archex
3
+ Version: 0.4.0
4
+ Summary: Architecture extraction & codebase intelligence for the agentic era
5
+ Author: Tom
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: architecture,ast,code-intelligence,codebase,rag,retrieval
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Requires-Python: >=3.11
16
+ Requires-Dist: click>=8.1
17
+ Requires-Dist: networkx>=3.3
18
+ Requires-Dist: pydantic>=2.7
19
+ Requires-Dist: tiktoken>=0.7
20
+ Requires-Dist: tree-sitter-go>=0.23
21
+ Requires-Dist: tree-sitter-javascript>=0.23
22
+ Requires-Dist: tree-sitter-python>=0.23
23
+ Requires-Dist: tree-sitter-rust>=0.23
24
+ Requires-Dist: tree-sitter-typescript>=0.23
25
+ Requires-Dist: tree-sitter>=0.23
26
+ Provides-Extra: all
27
+ Requires-Dist: anthropic>=0.30; extra == 'all'
28
+ Requires-Dist: langchain-core>=0.2; extra == 'all'
29
+ Requires-Dist: llama-index-core>=0.10; extra == 'all'
30
+ Requires-Dist: mcp>=1.0; extra == 'all'
31
+ Requires-Dist: onnxruntime>=1.17; extra == 'all'
32
+ Requires-Dist: openai>=1.0; extra == 'all'
33
+ Requires-Dist: tokenizers>=0.15; extra == 'all'
34
+ Requires-Dist: voyageai>=0.3; extra == 'all'
35
+ Provides-Extra: anthropic
36
+ Requires-Dist: anthropic>=0.30; extra == 'anthropic'
37
+ Provides-Extra: dev
38
+ Requires-Dist: pre-commit>=3.7; extra == 'dev'
39
+ Requires-Dist: pyright>=1.1; extra == 'dev'
40
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
41
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
42
+ Requires-Dist: pytest>=8.0; extra == 'dev'
43
+ Requires-Dist: ruff>=0.5; extra == 'dev'
44
+ Provides-Extra: langchain
45
+ Requires-Dist: langchain-core>=0.2; extra == 'langchain'
46
+ Provides-Extra: llamaindex
47
+ Requires-Dist: llama-index-core>=0.10; extra == 'llamaindex'
48
+ Provides-Extra: mcp
49
+ Requires-Dist: mcp>=1.0; extra == 'mcp'
50
+ Provides-Extra: openai
51
+ Requires-Dist: openai>=1.0; extra == 'openai'
52
+ Provides-Extra: vector
53
+ Requires-Dist: onnxruntime>=1.17; extra == 'vector'
54
+ Requires-Dist: tokenizers>=0.15; extra == 'vector'
55
+ Provides-Extra: vector-torch
56
+ Requires-Dist: sentence-transformers>=2.6; extra == 'vector-torch'
57
+ Provides-Extra: voyage
58
+ Requires-Dist: voyageai>=0.3; extra == 'voyage'
59
+ Description-Content-Type: text/markdown
60
+
61
+ # archex
62
+
63
+ Architecture extraction and codebase intelligence for the agentic era.
64
+
65
+ archex is a Python library and CLI that transforms any Git repository into structured architectural intelligence and token-budget-aware code context. It serves two consumers from a single index: **human architects** receive an `ArchProfile` with module boundaries, dependency graphs, detected patterns, and interface surfaces; **AI agents** receive a `ContextBundle` with relevance-ranked, syntax-aligned code chunks assembled to fit within a specified token budget.
66
+
67
+ ## Features
68
+
69
+ - **4 language adapters** — Python, TypeScript/JavaScript, Go, Rust (tree-sitter AST parsing), extensible via entry points
70
+ - **3 public APIs** — `analyze()`, `query()`, `compare()`
71
+ - **Hybrid retrieval** — BM25 keyword search + optional vector embeddings with reciprocal rank fusion
72
+ - **Token budget assembly** — AST-aware chunking, dependency-graph expansion, greedy bin-packing with configurable `ScoringWeights`
73
+ - **Structural analysis** — module detection (Louvain), pattern recognition (extensible `PatternRegistry`), interface extraction
74
+ - **Cross-repo comparison** — 6 architectural dimensions, no LLM required
75
+ - **Performance** — cache-first query (skips parse on cache hit), parallel parsing, parallel compare, git-aware cache keys
76
+ - **Extensibility** — plugin APIs for language adapters, pattern detectors, chunkers, and scoring weights via entry points and protocols
77
+ - **Security** — input validation on git URLs/branches, FTS5 query escaping, cache key validation, `allow_pickle=False` for vector persistence
78
+ - **LLM-optional** — entire structural pipeline runs without API calls; LLM enrichment is opt-in
79
+
80
+ ## Installation
81
+
82
+ ```bash
83
+ uv add archex
84
+ ```
85
+
86
+ ### Extras
87
+
88
+ | Extra | What it adds |
89
+ | ---------------------- | ---------------------------------------- |
90
+ | `archex[vector]` | ONNX-based local embeddings (Nomic Code) |
91
+ | `archex[vector-torch]` | Torch-backed sentence-transformers |
92
+ | `archex[voyage]` | Voyage Code API embeddings |
93
+ | `archex[openai]` | OpenAI API embeddings + LLM enrichment |
94
+ | `archex[anthropic]` | Anthropic API LLM enrichment |
95
+ | `archex[mcp]` | MCP server for agent integration |
96
+ | `archex[langchain]` | LangChain retriever integration |
97
+ | `archex[llamaindex]` | LlamaIndex retriever integration |
98
+ | `archex[all]` | All optional dependencies |
99
+
100
+ ## Quick Start
101
+
102
+ ### Python API
103
+
104
+ ```python
105
+ from archex import analyze, query, compare
106
+ from archex.models import RepoSource
107
+
108
+ # Architectural analysis
109
+ profile = analyze(RepoSource(local_path="./my-project"))
110
+ for module in profile.module_map:
111
+ print(f"{module.name}: {len(module.files)} files")
112
+ for pattern in profile.pattern_catalog:
113
+ print(f"[{pattern.confidence:.0%}] {pattern.name}")
114
+
115
+ # Implementation context for an agent
116
+ bundle = query(
117
+ RepoSource(local_path="./my-project"),
118
+ "How does authentication work?",
119
+ token_budget=8192,
120
+ )
121
+ print(bundle.to_prompt(format="xml"))
122
+
123
+ # Query with custom scoring weights
124
+ from archex.models import ScoringWeights
125
+
126
+ bundle = query(
127
+ RepoSource(local_path="./my-project"),
128
+ "database connection pooling",
129
+ scoring_weights=ScoringWeights(relevance=0.8, structural=0.1, type_coverage=0.1),
130
+ )
131
+
132
+ # Cross-repo comparison
133
+ result = compare(
134
+ RepoSource(local_path="./project-a"),
135
+ RepoSource(local_path="./project-b"),
136
+ dimensions=["error_handling", "api_surface"],
137
+ )
138
+ ```
139
+
140
+ ### CLI
141
+
142
+ ```bash
143
+ # Analyze a local repo or remote URL
144
+ archex analyze ./my-project --format json
145
+ archex analyze https://github.com/org/repo --format markdown -l python --timing
146
+
147
+ # Query for implementation context
148
+ archex query ./my-project "How does auth work?" --budget 8192 --format xml
149
+ archex query ./my-project "connection pooling" --strategy hybrid --timing
150
+
151
+ # Compare two repositories
152
+ archex compare ./project-a ./project-b --dimensions error_handling,api_surface --format markdown
153
+
154
+ # Manage the analysis cache
155
+ archex cache list
156
+ archex cache clean --max-age 168
157
+ archex cache info
158
+ ```
159
+
160
+ ## CLI Reference
161
+
162
+ ### `archex analyze <source>`
163
+
164
+ Analyze a repository and produce an architecture profile.
165
+
166
+ | Option | Default | Description |
167
+ | ------------------- | ------- | --------------------------------- |
168
+ | `--format` | `json` | Output format: `json`, `markdown` |
169
+ | `-l` / `--language` | all | Filter by language (repeatable) |
170
+ | `--timing` | off | Print timing breakdown to stderr |
171
+
172
+ ### `archex query <source> <question>`
173
+
174
+ Query a repository and return a context bundle.
175
+
176
+ | Option | Default | Description |
177
+ | ------------------- | ------- | ---------------------------------------- |
178
+ | `--budget` | `8192` | Token budget for context assembly |
179
+ | `--format` | `xml` | Output format: `xml`, `json`, `markdown` |
180
+ | `-l` / `--language` | all | Filter by language (repeatable) |
181
+ | `--strategy` | `bm25` | Retrieval strategy: `bm25`, `hybrid` |
182
+ | `--timing` | off | Print timing breakdown to stderr |
183
+
184
+ ### `archex compare <source_a> <source_b>`
185
+
186
+ Compare two repositories across architectural dimensions.
187
+
188
+ | Option | Default | Description |
189
+ | ------------------- | ------- | --------------------------------- |
190
+ | `--dimensions` | all 6 | Comma-separated dimension list |
191
+ | `--format` | `json` | Output format: `json`, `markdown` |
192
+ | `-l` / `--language` | all | Filter by language (repeatable) |
193
+ | `--timing` | off | Print timing breakdown to stderr |
194
+
195
+ Supported dimensions: `api_surface`, `concurrency`, `configuration`, `error_handling`, `state_management`, `testing`.
196
+
197
+ ### `archex mcp`
198
+
199
+ Start the MCP (Model Context Protocol) server for agent integration.
200
+
201
+ Tools exposed: `analyze_repo`, `query_repo`, `compare_repos`.
202
+
203
+ ### `archex cache <subcommand>`
204
+
205
+ Manage the local analysis cache.
206
+
207
+ | Subcommand | Options | Description |
208
+ | ---------- | ------------------------------------ | ---------------------- |
209
+ | `list` | `--cache-dir` | List cached entries |
210
+ | `clean` | `--max-age N` (hours), `--cache-dir` | Remove expired entries |
211
+ | `info` | `--cache-dir` | Show cache summary |
212
+
213
+ ## Extensibility
214
+
215
+ archex supports plugin APIs via Python entry points and protocols.
216
+
217
+ ### Custom Language Adapters
218
+
219
+ Register a language adapter in your package's `pyproject.toml`:
220
+
221
+ ```toml
222
+ [project.entry-points."archex.language_adapters"]
223
+ java = "mypackage.adapters:JavaAdapter"
224
+ ```
225
+
226
+ The adapter class must implement the `LanguageAdapter` protocol (see `archex.parse.adapters.base`).
227
+
228
+ ### Custom Pattern Detectors
229
+
230
+ Register a pattern detector function:
231
+
232
+ ```toml
233
+ [project.entry-points."archex.pattern_detectors"]
234
+ my_pattern = "mypackage.patterns:detect_my_pattern"
235
+ ```
236
+
237
+ Detector signature: `(list[ParsedFile], DependencyGraph) -> DetectedPattern | None`.
238
+
239
+ ### Custom Chunkers
240
+
241
+ Implement the `Chunker` protocol and pass it to `query()`:
242
+
243
+ ```python
244
+ from archex.index.chunker import Chunker
245
+
246
+ class MyChunker:
247
+ def chunk_file(self, parsed_file, source): ...
248
+ def chunk_files(self, parsed_files, sources): ...
249
+
250
+ bundle = query(source, question, chunker=MyChunker())
251
+ ```
252
+
253
+ ### Custom Scoring Weights
254
+
255
+ Adjust the retrieval ranking formula:
256
+
257
+ ```python
258
+ from archex.models import ScoringWeights
259
+
260
+ # Boost relevance, lower structural weight
261
+ weights = ScoringWeights(relevance=0.8, structural=0.1, type_coverage=0.1)
262
+ bundle = query(source, question, scoring_weights=weights)
263
+ ```
264
+
265
+ ## Configuration
266
+
267
+ archex reads configuration from `~/.archex/config.toml` and `ARCHEX_*` environment variables.
268
+
269
+ ```toml
270
+ # ~/.archex/config.toml
271
+ [default]
272
+ languages = ["python", "typescript"]
273
+ cache = true
274
+ cache_dir = "~/.archex/cache"
275
+ parallel = true
276
+ max_file_size = 10000000
277
+ ```
278
+
279
+ Environment variables override file config: `ARCHEX_CACHE_DIR`, `ARCHEX_PARALLEL`, `ARCHEX_MAX_FILE_SIZE`.
280
+
281
+ ## Development
282
+
283
+ ```bash
284
+ git clone https://github.com/Mathews-Tom/archex.git
285
+ cd archex
286
+ uv sync --all-extras
287
+
288
+ # Run tests (538 tests)
289
+ uv run pytest
290
+
291
+ # Lint and format
292
+ uv run ruff check .
293
+ uv run ruff format .
294
+
295
+ # Type check (strict mode)
296
+ uv run pyright
297
+ ```
298
+
299
+ ## License
300
+
301
+ Apache 2.0 — see [LICENSE](LICENSE).