PyPI - crumbs-cli - Versions diffs - 0.3.0__tar.gz - Mend

crumbs-cli 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

crumbs_cli-0.3.0/LICENSE +21 -0
crumbs_cli-0.3.0/PKG-INFO +110 -0
crumbs_cli-0.3.0/README.md +86 -0
crumbs_cli-0.3.0/crumbs/__init__.py +9 -0
crumbs_cli-0.3.0/crumbs/__main__.py +6 -0
crumbs_cli-0.3.0/crumbs/cli.py +186 -0
crumbs_cli-0.3.0/crumbs/digest.py +75 -0
crumbs_cli-0.3.0/crumbs/extractors.py +255 -0
crumbs_cli-0.3.0/crumbs/indexer.py +133 -0
crumbs_cli-0.3.0/crumbs/mcp.py +291 -0
crumbs_cli-0.3.0/crumbs/query.py +80 -0
crumbs_cli-0.3.0/crumbs/store.py +117 -0
crumbs_cli-0.3.0/crumbs_cli.egg-info/PKG-INFO +110 -0
crumbs_cli-0.3.0/crumbs_cli.egg-info/SOURCES.txt +18 -0
crumbs_cli-0.3.0/crumbs_cli.egg-info/dependency_links.txt +1 -0
crumbs_cli-0.3.0/crumbs_cli.egg-info/entry_points.txt +2 -0
crumbs_cli-0.3.0/crumbs_cli.egg-info/top_level.txt +1 -0
crumbs_cli-0.3.0/pyproject.toml +37 -0
crumbs_cli-0.3.0/setup.cfg +4 -0
crumbs_cli-0.3.0/tests/test_crumbs.py +161 -0

crumbs_cli-0.3.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 SufyanShaik
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

crumbs_cli-0.3.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,110 @@
+Metadata-Version: 2.4
+Name: crumbs-cli
+Version: 0.3.0
+Summary: Local, token-efficient cross-repo context for LLMs. CLI + MCP server.
+Author: crumbs
+License: MIT
+Project-URL: Homepage, https://github.com/crumbs1505/crumbs
+Project-URL: Repository, https://github.com/crumbs1505/crumbs
+Project-URL: Issues, https://github.com/crumbs1505/crumbs/issues
+Keywords: llm,context,claude,code,repo,tokens,mcp
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: Software Development :: Documentation
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file
+# crumbs
+**Local, token-efficient cross-repo context for LLMs.**
+`crumbs` indexes your repositories into compact *context crumbs* — file maps and
+symbol signatures (typed function/class/type declarations + one-line docs + line
+ranges), **never the full file bodies**. An assistant like Claude can then
+understand many repos at once by reading a tiny map instead of paying tokens to
+read the entire source tree.
+Indexing this very tool produces a map of **~1,200 tokens** standing in for
+**~8,400 tokens** of source — an **~86% reduction** — while still naming every
+file and symbol. Each symbol carries its full type signature and a source line
+range (e.g. `def build_parser() -> ArgumentParser [L125-168]`), so the assistant
+can open *just that slice* of a file rather than the whole thing.
+- 🪶 **Zero dependencies.** Pure Python 3.8+ stdlib. Runs on any device.
+- 🔒 **Fully local.** Crumbs live in `~/.crumbs`. Nothing leaves your machine.
+- 🧠 **Cross-repo.** Search and pull context across every repo you've indexed.
+- 🎯 **High signal.** Python is parsed via `ast`; JS/TS/Go/Rust/etc. via fast
+  regex. Skips `node_modules`, `.git`, build dirs, lockfiles, and binaries.
+## Install
+```bash
+pip install -e .        # provides the `crumbs` command
+# or run without installing:
+python3 -m crumbs --help
+```
+## Usage
+```bash
+crumbs index ~/code/my-api ~/code/my-web   # index one or more repos
+crumbs list                                # show indexed repos + stats
+crumbs map my-api --stats                  # compact map of one repo (+ token estimate)
+crumbs search "auth token"                 # rank matching symbols across all repos
+crumbs context "rate limiting" --repo my-api   # LLM-ready context slice
+crumbs refresh                             # re-index everything
+crumbs remove my-web                       # drop a repo from the index
+```
+A repo can be referenced by name, id, or path.
+## Workflow with Claude
+1. `crumbs index` the repos you work across (once, or on a `crumbs refresh` cron).
+2. Ask Claude to run `crumbs map <repo>` or `crumbs context "<topic>"` instead of
+   reading whole files. It gets the structure and the relevant symbols for a
+   fraction of the tokens, then reads full files only where it actually needs to.
+## How it stays cheap
+| | Full repo read | `crumbs map` |
+|---|---|---|
+| What | every byte of every file | file tree + typed signatures + 1-line docs + line ranges |
+| Bodies | yes | no |
+| Cost | grows with codebase | grows with *interface* size |
+Because every symbol records its line range, the follow-up step is cheap too: the
+assistant reads `path:start-end` for the one function it needs instead of opening
+the entire file.
+Storage layout (`~/.crumbs`, override with `CRUMBS_HOME`):
+```
+registry.json        # id -> {name, path, indexed_at, stats}
+repos/<id>.json      # full crumb data for one repo
+```
+## Supported languages
+Python (AST), JavaScript/TypeScript, Go, Rust, and a generic declaration
+matcher for Java, Ruby, PHP, C/C++, C#, Swift, Kotlin. Markdown is indexed by
+heading. Anything else is skipped from symbol extraction but still ignored
+safely.
+## Tests
+```bash
+python3 -m unittest discover -s tests -v
+```
+## License
+MIT

crumbs_cli-0.3.0/README.md ADDED Viewed

@@ -0,0 +1,86 @@
+# crumbs
+**Local, token-efficient cross-repo context for LLMs.**
+`crumbs` indexes your repositories into compact *context crumbs* — file maps and
+symbol signatures (typed function/class/type declarations + one-line docs + line
+ranges), **never the full file bodies**. An assistant like Claude can then
+understand many repos at once by reading a tiny map instead of paying tokens to
+read the entire source tree.
+Indexing this very tool produces a map of **~1,200 tokens** standing in for
+**~8,400 tokens** of source — an **~86% reduction** — while still naming every
+file and symbol. Each symbol carries its full type signature and a source line
+range (e.g. `def build_parser() -> ArgumentParser [L125-168]`), so the assistant
+can open *just that slice* of a file rather than the whole thing.
+- 🪶 **Zero dependencies.** Pure Python 3.8+ stdlib. Runs on any device.
+- 🔒 **Fully local.** Crumbs live in `~/.crumbs`. Nothing leaves your machine.
+- 🧠 **Cross-repo.** Search and pull context across every repo you've indexed.
+- 🎯 **High signal.** Python is parsed via `ast`; JS/TS/Go/Rust/etc. via fast
+  regex. Skips `node_modules`, `.git`, build dirs, lockfiles, and binaries.
+## Install
+```bash
+pip install -e .        # provides the `crumbs` command
+# or run without installing:
+python3 -m crumbs --help
+```
+## Usage
+```bash
+crumbs index ~/code/my-api ~/code/my-web   # index one or more repos
+crumbs list                                # show indexed repos + stats
+crumbs map my-api --stats                  # compact map of one repo (+ token estimate)
+crumbs search "auth token"                 # rank matching symbols across all repos
+crumbs context "rate limiting" --repo my-api   # LLM-ready context slice
+crumbs refresh                             # re-index everything
+crumbs remove my-web                       # drop a repo from the index
+```
+A repo can be referenced by name, id, or path.
+## Workflow with Claude
+1. `crumbs index` the repos you work across (once, or on a `crumbs refresh` cron).
+2. Ask Claude to run `crumbs map <repo>` or `crumbs context "<topic>"` instead of
+   reading whole files. It gets the structure and the relevant symbols for a
+   fraction of the tokens, then reads full files only where it actually needs to.
+## How it stays cheap
+| | Full repo read | `crumbs map` |
+|---|---|---|
+| What | every byte of every file | file tree + typed signatures + 1-line docs + line ranges |
+| Bodies | yes | no |
+| Cost | grows with codebase | grows with *interface* size |
+Because every symbol records its line range, the follow-up step is cheap too: the
+assistant reads `path:start-end` for the one function it needs instead of opening
+the entire file.
+Storage layout (`~/.crumbs`, override with `CRUMBS_HOME`):
+```
+registry.json        # id -> {name, path, indexed_at, stats}
+repos/<id>.json      # full crumb data for one repo
+```
+## Supported languages
+Python (AST), JavaScript/TypeScript, Go, Rust, and a generic declaration
+matcher for Java, Ruby, PHP, C/C++, C#, Swift, Kotlin. Markdown is indexed by
+heading. Anything else is skipped from symbol extraction but still ignored
+safely.
+## Tests
+```bash
+python3 -m unittest discover -s tests -v
+```
+## License
+MIT

crumbs_cli-0.3.0/crumbs/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""crumbs - local, token-efficient cross-repo context for LLMs.
+crumbs indexes repositories into compact "context crumbs" (file maps and symbol
+signatures, not full file bodies) stored locally. An assistant can query these
+crumbs to understand many repos at once without reading -- and paying tokens for
+-- the entire source tree.
+"""
+__version__ = "0.3.0"

crumbs_cli-0.3.0/crumbs/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+import sys
+from .cli import main
+if __name__ == "__main__":
+    sys.exit(main())

crumbs_cli-0.3.0/crumbs/cli.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""crumbs command-line interface."""
+from __future__ import annotations
+import argparse
+import json
+import sys
+import time
+from typing import List, Optional
+from . import __version__, digest, indexer, query, store
+def _fmt_age(ts: float) -> str:
+    secs = max(0, int(time.time() - ts))
+    for unit, n in (("d", 86400), ("h", 3600), ("m", 60)):
+        if secs >= n:
+            return f"{secs // n}{unit} ago"
+    return "just now"
+def cmd_index(args: argparse.Namespace) -> int:
+    paths = args.paths or ["."]
+    for p in paths:
+        try:
+            data = indexer.index_repo(p, name=args.name)
+        except (NotADirectoryError, FileNotFoundError) as e:
+            print(f"error: {e}", file=sys.stderr)
+            return 1
+        st = data["stats"]
+        m = digest.repo_map(data["id"])
+        sav = digest.savings(data, m)
+        print(
+            f"indexed {data['name']}  "
+            f"{st['files']} files, {st['symbols']} symbols  "
+            f"(map ~{sav['map_tokens']} tok vs ~{sav['source_tokens']} tok source, "
+            f"-{sav['saved_pct']}%)"
+        )
+    return 0
+def cmd_list(args: argparse.Namespace) -> int:
+    reg = store.load_registry()
+    if not reg:
+        print("no repos indexed. run: crumbs index <path>")
+        return 0
+    if args.json:
+        print(json.dumps(reg, indent=2))
+        return 0
+    rows = sorted(reg.items(), key=lambda kv: kv[1]["name"])
+    name_w = max((len(m["name"]) for _, m in rows), default=4)
+    for rid, m in rows:
+        st = m["stats"]
+        print(
+            f"{m['name']:<{name_w}}  {rid}  "
+            f"{st['files']:>4} files  {st['symbols']:>5} symbols  "
+            f"{_fmt_age(m['indexed_at'])}"
+        )
+    return 0
+def cmd_map(args: argparse.Namespace) -> int:
+    rid = store.resolve(args.repo)
+    if not rid:
+        print(f"error: no indexed repo matches '{args.repo}'", file=sys.stderr)
+        return 1
+    text = digest.repo_map(rid, max_symbols_per_file=args.max_symbols)
+    print(text)
+    if args.stats:
+        data = store.load_repo(rid)
+        sav = digest.savings(data, text)
+        print(
+            f"\n_~{sav['map_tokens']} tokens (vs ~{sav['source_tokens']} for full source, "
+            f"-{sav['saved_pct']}%)_",
+            file=sys.stderr,
+        )
+    return 0
+def cmd_search(args: argparse.Namespace) -> int:
+    hits = query.search(args.query, repo=args.repo, limit=args.limit)
+    if args.json:
+        print(json.dumps(hits, indent=2))
+        return 0
+    if not hits:
+        print("no matches")
+        return 0
+    for h in hits:
+        sig = h["sig"] or f"{h['kind']} {h['name']}"
+        loc = f":{h['line']}" if h.get("line") else ""
+        print(f"{h['repo']}:{h['path']}{loc}  {sig}")
+    return 0
+def cmd_context(args: argparse.Namespace) -> int:
+    print(query.context(args.query, repo=args.repo, limit=args.limit))
+    return 0
+def cmd_remove(args: argparse.Namespace) -> int:
+    rid = store.resolve(args.repo)
+    if not rid:
+        print(f"error: no indexed repo matches '{args.repo}'", file=sys.stderr)
+        return 1
+    name = store.load_registry().get(rid, {}).get("name", rid)
+    store.remove_repo(rid)
+    print(f"removed {name}")
+    return 0
+def cmd_mcp(args: argparse.Namespace) -> int:
+    from . import mcp
+    return mcp.serve()
+def cmd_refresh(args: argparse.Namespace) -> int:
+    reg = store.load_registry()
+    if not reg:
+        print("nothing to refresh")
+        return 0
+    for rid, m in list(reg.items()):
+        try:
+            indexer.index_repo(m["path"], name=m["name"])
+            print(f"refreshed {m['name']}")
+        except (NotADirectoryError, FileNotFoundError):
+            print(f"skip {m['name']} (path missing: {m['path']})", file=sys.stderr)
+    return 0
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        prog="crumbs",
+        description="Local, token-efficient cross-repo context for LLMs.",
+    )
+    p.add_argument("--version", action="version", version=f"crumbs {__version__}")
+    sub = p.add_subparsers(dest="cmd", required=True)
+    pi = sub.add_parser("index", help="index one or more repos")
+    pi.add_argument("paths", nargs="*", help="repo paths (default: .)")
+    pi.add_argument("--name", help="override repo name")
+    pi.set_defaults(func=cmd_index)
+    pl = sub.add_parser("list", help="list indexed repos")
+    pl.add_argument("--json", action="store_true")
+    pl.set_defaults(func=cmd_list)
+    pm = sub.add_parser("map", help="print compact map of a repo")
+    pm.add_argument("repo", help="repo name, id, or path")
+    pm.add_argument("--max-symbols", type=int, default=12)
+    pm.add_argument("--stats", action="store_true", help="print token estimate to stderr")
+    pm.set_defaults(func=cmd_map)
+    ps = sub.add_parser("search", help="search symbols across repos")
+    ps.add_argument("query")
+    ps.add_argument("--repo", help="limit to one repo")
+    ps.add_argument("--limit", type=int, default=30)
+    ps.add_argument("--json", action="store_true")
+    ps.set_defaults(func=cmd_search)
+    pc = sub.add_parser("context", help="LLM-ready context slice for a query")
+    pc.add_argument("query")
+    pc.add_argument("--repo", help="limit to one repo")
+    pc.add_argument("--limit", type=int, default=20)
+    pc.set_defaults(func=cmd_context)
+    pr = sub.add_parser("remove", help="remove a repo from the index")
+    pr.add_argument("repo")
+    pr.set_defaults(func=cmd_remove)
+    prf = sub.add_parser("refresh", help="re-index all known repos")
+    prf.set_defaults(func=cmd_refresh)
+    pmcp = sub.add_parser("mcp", help="run as an MCP server over stdio")
+    pmcp.set_defaults(func=cmd_mcp)
+    return p
+def main(argv: Optional[List[str]] = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    return args.func(args)
+if __name__ == "__main__":
+    sys.exit(main())

crumbs_cli-0.3.0/crumbs/digest.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Render a compact, token-efficient map of an indexed repo."""
+from __future__ import annotations
+from typing import Any, Dict, List
+from . import store
+def _est_tokens(chars: int) -> int:
+    """Rough token estimate (~4 chars/token)."""
+    return chars // 4
+def loc(sym: Dict[str, Any]) -> str:
+    """Compact source location tag, e.g. ``L40-92`` or ``L40``."""
+    start = sym.get("line")
+    if not start:
+        return ""
+    end = sym.get("end_line", start)
+    return f"L{start}" if end == start else f"L{start}-{end}"
+def repo_map(rid: str, max_symbols_per_file: int = 12) -> str:
+    data = store.load_repo(rid)
+    if not data:
+        return ""
+    lines: List[str] = []
+    g = data.get("git", {})
+    header = f"# {data['name']}"
+    lines.append(header)
+    meta = []
+    if g.get("remote"):
+        meta.append(g["remote"])
+    if g.get("branch"):
+        meta.append(f"@{g['branch']}")
+    if meta:
+        lines.append(" ".join(meta))
+    st = data["stats"]
+    lines.append(
+        f"_{st['files']} files, {st['symbols']} symbols indexed_"
+    )
+    lines.append("")
+    if data.get("readme"):
+        excerpt = data["readme"].strip().replace("\n\n", "\n")
+        lines.append("> " + excerpt.replace("\n", "\n> "))
+        lines.append("")
+    for f in data["files"]:
+        syms = f["symbols"]
+        if not syms:
+            continue
+        lines.append(f"### {f['path']}")
+        for sym in syms[:max_symbols_per_file]:
+            sig = sym["sig"] or f"{sym['kind']} {sym['name']}"
+            tag = loc(sym)
+            where = f" [{tag}]" if tag else ""
+            doc = f"  — {sym['doc']}" if sym.get("doc") else ""
+            lines.append(f"- {sig}{where}{doc}")
+        if len(syms) > max_symbols_per_file:
+            lines.append(f"- … +{len(syms) - max_symbols_per_file} more")
+        lines.append("")
+    return "\n".join(lines)
+def savings(data: Dict[str, Any], map_text: str) -> Dict[str, int]:
+    src_tokens = _est_tokens(data["stats"]["source_bytes"])
+    map_tokens = _est_tokens(len(map_text))
+    pct = 0 if src_tokens == 0 else round(100 * (1 - map_tokens / src_tokens))
+    return {
+        "source_tokens": src_tokens,
+        "map_tokens": map_tokens,
+        "saved_pct": pct,
+    }