siftd 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- siftd-0.1.1/.claude-plugin/marketplace.json +28 -0
- siftd-0.1.1/.githooks/pre-commit +2 -0
- siftd-0.1.1/.github/workflows/ci.yml +53 -0
- siftd-0.1.1/.github/workflows/publish.yml +30 -0
- siftd-0.1.1/.gitignore +12 -0
- siftd-0.1.1/CHANGELOG.md +96 -0
- siftd-0.1.1/CLAUDE.md +69 -0
- siftd-0.1.1/LICENSE +21 -0
- siftd-0.1.1/PKG-INFO +88 -0
- siftd-0.1.1/README.md +57 -0
- siftd-0.1.1/bench/build.py +122 -0
- siftd-0.1.1/bench/corpus_analysis.py +233 -0
- siftd-0.1.1/bench/queries.json +114 -0
- siftd-0.1.1/bench/run.py +569 -0
- siftd-0.1.1/bench/strategies/exchange-window.json +11 -0
- siftd-0.1.1/bench/view.py +251 -0
- siftd-0.1.1/docs/cli.md +449 -0
- siftd-0.1.1/plugin/.claude-plugin/plugin.json +15 -0
- siftd-0.1.1/plugin/README.md +89 -0
- siftd-0.1.1/plugin/hooks/hooks.json +36 -0
- siftd-0.1.1/plugin/scripts/session-start.sh +29 -0
- siftd-0.1.1/plugin/scripts/skill-reminder.sh +18 -0
- siftd-0.1.1/plugin/scripts/skill-required.sh +19 -0
- siftd-0.1.1/plugin/skills/siftd/SKILL.md +258 -0
- siftd-0.1.1/plugin/skills/siftd/reference/ask.md +223 -0
- siftd-0.1.1/plugin/skills/siftd/reference/query.md +177 -0
- siftd-0.1.1/plugin/skills/siftd/reference/tags.md +109 -0
- siftd-0.1.1/plugin/skills/siftd-api/SKILL.md +202 -0
- siftd-0.1.1/pyproject.toml +69 -0
- siftd-0.1.1/scripts/examples/bulk-tag.py +113 -0
- siftd-0.1.1/scripts/examples/find-decisions.py +113 -0
- siftd-0.1.1/scripts/examples/weekly-cost.sql +27 -0
- siftd-0.1.1/scripts/gen-cli-docs.sh +40 -0
- siftd-0.1.1/scripts/lint.sh +24 -0
- siftd-0.1.1/src/siftd/__init__.py +54 -0
- siftd-0.1.1/src/siftd/adapters/__init__.py +5 -0
- siftd-0.1.1/src/siftd/adapters/_jsonl.py +33 -0
- siftd-0.1.1/src/siftd/adapters/aider.py +305 -0
- siftd-0.1.1/src/siftd/adapters/claude_code.py +259 -0
- siftd-0.1.1/src/siftd/adapters/codex_cli.py +268 -0
- siftd-0.1.1/src/siftd/adapters/gemini_cli.py +260 -0
- siftd-0.1.1/src/siftd/adapters/registry.py +183 -0
- siftd-0.1.1/src/siftd/api/__init__.py +152 -0
- siftd-0.1.1/src/siftd/api/adapters.py +127 -0
- siftd-0.1.1/src/siftd/api/conversations.py +647 -0
- siftd-0.1.1/src/siftd/api/doctor.py +19 -0
- siftd-0.1.1/src/siftd/api/export.py +313 -0
- siftd-0.1.1/src/siftd/api/file_refs.py +111 -0
- siftd-0.1.1/src/siftd/api/peek.py +21 -0
- siftd-0.1.1/src/siftd/api/resources.py +132 -0
- siftd-0.1.1/src/siftd/api/search.py +173 -0
- siftd-0.1.1/src/siftd/api/stats.py +155 -0
- siftd-0.1.1/src/siftd/api/tools.py +130 -0
- siftd-0.1.1/src/siftd/backfill.py +285 -0
- siftd-0.1.1/src/siftd/builtin_queries/__init__.py +7 -0
- siftd-0.1.1/src/siftd/builtin_queries/cost.sql +27 -0
- siftd-0.1.1/src/siftd/builtin_queries/shell-analysis.sql +104 -0
- siftd-0.1.1/src/siftd/cli.py +1516 -0
- siftd-0.1.1/src/siftd/cli_ask.py +370 -0
- siftd-0.1.1/src/siftd/cli_install.py +216 -0
- siftd-0.1.1/src/siftd/config.py +101 -0
- siftd-0.1.1/src/siftd/doctor/__init__.py +21 -0
- siftd-0.1.1/src/siftd/doctor/checks.py +637 -0
- siftd-0.1.1/src/siftd/doctor/runner.py +130 -0
- siftd-0.1.1/src/siftd/domain/__init__.py +25 -0
- siftd-0.1.1/src/siftd/domain/models.py +77 -0
- siftd-0.1.1/src/siftd/domain/shell_categories.py +120 -0
- siftd-0.1.1/src/siftd/domain/source.py +22 -0
- siftd-0.1.1/src/siftd/embeddings/__init__.py +37 -0
- siftd-0.1.1/src/siftd/embeddings/availability.py +56 -0
- siftd-0.1.1/src/siftd/embeddings/base.py +68 -0
- siftd-0.1.1/src/siftd/embeddings/chunker.py +274 -0
- siftd-0.1.1/src/siftd/embeddings/fastembed_backend.py +41 -0
- siftd-0.1.1/src/siftd/embeddings/indexer.py +159 -0
- siftd-0.1.1/src/siftd/embeddings/ollama_backend.py +83 -0
- siftd-0.1.1/src/siftd/ids.py +38 -0
- siftd-0.1.1/src/siftd/ingestion/__init__.py +12 -0
- siftd-0.1.1/src/siftd/ingestion/discovery.py +23 -0
- siftd-0.1.1/src/siftd/ingestion/orchestration.py +384 -0
- siftd-0.1.1/src/siftd/math.py +11 -0
- siftd-0.1.1/src/siftd/models.py +133 -0
- siftd-0.1.1/src/siftd/output/__init__.py +45 -0
- siftd-0.1.1/src/siftd/output/formatters.py +680 -0
- siftd-0.1.1/src/siftd/output/registry.py +170 -0
- siftd-0.1.1/src/siftd/paths.py +75 -0
- siftd-0.1.1/src/siftd/peek/__init__.py +23 -0
- siftd-0.1.1/src/siftd/peek/reader.py +214 -0
- siftd-0.1.1/src/siftd/peek/scanner.py +142 -0
- siftd-0.1.1/src/siftd/search.py +398 -0
- siftd-0.1.1/src/siftd/storage/__init__.py +43 -0
- siftd-0.1.1/src/siftd/storage/embeddings.py +245 -0
- siftd-0.1.1/src/siftd/storage/filters.py +94 -0
- siftd-0.1.1/src/siftd/storage/fts.py +157 -0
- siftd-0.1.1/src/siftd/storage/queries.py +139 -0
- siftd-0.1.1/src/siftd/storage/schema.sql +287 -0
- siftd-0.1.1/src/siftd/storage/sqlite.py +991 -0
- siftd-0.1.1/src/siftd/storage/tags.py +255 -0
- siftd-0.1.1/tests/conftest.py +303 -0
- siftd-0.1.1/tests/fixtures/.aider.chat.history.md +49 -0
- siftd-0.1.1/tests/fixtures/claude_code_minimal.jsonl +4 -0
- siftd-0.1.1/tests/fixtures/codex_cli_minimal.jsonl +6 -0
- siftd-0.1.1/tests/fixtures/gemini_cli_minimal.json +35 -0
- siftd-0.1.1/tests/test_adapters.py +378 -0
- siftd-0.1.1/tests/test_api.py +431 -0
- siftd-0.1.1/tests/test_chunker.py +70 -0
- siftd-0.1.1/tests/test_cli.py +90 -0
- siftd-0.1.1/tests/test_config.py +186 -0
- siftd-0.1.1/tests/test_derivative.py +249 -0
- siftd-0.1.1/tests/test_doctor.py +384 -0
- siftd-0.1.1/tests/test_embeddings_availability.py +197 -0
- siftd-0.1.1/tests/test_embeddings_storage.py +151 -0
- siftd-0.1.1/tests/test_exclude_active.py +81 -0
- siftd-0.1.1/tests/test_export.py +323 -0
- siftd-0.1.1/tests/test_formatters.py +328 -0
- siftd-0.1.1/tests/test_ingestion.py +357 -0
- siftd-0.1.1/tests/test_integration.py +364 -0
- siftd-0.1.1/tests/test_mmr.py +189 -0
- siftd-0.1.1/tests/test_models.py +48 -0
- siftd-0.1.1/tests/test_peek.py +376 -0
- siftd-0.1.1/tests/test_query_files.py +238 -0
- siftd-0.1.1/tests/test_shell_categorization.py +100 -0
- siftd-0.1.1/uv.lock +898 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "siftd",
|
|
3
|
+
"owner": {
|
|
4
|
+
"name": "kaygee",
|
|
5
|
+
"url": "https://github.com/kgruel"
|
|
6
|
+
},
|
|
7
|
+
"metadata": {
|
|
8
|
+
"description": "Siftd plugin marketplace — conversation research for Claude Code agents",
|
|
9
|
+
"version": "1.0.0"
|
|
10
|
+
},
|
|
11
|
+
"plugins": [
|
|
12
|
+
{
|
|
13
|
+
"name": "siftd",
|
|
14
|
+
"source": "./plugin",
|
|
15
|
+
"description": "Search and research past conversations from CLI coding sessions",
|
|
16
|
+
"version": "1.0.0",
|
|
17
|
+
"author": {
|
|
18
|
+
"name": "kaygee",
|
|
19
|
+
"url": "https://github.com/kgruel"
|
|
20
|
+
},
|
|
21
|
+
"repository": "https://github.com/kgruel/siftd",
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"keywords": ["conversations", "search", "research", "history", "analytics"],
|
|
24
|
+
"category": "productivity",
|
|
25
|
+
"tags": ["research", "history", "memory"]
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v4
|
|
21
|
+
with:
|
|
22
|
+
version: "latest"
|
|
23
|
+
|
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
25
|
+
run: uv python install ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --extra dev
|
|
29
|
+
|
|
30
|
+
- name: Run lint
|
|
31
|
+
run: uv run ruff check src/
|
|
32
|
+
|
|
33
|
+
- name: Run tests
|
|
34
|
+
run: uv run pytest tests/ -v --tb=short
|
|
35
|
+
|
|
36
|
+
test-with-embeddings:
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
steps:
|
|
39
|
+
- uses: actions/checkout@v4
|
|
40
|
+
|
|
41
|
+
- name: Install uv
|
|
42
|
+
uses: astral-sh/setup-uv@v4
|
|
43
|
+
with:
|
|
44
|
+
version: "latest"
|
|
45
|
+
|
|
46
|
+
- name: Set up Python
|
|
47
|
+
run: uv python install 3.12
|
|
48
|
+
|
|
49
|
+
- name: Install dependencies with embeddings
|
|
50
|
+
run: uv sync --extra dev --extra embed
|
|
51
|
+
|
|
52
|
+
- name: Run tests with embeddings
|
|
53
|
+
run: uv run pytest tests/ -v --tb=short
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
environment: pypi
|
|
12
|
+
permissions:
|
|
13
|
+
id-token: write # Required for trusted publishing
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Install uv
|
|
19
|
+
uses: astral-sh/setup-uv@v4
|
|
20
|
+
with:
|
|
21
|
+
version: "latest"
|
|
22
|
+
|
|
23
|
+
- name: Set up Python
|
|
24
|
+
run: uv python install 3.12
|
|
25
|
+
|
|
26
|
+
- name: Build package
|
|
27
|
+
run: uv build
|
|
28
|
+
|
|
29
|
+
- name: Publish to PyPI
|
|
30
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
siftd-0.1.1/.gitignore
ADDED
siftd-0.1.1/CHANGELOG.md
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.1] - 2026-01-29
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- `siftd install embed` — Convenience command to auto-detect installation method and install embedding dependencies
|
|
15
|
+
- `:var` parameterized syntax for query files — safe quoting via sqlite3, alongside existing `$var` text substitution
|
|
16
|
+
- `ADAPTER_INTERFACE_VERSION = 1` — Required attribute for all adapters, enables future interface migrations
|
|
17
|
+
- `ON DELETE CASCADE` on schema foreign keys — Child records now cascade on parent delete
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Adapter `discover()` function now requires `locations` keyword argument (fallback removed)
|
|
22
|
+
- Error messages for missing `[embed]` extra now reference `siftd install embed` and suggest FTS5 alternative
|
|
23
|
+
|
|
24
|
+
### Removed
|
|
25
|
+
|
|
26
|
+
- `Conversation.default_model` field — Was defined but never populated or used
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
|
|
30
|
+
- Type checker (`ty`) configuration for optional dependencies — No longer blocks commits
|
|
31
|
+
- `bench/corpus_analysis.py` type annotation bug
|
|
32
|
+
|
|
33
|
+
## [0.1.0] - 2026-01-28
|
|
34
|
+
|
|
35
|
+
Initial public release.
|
|
36
|
+
|
|
37
|
+
### Added
|
|
38
|
+
|
|
39
|
+
#### Core Features
|
|
40
|
+
- **Ingestion** — Aggregate conversation logs from multiple CLI coding tools
|
|
41
|
+
- **FTS5 Search** — Full-text search across all conversations via `siftd query -s`
|
|
42
|
+
- **Semantic Search** — Vector similarity search via `siftd ask` (requires `[embed]` extra)
|
|
43
|
+
- **Tagging** — Apply tags to conversations, workspaces, and tool calls for organization
|
|
44
|
+
|
|
45
|
+
#### Adapters
|
|
46
|
+
- Claude Code (Anthropic) — `~/.claude/projects`
|
|
47
|
+
- Aider — `~/.aider`
|
|
48
|
+
- Gemini CLI (Google) — `~/.gemini/tmp`
|
|
49
|
+
- Codex CLI (OpenAI) — `~/.codex/sessions`
|
|
50
|
+
- Drop-in adapter support via `~/.config/siftd/adapters/`
|
|
51
|
+
- Entry-point adapter registration for pip-installable adapters
|
|
52
|
+
|
|
53
|
+
#### CLI Commands
|
|
54
|
+
- `siftd ingest` — Ingest logs from all discovered sources
|
|
55
|
+
- `siftd status` — Show database statistics
|
|
56
|
+
- `siftd query` — List/filter conversations with flexible filters
|
|
57
|
+
- `siftd ask` — Semantic search over conversations (optional `[embed]` extra)
|
|
58
|
+
- `siftd tag` — Apply or remove tags on entities
|
|
59
|
+
- `siftd tags` — List, rename, or delete tags
|
|
60
|
+
- `siftd tools` — Summarize tool usage by category
|
|
61
|
+
- `siftd export` — Export conversations for PR review workflows
|
|
62
|
+
- `siftd doctor` — Run health checks and maintenance
|
|
63
|
+
- `siftd peek` — Inspect live sessions from disk (bypasses SQLite)
|
|
64
|
+
- `siftd path` — Show XDG paths
|
|
65
|
+
- `siftd config` — View or modify configuration
|
|
66
|
+
- `siftd adapters` — List discovered adapters
|
|
67
|
+
- `siftd copy` — Copy built-in resources for customization
|
|
68
|
+
- `siftd backfill` — Backfill derived data from existing records
|
|
69
|
+
|
|
70
|
+
#### Query System
|
|
71
|
+
- User-defined SQL queries via `~/.config/siftd/queries/*.sql`
|
|
72
|
+
- `$var` syntax for text substitution
|
|
73
|
+
- Built-in queries: `cost.sql`, `shell-analysis.sql`
|
|
74
|
+
|
|
75
|
+
#### Python API
|
|
76
|
+
- `siftd.api.list_conversations()` — Query conversations with filters
|
|
77
|
+
- `siftd.api.get_conversation()` — Get full conversation detail
|
|
78
|
+
- `siftd.api.export_conversations()` — Export for external tools
|
|
79
|
+
- `siftd.api.hybrid_search()` — Combined FTS5 + semantic search
|
|
80
|
+
|
|
81
|
+
#### Storage
|
|
82
|
+
- SQLite with FTS5 for full-text search
|
|
83
|
+
- ULID primary keys throughout
|
|
84
|
+
- Normalized schema with proper foreign key constraints
|
|
85
|
+
- Extensible `*_attributes` tables for variable metadata
|
|
86
|
+
|
|
87
|
+
#### Developer Experience
|
|
88
|
+
- XDG Base Directory compliance for paths
|
|
89
|
+
- `--db PATH` override for all commands
|
|
90
|
+
- JSON output mode for scripting (`--json`)
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
[Unreleased]: https://github.com/anthropics/siftd/compare/v0.1.1...HEAD
|
|
95
|
+
[0.1.1]: https://github.com/anthropics/siftd/compare/v0.1.0...v0.1.1
|
|
96
|
+
[0.1.0]: https://github.com/anthropics/siftd/releases/tag/v0.1.0
|
siftd-0.1.1/CLAUDE.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Personal LLM usage analytics. Ingests conversation logs from CLI coding tools, stores in SQLite, queries via FTS5 and user-defined SQL files.
|
|
2
|
+
|
|
3
|
+
## Install
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
uv pip install . # core (FTS5 search, tags, queries)
|
|
7
|
+
uv pip install .[embed] # with semantic search (siftd ask)
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Architecture
|
|
11
|
+
|
|
12
|
+
Core loop: **Ingest → Store → Query**
|
|
13
|
+
|
|
14
|
+
- **Adapters** own parsing and raw format knowledge. Storage is adapter-agnostic.
|
|
15
|
+
- **Storage** is normalized SQLite. Schema is fixed for core entities, extensible via `*_attributes` tables.
|
|
16
|
+
- **Queries** are user-defined `.sql` files with `$var` substitution. The system is a data platform, not a reporting tool.
|
|
17
|
+
|
|
18
|
+
## Design Principles
|
|
19
|
+
|
|
20
|
+
1. **Manual first, automate when patterns emerge** — labels are user-applied, enrichment is deferred, cost is approximate. Don't build automation until real usage reveals what's worth automating.
|
|
21
|
+
2. **Query-time computation over stored redundancy** — cost is derived via JOIN, not pre-computed. Avoids stale data and schema coupling.
|
|
22
|
+
3. **Attributes for variable metadata** — when the field set varies by provider or adapter, use key/value `*_attributes` tables instead of adding nullable columns.
|
|
23
|
+
4. **Adapters are the parsing boundary** — each adapter knows its raw format, dedup strategy, and provider source. Everything downstream is normalized.
|
|
24
|
+
5. **Approximate is fine when labeled** — approximate cost is useful. Don't over-engineer precision until billing context demands it.
|
|
25
|
+
|
|
26
|
+
## Branching
|
|
27
|
+
|
|
28
|
+
- Work in a `wip/<topic>` branch during sessions. Handoff updates, test scaffolding, and iterative commits go here.
|
|
29
|
+
- Subtask merges target `main` independently (they're isolated worktrees).
|
|
30
|
+
- Merge/rebase WIP to main when the work is ready to ship.
|
|
31
|
+
|
|
32
|
+
## Conventions
|
|
33
|
+
|
|
34
|
+
- `commit=False` default on storage functions; caller controls transaction boundaries
|
|
35
|
+
- ULIDs for all primary keys
|
|
36
|
+
- XDG paths: data `~/.local/share/siftd`, config `~/.config/siftd`
|
|
37
|
+
- New CLI commands follow existing argparse patterns in `src/cli.py`
|
|
38
|
+
- New adapters implement `can_handle(source)`, `parse(source)`, `discover()`, set `HARNESS_SOURCE`
|
|
39
|
+
- Queries go in `~/.config/siftd/queries/*.sql`, use `$var` for parameters
|
|
40
|
+
|
|
41
|
+
## Agent Memory (siftd)
|
|
42
|
+
|
|
43
|
+
Search past conversations:
|
|
44
|
+
```
|
|
45
|
+
siftd ask "your query" # semantic search
|
|
46
|
+
siftd ask -w projectname "query" # filter by workspace
|
|
47
|
+
siftd query <id> # drill down into conversation
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Tag useful findings:
|
|
51
|
+
```
|
|
52
|
+
siftd tag <id> research:<topic> # bookmark for later
|
|
53
|
+
siftd query -l research:<topic> # retrieve tagged
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Tag conventions:
|
|
57
|
+
- `research:*` — Investigation findings worth preserving
|
|
58
|
+
- `useful:*` — General bookmarks (useful:pattern, useful:example)
|
|
59
|
+
- `decision:*` — Key architectural/design decisions
|
|
60
|
+
- `handoff:update` — Sessions that modified HANDOFF.md
|
|
61
|
+
- `rationale:*` — Why we chose X over Y
|
|
62
|
+
- `genesis:*` — First mention of a concept
|
|
63
|
+
|
|
64
|
+
When you find something useful via `siftd ask`, tag it before moving on.
|
|
65
|
+
|
|
66
|
+
Before ending a session that updates HANDOFF.md:
|
|
67
|
+
```
|
|
68
|
+
siftd tag --last handoff:update
|
|
69
|
+
```
|
siftd-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 siftd contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
siftd-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: siftd
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Personal LLM usage analytics. Ingest conversation logs from CLI coding tools, query via FTS5 and semantic search.
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: analytics,claude,conversation,llm,search
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Environment :: Console
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
14
|
+
Requires-Python: >=3.12
|
|
15
|
+
Requires-Dist: httpx
|
|
16
|
+
Requires-Dist: loguru
|
|
17
|
+
Requires-Dist: pyyaml
|
|
18
|
+
Requires-Dist: tomlkit
|
|
19
|
+
Requires-Dist: tqdm
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
23
|
+
Requires-Dist: ty; extra == 'dev'
|
|
24
|
+
Provides-Extra: embed
|
|
25
|
+
Requires-Dist: fastembed; extra == 'embed'
|
|
26
|
+
Requires-Dist: huggingface-hub; extra == 'embed'
|
|
27
|
+
Requires-Dist: numpy; extra == 'embed'
|
|
28
|
+
Requires-Dist: onnxruntime; extra == 'embed'
|
|
29
|
+
Requires-Dist: tokenizers; extra == 'embed'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# siftd
|
|
33
|
+
|
|
34
|
+
Ingest and query conversation logs from LLM coding tools. Stores in SQLite, searches via FTS5 and embeddings.
|
|
35
|
+
|
|
36
|
+
Warning: This project is under active development and breaking changes may occur.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install siftd # core (query, tags, ingest)
|
|
42
|
+
pip install siftd[embed] # with semantic search
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Ingest logs from Claude Code, Gemini CLI, Codex, Aider
|
|
49
|
+
siftd ingest
|
|
50
|
+
|
|
51
|
+
# List recent conversations
|
|
52
|
+
siftd query -w . # current workspace
|
|
53
|
+
siftd query --since 7d # last week
|
|
54
|
+
|
|
55
|
+
# Semantic search (requires [embed])
|
|
56
|
+
siftd ask "how did I handle auth"
|
|
57
|
+
siftd ask -w myproject "error handling"
|
|
58
|
+
|
|
59
|
+
# Tag and filter
|
|
60
|
+
siftd tag 01JGK3 decision:auth
|
|
61
|
+
siftd query -l decision:
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Supported Tools
|
|
65
|
+
|
|
66
|
+
- Claude Code
|
|
67
|
+
- Gemini CLI
|
|
68
|
+
- Codex CLI
|
|
69
|
+
- Aider
|
|
70
|
+
|
|
71
|
+
## Commands
|
|
72
|
+
|
|
73
|
+
| Command | Description |
|
|
74
|
+
|---------|-------------|
|
|
75
|
+
| `ingest` | Import conversation logs |
|
|
76
|
+
| `query` | List/filter conversations |
|
|
77
|
+
| `ask` | Semantic search |
|
|
78
|
+
| `tag` | Apply tags to conversations |
|
|
79
|
+
| `peek` | View conversation contents |
|
|
80
|
+
| `doctor` | Check configuration |
|
|
81
|
+
|
|
82
|
+
## Documentation
|
|
83
|
+
|
|
84
|
+
- [CLI Reference](docs/cli.md)
|
|
85
|
+
|
|
86
|
+
## License
|
|
87
|
+
|
|
88
|
+
MIT
|
siftd-0.1.1/README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# siftd
|
|
2
|
+
|
|
3
|
+
Ingest and query conversation logs from LLM coding tools. Stores in SQLite, searches via FTS5 and embeddings.
|
|
4
|
+
|
|
5
|
+
Warning: This project is under active development and breaking changes may occur.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install siftd # core (query, tags, ingest)
|
|
11
|
+
pip install siftd[embed] # with semantic search
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Ingest logs from Claude Code, Gemini CLI, Codex, Aider
|
|
18
|
+
siftd ingest
|
|
19
|
+
|
|
20
|
+
# List recent conversations
|
|
21
|
+
siftd query -w . # current workspace
|
|
22
|
+
siftd query --since 7d # last week
|
|
23
|
+
|
|
24
|
+
# Semantic search (requires [embed])
|
|
25
|
+
siftd ask "how did I handle auth"
|
|
26
|
+
siftd ask -w myproject "error handling"
|
|
27
|
+
|
|
28
|
+
# Tag and filter
|
|
29
|
+
siftd tag 01JGK3 decision:auth
|
|
30
|
+
siftd query -l decision:
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Supported Tools
|
|
34
|
+
|
|
35
|
+
- Claude Code
|
|
36
|
+
- Gemini CLI
|
|
37
|
+
- Codex CLI
|
|
38
|
+
- Aider
|
|
39
|
+
|
|
40
|
+
## Commands
|
|
41
|
+
|
|
42
|
+
| Command | Description |
|
|
43
|
+
|---------|-------------|
|
|
44
|
+
| `ingest` | Import conversation logs |
|
|
45
|
+
| `query` | List/filter conversations |
|
|
46
|
+
| `ask` | Semantic search |
|
|
47
|
+
| `tag` | Apply tags to conversations |
|
|
48
|
+
| `peek` | View conversation contents |
|
|
49
|
+
| `doctor` | Check configuration |
|
|
50
|
+
|
|
51
|
+
## Documentation
|
|
52
|
+
|
|
53
|
+
- [CLI Reference](docs/cli.md)
|
|
54
|
+
|
|
55
|
+
## License
|
|
56
|
+
|
|
57
|
+
MIT
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Build an embeddings database from a strategy file.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python bench/build.py --strategy bench/strategies/exchange-window.json
|
|
5
|
+
python bench/build.py --strategy bench/strategies/exchange-window.json --output /tmp/test.db
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import json
|
|
10
|
+
import sqlite3
|
|
11
|
+
import sys
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
# bench/ is not a package — add src/ to path so siftd imports work
|
|
16
|
+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
17
|
+
|
|
18
|
+
from siftd.embeddings.fastembed_backend import FastEmbedBackend
|
|
19
|
+
from siftd.paths import data_dir
|
|
20
|
+
from siftd.storage.embeddings import open_embeddings_db, store_chunk, set_meta
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def extract_chunks(main_conn: sqlite3.Connection, params: dict) -> list[dict]:
|
|
24
|
+
"""Extract chunks from main DB using the exchange-window chunker."""
|
|
25
|
+
from fastembed import TextEmbedding
|
|
26
|
+
from siftd.embeddings.chunker import extract_exchange_window_chunks
|
|
27
|
+
|
|
28
|
+
target_tokens = params.get("target_tokens", 256)
|
|
29
|
+
max_tokens = params.get("max_tokens", 512)
|
|
30
|
+
overlap_tokens = params.get("overlap_tokens", 25)
|
|
31
|
+
|
|
32
|
+
emb = TextEmbedding("BAAI/bge-small-en-v1.5")
|
|
33
|
+
tokenizer = emb.model.tokenizer
|
|
34
|
+
|
|
35
|
+
return extract_exchange_window_chunks(
|
|
36
|
+
main_conn,
|
|
37
|
+
tokenizer,
|
|
38
|
+
target_tokens=target_tokens,
|
|
39
|
+
max_tokens=max_tokens,
|
|
40
|
+
overlap_tokens=overlap_tokens,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build(strategy_path: Path, output_path: Path, db_path: Path) -> None:
|
|
45
|
+
"""Build embeddings DB from strategy."""
|
|
46
|
+
strategy = json.loads(strategy_path.read_text())
|
|
47
|
+
params = strategy["params"]
|
|
48
|
+
|
|
49
|
+
# Extract chunks from main DB
|
|
50
|
+
main_conn = sqlite3.connect(db_path)
|
|
51
|
+
chunks = extract_chunks(main_conn, params)
|
|
52
|
+
main_conn.close()
|
|
53
|
+
|
|
54
|
+
if not chunks:
|
|
55
|
+
print("No chunks extracted. Check strategy params and main DB.")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
print(f"Extracted {len(chunks)} chunks")
|
|
59
|
+
|
|
60
|
+
# Embed in batches
|
|
61
|
+
backend = FastEmbedBackend()
|
|
62
|
+
batch_size = 64
|
|
63
|
+
all_embeddings: list[list[float]] = []
|
|
64
|
+
|
|
65
|
+
for i in range(0, len(chunks), batch_size):
|
|
66
|
+
batch_texts = [c["text"] for c in chunks[i : i + batch_size]]
|
|
67
|
+
batch_embeddings = backend.embed(batch_texts)
|
|
68
|
+
all_embeddings.extend(batch_embeddings)
|
|
69
|
+
print(f" Embedded batch {i // batch_size + 1}/{(len(chunks) + batch_size - 1) // batch_size}")
|
|
70
|
+
|
|
71
|
+
# Store in embeddings DB
|
|
72
|
+
embed_conn = open_embeddings_db(output_path)
|
|
73
|
+
set_meta(embed_conn, "backend", backend.model)
|
|
74
|
+
set_meta(embed_conn, "dimension", str(backend.dimension))
|
|
75
|
+
|
|
76
|
+
for chunk, embedding in zip(chunks, all_embeddings):
|
|
77
|
+
store_chunk(
|
|
78
|
+
embed_conn,
|
|
79
|
+
chunk["conversation_id"],
|
|
80
|
+
chunk["chunk_type"],
|
|
81
|
+
chunk["text"],
|
|
82
|
+
embedding,
|
|
83
|
+
token_count=chunk.get("token_count"),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
embed_conn.commit()
|
|
87
|
+
embed_conn.close()
|
|
88
|
+
|
|
89
|
+
print(f"Built {len(chunks)} chunks → {output_path}")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def main():
|
|
93
|
+
parser = argparse.ArgumentParser(description="Build embeddings DB from a strategy file")
|
|
94
|
+
parser.add_argument("--strategy", type=Path, required=True, help="Path to strategy JSON file")
|
|
95
|
+
parser.add_argument("--output", type=Path, default=None, help="Output embeddings DB path")
|
|
96
|
+
parser.add_argument("--db", type=Path, default=None, help="Path to main siftd.db")
|
|
97
|
+
args = parser.parse_args()
|
|
98
|
+
|
|
99
|
+
if not args.strategy.exists():
|
|
100
|
+
print(f"Strategy file not found: {args.strategy}")
|
|
101
|
+
sys.exit(1)
|
|
102
|
+
|
|
103
|
+
# Resolve main DB path
|
|
104
|
+
db = args.db or (data_dir() / "siftd.db")
|
|
105
|
+
if not db.exists():
|
|
106
|
+
print(f"Main DB not found: {db}")
|
|
107
|
+
sys.exit(1)
|
|
108
|
+
|
|
109
|
+
# Resolve output path
|
|
110
|
+
if args.output:
|
|
111
|
+
output = args.output
|
|
112
|
+
else:
|
|
113
|
+
strategy = json.loads(args.strategy.read_text())
|
|
114
|
+
name = strategy.get("name", args.strategy.stem)
|
|
115
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
116
|
+
output = data_dir() / f"embeddings_{name}_{timestamp}.db"
|
|
117
|
+
|
|
118
|
+
build(args.strategy, output, db)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
if __name__ == "__main__":
|
|
122
|
+
main()
|