ietf-llm 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ietf_llm-0.5.0/LICENSE.md +19 -0
- ietf_llm-0.5.0/MANIFEST.in +1 -0
- ietf_llm-0.5.0/PKG-INFO +504 -0
- ietf_llm-0.5.0/README.md +468 -0
- ietf_llm-0.5.0/ietf_llm/__init__.py +1 -0
- ietf_llm-0.5.0/ietf_llm/__main__.py +514 -0
- ietf_llm-0.5.0/ietf_llm/config.py +106 -0
- ietf_llm-0.5.0/ietf_llm/data/skill/SKILL.md +346 -0
- ietf_llm-0.5.0/ietf_llm/digest/__init__.py +118 -0
- ietf_llm-0.5.0/ietf_llm/digest/events.py +25 -0
- ietf_llm-0.5.0/ietf_llm/digest/helpers.py +43 -0
- ietf_llm-0.5.0/ietf_llm/digest/index.py +170 -0
- ietf_llm-0.5.0/ietf_llm/digest/issues.py +167 -0
- ietf_llm-0.5.0/ietf_llm/digest/overview.py +493 -0
- ietf_llm-0.5.0/ietf_llm/digest/query.py +382 -0
- ietf_llm-0.5.0/ietf_llm/digest/summarizer.py +115 -0
- ietf_llm-0.5.0/ietf_llm/digest/threads.py +111 -0
- ietf_llm-0.5.0/ietf_llm/digest/timeline.py +522 -0
- ietf_llm-0.5.0/ietf_llm/embeddings/__init__.py +55 -0
- ietf_llm-0.5.0/ietf_llm/embeddings/chunking.py +518 -0
- ietf_llm-0.5.0/ietf_llm/embeddings/models.py +145 -0
- ietf_llm-0.5.0/ietf_llm/embeddings/search.py +437 -0
- ietf_llm-0.5.0/ietf_llm/embeddings/snippet.py +229 -0
- ietf_llm-0.5.0/ietf_llm/embeddings/storage.py +278 -0
- ietf_llm-0.5.0/ietf_llm/export.py +355 -0
- ietf_llm-0.5.0/ietf_llm/export_cli.py +143 -0
- ietf_llm-0.5.0/ietf_llm/freshness.py +106 -0
- ietf_llm-0.5.0/ietf_llm/gather/__init__.py +0 -0
- ietf_llm-0.5.0/ietf_llm/gather/ballots.py +420 -0
- ietf_llm-0.5.0/ietf_llm/gather/charter.py +81 -0
- ietf_llm-0.5.0/ietf_llm/gather/citations.py +226 -0
- ietf_llm-0.5.0/ietf_llm/gather/datatracker.py +156 -0
- ietf_llm-0.5.0/ietf_llm/gather/datatracker_history.py +300 -0
- ietf_llm-0.5.0/ietf_llm/gather/draft_authors.py +184 -0
- ietf_llm-0.5.0/ietf_llm/gather/drafts.py +285 -0
- ietf_llm-0.5.0/ietf_llm/gather/github.py +281 -0
- ietf_llm-0.5.0/ietf_llm/gather/github_users.py +271 -0
- ietf_llm-0.5.0/ietf_llm/gather/issue_files.py +352 -0
- ietf_llm-0.5.0/ietf_llm/gather/mail_threads.py +522 -0
- ietf_llm-0.5.0/ietf_llm/gather/mbox.py +448 -0
- ietf_llm-0.5.0/ietf_llm/gather/meetings.py +333 -0
- ietf_llm-0.5.0/ietf_llm/gather/pdf_extract.py +281 -0
- ietf_llm-0.5.0/ietf_llm/gather/session_polls.py +267 -0
- ietf_llm-0.5.0/ietf_llm/gather/transcript_context.py +216 -0
- ietf_llm-0.5.0/ietf_llm/gather/transcripts.py +122 -0
- ietf_llm-0.5.0/ietf_llm/mcp_server.py +1812 -0
- ietf_llm-0.5.0/ietf_llm/notebooklm.py +167 -0
- ietf_llm-0.5.0/ietf_llm/paths.py +252 -0
- ietf_llm-0.5.0/ietf_llm/people.py +914 -0
- ietf_llm-0.5.0/ietf_llm/positions.py +833 -0
- ietf_llm-0.5.0/ietf_llm/py.typed +0 -0
- ietf_llm-0.5.0/ietf_llm/search_cli.py +92 -0
- ietf_llm-0.5.0/ietf_llm/skill_install.py +81 -0
- ietf_llm-0.5.0/ietf_llm/text.py +61 -0
- ietf_llm-0.5.0/ietf_llm/utils.py +297 -0
- ietf_llm-0.5.0/ietf_llm.egg-info/PKG-INFO +504 -0
- ietf_llm-0.5.0/ietf_llm.egg-info/SOURCES.txt +91 -0
- ietf_llm-0.5.0/ietf_llm.egg-info/dependency_links.txt +1 -0
- ietf_llm-0.5.0/ietf_llm.egg-info/entry_points.txt +5 -0
- ietf_llm-0.5.0/ietf_llm.egg-info/requires.txt +24 -0
- ietf_llm-0.5.0/ietf_llm.egg-info/top_level.txt +1 -0
- ietf_llm-0.5.0/pyproject.toml +78 -0
- ietf_llm-0.5.0/setup.cfg +4 -0
- ietf_llm-0.5.0/tests/test_ballots.py +326 -0
- ietf_llm-0.5.0/tests/test_citations.py +200 -0
- ietf_llm-0.5.0/tests/test_config.py +121 -0
- ietf_llm-0.5.0/tests/test_datatracker_history.py +328 -0
- ietf_llm-0.5.0/tests/test_digest_helpers.py +145 -0
- ietf_llm-0.5.0/tests/test_digest_index.py +76 -0
- ietf_llm-0.5.0/tests/test_digest_issues.py +151 -0
- ietf_llm-0.5.0/tests/test_digest_query.py +283 -0
- ietf_llm-0.5.0/tests/test_digest_threads.py +117 -0
- ietf_llm-0.5.0/tests/test_draft_authors.py +190 -0
- ietf_llm-0.5.0/tests/test_embeddings_chunking.py +253 -0
- ietf_llm-0.5.0/tests/test_export.py +242 -0
- ietf_llm-0.5.0/tests/test_extra_inputs.py +171 -0
- ietf_llm-0.5.0/tests/test_freshness.py +117 -0
- ietf_llm-0.5.0/tests/test_github_canonical.py +59 -0
- ietf_llm-0.5.0/tests/test_github_users.py +307 -0
- ietf_llm-0.5.0/tests/test_issue_files.py +523 -0
- ietf_llm-0.5.0/tests/test_mail_threads.py +537 -0
- ietf_llm-0.5.0/tests/test_mcp_server.py +597 -0
- ietf_llm-0.5.0/tests/test_overview.py +304 -0
- ietf_llm-0.5.0/tests/test_pdf_extract.py +218 -0
- ietf_llm-0.5.0/tests/test_people.py +609 -0
- ietf_llm-0.5.0/tests/test_positions.py +469 -0
- ietf_llm-0.5.0/tests/test_read_topic.py +301 -0
- ietf_llm-0.5.0/tests/test_search_filters.py +824 -0
- ietf_llm-0.5.0/tests/test_session_polls.py +335 -0
- ietf_llm-0.5.0/tests/test_skill_install.py +107 -0
- ietf_llm-0.5.0/tests/test_snippet.py +242 -0
- ietf_llm-0.5.0/tests/test_timeline.py +284 -0
- ietf_llm-0.5.0/tests/test_transcript_context.py +151 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) Mark Nottingham
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
|
11
|
+
all copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
19
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include ietf_llm/py.typed
|
ietf_llm-0.5.0/PKG-INFO
ADDED
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ietf-llm
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Maintain a local, LLM-queryable corpus of an IETF Working Group's public record (drafts, mailing list, GitHub issues, meetings), with an MCP server, semantic search, and NotebookLM export.
|
|
5
|
+
Author-email: Mark Nottingham <mnot@mnot.net>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: homepage, https://github.com/mnot/ietf-llm
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE.md
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Requires-Dist: beautifulsoup4
|
|
15
|
+
Requires-Dist: pypdf
|
|
16
|
+
Requires-Dist: google-auth
|
|
17
|
+
Requires-Dist: google-auth-oauthlib
|
|
18
|
+
Requires-Dist: google-auth-httplib2
|
|
19
|
+
Requires-Dist: llm
|
|
20
|
+
Requires-Dist: llm-sentence-transformers
|
|
21
|
+
Requires-Dist: numpy
|
|
22
|
+
Requires-Dist: mcp
|
|
23
|
+
Provides-Extra: certs
|
|
24
|
+
Requires-Dist: pip-system-certs; extra == "certs"
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: mypy; extra == "dev"
|
|
27
|
+
Requires-Dist: black; extra == "dev"
|
|
28
|
+
Requires-Dist: pylint; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-md; extra == "dev"
|
|
31
|
+
Requires-Dist: validate-pyproject; extra == "dev"
|
|
32
|
+
Requires-Dist: build; extra == "dev"
|
|
33
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
34
|
+
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
# ietf-llm
|
|
38
|
+
|
|
39
|
+
Maintain a local, queryable corpus of an [IETF](https://www.ietf.org/)
|
|
40
|
+
Working Group's public record — charter, drafts, RFCs, meeting minutes,
|
|
41
|
+
slides, transcripts, mailing list archives, and GitHub issues — for use
|
|
42
|
+
with LLM-based tools.
|
|
43
|
+
|
|
44
|
+
Two supported workflows:
|
|
45
|
+
|
|
46
|
+
1. **[Use it as an MCP server](#1-use-as-an-mcp-server)** — register
|
|
47
|
+
`ietf-llm-mcp` with Claude, Codex, Gemini, Cursor, Zed, etc. and
|
|
48
|
+
ask questions across any WG you've gathered.
|
|
49
|
+
2. **[Use it with NotebookLM](#2-use-with-notebooklm)** — export the
|
|
50
|
+
gathered corpus as a directory of clean text files (or push directly
|
|
51
|
+
to NotebookLM Enterprise) and ingest it as a notebook source set.
|
|
52
|
+
|
|
53
|
+
> Also works with [IRTF](https://irtf.org/) Research Groups. Pass the
|
|
54
|
+
> RG's shortname (e.g. `cfrg`, `hrpc`, `pearg`) anywhere this README
|
|
55
|
+
> says `<wg>`.
|
|
56
|
+
|
|
57
|
+
> **Note:** This package was previously published as `ietf-notebook`.
|
|
58
|
+
> That distribution is deprecated. See
|
|
59
|
+
> [Migrating from `ietf-notebook`](#migrating-from-ietf-notebook).
|
|
60
|
+
|
|
61
|
+
## Table of contents
|
|
62
|
+
|
|
63
|
+
- [Installation](#installation)
|
|
64
|
+
- [1. Use as an MCP server](#1-use-as-an-mcp-server)
|
|
65
|
+
- [Register the server](#register-the-server)
|
|
66
|
+
- [Gather a Working Group](#gather-a-working-group)
|
|
67
|
+
- [Ask your agent](#ask-your-agent)
|
|
68
|
+
- [Updating](#updating)
|
|
69
|
+
- [2. Use with NotebookLM](#2-use-with-notebooklm)
|
|
70
|
+
- [Gather a Working Group](#gather-a-working-group-1)
|
|
71
|
+
- [Export to a local directory](#export-to-a-local-directory)
|
|
72
|
+
- [Export to NotebookLM Enterprise](#export-to-notebooklm-enterprise)
|
|
73
|
+
- [Reference](#reference)
|
|
74
|
+
- [Commands](#commands)
|
|
75
|
+
- [Gather options](#gather-options)
|
|
76
|
+
- [Semantic search from the CLI](#semantic-search-from-the-cli)
|
|
77
|
+
- [Digest files](#digest-files)
|
|
78
|
+
- [MCP tools](#mcp-tools)
|
|
79
|
+
- [Migrating from `ietf-notebook`](#migrating-from-ietf-notebook)
|
|
80
|
+
- [Contributing](#contributing)
|
|
81
|
+
|
|
82
|
+
## Installation
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pipx install ietf-llm
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Behind a corporate firewall with TLS interception? Install with the
|
|
89
|
+
`certs` extra:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pipx install ietf-llm[certs]
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## 1. Use as an MCP server
|
|
98
|
+
|
|
99
|
+
`ietf-llm-mcp` is a stdio [Model Context Protocol](https://modelcontextprotocol.io/)
|
|
100
|
+
server that exposes the local corpus to any MCP-capable agent. Set up
|
|
101
|
+
once, gather each WG you care about once, then ask questions
|
|
102
|
+
indefinitely.
|
|
103
|
+
|
|
104
|
+
### Register the server
|
|
105
|
+
|
|
106
|
+
Pick your client. The snippets below are correct as of writing — if
|
|
107
|
+
your client has changed since, its own MCP docs are authoritative.
|
|
108
|
+
|
|
109
|
+
**Gotcha (all clients):** if `ietf-llm-mcp` was installed via `pipx`,
|
|
110
|
+
the binary is on your shell `PATH` but may not be on the `PATH`
|
|
111
|
+
inherited by a GUI app launched from Finder / Spotlight / Explorer.
|
|
112
|
+
Use the absolute path (`which ietf-llm-mcp`) if the client can't find
|
|
113
|
+
the command.
|
|
114
|
+
|
|
115
|
+
#### Claude Code
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
claude mcp add ietf-llm -- ietf-llm-mcp
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Also install the bundled skill so Claude knows how to drive the tools
|
|
122
|
+
well (digests before raw reads, search before slurping mailing-list
|
|
123
|
+
files, etc.):
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
ietf-llm --install-claude-skill
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Re-run after upgrading the package to pick up improvements.
|
|
130
|
+
|
|
131
|
+
#### Claude Desktop
|
|
132
|
+
|
|
133
|
+
Edit `claude_desktop_config.json` (create it if missing):
|
|
134
|
+
|
|
135
|
+
- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
136
|
+
- **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
|
|
137
|
+
- **Linux:** `~/.config/Claude/claude_desktop_config.json`
|
|
138
|
+
|
|
139
|
+
```json
|
|
140
|
+
{
|
|
141
|
+
"mcpServers": {
|
|
142
|
+
"ietf-llm": {
|
|
143
|
+
"command": "ietf-llm-mcp"
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Quit and relaunch Claude Desktop — the config is only read at startup.
|
|
150
|
+
|
|
151
|
+
#### Codex CLI (OpenAI)
|
|
152
|
+
|
|
153
|
+
`~/.codex/config.toml`:
|
|
154
|
+
|
|
155
|
+
```toml
|
|
156
|
+
[mcp_servers.ietf-llm]
|
|
157
|
+
command = "ietf-llm-mcp"
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
#### Gemini CLI
|
|
161
|
+
|
|
162
|
+
`~/.gemini/settings.json`:
|
|
163
|
+
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"mcpServers": {
|
|
167
|
+
"ietf-llm": {
|
|
168
|
+
"command": "ietf-llm-mcp"
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
#### opencode
|
|
175
|
+
|
|
176
|
+
`~/.config/opencode/opencode.json` (or `opencode.json` in your project
|
|
177
|
+
root):
|
|
178
|
+
|
|
179
|
+
```json
|
|
180
|
+
{
|
|
181
|
+
"$schema": "https://opencode.ai/config.json",
|
|
182
|
+
"mcp": {
|
|
183
|
+
"ietf-llm": {
|
|
184
|
+
"type": "local",
|
|
185
|
+
"command": ["ietf-llm-mcp"],
|
|
186
|
+
"enabled": true
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
#### Cursor
|
|
193
|
+
|
|
194
|
+
In-app MCP settings panel, or `~/.cursor/mcp.json` (global) or
|
|
195
|
+
`.cursor/mcp.json` (per-project):
|
|
196
|
+
|
|
197
|
+
```json
|
|
198
|
+
{
|
|
199
|
+
"mcpServers": {
|
|
200
|
+
"ietf-llm": {
|
|
201
|
+
"command": "ietf-llm-mcp"
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
#### Zed
|
|
208
|
+
|
|
209
|
+
`~/.config/zed/settings.json`:
|
|
210
|
+
|
|
211
|
+
```json
|
|
212
|
+
{
|
|
213
|
+
"context_servers": {
|
|
214
|
+
"ietf-llm": {
|
|
215
|
+
"command": {
|
|
216
|
+
"path": "ietf-llm-mcp",
|
|
217
|
+
"args": []
|
|
218
|
+
},
|
|
219
|
+
"settings": {}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Gather a Working Group
|
|
226
|
+
|
|
227
|
+
Gathering is a slow, network-heavy job, so it runs from the CLI —
|
|
228
|
+
not silently from the agent. Do it once per WG you want to query:
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
ietf-llm httpbis \
|
|
232
|
+
--github httpwg/http-core \
|
|
233
|
+
--github httpwg/http-extensions \
|
|
234
|
+
--embed
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
- `--github org/repo` — GitHub repos whose issues to include. Repeat
|
|
238
|
+
per repo. Persisted, so future updates omit it.
|
|
239
|
+
- `--embed` — build the local semantic search index that backs the
|
|
240
|
+
`search_corpus` MCP tool. **Required if you want the agent to
|
|
241
|
+
search.** Downloads ~130 MB of model weights once on first run.
|
|
242
|
+
|
|
243
|
+
Everything goes to `~/.cache/ietf-llm/<wg>/`. The MCP server reads
|
|
244
|
+
from there — no separate destination to manage.
|
|
245
|
+
|
|
246
|
+
### Ask your agent
|
|
247
|
+
|
|
248
|
+
```text
|
|
249
|
+
"What's open in httpbis right now?"
|
|
250
|
+
"Anyone on the list raised concerns about cookie partitioning?"
|
|
251
|
+
"How did the debate on MLKEM evolve in TLS?"
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
The agent uses `list_working_groups`, `overview`, `read_digest`,
|
|
255
|
+
`search_corpus`, and `read_topic` to answer — no need to point at
|
|
256
|
+
files. See [MCP tools](#mcp-tools) for the full surface.
|
|
257
|
+
|
|
258
|
+
### Updating
|
|
259
|
+
|
|
260
|
+
Just re-run the gather. All per-WG settings (GitHub repos, embedding
|
|
261
|
+
choice) are remembered:
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
ietf-llm httpbis
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
Embedding is incremental — only changed files are re-embedded. Run on
|
|
268
|
+
a cron or whenever you want fresh data; the agent picks up the new
|
|
269
|
+
state on its next tool call.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## 2. Use with NotebookLM
|
|
274
|
+
|
|
275
|
+
NotebookLM ingests a corpus as a set of source files. `ietf-llm-export`
|
|
276
|
+
turns the gathered cache into an upload-ready directory, or pushes
|
|
277
|
+
straight to a NotebookLM Enterprise notebook.
|
|
278
|
+
|
|
279
|
+
> **Workflow note:** export always produces a complete fresh dump.
|
|
280
|
+
> Create a new notebook on each refresh rather than trying to merge
|
|
281
|
+
> updates into an existing one.
|
|
282
|
+
|
|
283
|
+
### Gather a Working Group
|
|
284
|
+
|
|
285
|
+
Same as the MCP path, but `--embed` is optional (NotebookLM does its
|
|
286
|
+
own indexing):
|
|
287
|
+
|
|
288
|
+
```bash
|
|
289
|
+
ietf-llm httpbis \
|
|
290
|
+
--github httpwg/http-core \
|
|
291
|
+
--github httpwg/http-extensions
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
### Export to a local directory
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
ietf-llm-export httpbis --destination ~/notebooklm/httpbis
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
Drag the directory's contents into NotebookLM as sources. Per-thread
|
|
301
|
+
mailing list conversations and per-issue GitHub records are bundled
|
|
302
|
+
by year / repo to stay under NotebookLM's 50-source free / 300-source
|
|
303
|
+
Plus limit.
|
|
304
|
+
|
|
305
|
+
### Export to NotebookLM Enterprise
|
|
306
|
+
|
|
307
|
+
If you have Google Workspace Enterprise with NotebookLM enabled,
|
|
308
|
+
`ietf-llm-export` can create the notebook and upload sources directly:
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
ietf-llm-export httpbis --create my-gcp-project-id
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
One-time setup:
|
|
315
|
+
|
|
316
|
+
1. **Google Cloud Project** with the **Discovery Engine API** enabled.
|
|
317
|
+
2. **OAuth credentials**: create an "OAuth 2.0 Client ID" (Desktop
|
|
318
|
+
App) in the [Cloud Console](https://console.cloud.google.com/apis/credentials).
|
|
319
|
+
3. **Save the JSON** as `client_secrets.json` in
|
|
320
|
+
`~/.config/ietf-llm/` (or pass `--credentials-file PATH`).
|
|
321
|
+
|
|
322
|
+
First run opens a browser to authorise; the token is cached at
|
|
323
|
+
`~/.config/ietf-llm/token.json`.
|
|
324
|
+
|
|
325
|
+
Per-WG export settings are persisted at
|
|
326
|
+
`~/.config/ietf-llm/<wg>/export.json` — subsequent runs of the same
|
|
327
|
+
mode need only `ietf-llm-export <wg>`.
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
## Reference
|
|
332
|
+
|
|
333
|
+
### Commands
|
|
334
|
+
|
|
335
|
+
| Command | Job | Reads | Writes |
|
|
336
|
+
|---|---|---|---|
|
|
337
|
+
| `ietf-llm` | Gather / refresh a WG | network | cache |
|
|
338
|
+
| `ietf-llm-export` | Mirror cache to dir, or push to NotebookLM Enterprise | cache | dir / NotebookLM |
|
|
339
|
+
| `ietf-llm-search` | Semantic search over the cache | cache | stdout |
|
|
340
|
+
| `ietf-llm-mcp` | Expose the cache to MCP clients | cache | stdio (MCP) |
|
|
341
|
+
|
|
342
|
+
All four are independent. The cache (`~/.cache/ietf-llm/<wg>/`) is
|
|
343
|
+
the single source of truth; everything else reads from it.
|
|
344
|
+
|
|
345
|
+
### Gather options
|
|
346
|
+
|
|
347
|
+
```bash
|
|
348
|
+
ietf-llm [OPTIONS] <wg_shortname>
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
- `--github OWNER/REPO` — repeat per GitHub repo whose issues to gather.
|
|
352
|
+
- `--draft DRAFT-NAME` — extra Internet-Draft to track, beyond the
|
|
353
|
+
WG's auto-discovered documents (repeatable, persisted). Version
|
|
354
|
+
suffix is stripped; every revision is gathered.
|
|
355
|
+
- `--mailing-list LIST` — extra IETF-hosted mailing list to sync,
|
|
356
|
+
beyond the WG's auto-discovered one (repeatable, persisted).
|
|
357
|
+
Accepts `foo` or `foo@ietf.org`.
|
|
358
|
+
- `--github-label LABEL` / `--exclude-github-label LABEL` — filter
|
|
359
|
+
issues by label; repeatable.
|
|
360
|
+
- `--months N` — months of mailing list / meeting history (default 12).
|
|
361
|
+
- `--summarize` / `--summarize-model MODEL` — add LLM-generated
|
|
362
|
+
one-liners to digests via the `llm` package.
|
|
363
|
+
- `--embed` / `--embed-model MODEL` — build / refresh the semantic
|
|
364
|
+
search index (required for `ietf-llm-search` and the MCP
|
|
365
|
+
`search_corpus` tool).
|
|
366
|
+
- `--rebuild-embeddings` — with `--embed`, drop and re-embed instead
|
|
367
|
+
of incremental update.
|
|
368
|
+
- `--clear-cache` — wipe the cache for this WG and re-download.
|
|
369
|
+
- `--clear-config` — clear persisted config for this WG.
|
|
370
|
+
- `--quiet` / `--verbose`.
|
|
371
|
+
|
|
372
|
+
Per-WG settings are persisted at `~/.config/ietf-llm/<wg>/gather.json`.
|
|
373
|
+
|
|
374
|
+
**GitHub auth.** Set `GITHUB_TOKEN` on the gather invocation (a fine-
|
|
375
|
+
scoped read-only token is plenty); without one you'll hit anonymous
|
|
376
|
+
API rate limits quickly on large WGs. Prefer inline-passing over
|
|
377
|
+
exporting in your shell rc so the token doesn't leak into every other
|
|
378
|
+
subprocess:
|
|
379
|
+
|
|
380
|
+
```bash
|
|
381
|
+
GITHUB_TOKEN=ghp_... ietf-llm httpbis
|
|
382
|
+
# or, from a secret manager:
|
|
383
|
+
GITHUB_TOKEN=$(security find-generic-password -s github-readonly -w) \
|
|
384
|
+
ietf-llm httpbis
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
### Semantic search from the CLI
|
|
388
|
+
|
|
389
|
+
```bash
|
|
390
|
+
ietf-llm-search httpbis "skepticism about cookie partitioning" -k 8
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
Chunks are content-aware: one chunk per mailing list message, one per
|
|
394
|
+
issue comment, and a windowed slice of drafts/RFCs/transcripts. The
|
|
395
|
+
index lives at `~/.cache/ietf-llm/<wg>/embeddings.db` and updates
|
|
396
|
+
incrementally on each `--embed` run.
|
|
397
|
+
|
|
398
|
+
Default model: **`sentence-transformers/BAAI/bge-small-en-v1.5`** —
|
|
399
|
+
small (~33M params), MPS-accelerated, runs entirely on your machine.
|
|
400
|
+
Override with `--embed-model <id>` for any model the `llm` package
|
|
401
|
+
recognises.
|
|
402
|
+
|
|
403
|
+
### Digest files
|
|
404
|
+
|
|
405
|
+
Every gather produces small markdown digests under
|
|
406
|
+
`~/.cache/ietf-llm/<wg>/files/digests/`:
|
|
407
|
+
|
|
408
|
+
- `index.md` — categorised inventory of all cached files.
|
|
409
|
+
- `issues.md` — one row per GitHub issue (state, title, labels,
|
|
410
|
+
comments, last updated), sorted open-first.
|
|
411
|
+
- `threads.md` — one row per mailing list thread (subject, message
|
|
412
|
+
count, participants, date range).
|
|
413
|
+
- `people.md` — participants with roles + message counts.
|
|
414
|
+
- `timeline.md` — chronological events (draft publications, issue
|
|
415
|
+
open/close, meetings, polls, WGLC, …).
|
|
416
|
+
|
|
417
|
+
Generated deterministically from the cache. Pass `--summarize` to
|
|
418
|
+
also include LLM-generated one-liners per row.
|
|
419
|
+
|
|
420
|
+
### MCP tools
|
|
421
|
+
|
|
422
|
+
`ietf-llm-mcp` exposes:
|
|
423
|
+
|
|
424
|
+
- `list_working_groups()` — WGs gathered locally.
|
|
425
|
+
- `overview(wg)` — chairs, active drafts, top open issues, recent
|
|
426
|
+
threads, latest meeting. First call for "tell me about X."
|
|
427
|
+
- `list_labels(wg)` — GitHub issue labels with frequencies.
|
|
428
|
+
- `list_files(wg, pattern?)` — file inventory with chunk counts.
|
|
429
|
+
- `read_digest(wg, kind, ...filters)` — `index` / `issues` /
|
|
430
|
+
`threads` / `people` / `timeline`. Filters compose (state, label,
|
|
431
|
+
author, role, since/until, event_kind, …).
|
|
432
|
+
- `search_corpus(wg, query, ...)` — semantic search with optional
|
|
433
|
+
`state`, `label`, `file_pattern`, `since`/`until`, `sort="date"`,
|
|
434
|
+
`group_by="file"`.
|
|
435
|
+
- `read_topic(wg, query, include_replies=False)` — chronological
|
|
436
|
+
narrative view: full message bodies across threads and issues in
|
|
437
|
+
date order, optionally walking reply descendants.
|
|
438
|
+
- `get_chunk_text(wg, file, chunk_idx, end_chunk_idx?)` — full text
|
|
439
|
+
of one chunk (or a range).
|
|
440
|
+
- `get_chunks_batch(wg, [{file, chunk_idx, end_chunk_idx?}, …])` —
|
|
441
|
+
multi-file batch fetch.
|
|
442
|
+
- `fetch_by_url(wg, url)` — resolve a GitHub or mail-archive URL to
|
|
443
|
+
its cached content.
|
|
444
|
+
- `read_file_section(wg, file, start_line, max_lines)` — bounded raw
|
|
445
|
+
read (default 400 lines, hard cap 5000).
|
|
446
|
+
|
|
447
|
+
---
|
|
448
|
+
|
|
449
|
+
## Migrating from `ietf-notebook`
|
|
450
|
+
|
|
451
|
+
If you previously used the `ietf-notebook` distribution:
|
|
452
|
+
|
|
453
|
+
```bash
|
|
454
|
+
pipx uninstall ietf-notebook
|
|
455
|
+
pipx install ietf-llm
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
Cache and config directories changed names. To preserve a gathered
|
|
459
|
+
cache, move it by hand:
|
|
460
|
+
|
|
461
|
+
```bash
|
|
462
|
+
mv ~/.cache/ietf-notebook ~/.cache/ietf-llm
|
|
463
|
+
mv ~/.config/ietf-notebook ~/.config/ietf-llm
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
Otherwise the old directories are simply ignored.
|
|
467
|
+
|
|
468
|
+
### Command renames
|
|
469
|
+
|
|
470
|
+
| Before | After |
|
|
471
|
+
|---|---|
|
|
472
|
+
| `ietf-notebook <wg>` | `ietf-llm <wg>` |
|
|
473
|
+
| (no equivalent) | `ietf-llm-export <wg>` (split out) |
|
|
474
|
+
| (no equivalent) | `ietf-llm-search <wg> <query>` (new) |
|
|
475
|
+
| (no equivalent) | `ietf-llm-mcp` (new) |
|
|
476
|
+
|
|
477
|
+
### Flags moved off the gather CLI
|
|
478
|
+
|
|
479
|
+
These now live on `ietf-llm-export`:
|
|
480
|
+
|
|
481
|
+
| Old: `ietf-notebook <wg> ...` | New |
|
|
482
|
+
|---|---|
|
|
483
|
+
| `--destination DIR` | `ietf-llm-export <wg> --destination DIR` |
|
|
484
|
+
| `--create GCP_PROJECT` | `ietf-llm-export <wg> --create GCP_PROJECT` |
|
|
485
|
+
| `--credentials-file PATH` | `ietf-llm-export <wg> --credentials-file PATH` |
|
|
486
|
+
| `--token-file PATH` | `ietf-llm-export <wg> --token-file PATH` |
|
|
487
|
+
|
|
488
|
+
If you pass any of these to `ietf-llm`, you'll get a redirect error.
|
|
489
|
+
|
|
490
|
+
### `--update` is gone
|
|
491
|
+
|
|
492
|
+
The gather CLI is now idempotent — re-run it whenever you want fresh
|
|
493
|
+
data. The export CLI always produces a complete fresh dump; for
|
|
494
|
+
NotebookLM, create a new notebook each refresh rather than trying to
|
|
495
|
+
merge updates.
|
|
496
|
+
|
|
497
|
+
## Contributing
|
|
498
|
+
|
|
499
|
+
Pull requests welcome. For major changes, please open an issue first.
|
|
500
|
+
|
|
501
|
+
[ARCHITECTURE.md](ARCHITECTURE.md) is the read-this-first for anyone
|
|
502
|
+
poking at the code: package layout, cache and config conventions,
|
|
503
|
+
data flow, and the key design decisions worth knowing before you
|
|
504
|
+
change anything.
|