cobrain 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cobrain-0.1.0/LICENSE +21 -0
- cobrain-0.1.0/PKG-INFO +98 -0
- cobrain-0.1.0/README.md +85 -0
- cobrain-0.1.0/pyproject.toml +29 -0
- cobrain-0.1.0/setup.cfg +4 -0
- cobrain-0.1.0/src/cobrain/__init__.py +1 -0
- cobrain-0.1.0/src/cobrain/cli/__init__.py +167 -0
- cobrain-0.1.0/src/cobrain/cli/ingest/__init__.py +4 -0
- cobrain-0.1.0/src/cobrain/cli/ingest/chatgpt.py +260 -0
- cobrain-0.1.0/src/cobrain/cli/ingest/x/__init__.py +8 -0
- cobrain-0.1.0/src/cobrain/cli/ingest/x/cmd.py +69 -0
- cobrain-0.1.0/src/cobrain/cli/ingest/x/ingest.py +224 -0
- cobrain-0.1.0/src/cobrain/cli/ingest/x/parse.py +37 -0
- cobrain-0.1.0/src/cobrain/cli/show.py +26 -0
- cobrain-0.1.0/src/cobrain/cli/sources.py +117 -0
- cobrain-0.1.0/src/cobrain/cli/sync.py +16 -0
- cobrain-0.1.0/src/cobrain/cli/utils.py +86 -0
- cobrain-0.1.0/src/cobrain/cli/vault.py +300 -0
- cobrain-0.1.0/src/cobrain/config.py +121 -0
- cobrain-0.1.0/src/cobrain/directories.py +84 -0
- cobrain-0.1.0/src/cobrain/graph/__init__.py +33 -0
- cobrain-0.1.0/src/cobrain/graph/backup.py +54 -0
- cobrain-0.1.0/src/cobrain/graph/builder.py +197 -0
- cobrain-0.1.0/src/cobrain/graph/category.py +73 -0
- cobrain-0.1.0/src/cobrain/graph/diffs.py +191 -0
- cobrain-0.1.0/src/cobrain/graph/validation.py +26 -0
- cobrain-0.1.0/src/cobrain/html/html.py +91 -0
- cobrain-0.1.0/src/cobrain/models.py +44 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/__init__.py +68 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/extract.py +143 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/format.py +183 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/load.py +35 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/lookups.py +119 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/models.py +53 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/transform.py +84 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/traverse.py +200 -0
- cobrain-0.1.0/src/cobrain/parsers/chatgpt/utils.py +82 -0
- cobrain-0.1.0/src/cobrain/parsers/x/__init__.py +58 -0
- cobrain-0.1.0/src/cobrain/parsers/x/auth.py +123 -0
- cobrain-0.1.0/src/cobrain/parsers/x/client.py +161 -0
- cobrain-0.1.0/src/cobrain/parsers/x/helpers.py +57 -0
- cobrain-0.1.0/src/cobrain/parsers/x/merge.py +141 -0
- cobrain-0.1.0/src/cobrain/parsers/x/models.py +109 -0
- cobrain-0.1.0/src/cobrain/parsers/x/parse.py +131 -0
- cobrain-0.1.0/src/cobrain/parsers/x/storage.py +93 -0
- cobrain-0.1.0/src/cobrain/parsers/x/transform.py +66 -0
- cobrain-0.1.0/src/cobrain/parsers/x/tree.py +124 -0
- cobrain-0.1.0/src/cobrain/templates.py +71 -0
- cobrain-0.1.0/src/cobrain/topics/__init__.py +27 -0
- cobrain-0.1.0/src/cobrain/topics/frontmatter.py +13 -0
- cobrain-0.1.0/src/cobrain/topics/topic.py +211 -0
- cobrain-0.1.0/src/cobrain/yaml_utils.py +52 -0
- cobrain-0.1.0/src/cobrain.egg-info/PKG-INFO +98 -0
- cobrain-0.1.0/src/cobrain.egg-info/SOURCES.txt +61 -0
- cobrain-0.1.0/src/cobrain.egg-info/dependency_links.txt +1 -0
- cobrain-0.1.0/src/cobrain.egg-info/entry_points.txt +3 -0
- cobrain-0.1.0/src/cobrain.egg-info/requires.txt +4 -0
- cobrain-0.1.0/src/cobrain.egg-info/top_level.txt +1 -0
- cobrain-0.1.0/tests/test_chatgpt.py +600 -0
- cobrain-0.1.0/tests/test_diffs.py +170 -0
- cobrain-0.1.0/tests/test_html.py +72 -0
- cobrain-0.1.0/tests/test_validation.py +122 -0
- cobrain-0.1.0/tests/test_x.py +248 -0
cobrain-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Igor Akulov
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cobrain-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cobrain
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge locally on device. Use it to back up and organize your knowledge, help AI agents read your mind, or map and track your learning progress.
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: build>=1.5.0
|
|
9
|
+
Requires-Dist: pyyaml>=6.0
|
|
10
|
+
Requires-Dist: twine>=6.2.0
|
|
11
|
+
Requires-Dist: xdk>=0.9.0
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# Cobrain
|
|
15
|
+
|
|
16
|
+
## Overview
|
|
17
|
+
|
|
18
|
+
Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge in a local vault: files (topics, sources) + graph (structured metadata).
|
|
19
|
+
|
|
20
|
+
- Vault locally stores and organizes knowledge, tracks learning, and creates persistant shared context between owner/user/agent for collaboration and decisions
|
|
21
|
+
- Agent manages vault autonomously
|
|
22
|
+
- Cobrain CLI performs deterministic tasks and helps AI agent perform vault-related tasks reliably and token-efficiently
|
|
23
|
+
|
|
24
|
+
## Installation & Setup
|
|
25
|
+
|
|
26
|
+
Install agent skill `skills/cobrain-vault`. Agent does the rest.
|
|
27
|
+
|
|
28
|
+
Alternative:
|
|
29
|
+
```bash
|
|
30
|
+
pip install cobrain
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Vault
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
vault/
|
|
37
|
+
├── vault.html # Visual graph with search/filters for human user
|
|
38
|
+
├── topics/ # Topic files (.md), source of truth for CLI
|
|
39
|
+
├── sources/
|
|
40
|
+
│ ├── chats/ # ChatGPT conversations (.md)
|
|
41
|
+
│ ├── x/ # X conversations (.yaml)
|
|
42
|
+
│ └── ... # Add more for other sources
|
|
43
|
+
└── .cobrain/ # App internals, read but never edit directly
|
|
44
|
+
├── vault.yaml # Derived graph of topics, with metadata
|
|
45
|
+
├── categories.yaml # Customizable topic category colors / titles
|
|
46
|
+
├── backups/ # Rolling backups
|
|
47
|
+
├── diffs/ # Diffs from backups
|
|
48
|
+
└── logs/ # Ingest logs
|
|
49
|
+
├── chatgpt/
|
|
50
|
+
└── x/
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
`vault/sources/`: raw content ingested from external systems (ChatGPT, X, user-provided documents, your own chat with user), users never read this.
|
|
54
|
+
`vault/topics/`: curated summaries users read, source of truth for everything.
|
|
55
|
+
|
|
56
|
+
## CLI reference
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
brn version # show version
|
|
60
|
+
brn vault --dir <path> # initialize new/existing vault, write config
|
|
61
|
+
brn sync [--warnings] # build graph from files + show warnings
|
|
62
|
+
brn show # build and open vault.html in browser
|
|
63
|
+
brn vault [--ids <ids>] [--minimal | --full | --full+] [--flow | --block] # get graph as YAML (select ids, topic metadata fields, YAML format)
|
|
64
|
+
brn vault --ids <ids> --set field=value... # update topic frontmatter + sync
|
|
65
|
+
brn vault --from <id> [--depth N] # subtree
|
|
66
|
+
brn vault --from <id> --to <id2> # shortest path (parent links only)
|
|
67
|
+
brn sources [--warnings] # view source stats + warnings
|
|
68
|
+
brn sources --ingest chatgpt --paths <path...> [--since <dt>] [--until <dt>] [--titles <titles]> # ingest ChatGPT conversations.json
|
|
69
|
+
brn sources --ingest x --ids <post_ids> # ingest X posts by ID/URL/xurl
|
|
70
|
+
brn sources --ingest x --own [--count <N> | --new | --since-id <id> --until-id <id>] # fetch own posts (default 10, count, all new until hit existing, or target range)
|
|
71
|
+
brn sources --ingest x --own --authorization-code <code> # first-time X auth
|
|
72
|
+
brn sources --ingest x --likes [--count <N> | --new] # ingest liked posts
|
|
73
|
+
brn sources --ingest x --bookmarks [--count <N> | --new] # ingest bookmarked posts
|
|
74
|
+
brn backup # copy vault.yaml + categories.yaml (up to 20)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
All `--ingest x` commands pull full conversation above the target post, and minimize API credits spent by always checking locally stored posts first.
|
|
78
|
+
|
|
79
|
+
## Topics
|
|
80
|
+
|
|
81
|
+
## Sources
|
|
82
|
+
|
|
83
|
+
X and ChatGPT: integrated, agent ingests with CLI command.
|
|
84
|
+
Other sources (webpage, file, chat): agent reads directly.
|
|
85
|
+
|
|
86
|
+
### X
|
|
87
|
+
|
|
88
|
+
- Built with XDK (official X SDK), with their affordable [pay-per-use pricing](https://docs.x.com/x-api/getting-started/pricing) and 24h retrieved post caching to save your credits.
|
|
89
|
+
- Uses your own app's OAuth2.0 credentials.
|
|
90
|
+
- Ingest your bookmarks, likes, posts and replies, or any post by id/url. Official endpoint filters supported as flags.
|
|
91
|
+
- CLI automatically builds coherent conversations (one per file) from target post up to original (root) post. Locally stored posts are re-used whenever possible to save your credits. New posts from an existing conversation merge into the same file seamlessly.
|
|
92
|
+
- Output: `vault/x/<conversation_author>_<conversation_id>.yaml`.
|
|
93
|
+
|
|
94
|
+
### ChatGPT
|
|
95
|
+
|
|
96
|
+
- Request data export from your ChatGPT app and run ingest command on `conversations.json`.
|
|
97
|
+
- Filter by dates and title supported as flags.
|
|
98
|
+
- Output: `vault/chats/<title>.md`.
|
cobrain-0.1.0/README.md
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Cobrain
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge in a local vault: files (topics, sources) + graph (structured metadata).
|
|
6
|
+
|
|
7
|
+
- Vault locally stores and organizes knowledge, tracks learning, and creates persistant shared context between owner/user/agent for collaboration and decisions
|
|
8
|
+
- Agent manages vault autonomously
|
|
9
|
+
- Cobrain CLI performs deterministic tasks and helps AI agent perform vault-related tasks reliably and token-efficiently
|
|
10
|
+
|
|
11
|
+
## Installation & Setup
|
|
12
|
+
|
|
13
|
+
Install agent skill `skills/cobrain-vault`. Agent does the rest.
|
|
14
|
+
|
|
15
|
+
Alternative:
|
|
16
|
+
```bash
|
|
17
|
+
pip install cobrain
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Vault
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
vault/
|
|
24
|
+
├── vault.html # Visual graph with search/filters for human user
|
|
25
|
+
├── topics/ # Topic files (.md), source of truth for CLI
|
|
26
|
+
├── sources/
|
|
27
|
+
│ ├── chats/ # ChatGPT conversations (.md)
|
|
28
|
+
│ ├── x/ # X conversations (.yaml)
|
|
29
|
+
│ └── ... # Add more for other sources
|
|
30
|
+
└── .cobrain/ # App internals, read but never edit directly
|
|
31
|
+
├── vault.yaml # Derived graph of topics, with metadata
|
|
32
|
+
├── categories.yaml # Customizable topic category colors / titles
|
|
33
|
+
├── backups/ # Rolling backups
|
|
34
|
+
├── diffs/ # Diffs from backups
|
|
35
|
+
└── logs/ # Ingest logs
|
|
36
|
+
├── chatgpt/
|
|
37
|
+
└── x/
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
`vault/sources/`: raw content ingested from external systems (ChatGPT, X, user-provided documents, your own chat with user), users never read this.
|
|
41
|
+
`vault/topics/`: curated summaries users read, source of truth for everything.
|
|
42
|
+
|
|
43
|
+
## CLI reference
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
brn version # show version
|
|
47
|
+
brn vault --dir <path> # initialize new/existing vault, write config
|
|
48
|
+
brn sync [--warnings] # build graph from files + show warnings
|
|
49
|
+
brn show # build and open vault.html in browser
|
|
50
|
+
brn vault [--ids <ids>] [--minimal | --full | --full+] [--flow | --block] # get graph as YAML (select ids, topic metadata fields, YAML format)
|
|
51
|
+
brn vault --ids <ids> --set field=value... # update topic frontmatter + sync
|
|
52
|
+
brn vault --from <id> [--depth N] # subtree
|
|
53
|
+
brn vault --from <id> --to <id2> # shortest path (parent links only)
|
|
54
|
+
brn sources [--warnings] # view source stats + warnings
|
|
55
|
+
brn sources --ingest chatgpt --paths <path...> [--since <dt>] [--until <dt>] [--titles <titles]> # ingest ChatGPT conversations.json
|
|
56
|
+
brn sources --ingest x --ids <post_ids> # ingest X posts by ID/URL/xurl
|
|
57
|
+
brn sources --ingest x --own [--count <N> | --new | --since-id <id> --until-id <id>] # fetch own posts (default 10, count, all new until hit existing, or target range)
|
|
58
|
+
brn sources --ingest x --own --authorization-code <code> # first-time X auth
|
|
59
|
+
brn sources --ingest x --likes [--count <N> | --new] # ingest liked posts
|
|
60
|
+
brn sources --ingest x --bookmarks [--count <N> | --new] # ingest bookmarked posts
|
|
61
|
+
brn backup # copy vault.yaml + categories.yaml (up to 20)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
All `--ingest x` commands pull full conversation above the target post, and minimize API credits spent by always checking locally stored posts first.
|
|
65
|
+
|
|
66
|
+
## Topics
|
|
67
|
+
|
|
68
|
+
## Sources
|
|
69
|
+
|
|
70
|
+
X and ChatGPT: integrated, agent ingests with CLI command.
|
|
71
|
+
Other sources (webpage, file, chat): agent reads directly.
|
|
72
|
+
|
|
73
|
+
### X
|
|
74
|
+
|
|
75
|
+
- Built with XDK (official X SDK), with their affordable [pay-per-use pricing](https://docs.x.com/x-api/getting-started/pricing) and 24h retrieved post caching to save your credits.
|
|
76
|
+
- Uses your own app's OAuth2.0 credentials.
|
|
77
|
+
- Ingest your bookmarks, likes, posts and replies, or any post by id/url. Official endpoint filters supported as flags.
|
|
78
|
+
- CLI automatically builds coherent conversations (one per file) from target post up to original (root) post. Locally stored posts are re-used whenever possible to save your credits. New posts from an existing conversation merge into the same file seamlessly.
|
|
79
|
+
- Output: `vault/x/<conversation_author>_<conversation_id>.yaml`.
|
|
80
|
+
|
|
81
|
+
### ChatGPT
|
|
82
|
+
|
|
83
|
+
- Request data export from your ChatGPT app and run ingest command on `conversations.json`.
|
|
84
|
+
- Filter by dates and title supported as flags.
|
|
85
|
+
- Output: `vault/chats/<title>.md`.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cobrain"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge locally on device. Use it to back up and organize your knowledge, help AI agents read your mind, or map and track your learning progress."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"build>=1.5.0",
|
|
9
|
+
"pyyaml>=6.0",
|
|
10
|
+
"twine>=6.2.0",
|
|
11
|
+
"xdk>=0.9.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.scripts]
|
|
15
|
+
cobrain = "cobrain.cli:main"
|
|
16
|
+
brn = "cobrain.cli:main"
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["setuptools>=61.0"]
|
|
20
|
+
build-backend = "setuptools.build_meta"
|
|
21
|
+
|
|
22
|
+
[tool.ruff]
|
|
23
|
+
target-version = "py311"
|
|
24
|
+
|
|
25
|
+
[tool.pyright]
|
|
26
|
+
pythonVersion = "3.11"
|
|
27
|
+
|
|
28
|
+
[tool.deptry.per_rule_ignores]
|
|
29
|
+
DEP003 = ["cobrain"]
|
cobrain-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import signal
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from cobrain import __version__
|
|
6
|
+
|
|
7
|
+
from .vault import cmd_backup, cmd_vault
|
|
8
|
+
from .show import cmd_show
|
|
9
|
+
from .sources import cmd_sources
|
|
10
|
+
from .sync import cmd_sync
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def cmd_version(args: argparse.Namespace) -> None:
|
|
14
|
+
print(__version__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main() -> None:
|
|
18
|
+
def _signal_handler(signum, frame):
|
|
19
|
+
print("\nInterrupted", file=sys.stderr)
|
|
20
|
+
sys.exit(130)
|
|
21
|
+
|
|
22
|
+
signal.signal(signal.SIGINT, _signal_handler)
|
|
23
|
+
signal.signal(signal.SIGTERM, _signal_handler)
|
|
24
|
+
|
|
25
|
+
prog = "brn" if "brn" in sys.argv[0] else "cobrain"
|
|
26
|
+
parser = argparse.ArgumentParser(
|
|
27
|
+
prog=prog,
|
|
28
|
+
description="Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge locally on device. Use it to back up and organize your knowledge, help AI agents read your mind, or map and track your learning progress.",
|
|
29
|
+
)
|
|
30
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
31
|
+
|
|
32
|
+
version_parser = subparsers.add_parser("version", help="Show version")
|
|
33
|
+
version_parser.set_defaults(func=cmd_version)
|
|
34
|
+
|
|
35
|
+
sync_parser = subparsers.add_parser("sync", help="Rebuild vault graph from files")
|
|
36
|
+
sync_parser.add_argument("--warnings", action="store_true", help="Show warnings")
|
|
37
|
+
sync_parser.set_defaults(func=cmd_sync)
|
|
38
|
+
|
|
39
|
+
vault_parser = subparsers.add_parser("vault", help="List or update topics")
|
|
40
|
+
|
|
41
|
+
vault_parser.add_argument(
|
|
42
|
+
"--dir",
|
|
43
|
+
dest="vault_dir",
|
|
44
|
+
help="Path to vault directory (initializes if not found)",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
vault_parser.add_argument(
|
|
48
|
+
"--set", nargs="+", help="Set metadata for --ids (also syncs)"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
traversal_group = vault_parser.add_argument_group("Discovery")
|
|
52
|
+
traversal_group.add_argument("--ids", help="Comma-separated topic IDs")
|
|
53
|
+
traversal_group.add_argument("--from", dest="from_topic", help="Starting topic")
|
|
54
|
+
traversal_group.add_argument("--depth", type=int, default=1, help="Traversal depth")
|
|
55
|
+
traversal_group.add_argument("--to", dest="to_topic", help="Target topic for path")
|
|
56
|
+
|
|
57
|
+
fields_group = vault_parser.add_argument_group("Fields")
|
|
58
|
+
fields_group.add_argument(
|
|
59
|
+
"--minimal",
|
|
60
|
+
action="store_true",
|
|
61
|
+
help="id, aliases, category, parent, related",
|
|
62
|
+
)
|
|
63
|
+
fields_group.add_argument(
|
|
64
|
+
"--full",
|
|
65
|
+
action="store_true",
|
|
66
|
+
help="minimal + title, created_at, updated_at",
|
|
67
|
+
)
|
|
68
|
+
fields_group.add_argument(
|
|
69
|
+
"--full+",
|
|
70
|
+
dest="full_plus",
|
|
71
|
+
action="store_true",
|
|
72
|
+
help="full + sources, word_count",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
format_group = vault_parser.add_argument_group("Format")
|
|
76
|
+
format_group.add_argument(
|
|
77
|
+
"--flow",
|
|
78
|
+
action="store_true",
|
|
79
|
+
default=True,
|
|
80
|
+
help="Flow style (default, compact)",
|
|
81
|
+
)
|
|
82
|
+
format_group.add_argument(
|
|
83
|
+
"--block", action="store_true", help="Block style (human-readable)"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
vault_parser.set_defaults(func=cmd_vault)
|
|
87
|
+
|
|
88
|
+
show_parser = subparsers.add_parser("show", help="Show vault in browser")
|
|
89
|
+
show_parser.set_defaults(func=cmd_show)
|
|
90
|
+
|
|
91
|
+
backup_parser = subparsers.add_parser("backup", help="Create rolling backup")
|
|
92
|
+
backup_parser.set_defaults(func=cmd_backup)
|
|
93
|
+
|
|
94
|
+
sources_parser = subparsers.add_parser("sources", help="Manage sources")
|
|
95
|
+
|
|
96
|
+
sources_parser.add_argument("--warnings", action="store_true", help="Show warnings")
|
|
97
|
+
|
|
98
|
+
chatgpt_group = sources_parser.add_argument_group("ChatGPT")
|
|
99
|
+
chatgpt_group.add_argument(
|
|
100
|
+
"--ingest",
|
|
101
|
+
choices=["chatgpt", "x"],
|
|
102
|
+
help="Ingest source (e.g., chatgpt, x)",
|
|
103
|
+
)
|
|
104
|
+
chatgpt_group.add_argument(
|
|
105
|
+
"--paths", nargs="+", help="Path(s) to conversations.json files"
|
|
106
|
+
)
|
|
107
|
+
chatgpt_group.add_argument(
|
|
108
|
+
"--since", dest="since_datetime", help="Start datetime (ISO 8601, inclusive)"
|
|
109
|
+
)
|
|
110
|
+
chatgpt_group.add_argument(
|
|
111
|
+
"--until", dest="until_datetime", help="End datetime (ISO 8601, inclusive)"
|
|
112
|
+
)
|
|
113
|
+
chatgpt_group.add_argument("--titles", help="Filter by titles (comma-separated)")
|
|
114
|
+
|
|
115
|
+
x_endpoints_group = sources_parser.add_argument_group("X endpoints")
|
|
116
|
+
x_endpoints_group.add_argument(
|
|
117
|
+
"--ids", help="Comma-separated post IDs, URLs, or xurls"
|
|
118
|
+
)
|
|
119
|
+
x_endpoints_group.add_argument(
|
|
120
|
+
"--own",
|
|
121
|
+
action="store_true",
|
|
122
|
+
help="Ingest own posts",
|
|
123
|
+
)
|
|
124
|
+
x_endpoints_group.add_argument(
|
|
125
|
+
"--likes", action="store_true", help="Ingest liked posts"
|
|
126
|
+
)
|
|
127
|
+
x_endpoints_group.add_argument(
|
|
128
|
+
"--bookmarks", action="store_true", help="Ingest bookmarked posts"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
x_filters_group = sources_parser.add_argument_group("X filters")
|
|
132
|
+
x_filters_group.add_argument(
|
|
133
|
+
"--new",
|
|
134
|
+
action="store_true",
|
|
135
|
+
help="Loop until hitting existing posts",
|
|
136
|
+
)
|
|
137
|
+
x_filters_group.add_argument("--count", type=int, help="Total posts to fetch")
|
|
138
|
+
x_filters_group.add_argument(
|
|
139
|
+
"--since-id",
|
|
140
|
+
dest="since_id",
|
|
141
|
+
help="Oldest post ID (excl., --own only)",
|
|
142
|
+
)
|
|
143
|
+
x_filters_group.add_argument(
|
|
144
|
+
"--until-id",
|
|
145
|
+
dest="until_id",
|
|
146
|
+
help="Newest post ID (excl., --own only)",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
x_auth_group = sources_parser.add_argument_group("X auth")
|
|
150
|
+
x_auth_group.add_argument(
|
|
151
|
+
"--authorization-code",
|
|
152
|
+
help="Authorization code from redirect URL",
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
sources_parser.set_defaults(func=cmd_sources)
|
|
156
|
+
|
|
157
|
+
args = parser.parse_args()
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
args.func(args)
|
|
161
|
+
except RuntimeError as e:
|
|
162
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
163
|
+
sys.exit(1)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
if __name__ == "__main__":
|
|
167
|
+
main()
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from cobrain.cli.utils import _parse_iso_datetime
|
|
8
|
+
from cobrain.directories import (
|
|
9
|
+
get_chats_dir,
|
|
10
|
+
get_chats_logs_dir,
|
|
11
|
+
get_chat_log_path,
|
|
12
|
+
)
|
|
13
|
+
from cobrain.yaml_utils import write_yaml
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def cmd_ingest_chat(args: argparse.Namespace) -> None:
|
|
17
|
+
from cobrain.parsers.chatgpt import (
|
|
18
|
+
load_conversations,
|
|
19
|
+
filter_conversations,
|
|
20
|
+
parse_conversation_expand,
|
|
21
|
+
conversation_to_markdown,
|
|
22
|
+
compute_word_count,
|
|
23
|
+
get_last_message_id,
|
|
24
|
+
get_output_filename,
|
|
25
|
+
get_existing_file_for_conversation,
|
|
26
|
+
message_to_markdown,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
paths = [Path(p).expanduser().resolve() for p in args.paths]
|
|
30
|
+
for path in paths:
|
|
31
|
+
if not path.exists():
|
|
32
|
+
print(f"ERROR: File not found: {path}", file=sys.stderr)
|
|
33
|
+
sys.exit(1)
|
|
34
|
+
|
|
35
|
+
from_time = None
|
|
36
|
+
till_time = None
|
|
37
|
+
titles = None
|
|
38
|
+
|
|
39
|
+
if args.since_datetime:
|
|
40
|
+
try:
|
|
41
|
+
from_time = _parse_iso_datetime(args.since_datetime).timestamp()
|
|
42
|
+
except ValueError:
|
|
43
|
+
print(
|
|
44
|
+
f"ERROR: Invalid --since datetime: {args.since_datetime}",
|
|
45
|
+
file=sys.stderr,
|
|
46
|
+
)
|
|
47
|
+
sys.exit(1)
|
|
48
|
+
|
|
49
|
+
if args.until_datetime:
|
|
50
|
+
try:
|
|
51
|
+
till_time = _parse_iso_datetime(args.until_datetime).timestamp()
|
|
52
|
+
except ValueError:
|
|
53
|
+
print(
|
|
54
|
+
f"ERROR: Invalid --until datetime: {args.until_datetime}",
|
|
55
|
+
file=sys.stderr,
|
|
56
|
+
)
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
|
|
59
|
+
if args.titles:
|
|
60
|
+
titles = [t.strip() for t in args.titles.split(",") if t.strip()]
|
|
61
|
+
|
|
62
|
+
conversations = []
|
|
63
|
+
path_by_conv_id: dict[str, Path] = {}
|
|
64
|
+
for path in paths:
|
|
65
|
+
convs = load_conversations(path)
|
|
66
|
+
for c in convs:
|
|
67
|
+
conv_id = c.get("conversation_id") or c.get("id", "")
|
|
68
|
+
path_by_conv_id[conv_id] = path
|
|
69
|
+
conversations.extend(convs)
|
|
70
|
+
|
|
71
|
+
filtered = filter_conversations(conversations, from_time, till_time, titles)
|
|
72
|
+
|
|
73
|
+
chats_dir = get_chats_dir()
|
|
74
|
+
logs_dir = get_chats_logs_dir()
|
|
75
|
+
chats_dir.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
logs_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
created_files = []
|
|
79
|
+
updated_files = []
|
|
80
|
+
|
|
81
|
+
log_entries = []
|
|
82
|
+
ingest_timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%S")
|
|
83
|
+
|
|
84
|
+
for conv_data in filtered:
|
|
85
|
+
conv_id = conv_data.get("conversation_id") or conv_data.get("id", "")
|
|
86
|
+
|
|
87
|
+
existing_file = get_existing_file_for_conversation(chats_dir, conv_id)
|
|
88
|
+
existing_last_msg_id = None
|
|
89
|
+
created_at = datetime.utcnow().isoformat()
|
|
90
|
+
existing_body = ""
|
|
91
|
+
existing_title = None
|
|
92
|
+
|
|
93
|
+
if existing_file:
|
|
94
|
+
existing_last_msg_id = _find_last_message_id_from_log(conv_id)
|
|
95
|
+
existing_content = existing_file.read_text(encoding="utf-8")
|
|
96
|
+
fm, existing_body = _split_frontmatter(existing_content)
|
|
97
|
+
created_at = _get_created_at_from_file(existing_file)
|
|
98
|
+
existing_title = fm.get("title") if fm else None
|
|
99
|
+
|
|
100
|
+
conv = parse_conversation_expand(conv_data, existing_last_msg_id)
|
|
101
|
+
|
|
102
|
+
if existing_file:
|
|
103
|
+
updated_files.append(str(existing_file))
|
|
104
|
+
else:
|
|
105
|
+
if conv.messages:
|
|
106
|
+
created_files.append(get_output_filename(conv))
|
|
107
|
+
else:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
output_filename = get_output_filename(conv)
|
|
111
|
+
output_path = chats_dir / output_filename
|
|
112
|
+
|
|
113
|
+
updated_at = datetime.utcnow().isoformat()
|
|
114
|
+
last_msg_id = (
|
|
115
|
+
get_last_message_id(conv) if conv.messages else (existing_last_msg_id or "")
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if existing_file and existing_body.strip():
|
|
119
|
+
existing_word_count = _get_word_count_from_file(existing_file)
|
|
120
|
+
word_count = existing_word_count + compute_word_count(conv)
|
|
121
|
+
|
|
122
|
+
body_content = existing_body.rstrip("\n")
|
|
123
|
+
if conv.messages:
|
|
124
|
+
if body_content:
|
|
125
|
+
body_content += "\n\n***\n\n"
|
|
126
|
+
for msg in conv.messages:
|
|
127
|
+
body_content += message_to_markdown(msg)
|
|
128
|
+
body_content += "\n\n***\n\n"
|
|
129
|
+
|
|
130
|
+
new_fm = _build_frontmatter(
|
|
131
|
+
conv_id=conv_id,
|
|
132
|
+
conv_title=existing_title or conv.title,
|
|
133
|
+
created_at=created_at,
|
|
134
|
+
updated_at=updated_at,
|
|
135
|
+
original_create_time=conv.create_time,
|
|
136
|
+
word_count=word_count,
|
|
137
|
+
urls=conv.sources,
|
|
138
|
+
)
|
|
139
|
+
content = new_fm + "\n" + body_content.rstrip()
|
|
140
|
+
else:
|
|
141
|
+
word_count = compute_word_count(conv)
|
|
142
|
+
title = existing_title or conv.title
|
|
143
|
+
content = conversation_to_markdown(
|
|
144
|
+
conv,
|
|
145
|
+
source_path=None,
|
|
146
|
+
word_count=word_count,
|
|
147
|
+
created_at=created_at,
|
|
148
|
+
updated_at=updated_at,
|
|
149
|
+
title=title,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
output_path.write_text(content, encoding="utf-8")
|
|
153
|
+
|
|
154
|
+
log_entries.append(
|
|
155
|
+
{
|
|
156
|
+
"conversation_id": conv_id,
|
|
157
|
+
"output_file": str(output_path),
|
|
158
|
+
"last_message_id": last_msg_id,
|
|
159
|
+
"ingested_at": ingest_timestamp,
|
|
160
|
+
"filters": {
|
|
161
|
+
"since": args.since_datetime,
|
|
162
|
+
"until": args.until_datetime,
|
|
163
|
+
"titles": args.titles,
|
|
164
|
+
},
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if log_entries:
|
|
169
|
+
log_path = get_chat_log_path(ingest_timestamp)
|
|
170
|
+
write_yaml(log_path, log_entries)
|
|
171
|
+
|
|
172
|
+
total_created = len(created_files)
|
|
173
|
+
total_updated = len(updated_files)
|
|
174
|
+
print(f"Ingest complete: {total_created} created, {total_updated} updated")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _find_last_message_id_from_log(conv_id: str) -> str | None:
|
|
178
|
+
from cobrain.yaml_utils import read_yaml
|
|
179
|
+
|
|
180
|
+
logs_dir = get_chats_logs_dir()
|
|
181
|
+
if not logs_dir.exists():
|
|
182
|
+
return None
|
|
183
|
+
for log_file in logs_dir.glob("ingest_*.yaml"):
|
|
184
|
+
log_data = read_yaml(log_file)
|
|
185
|
+
if not log_data:
|
|
186
|
+
continue
|
|
187
|
+
if isinstance(log_data, list):
|
|
188
|
+
for entry in log_data:
|
|
189
|
+
if not isinstance(entry, dict):
|
|
190
|
+
continue
|
|
191
|
+
if entry.get("conversation_id") == conv_id:
|
|
192
|
+
return entry.get("last_message_id")
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _get_created_at_from_file(file_path: Path) -> str:
|
|
197
|
+
try:
|
|
198
|
+
content = file_path.read_text(encoding="utf-8")
|
|
199
|
+
pattern = re.compile(r"^created_at:\s*(.+)$", re.MULTILINE)
|
|
200
|
+
match = pattern.search(content)
|
|
201
|
+
if match:
|
|
202
|
+
return match.group(1).strip()
|
|
203
|
+
except Exception:
|
|
204
|
+
pass
|
|
205
|
+
return datetime.utcnow().isoformat()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _get_word_count_from_file(file_path: Path) -> int:
|
|
209
|
+
try:
|
|
210
|
+
content = file_path.read_text(encoding="utf-8")
|
|
211
|
+
pattern = re.compile(r"^word_count:\s*(\d+)$", re.MULTILINE)
|
|
212
|
+
match = pattern.search(content)
|
|
213
|
+
if match:
|
|
214
|
+
return int(match.group(1))
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
return 0
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _split_frontmatter(content: str) -> tuple[dict, str]:
|
|
221
|
+
fm_pattern = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
|
|
222
|
+
match = fm_pattern.search(content)
|
|
223
|
+
if match:
|
|
224
|
+
fm_text = match.group(1)
|
|
225
|
+
body = content[match.end() :]
|
|
226
|
+
body = body.lstrip("\n")
|
|
227
|
+
fm = {}
|
|
228
|
+
for line in fm_text.split("\n"):
|
|
229
|
+
if ":" in line:
|
|
230
|
+
key, value = line.split(":", 1)
|
|
231
|
+
fm[key.strip()] = value.strip()
|
|
232
|
+
return fm, body
|
|
233
|
+
return {}, content
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _build_frontmatter(
|
|
237
|
+
conv_id: str,
|
|
238
|
+
conv_title: str,
|
|
239
|
+
created_at: str,
|
|
240
|
+
updated_at: str,
|
|
241
|
+
original_create_time: float,
|
|
242
|
+
word_count: int,
|
|
243
|
+
urls: list[str],
|
|
244
|
+
) -> str:
|
|
245
|
+
from cobrain.parsers.chatgpt import format_timestamp
|
|
246
|
+
|
|
247
|
+
lines = ["---"]
|
|
248
|
+
lines.append(f"id: {conv_id}")
|
|
249
|
+
lines.append(f"title: {conv_title}")
|
|
250
|
+
lines.append(f"created_at: {created_at}")
|
|
251
|
+
lines.append(f"updated_at: {updated_at}")
|
|
252
|
+
lines.append(
|
|
253
|
+
f"original_conversation_created_at: {format_timestamp(original_create_time)}"
|
|
254
|
+
)
|
|
255
|
+
lines.append(f"word_count: {word_count}")
|
|
256
|
+
lines.append("sources:")
|
|
257
|
+
for url in urls:
|
|
258
|
+
lines.append(f" - {url}")
|
|
259
|
+
lines.append("---")
|
|
260
|
+
return "\n".join(lines)
|