cobrain 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. cobrain-0.1.0/LICENSE +21 -0
  2. cobrain-0.1.0/PKG-INFO +98 -0
  3. cobrain-0.1.0/README.md +85 -0
  4. cobrain-0.1.0/pyproject.toml +29 -0
  5. cobrain-0.1.0/setup.cfg +4 -0
  6. cobrain-0.1.0/src/cobrain/__init__.py +1 -0
  7. cobrain-0.1.0/src/cobrain/cli/__init__.py +167 -0
  8. cobrain-0.1.0/src/cobrain/cli/ingest/__init__.py +4 -0
  9. cobrain-0.1.0/src/cobrain/cli/ingest/chatgpt.py +260 -0
  10. cobrain-0.1.0/src/cobrain/cli/ingest/x/__init__.py +8 -0
  11. cobrain-0.1.0/src/cobrain/cli/ingest/x/cmd.py +69 -0
  12. cobrain-0.1.0/src/cobrain/cli/ingest/x/ingest.py +224 -0
  13. cobrain-0.1.0/src/cobrain/cli/ingest/x/parse.py +37 -0
  14. cobrain-0.1.0/src/cobrain/cli/show.py +26 -0
  15. cobrain-0.1.0/src/cobrain/cli/sources.py +117 -0
  16. cobrain-0.1.0/src/cobrain/cli/sync.py +16 -0
  17. cobrain-0.1.0/src/cobrain/cli/utils.py +86 -0
  18. cobrain-0.1.0/src/cobrain/cli/vault.py +300 -0
  19. cobrain-0.1.0/src/cobrain/config.py +121 -0
  20. cobrain-0.1.0/src/cobrain/directories.py +84 -0
  21. cobrain-0.1.0/src/cobrain/graph/__init__.py +33 -0
  22. cobrain-0.1.0/src/cobrain/graph/backup.py +54 -0
  23. cobrain-0.1.0/src/cobrain/graph/builder.py +197 -0
  24. cobrain-0.1.0/src/cobrain/graph/category.py +73 -0
  25. cobrain-0.1.0/src/cobrain/graph/diffs.py +191 -0
  26. cobrain-0.1.0/src/cobrain/graph/validation.py +26 -0
  27. cobrain-0.1.0/src/cobrain/html/html.py +91 -0
  28. cobrain-0.1.0/src/cobrain/models.py +44 -0
  29. cobrain-0.1.0/src/cobrain/parsers/chatgpt/__init__.py +68 -0
  30. cobrain-0.1.0/src/cobrain/parsers/chatgpt/extract.py +143 -0
  31. cobrain-0.1.0/src/cobrain/parsers/chatgpt/format.py +183 -0
  32. cobrain-0.1.0/src/cobrain/parsers/chatgpt/load.py +35 -0
  33. cobrain-0.1.0/src/cobrain/parsers/chatgpt/lookups.py +119 -0
  34. cobrain-0.1.0/src/cobrain/parsers/chatgpt/models.py +53 -0
  35. cobrain-0.1.0/src/cobrain/parsers/chatgpt/transform.py +84 -0
  36. cobrain-0.1.0/src/cobrain/parsers/chatgpt/traverse.py +200 -0
  37. cobrain-0.1.0/src/cobrain/parsers/chatgpt/utils.py +82 -0
  38. cobrain-0.1.0/src/cobrain/parsers/x/__init__.py +58 -0
  39. cobrain-0.1.0/src/cobrain/parsers/x/auth.py +123 -0
  40. cobrain-0.1.0/src/cobrain/parsers/x/client.py +161 -0
  41. cobrain-0.1.0/src/cobrain/parsers/x/helpers.py +57 -0
  42. cobrain-0.1.0/src/cobrain/parsers/x/merge.py +141 -0
  43. cobrain-0.1.0/src/cobrain/parsers/x/models.py +109 -0
  44. cobrain-0.1.0/src/cobrain/parsers/x/parse.py +131 -0
  45. cobrain-0.1.0/src/cobrain/parsers/x/storage.py +93 -0
  46. cobrain-0.1.0/src/cobrain/parsers/x/transform.py +66 -0
  47. cobrain-0.1.0/src/cobrain/parsers/x/tree.py +124 -0
  48. cobrain-0.1.0/src/cobrain/templates.py +71 -0
  49. cobrain-0.1.0/src/cobrain/topics/__init__.py +27 -0
  50. cobrain-0.1.0/src/cobrain/topics/frontmatter.py +13 -0
  51. cobrain-0.1.0/src/cobrain/topics/topic.py +211 -0
  52. cobrain-0.1.0/src/cobrain/yaml_utils.py +52 -0
  53. cobrain-0.1.0/src/cobrain.egg-info/PKG-INFO +98 -0
  54. cobrain-0.1.0/src/cobrain.egg-info/SOURCES.txt +61 -0
  55. cobrain-0.1.0/src/cobrain.egg-info/dependency_links.txt +1 -0
  56. cobrain-0.1.0/src/cobrain.egg-info/entry_points.txt +3 -0
  57. cobrain-0.1.0/src/cobrain.egg-info/requires.txt +4 -0
  58. cobrain-0.1.0/src/cobrain.egg-info/top_level.txt +1 -0
  59. cobrain-0.1.0/tests/test_chatgpt.py +600 -0
  60. cobrain-0.1.0/tests/test_diffs.py +170 -0
  61. cobrain-0.1.0/tests/test_html.py +72 -0
  62. cobrain-0.1.0/tests/test_validation.py +122 -0
  63. cobrain-0.1.0/tests/test_x.py +248 -0
cobrain-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Igor Akulov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
cobrain-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: cobrain
3
+ Version: 0.1.0
4
+ Summary: Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge locally on device. Use it to back up and organize your knowledge, help AI agents read your mind, or map and track your learning progress.
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: build>=1.5.0
9
+ Requires-Dist: pyyaml>=6.0
10
+ Requires-Dist: twine>=6.2.0
11
+ Requires-Dist: xdk>=0.9.0
12
+ Dynamic: license-file
13
+
14
+ # Cobrain
15
+
16
+ ## Overview
17
+
18
+ Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge in a local vault: files (topics, sources) + graph (structured metadata).
19
+
20
+ - Vault locally stores and organizes knowledge, tracks learning, and creates persistant shared context between owner/user/agent for collaboration and decisions
21
+ - Agent manages vault autonomously
22
+ - Cobrain CLI performs deterministic tasks and helps AI agent perform vault-related tasks reliably and token-efficiently
23
+
24
+ ## Installation & Setup
25
+
26
+ Install agent skill `skills/cobrain-vault`. Agent does the rest.
27
+
28
+ Alternative:
29
+ ```bash
30
+ pip install cobrain
31
+ ```
32
+
33
+ ## Vault
34
+
35
+ ```
36
+ vault/
37
+ ├── vault.html # Visual graph with search/filters for human user
38
+ ├── topics/ # Topic files (.md), source of truth for CLI
39
+ ├── sources/
40
+ │ ├── chats/ # ChatGPT conversations (.md)
41
+ │ ├── x/ # X conversations (.yaml)
42
+ │ └── ... # Add more for other sources
43
+ └── .cobrain/ # App internals, read but never edit directly
44
+ ├── vault.yaml # Derived graph of topics, with metadata
45
+ ├── categories.yaml # Customizable topic category colors / titles
46
+ ├── backups/ # Rolling backups
47
+ ├── diffs/ # Diffs from backups
48
+ └── logs/ # Ingest logs
49
+ ├── chatgpt/
50
+ └── x/
51
+ ```
52
+
53
+ `vault/sources/`: raw content ingested from external systems (ChatGPT, X, user-provided documents, your own chat with user), users never read this.
54
+ `vault/topics/`: curated summaries users read, source of truth for everything.
55
+
56
+ ## CLI reference
57
+
58
+ ```bash
59
+ brn version # show version
60
+ brn vault --dir <path> # initialize new/existing vault, write config
61
+ brn sync [--warnings] # build graph from files + show warnings
62
+ brn show # build and open vault.html in browser
63
+ brn vault [--ids <ids>] [--minimal | --full | --full+] [--flow | --block] # get graph as YAML (select ids, topic metadata fields, YAML format)
64
+ brn vault --ids <ids> --set field=value... # update topic frontmatter + sync
65
+ brn vault --from <id> [--depth N] # subtree
66
+ brn vault --from <id> --to <id2> # shortest path (parent links only)
67
+ brn sources [--warnings] # view source stats + warnings
68
+ brn sources --ingest chatgpt --paths <path...> [--since <dt>] [--until <dt>] [--titles <titles]> # ingest ChatGPT conversations.json
69
+ brn sources --ingest x --ids <post_ids> # ingest X posts by ID/URL/xurl
70
+ brn sources --ingest x --own [--count <N> | --new | --since-id <id> --until-id <id>] # fetch own posts (default 10, count, all new until hit existing, or target range)
71
+ brn sources --ingest x --own --authorization-code <code> # first-time X auth
72
+ brn sources --ingest x --likes [--count <N> | --new] # ingest liked posts
73
+ brn sources --ingest x --bookmarks [--count <N> | --new] # ingest bookmarked posts
74
+ brn backup # copy vault.yaml + categories.yaml (up to 20)
75
+ ```
76
+
77
+ All `--ingest x` commands pull full conversation above the target post, and minimize API credits spent by always checking locally stored posts first.
78
+
79
+ ## Topics
80
+
81
+ ## Sources
82
+
83
+ X and ChatGPT: integrated, agent ingests with CLI command.
84
+ Other sources (webpage, file, chat): agent reads directly.
85
+
86
+ ### X
87
+
88
+ - Built with XDK (official X SDK), with their affordable [pay-per-use pricing](https://docs.x.com/x-api/getting-started/pricing) and 24h retrieved post caching to save your credits.
89
+ - Uses your own app's OAuth2.0 credentials.
90
+ - Ingest your bookmarks, likes, posts and replies, or any post by id/url. Official endpoint filters supported as flags.
91
+ - CLI automatically builds coherent conversations (one per file) from target post up to original (root) post. Locally stored posts are re-used whenever possible to save your credits. New posts from an existing conversation merge into the same file seamlessly.
92
+ - Output: `vault/x/<conversation_author>_<conversation_id>.yaml`.
93
+
94
+ ### ChatGPT
95
+
96
+ - Request data export from your ChatGPT app and run ingest command on `conversations.json`.
97
+ - Filter by dates and title supported as flags.
98
+ - Output: `vault/chats/<title>.md`.
@@ -0,0 +1,85 @@
1
+ # Cobrain
2
+
3
+ ## Overview
4
+
5
+ Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge in a local vault: files (topics, sources) + graph (structured metadata).
6
+
7
+ - Vault locally stores and organizes knowledge, tracks learning, and creates persistant shared context between owner/user/agent for collaboration and decisions
8
+ - Agent manages vault autonomously
9
+ - Cobrain CLI performs deterministic tasks and helps AI agent perform vault-related tasks reliably and token-efficiently
10
+
11
+ ## Installation & Setup
12
+
13
+ Install agent skill `skills/cobrain-vault`. Agent does the rest.
14
+
15
+ Alternative:
16
+ ```bash
17
+ pip install cobrain
18
+ ```
19
+
20
+ ## Vault
21
+
22
+ ```
23
+ vault/
24
+ ├── vault.html # Visual graph with search/filters for human user
25
+ ├── topics/ # Topic files (.md), source of truth for CLI
26
+ ├── sources/
27
+ │ ├── chats/ # ChatGPT conversations (.md)
28
+ │ ├── x/ # X conversations (.yaml)
29
+ │ └── ... # Add more for other sources
30
+ └── .cobrain/ # App internals, read but never edit directly
31
+ ├── vault.yaml # Derived graph of topics, with metadata
32
+ ├── categories.yaml # Customizable topic category colors / titles
33
+ ├── backups/ # Rolling backups
34
+ ├── diffs/ # Diffs from backups
35
+ └── logs/ # Ingest logs
36
+ ├── chatgpt/
37
+ └── x/
38
+ ```
39
+
40
+ `vault/sources/`: raw content ingested from external systems (ChatGPT, X, user-provided documents, your own chat with user), users never read this.
41
+ `vault/topics/`: curated summaries users read, source of truth for everything.
42
+
43
+ ## CLI reference
44
+
45
+ ```bash
46
+ brn version # show version
47
+ brn vault --dir <path> # initialize new/existing vault, write config
48
+ brn sync [--warnings] # build graph from files + show warnings
49
+ brn show # build and open vault.html in browser
50
+ brn vault [--ids <ids>] [--minimal | --full | --full+] [--flow | --block] # get graph as YAML (select ids, topic metadata fields, YAML format)
51
+ brn vault --ids <ids> --set field=value... # update topic frontmatter + sync
52
+ brn vault --from <id> [--depth N] # subtree
53
+ brn vault --from <id> --to <id2> # shortest path (parent links only)
54
+ brn sources [--warnings] # view source stats + warnings
55
+ brn sources --ingest chatgpt --paths <path...> [--since <dt>] [--until <dt>] [--titles <titles]> # ingest ChatGPT conversations.json
56
+ brn sources --ingest x --ids <post_ids> # ingest X posts by ID/URL/xurl
57
+ brn sources --ingest x --own [--count <N> | --new | --since-id <id> --until-id <id>] # fetch own posts (default 10, count, all new until hit existing, or target range)
58
+ brn sources --ingest x --own --authorization-code <code> # first-time X auth
59
+ brn sources --ingest x --likes [--count <N> | --new] # ingest liked posts
60
+ brn sources --ingest x --bookmarks [--count <N> | --new] # ingest bookmarked posts
61
+ brn backup # copy vault.yaml + categories.yaml (up to 20)
62
+ ```
63
+
64
+ All `--ingest x` commands pull full conversation above the target post, and minimize API credits spent by always checking locally stored posts first.
65
+
66
+ ## Topics
67
+
68
+ ## Sources
69
+
70
+ X and ChatGPT: integrated, agent ingests with CLI command.
71
+ Other sources (webpage, file, chat): agent reads directly.
72
+
73
+ ### X
74
+
75
+ - Built with XDK (official X SDK), with their affordable [pay-per-use pricing](https://docs.x.com/x-api/getting-started/pricing) and 24h retrieved post caching to save your credits.
76
+ - Uses your own app's OAuth2.0 credentials.
77
+ - Ingest your bookmarks, likes, posts and replies, or any post by id/url. Official endpoint filters supported as flags.
78
+ - CLI automatically builds coherent conversations (one per file) from target post up to original (root) post. Locally stored posts are re-used whenever possible to save your credits. New posts from an existing conversation merge into the same file seamlessly.
79
+ - Output: `vault/x/<conversation_author>_<conversation_id>.yaml`.
80
+
81
+ ### ChatGPT
82
+
83
+ - Request data export from your ChatGPT app and run ingest command on `conversations.json`.
84
+ - Filter by dates and title supported as flags.
85
+ - Output: `vault/chats/<title>.md`.
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "cobrain"
3
+ version = "0.1.0"
4
+ description = "Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge locally on device. Use it to back up and organize your knowledge, help AI agents read your mind, or map and track your learning progress."
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "build>=1.5.0",
9
+ "pyyaml>=6.0",
10
+ "twine>=6.2.0",
11
+ "xdk>=0.9.0",
12
+ ]
13
+
14
+ [project.scripts]
15
+ cobrain = "cobrain.cli:main"
16
+ brn = "cobrain.cli:main"
17
+
18
+ [build-system]
19
+ requires = ["setuptools>=61.0"]
20
+ build-backend = "setuptools.build_meta"
21
+
22
+ [tool.ruff]
23
+ target-version = "py311"
24
+
25
+ [tool.pyright]
26
+ pythonVersion = "3.11"
27
+
28
+ [tool.deptry.per_rule_ignores]
29
+ DEP003 = ["cobrain"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,167 @@
1
+ import argparse
2
+ import signal
3
+ import sys
4
+
5
+ from cobrain import __version__
6
+
7
+ from .vault import cmd_backup, cmd_vault
8
+ from .show import cmd_show
9
+ from .sources import cmd_sources
10
+ from .sync import cmd_sync
11
+
12
+
13
+ def cmd_version(args: argparse.Namespace) -> None:
14
+ print(__version__)
15
+
16
+
17
+ def main() -> None:
18
+ def _signal_handler(signum, frame):
19
+ print("\nInterrupted", file=sys.stderr)
20
+ sys.exit(130)
21
+
22
+ signal.signal(signal.SIGINT, _signal_handler)
23
+ signal.signal(signal.SIGTERM, _signal_handler)
24
+
25
+ prog = "brn" if "brn" in sys.argv[0] else "cobrain"
26
+ parser = argparse.ArgumentParser(
27
+ prog=prog,
28
+ description="Cobrain CLI helps AI agents gather, organize and visualize owner's knowledge locally on device. Use it to back up and organize your knowledge, help AI agents read your mind, or map and track your learning progress.",
29
+ )
30
+ subparsers = parser.add_subparsers(dest="command", required=True)
31
+
32
+ version_parser = subparsers.add_parser("version", help="Show version")
33
+ version_parser.set_defaults(func=cmd_version)
34
+
35
+ sync_parser = subparsers.add_parser("sync", help="Rebuild vault graph from files")
36
+ sync_parser.add_argument("--warnings", action="store_true", help="Show warnings")
37
+ sync_parser.set_defaults(func=cmd_sync)
38
+
39
+ vault_parser = subparsers.add_parser("vault", help="List or update topics")
40
+
41
+ vault_parser.add_argument(
42
+ "--dir",
43
+ dest="vault_dir",
44
+ help="Path to vault directory (initializes if not found)",
45
+ )
46
+
47
+ vault_parser.add_argument(
48
+ "--set", nargs="+", help="Set metadata for --ids (also syncs)"
49
+ )
50
+
51
+ traversal_group = vault_parser.add_argument_group("Discovery")
52
+ traversal_group.add_argument("--ids", help="Comma-separated topic IDs")
53
+ traversal_group.add_argument("--from", dest="from_topic", help="Starting topic")
54
+ traversal_group.add_argument("--depth", type=int, default=1, help="Traversal depth")
55
+ traversal_group.add_argument("--to", dest="to_topic", help="Target topic for path")
56
+
57
+ fields_group = vault_parser.add_argument_group("Fields")
58
+ fields_group.add_argument(
59
+ "--minimal",
60
+ action="store_true",
61
+ help="id, aliases, category, parent, related",
62
+ )
63
+ fields_group.add_argument(
64
+ "--full",
65
+ action="store_true",
66
+ help="minimal + title, created_at, updated_at",
67
+ )
68
+ fields_group.add_argument(
69
+ "--full+",
70
+ dest="full_plus",
71
+ action="store_true",
72
+ help="full + sources, word_count",
73
+ )
74
+
75
+ format_group = vault_parser.add_argument_group("Format")
76
+ format_group.add_argument(
77
+ "--flow",
78
+ action="store_true",
79
+ default=True,
80
+ help="Flow style (default, compact)",
81
+ )
82
+ format_group.add_argument(
83
+ "--block", action="store_true", help="Block style (human-readable)"
84
+ )
85
+
86
+ vault_parser.set_defaults(func=cmd_vault)
87
+
88
+ show_parser = subparsers.add_parser("show", help="Show vault in browser")
89
+ show_parser.set_defaults(func=cmd_show)
90
+
91
+ backup_parser = subparsers.add_parser("backup", help="Create rolling backup")
92
+ backup_parser.set_defaults(func=cmd_backup)
93
+
94
+ sources_parser = subparsers.add_parser("sources", help="Manage sources")
95
+
96
+ sources_parser.add_argument("--warnings", action="store_true", help="Show warnings")
97
+
98
+ chatgpt_group = sources_parser.add_argument_group("ChatGPT")
99
+ chatgpt_group.add_argument(
100
+ "--ingest",
101
+ choices=["chatgpt", "x"],
102
+ help="Ingest source (e.g., chatgpt, x)",
103
+ )
104
+ chatgpt_group.add_argument(
105
+ "--paths", nargs="+", help="Path(s) to conversations.json files"
106
+ )
107
+ chatgpt_group.add_argument(
108
+ "--since", dest="since_datetime", help="Start datetime (ISO 8601, inclusive)"
109
+ )
110
+ chatgpt_group.add_argument(
111
+ "--until", dest="until_datetime", help="End datetime (ISO 8601, inclusive)"
112
+ )
113
+ chatgpt_group.add_argument("--titles", help="Filter by titles (comma-separated)")
114
+
115
+ x_endpoints_group = sources_parser.add_argument_group("X endpoints")
116
+ x_endpoints_group.add_argument(
117
+ "--ids", help="Comma-separated post IDs, URLs, or xurls"
118
+ )
119
+ x_endpoints_group.add_argument(
120
+ "--own",
121
+ action="store_true",
122
+ help="Ingest own posts",
123
+ )
124
+ x_endpoints_group.add_argument(
125
+ "--likes", action="store_true", help="Ingest liked posts"
126
+ )
127
+ x_endpoints_group.add_argument(
128
+ "--bookmarks", action="store_true", help="Ingest bookmarked posts"
129
+ )
130
+
131
+ x_filters_group = sources_parser.add_argument_group("X filters")
132
+ x_filters_group.add_argument(
133
+ "--new",
134
+ action="store_true",
135
+ help="Loop until hitting existing posts",
136
+ )
137
+ x_filters_group.add_argument("--count", type=int, help="Total posts to fetch")
138
+ x_filters_group.add_argument(
139
+ "--since-id",
140
+ dest="since_id",
141
+ help="Oldest post ID (excl., --own only)",
142
+ )
143
+ x_filters_group.add_argument(
144
+ "--until-id",
145
+ dest="until_id",
146
+ help="Newest post ID (excl., --own only)",
147
+ )
148
+
149
+ x_auth_group = sources_parser.add_argument_group("X auth")
150
+ x_auth_group.add_argument(
151
+ "--authorization-code",
152
+ help="Authorization code from redirect URL",
153
+ )
154
+
155
+ sources_parser.set_defaults(func=cmd_sources)
156
+
157
+ args = parser.parse_args()
158
+
159
+ try:
160
+ args.func(args)
161
+ except RuntimeError as e:
162
+ print(f"Error: {e}", file=sys.stderr)
163
+ sys.exit(1)
164
+
165
+
166
+ if __name__ == "__main__":
167
+ main()
@@ -0,0 +1,4 @@
1
+ from cobrain.cli.ingest.chatgpt import cmd_ingest_chat
2
+ from cobrain.cli.ingest.x.cmd import cmd_ingest_x
3
+
4
+ __all__ = ["cmd_ingest_chat", "cmd_ingest_x"]
@@ -0,0 +1,260 @@
1
+ import argparse
2
+ import re
3
+ import sys
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ from cobrain.cli.utils import _parse_iso_datetime
8
+ from cobrain.directories import (
9
+ get_chats_dir,
10
+ get_chats_logs_dir,
11
+ get_chat_log_path,
12
+ )
13
+ from cobrain.yaml_utils import write_yaml
14
+
15
+
16
+ def cmd_ingest_chat(args: argparse.Namespace) -> None:
17
+ from cobrain.parsers.chatgpt import (
18
+ load_conversations,
19
+ filter_conversations,
20
+ parse_conversation_expand,
21
+ conversation_to_markdown,
22
+ compute_word_count,
23
+ get_last_message_id,
24
+ get_output_filename,
25
+ get_existing_file_for_conversation,
26
+ message_to_markdown,
27
+ )
28
+
29
+ paths = [Path(p).expanduser().resolve() for p in args.paths]
30
+ for path in paths:
31
+ if not path.exists():
32
+ print(f"ERROR: File not found: {path}", file=sys.stderr)
33
+ sys.exit(1)
34
+
35
+ from_time = None
36
+ till_time = None
37
+ titles = None
38
+
39
+ if args.since_datetime:
40
+ try:
41
+ from_time = _parse_iso_datetime(args.since_datetime).timestamp()
42
+ except ValueError:
43
+ print(
44
+ f"ERROR: Invalid --since datetime: {args.since_datetime}",
45
+ file=sys.stderr,
46
+ )
47
+ sys.exit(1)
48
+
49
+ if args.until_datetime:
50
+ try:
51
+ till_time = _parse_iso_datetime(args.until_datetime).timestamp()
52
+ except ValueError:
53
+ print(
54
+ f"ERROR: Invalid --until datetime: {args.until_datetime}",
55
+ file=sys.stderr,
56
+ )
57
+ sys.exit(1)
58
+
59
+ if args.titles:
60
+ titles = [t.strip() for t in args.titles.split(",") if t.strip()]
61
+
62
+ conversations = []
63
+ path_by_conv_id: dict[str, Path] = {}
64
+ for path in paths:
65
+ convs = load_conversations(path)
66
+ for c in convs:
67
+ conv_id = c.get("conversation_id") or c.get("id", "")
68
+ path_by_conv_id[conv_id] = path
69
+ conversations.extend(convs)
70
+
71
+ filtered = filter_conversations(conversations, from_time, till_time, titles)
72
+
73
+ chats_dir = get_chats_dir()
74
+ logs_dir = get_chats_logs_dir()
75
+ chats_dir.mkdir(parents=True, exist_ok=True)
76
+ logs_dir.mkdir(parents=True, exist_ok=True)
77
+
78
+ created_files = []
79
+ updated_files = []
80
+
81
+ log_entries = []
82
+ ingest_timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%S")
83
+
84
+ for conv_data in filtered:
85
+ conv_id = conv_data.get("conversation_id") or conv_data.get("id", "")
86
+
87
+ existing_file = get_existing_file_for_conversation(chats_dir, conv_id)
88
+ existing_last_msg_id = None
89
+ created_at = datetime.utcnow().isoformat()
90
+ existing_body = ""
91
+ existing_title = None
92
+
93
+ if existing_file:
94
+ existing_last_msg_id = _find_last_message_id_from_log(conv_id)
95
+ existing_content = existing_file.read_text(encoding="utf-8")
96
+ fm, existing_body = _split_frontmatter(existing_content)
97
+ created_at = _get_created_at_from_file(existing_file)
98
+ existing_title = fm.get("title") if fm else None
99
+
100
+ conv = parse_conversation_expand(conv_data, existing_last_msg_id)
101
+
102
+ if existing_file:
103
+ updated_files.append(str(existing_file))
104
+ else:
105
+ if conv.messages:
106
+ created_files.append(get_output_filename(conv))
107
+ else:
108
+ continue
109
+
110
+ output_filename = get_output_filename(conv)
111
+ output_path = chats_dir / output_filename
112
+
113
+ updated_at = datetime.utcnow().isoformat()
114
+ last_msg_id = (
115
+ get_last_message_id(conv) if conv.messages else (existing_last_msg_id or "")
116
+ )
117
+
118
+ if existing_file and existing_body.strip():
119
+ existing_word_count = _get_word_count_from_file(existing_file)
120
+ word_count = existing_word_count + compute_word_count(conv)
121
+
122
+ body_content = existing_body.rstrip("\n")
123
+ if conv.messages:
124
+ if body_content:
125
+ body_content += "\n\n***\n\n"
126
+ for msg in conv.messages:
127
+ body_content += message_to_markdown(msg)
128
+ body_content += "\n\n***\n\n"
129
+
130
+ new_fm = _build_frontmatter(
131
+ conv_id=conv_id,
132
+ conv_title=existing_title or conv.title,
133
+ created_at=created_at,
134
+ updated_at=updated_at,
135
+ original_create_time=conv.create_time,
136
+ word_count=word_count,
137
+ urls=conv.sources,
138
+ )
139
+ content = new_fm + "\n" + body_content.rstrip()
140
+ else:
141
+ word_count = compute_word_count(conv)
142
+ title = existing_title or conv.title
143
+ content = conversation_to_markdown(
144
+ conv,
145
+ source_path=None,
146
+ word_count=word_count,
147
+ created_at=created_at,
148
+ updated_at=updated_at,
149
+ title=title,
150
+ )
151
+
152
+ output_path.write_text(content, encoding="utf-8")
153
+
154
+ log_entries.append(
155
+ {
156
+ "conversation_id": conv_id,
157
+ "output_file": str(output_path),
158
+ "last_message_id": last_msg_id,
159
+ "ingested_at": ingest_timestamp,
160
+ "filters": {
161
+ "since": args.since_datetime,
162
+ "until": args.until_datetime,
163
+ "titles": args.titles,
164
+ },
165
+ }
166
+ )
167
+
168
+ if log_entries:
169
+ log_path = get_chat_log_path(ingest_timestamp)
170
+ write_yaml(log_path, log_entries)
171
+
172
+ total_created = len(created_files)
173
+ total_updated = len(updated_files)
174
+ print(f"Ingest complete: {total_created} created, {total_updated} updated")
175
+
176
+
177
+ def _find_last_message_id_from_log(conv_id: str) -> str | None:
178
+ from cobrain.yaml_utils import read_yaml
179
+
180
+ logs_dir = get_chats_logs_dir()
181
+ if not logs_dir.exists():
182
+ return None
183
+ for log_file in logs_dir.glob("ingest_*.yaml"):
184
+ log_data = read_yaml(log_file)
185
+ if not log_data:
186
+ continue
187
+ if isinstance(log_data, list):
188
+ for entry in log_data:
189
+ if not isinstance(entry, dict):
190
+ continue
191
+ if entry.get("conversation_id") == conv_id:
192
+ return entry.get("last_message_id")
193
+ return None
194
+
195
+
196
+ def _get_created_at_from_file(file_path: Path) -> str:
197
+ try:
198
+ content = file_path.read_text(encoding="utf-8")
199
+ pattern = re.compile(r"^created_at:\s*(.+)$", re.MULTILINE)
200
+ match = pattern.search(content)
201
+ if match:
202
+ return match.group(1).strip()
203
+ except Exception:
204
+ pass
205
+ return datetime.utcnow().isoformat()
206
+
207
+
208
+ def _get_word_count_from_file(file_path: Path) -> int:
209
+ try:
210
+ content = file_path.read_text(encoding="utf-8")
211
+ pattern = re.compile(r"^word_count:\s*(\d+)$", re.MULTILINE)
212
+ match = pattern.search(content)
213
+ if match:
214
+ return int(match.group(1))
215
+ except Exception:
216
+ pass
217
+ return 0
218
+
219
+
220
+ def _split_frontmatter(content: str) -> tuple[dict, str]:
221
+ fm_pattern = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
222
+ match = fm_pattern.search(content)
223
+ if match:
224
+ fm_text = match.group(1)
225
+ body = content[match.end() :]
226
+ body = body.lstrip("\n")
227
+ fm = {}
228
+ for line in fm_text.split("\n"):
229
+ if ":" in line:
230
+ key, value = line.split(":", 1)
231
+ fm[key.strip()] = value.strip()
232
+ return fm, body
233
+ return {}, content
234
+
235
+
236
+ def _build_frontmatter(
237
+ conv_id: str,
238
+ conv_title: str,
239
+ created_at: str,
240
+ updated_at: str,
241
+ original_create_time: float,
242
+ word_count: int,
243
+ urls: list[str],
244
+ ) -> str:
245
+ from cobrain.parsers.chatgpt import format_timestamp
246
+
247
+ lines = ["---"]
248
+ lines.append(f"id: {conv_id}")
249
+ lines.append(f"title: {conv_title}")
250
+ lines.append(f"created_at: {created_at}")
251
+ lines.append(f"updated_at: {updated_at}")
252
+ lines.append(
253
+ f"original_conversation_created_at: {format_timestamp(original_create_time)}"
254
+ )
255
+ lines.append(f"word_count: {word_count}")
256
+ lines.append("sources:")
257
+ for url in urls:
258
+ lines.append(f" - {url}")
259
+ lines.append("---")
260
+ return "\n".join(lines)
@@ -0,0 +1,8 @@
1
+ from cobrain.cli.ingest.x.cmd import cmd_ingest_x
2
+ from cobrain.cli.ingest.x.parse import _parse_post_args, _extract_post_id
3
+
4
+ __all__ = [
5
+ "cmd_ingest_x",
6
+ "_parse_post_args",
7
+ "_extract_post_id",
8
+ ]