agent-tome 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/CONTRIBUTING.md +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +81 -0
- data/db/migrate/20250601000001_create_initial_schema.rb +85 -0
- data/exe/agent-tome +6 -0
- data/lib/agent/tome/cli.rb +178 -0
- data/lib/agent/tome/commands/addend.rb +158 -0
- data/lib/agent/tome/commands/consolidate.rb +90 -0
- data/lib/agent/tome/commands/create.rb +168 -0
- data/lib/agent/tome/commands/fetch.rb +69 -0
- data/lib/agent/tome/commands/keywords_list.rb +22 -0
- data/lib/agent/tome/commands/related.rb +80 -0
- data/lib/agent/tome/commands/search.rb +62 -0
- data/lib/agent/tome/commands/source_search.rb +60 -0
- data/lib/agent/tome/config.rb +52 -0
- data/lib/agent/tome/database.rb +77 -0
- data/lib/agent/tome/global_id.rb +16 -0
- data/lib/agent/tome/models/application_record.rb +8 -0
- data/lib/agent/tome/models/article.rb +36 -0
- data/lib/agent/tome/models/article_keyword.rb +10 -0
- data/lib/agent/tome/models/article_reference.rb +20 -0
- data/lib/agent/tome/models/consolidation_link.rb +8 -0
- data/lib/agent/tome/models/entry.rb +23 -0
- data/lib/agent/tome/models/entry_file_source.rb +10 -0
- data/lib/agent/tome/models/entry_web_source.rb +10 -0
- data/lib/agent/tome/models/file_source.rb +23 -0
- data/lib/agent/tome/models/keyword.rb +10 -0
- data/lib/agent/tome/models/web_source.rb +21 -0
- data/lib/agent/tome/url_normalizer.rb +33 -0
- data/lib/agent/tome/version.rb +7 -0
- data/lib/agent/tome.rb +46 -0
- data/lib/agent.rb +1 -0
- metadata +120 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 5ea76c93f40a8d4e0a643df1ed73eef19954938d1e2b28977564a82a58284c93
|
|
4
|
+
data.tar.gz: 801d26d865bbe49e1975dd4f8039d008c37de5a12822850a8e32041c8e912a99
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 81fc0b5eedfc6126e23d553abbf8a2359c5e98a118bbba065b4c847be56eb9303d6b3f4885ee2649d11ef02f2c1fae6370f78c205e4ecd884bdbfb1ecb79a27e
|
|
7
|
+
data.tar.gz: 62c870a453dde2d2d06b8e26b2361267f211691a3962a6e477a30e96cec749a69f438f2f4dbd44fe5cdd3e3bf6e0ad4c8df0a2b46449c66c3dc20bbc2489d652
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
## [Unreleased]
|
|
2
|
+
|
|
3
|
+
## [1.0.0] - 2026-04-03
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- **Article management**: Create articles with body, description, keywords, sources, and related article references. Add content addenda to existing articles.
|
|
8
|
+
- **Search**: Keyword-based article search with `--match any` (default) and `--match all` modes, results ordered by keyword relevance, capped at 1000.
|
|
9
|
+
- **Fetch**: Retrieve full article content including all entries, sources, and consolidation history.
|
|
10
|
+
- **Related articles**: Discover related articles via shared keywords, article references (both directions), and consolidation links (both directions).
|
|
11
|
+
- **Consolidation**: Merge all addenda into a single consolidated entry, preserving keywords, sources, and original article IDs for continued lookup.
|
|
12
|
+
- **Keyword discovery**: List keywords matching a prefix/substring for vocabulary exploration.
|
|
13
|
+
- **Source search**: Find articles by web URL or file path, with optional `--system` flag for scoped file path lookups.
|
|
14
|
+
- **Append-only SQLite store**: WAL mode and busy timeout for concurrent access. Automatic migration on first run.
|
|
15
|
+
- **Opaque identifiers**: All user-facing entities use randomly generated 7-character base58 global IDs. Internal integer IDs are never exposed.
|
|
16
|
+
- **Input validation**: Structured JSON error responses for missing fields, invalid URLs, blank bodies, oversized descriptions (350 char limit), self-references, empty stdin, and invalid JSON.
|
|
17
|
+
- **Data integrity**: Keyword singularisation and downcasing, web URL normalisation (strips tracking params), source deduplication, duplicate keyword and reference handling.
|
|
18
|
+
- **CLI**: `agent-tome` executable with JSON stdin/stdout interface, exit code 0 on success and non-zero on error.
|
|
19
|
+
- **Claude Code skills**: Companion `tome-lookup` and `tome-capture` skills for AI agent integration.
|
|
20
|
+
|
|
21
|
+
[Unreleased]: https://github.com/beatmadsen/agent-tome/compare/v1.0.0...HEAD
|
|
22
|
+
[1.0.0]: https://github.com/beatmadsen/agent-tome/releases/tag/v1.0.0
|
data/CONTRIBUTING.md
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Erik T. Madsen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Agent Tome
|
|
2
|
+
|
|
3
|
+
Build a personal encyclopedia of accumulated knowledge, designed for AI agents.
|
|
4
|
+
|
|
5
|
+
AI agents are brilliant researchers, and terrible at remembering what they found. Every new session starts from scratch: the same APIs re-discovered, the same docs re-read. Agent Tome gives your agent a place to store what it learns, so you can build the habits and tooling to stop repeating that work.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
gem install agent-tome
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
On first run, a config directory is created at `~/.agent-tome/` with a SQLite database at `~/.agent-tome/tome.db`. To use a different database location, edit `~/.agent-tome/config.yml` and change `db_path`.
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Your agent just researched Ruby GC internals. Capture what it learned:
|
|
19
|
+
echo '{"description": "Ruby GC internals", "body": "Ruby uses a generational mark-and-sweep garbage collector.", "keywords": ["ruby", "garbage-collection"]}' | agent-tome create
|
|
20
|
+
# => {"global_id": "3xK9mWp", ...}
|
|
21
|
+
|
|
22
|
+
# Two weeks later, a different session hits the same topic.
|
|
23
|
+
# Instead of re-reading the docs, your agent checks the tome:
|
|
24
|
+
agent-tome search ruby gc
|
|
25
|
+
|
|
26
|
+
# The article exists. The agent adds what it learned today:
|
|
27
|
+
echo '{"body": "GC compaction was added in Ruby 2.7 via GC.compact."}' | agent-tome addend 3xK9mWp
|
|
28
|
+
|
|
29
|
+
# Fetch the full article with all entries
|
|
30
|
+
agent-tome fetch 3xK9mWp
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Knowledge Evolution
|
|
34
|
+
|
|
35
|
+
Articles grow over time as your agent adds findings from different sessions:
|
|
36
|
+
|
|
37
|
+
Session 1: "Ruby uses mark-and-sweep GC"
|
|
38
|
+
Session 3: "GC compaction added in Ruby 2.7"
|
|
39
|
+
Session 7: "Tuning GC with RUBY_GC_HEAP_INIT_SLOTS"
|
|
40
|
+
|
|
41
|
+
Eventually, the article has redundant or overlapping entries. `consolidate` lets your agent (or you) synthesize them into a single authoritative entry:
|
|
42
|
+
|
|
43
|
+
"Ruby uses generational mark-and-sweep GC with optional compaction
|
|
44
|
+
(since 2.7). Key tuning env vars: RUBY_GC_HEAP_INIT_SLOTS, ..."
|
|
45
|
+
|
|
46
|
+
The original entries are preserved. The article ID still works. Keywords and sources are carried forward. This is how a tome stays useful over months instead of becoming a pile of notes.
|
|
47
|
+
|
|
48
|
+
## Commands
|
|
49
|
+
|
|
50
|
+
| Command | Description |
|
|
51
|
+
|---------|-------------|
|
|
52
|
+
| `agent-tome create` | Create a new article with its first entry (JSON via stdin) |
|
|
53
|
+
| `agent-tome addend <id>` | Add an addendum to an existing article (JSON via stdin) |
|
|
54
|
+
| `agent-tome search <keywords...>` | Search articles by keywords (`--match any` or `--match all`) |
|
|
55
|
+
| `agent-tome fetch <id>` | Retrieve full article content with all entries and sources |
|
|
56
|
+
| `agent-tome related <id>` | Find articles related through shared keywords or references |
|
|
57
|
+
| `agent-tome consolidate <id>` | Merge all addenda into a single consolidated article (JSON via stdin) |
|
|
58
|
+
| `agent-tome keywords <prefix>` | List keywords matching a prefix for vocabulary discovery |
|
|
59
|
+
| `agent-tome source-search <url-or-path>` | Find articles referencing a given source |
|
|
60
|
+
|
|
61
|
+
## Agent Integration
|
|
62
|
+
|
|
63
|
+
Agent Tome works with any tool that can call a CLI. JSON in, JSON out, exit code 0 on success:
|
|
64
|
+
|
|
65
|
+
- **Claude Code**: Use the companion [tome-lookup](https://github.com/beatmadsen/claude-skills/tree/main/skills/tome-lookup) and [tome-capture](https://github.com/beatmadsen/claude-skills/tree/main/skills/tome-capture) skills from the [claude-skills](https://github.com/beatmadsen/claude-skills) repository
|
|
66
|
+
- **Cursor / Windsurf**: Call `agent-tome` as a custom tool or shell command
|
|
67
|
+
- **MCP servers**: Wrap the CLI as a tool definition
|
|
68
|
+
- **Any agent framework**: If it can `exec` a process and read stdout, it works
|
|
69
|
+
|
|
70
|
+
No SDK, no API keys, no server. The CLI is the interface.
|
|
71
|
+
|
|
72
|
+
## Design Principles
|
|
73
|
+
|
|
74
|
+
- **Append-only**: Your agent can never corrupt or lose knowledge. There are no updates or deletions. When information evolves, `consolidate` synthesizes a clean entry while keeping the full history.
|
|
75
|
+
- **Agent-first**: JSON in, JSON out. Simple commands with minimal flags. Every response is machine-parseable, every error is structured.
|
|
76
|
+
- **Keyword-based discovery**: Retrieval is deterministic. You always know exactly why a result came back and what keywords will find it again. No embeddings to configure, no similarity thresholds to tune.
|
|
77
|
+
- **Opaque identifiers**: All user-facing entities use randomly generated 7-character base58 global IDs. Internal details never leak into your agent's context.
|
|
78
|
+
|
|
79
|
+
## License
|
|
80
|
+
|
|
81
|
+
MIT
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
class CreateInitialSchema < ActiveRecord::Migration[7.1]
|
|
2
|
+
def change
|
|
3
|
+
create_table :articles do |t|
|
|
4
|
+
t.string :global_id, limit: 7, null: false
|
|
5
|
+
t.text :description, null: false
|
|
6
|
+
t.datetime :created_at, null: false
|
|
7
|
+
end
|
|
8
|
+
add_index :articles, :global_id, unique: true
|
|
9
|
+
|
|
10
|
+
create_table :entries do |t|
|
|
11
|
+
t.string :global_id, limit: 7, null: false
|
|
12
|
+
t.references :article, null: false, foreign_key: true
|
|
13
|
+
t.text :body
|
|
14
|
+
t.datetime :created_at, null: false
|
|
15
|
+
end
|
|
16
|
+
add_index :entries, :global_id, unique: true
|
|
17
|
+
|
|
18
|
+
create_table :keywords do |t|
|
|
19
|
+
t.string :term, null: false
|
|
20
|
+
t.datetime :created_at, null: false
|
|
21
|
+
end
|
|
22
|
+
add_index :keywords, :term, unique: true
|
|
23
|
+
|
|
24
|
+
create_table :article_keywords do |t|
|
|
25
|
+
t.references :article, null: false, foreign_key: true
|
|
26
|
+
t.references :keyword, null: false, foreign_key: true
|
|
27
|
+
t.datetime :created_at, null: false
|
|
28
|
+
end
|
|
29
|
+
add_index :article_keywords, [:article_id, :keyword_id], unique: true
|
|
30
|
+
|
|
31
|
+
create_table :web_sources do |t|
|
|
32
|
+
t.string :global_id, limit: 7, null: false
|
|
33
|
+
t.text :url, null: false
|
|
34
|
+
t.string :title
|
|
35
|
+
t.datetime :fetched_at
|
|
36
|
+
t.datetime :created_at, null: false
|
|
37
|
+
end
|
|
38
|
+
add_index :web_sources, :global_id, unique: true
|
|
39
|
+
add_index :web_sources, :url, unique: true
|
|
40
|
+
|
|
41
|
+
create_table :file_sources do |t|
|
|
42
|
+
t.string :global_id, limit: 7, null: false
|
|
43
|
+
t.text :path, null: false
|
|
44
|
+
t.string :system_name, null: false
|
|
45
|
+
t.datetime :created_at, null: false
|
|
46
|
+
end
|
|
47
|
+
add_index :file_sources, :global_id, unique: true
|
|
48
|
+
add_index :file_sources, [:path, :system_name], unique: true
|
|
49
|
+
|
|
50
|
+
create_table :entry_web_sources do |t|
|
|
51
|
+
t.references :entry, null: false, foreign_key: true
|
|
52
|
+
t.references :web_source, null: false, foreign_key: true
|
|
53
|
+
t.datetime :created_at, null: false
|
|
54
|
+
end
|
|
55
|
+
add_index :entry_web_sources, [:entry_id, :web_source_id], unique: true
|
|
56
|
+
|
|
57
|
+
create_table :entry_file_sources do |t|
|
|
58
|
+
t.references :entry, null: false, foreign_key: true
|
|
59
|
+
t.references :file_source, null: false, foreign_key: true
|
|
60
|
+
t.datetime :created_at, null: false
|
|
61
|
+
end
|
|
62
|
+
add_index :entry_file_sources, [:entry_id, :file_source_id], unique: true
|
|
63
|
+
|
|
64
|
+
create_table :article_references do |t|
|
|
65
|
+
t.bigint :source_article_id, null: false
|
|
66
|
+
t.bigint :target_article_id, null: false
|
|
67
|
+
t.datetime :created_at, null: false
|
|
68
|
+
end
|
|
69
|
+
add_index :article_references, :source_article_id
|
|
70
|
+
add_index :article_references, :target_article_id
|
|
71
|
+
add_index :article_references, [:source_article_id, :target_article_id], unique: true, name: "idx_article_refs_unique"
|
|
72
|
+
add_foreign_key :article_references, :articles, column: :source_article_id
|
|
73
|
+
add_foreign_key :article_references, :articles, column: :target_article_id
|
|
74
|
+
|
|
75
|
+
create_table :consolidation_links do |t|
|
|
76
|
+
t.bigint :new_article_id, null: false
|
|
77
|
+
t.bigint :old_article_id, null: false
|
|
78
|
+
t.datetime :created_at, null: false
|
|
79
|
+
end
|
|
80
|
+
add_index :consolidation_links, :new_article_id
|
|
81
|
+
add_index :consolidation_links, :old_article_id
|
|
82
|
+
add_foreign_key :consolidation_links, :articles, column: :new_article_id
|
|
83
|
+
add_foreign_key :consolidation_links, :articles, column: :old_article_id
|
|
84
|
+
end
|
|
85
|
+
end
|
data/exe/agent-tome
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
require "json"
|
|
2
|
+
require "optparse"
|
|
3
|
+
|
|
4
|
+
module Agent
|
|
5
|
+
module Tome
|
|
6
|
+
class CLI
|
|
7
|
+
def self.run(argv)
|
|
8
|
+
new(argv).run
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def initialize(argv)
|
|
12
|
+
@argv = argv.dup
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def run
|
|
16
|
+
command = @argv.shift
|
|
17
|
+
|
|
18
|
+
if command.nil? || command == "--help" || command == "-h"
|
|
19
|
+
print_usage
|
|
20
|
+
exit 0
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if command == "--version" || command == "version"
|
|
24
|
+
puts VERSION
|
|
25
|
+
exit 0
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
config = Config.new
|
|
29
|
+
begin
|
|
30
|
+
config.load!
|
|
31
|
+
rescue ConfigError => e
|
|
32
|
+
output_error(e.message)
|
|
33
|
+
exit 1
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
begin
|
|
37
|
+
Database.connect!(config.db_path)
|
|
38
|
+
rescue DatabaseError => e
|
|
39
|
+
output_error(e.message)
|
|
40
|
+
exit 1
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
dispatch(command)
|
|
44
|
+
rescue ValidationError => e
|
|
45
|
+
output_error(e.message)
|
|
46
|
+
exit 1
|
|
47
|
+
rescue NotFoundError => e
|
|
48
|
+
output_error(e.message)
|
|
49
|
+
exit 1
|
|
50
|
+
rescue => e
|
|
51
|
+
output_error(e.message)
|
|
52
|
+
exit 1
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def dispatch(command)
|
|
58
|
+
case command
|
|
59
|
+
when "create"
|
|
60
|
+
input = read_stdin_json
|
|
61
|
+
result = Commands::Create.new.call(input)
|
|
62
|
+
puts JSON.generate(result)
|
|
63
|
+
when "addend"
|
|
64
|
+
article_id = @argv.shift
|
|
65
|
+
raise ValidationError, "article_global_id is required" if article_id.nil? || article_id.strip.empty?
|
|
66
|
+
|
|
67
|
+
input = read_stdin_json
|
|
68
|
+
result = Commands::Addend.new(article_global_id: article_id).call(input)
|
|
69
|
+
puts JSON.generate(result)
|
|
70
|
+
when "search"
|
|
71
|
+
keywords, match = parse_search_args
|
|
72
|
+
result = Commands::Search.new(keywords: keywords, match: match).call
|
|
73
|
+
puts JSON.generate(result)
|
|
74
|
+
when "fetch"
|
|
75
|
+
global_id = @argv.shift
|
|
76
|
+
raise ValidationError, "global_id is required" if global_id.nil? || global_id.strip.empty?
|
|
77
|
+
|
|
78
|
+
result = Commands::Fetch.new(global_id: global_id).call
|
|
79
|
+
puts JSON.generate(result)
|
|
80
|
+
when "consolidate"
|
|
81
|
+
global_id = @argv.shift
|
|
82
|
+
raise ValidationError, "global_id is required" if global_id.nil? || global_id.strip.empty?
|
|
83
|
+
|
|
84
|
+
input = read_stdin_json
|
|
85
|
+
result = Commands::Consolidate.new(global_id: global_id).call(input)
|
|
86
|
+
puts JSON.generate(result)
|
|
87
|
+
when "related"
|
|
88
|
+
global_id = @argv.shift
|
|
89
|
+
raise ValidationError, "global_id is required" if global_id.nil? || global_id.strip.empty?
|
|
90
|
+
|
|
91
|
+
result = Commands::Related.new(global_id: global_id).call
|
|
92
|
+
puts JSON.generate(result)
|
|
93
|
+
when "keywords"
|
|
94
|
+
prefix = @argv.shift
|
|
95
|
+
result = Commands::KeywordsList.new(prefix: prefix).call
|
|
96
|
+
puts JSON.generate(result)
|
|
97
|
+
when "source-search"
|
|
98
|
+
source, system = parse_source_search_args
|
|
99
|
+
result = Commands::SourceSearch.new(source: source, system: system).call
|
|
100
|
+
puts JSON.generate(result)
|
|
101
|
+
else
|
|
102
|
+
output_error("Unknown command: #{command}")
|
|
103
|
+
exit 1
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
exit 0
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def read_stdin_json
|
|
110
|
+
if $stdin.tty?
|
|
111
|
+
$stderr.puts "Reading JSON from stdin (Ctrl-D to finish)..."
|
|
112
|
+
end
|
|
113
|
+
raw = $stdin.read
|
|
114
|
+
raise ValidationError, "Empty input" if raw.nil? || raw.strip.empty?
|
|
115
|
+
|
|
116
|
+
JSON.parse(raw)
|
|
117
|
+
rescue JSON::ParserError => e
|
|
118
|
+
raise ValidationError, "Invalid JSON input: #{e.message}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def parse_search_args
|
|
122
|
+
match = "any"
|
|
123
|
+
keywords = []
|
|
124
|
+
|
|
125
|
+
i = 0
|
|
126
|
+
while i < @argv.length
|
|
127
|
+
if @argv[i] == "--match" && i + 1 < @argv.length
|
|
128
|
+
match = @argv[i + 1]
|
|
129
|
+
i += 2
|
|
130
|
+
else
|
|
131
|
+
keywords << @argv[i]
|
|
132
|
+
i += 1
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
[keywords, match]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def parse_source_search_args
|
|
140
|
+
source = @argv.shift
|
|
141
|
+
system = nil
|
|
142
|
+
|
|
143
|
+
if @argv[0] == "--system" && @argv[1]
|
|
144
|
+
@argv.shift
|
|
145
|
+
system = @argv.shift
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
[source, system]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def print_usage
|
|
152
|
+
$stderr.puts <<~USAGE
|
|
153
|
+
Usage: agent-tome <command> [options]
|
|
154
|
+
|
|
155
|
+
Commands:
|
|
156
|
+
create Create a new article (JSON via stdin)
|
|
157
|
+
addend <id> Add an addendum to an article (JSON via stdin)
|
|
158
|
+
search <keywords...> Search articles by keywords
|
|
159
|
+
fetch <id> Retrieve full article content
|
|
160
|
+
related <id> Find related articles
|
|
161
|
+
consolidate <id> Merge addenda into one article (JSON via stdin)
|
|
162
|
+
keywords <prefix> List keywords matching a prefix
|
|
163
|
+
source-search <url|path> Find articles by source reference
|
|
164
|
+
|
|
165
|
+
Options:
|
|
166
|
+
--help, -h Show this help
|
|
167
|
+
--version Show version
|
|
168
|
+
|
|
169
|
+
See https://github.com/beatmadsen/agent-tome for full documentation.
|
|
170
|
+
USAGE
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def output_error(message)
|
|
174
|
+
puts JSON.generate({ "error" => message })
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
require "active_support/core_ext/string/inflections"
|
|
2
|
+
|
|
3
|
+
module Agent
|
|
4
|
+
module Tome
|
|
5
|
+
module Commands
|
|
6
|
+
class Addend
|
|
7
|
+
def initialize(article_global_id:)
|
|
8
|
+
@article_global_id = article_global_id
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(input)
|
|
12
|
+
article = Article.find_by(global_id: @article_global_id)
|
|
13
|
+
raise NotFoundError, "Article not found: #{@article_global_id}" unless article
|
|
14
|
+
|
|
15
|
+
validate!(input)
|
|
16
|
+
|
|
17
|
+
result = {}
|
|
18
|
+
|
|
19
|
+
ActiveRecord::Base.transaction do
|
|
20
|
+
entry = Entry.create!(
|
|
21
|
+
article: article,
|
|
22
|
+
body: input["body"].to_s.strip.empty? ? nil : input["body"],
|
|
23
|
+
created_at: Time.now
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
process_keywords!(article, input["keywords"] || [])
|
|
27
|
+
web_source_ids = process_web_sources!(entry, input["web_sources"] || [])
|
|
28
|
+
file_source_ids = process_file_sources!(entry, input["file_sources"] || [])
|
|
29
|
+
process_related_articles!(article, input["related_article_ids"] || [])
|
|
30
|
+
|
|
31
|
+
result = {
|
|
32
|
+
"entry_global_id" => entry.global_id,
|
|
33
|
+
"web_source_global_ids" => web_source_ids,
|
|
34
|
+
"file_source_global_ids" => file_source_ids
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
result
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def validate!(input)
|
|
44
|
+
body = input["body"]
|
|
45
|
+
if body && !body.to_s.strip.empty?
|
|
46
|
+
return
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
keywords = input["keywords"] || []
|
|
50
|
+
web_sources = input["web_sources"] || []
|
|
51
|
+
file_sources = input["file_sources"] || []
|
|
52
|
+
related = input["related_article_ids"] || []
|
|
53
|
+
|
|
54
|
+
if body.is_a?(String) && body.strip.empty?
|
|
55
|
+
raise ValidationError, "body cannot be blank"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
if keywords.empty? && web_sources.empty? && file_sources.empty? && related.empty?
|
|
59
|
+
raise ValidationError, "At least one field must be substantively present"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
validate_keywords!(keywords) if keywords.any?
|
|
63
|
+
validate_web_sources!(web_sources) if web_sources.any?
|
|
64
|
+
validate_file_sources!(file_sources) if file_sources.any?
|
|
65
|
+
validate_related_ids!(input["related_article_ids"]) if input.key?("related_article_ids")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def validate_keywords!(keywords)
|
|
69
|
+
keywords.each do |kw|
|
|
70
|
+
raise ValidationError, "keyword must be a non-empty string" unless kw.is_a?(String) && !kw.strip.empty?
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def validate_web_sources!(sources)
|
|
75
|
+
sources.each do |src|
|
|
76
|
+
raise ValidationError, "invalid URL: #{src["url"]}" unless UrlNormalizer.valid?(src["url"].to_s)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def validate_file_sources!(sources)
|
|
81
|
+
sources.each do |src|
|
|
82
|
+
raise ValidationError, "file_source path cannot be empty" if src["path"].to_s.strip.empty?
|
|
83
|
+
raise ValidationError, "file_source system_name cannot be empty" if src["system_name"].to_s.strip.empty?
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def validate_related_ids!(ids)
|
|
88
|
+
return unless ids
|
|
89
|
+
|
|
90
|
+
ids.each do |id|
|
|
91
|
+
raise ValidationError, "Referenced article not found: #{id}" unless Article.exists?(global_id: id)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def process_keywords!(article, keywords)
|
|
96
|
+
keywords.each do |kw|
|
|
97
|
+
normalized = normalize_keyword(kw)
|
|
98
|
+
keyword = Keyword.find_or_create_by!(term: normalized) do |k|
|
|
99
|
+
k.created_at = Time.now
|
|
100
|
+
end
|
|
101
|
+
ArticleKeyword.find_or_create_by!(article: article, keyword: keyword) do |ak|
|
|
102
|
+
ak.created_at = Time.now
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def process_web_sources!(entry, sources)
|
|
108
|
+
sources.map do |src|
|
|
109
|
+
normalized_url = UrlNormalizer.normalize(src["url"])
|
|
110
|
+
ws = WebSource.find_or_create_by!(url: normalized_url) do |w|
|
|
111
|
+
w.global_id = GlobalId.generate
|
|
112
|
+
w.title = src["title"]
|
|
113
|
+
w.fetched_at = src["fetched_at"] ? Time.parse(src["fetched_at"]) : nil
|
|
114
|
+
w.created_at = Time.now
|
|
115
|
+
end
|
|
116
|
+
EntryWebSource.find_or_create_by!(entry: entry, web_source: ws) do |ews|
|
|
117
|
+
ews.created_at = Time.now
|
|
118
|
+
end
|
|
119
|
+
ws.global_id
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def process_file_sources!(entry, sources)
|
|
124
|
+
sources.map do |src|
|
|
125
|
+
fs = FileSource.find_or_create_by!(path: src["path"], system_name: src["system_name"]) do |f|
|
|
126
|
+
f.global_id = GlobalId.generate
|
|
127
|
+
f.created_at = Time.now
|
|
128
|
+
end
|
|
129
|
+
EntryFileSource.find_or_create_by!(entry: entry, file_source: fs) do |efs|
|
|
130
|
+
efs.created_at = Time.now
|
|
131
|
+
end
|
|
132
|
+
fs.global_id
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def process_related_articles!(article, related_ids)
|
|
137
|
+
related_ids.each do |target_id|
|
|
138
|
+
raise ValidationError, "An article cannot reference itself" if target_id == article.global_id
|
|
139
|
+
|
|
140
|
+
target = Article.find_by!(global_id: target_id)
|
|
141
|
+
ArticleReference.find_or_create_by!(
|
|
142
|
+
source_article: article,
|
|
143
|
+
target_article: target
|
|
144
|
+
) do |ref|
|
|
145
|
+
ref.created_at = Time.now
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def normalize_keyword(kw)
|
|
151
|
+
words = kw.downcase.split("-")
|
|
152
|
+
words[-1] = ActiveSupport::Inflector.singularize(words[-1])
|
|
153
|
+
words.join("-")
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
module Agent
|
|
2
|
+
module Tome
|
|
3
|
+
module Commands
|
|
4
|
+
class Consolidate
|
|
5
|
+
def initialize(global_id:)
|
|
6
|
+
@global_id = global_id
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def call(input)
|
|
10
|
+
article = Article.find_by(global_id: @global_id)
|
|
11
|
+
raise NotFoundError, "Article not found: #{@global_id}" unless article
|
|
12
|
+
|
|
13
|
+
validate!(input)
|
|
14
|
+
|
|
15
|
+
result = {}
|
|
16
|
+
|
|
17
|
+
ActiveRecord::Base.transaction do
|
|
18
|
+
original_global_id = article.global_id
|
|
19
|
+
new_description = input["description"] || article.description
|
|
20
|
+
|
|
21
|
+
# Assign old article a new global_id
|
|
22
|
+
old_global_id = GlobalId.generate
|
|
23
|
+
article.update_columns(global_id: old_global_id)
|
|
24
|
+
|
|
25
|
+
# Create the new consolidated article with the original global_id
|
|
26
|
+
new_article = Article.create!(
|
|
27
|
+
global_id: original_global_id,
|
|
28
|
+
description: new_description,
|
|
29
|
+
created_at: Time.now
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Create the first (and only) entry for the consolidated article
|
|
33
|
+
new_entry = Entry.create!(
|
|
34
|
+
article: new_article,
|
|
35
|
+
body: input["body"],
|
|
36
|
+
created_at: Time.now
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Copy all sources from old article's entries to the consolidated entry
|
|
40
|
+
article.entries.each do |old_entry|
|
|
41
|
+
old_entry.web_sources.each do |ws|
|
|
42
|
+
EntryWebSource.find_or_create_by!(entry: new_entry, web_source: ws)
|
|
43
|
+
end
|
|
44
|
+
old_entry.file_sources.each do |fs|
|
|
45
|
+
EntryFileSource.find_or_create_by!(entry: new_entry, file_source: fs)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Copy keywords from old article to new article
|
|
50
|
+
article.keywords.each do |keyword|
|
|
51
|
+
ArticleKeyword.find_or_create_by!(article: new_article, keyword: keyword) do |ak|
|
|
52
|
+
ak.created_at = Time.now
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Create consolidation link
|
|
57
|
+
ConsolidationLink.create!(
|
|
58
|
+
new_article: new_article,
|
|
59
|
+
old_article: article,
|
|
60
|
+
created_at: Time.now
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
result = {
|
|
64
|
+
"new_article_global_id" => new_article.global_id,
|
|
65
|
+
"old_article_global_id" => article.global_id
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
result
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def validate!(input)
|
|
75
|
+
raise ValidationError, "body is required" unless input.key?("body")
|
|
76
|
+
|
|
77
|
+
body = input["body"]
|
|
78
|
+
raise ValidationError, "body must be a string" unless body.is_a?(String)
|
|
79
|
+
raise ValidationError, "body cannot be blank" if body.strip.empty?
|
|
80
|
+
|
|
81
|
+
if input.key?("description")
|
|
82
|
+
desc = input["description"]
|
|
83
|
+
raise ValidationError, "description must be a string" unless desc.is_a?(String)
|
|
84
|
+
raise ValidationError, "description must be 350 characters or fewer" if desc.length > 350
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|