agent-tome 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ require "active_support/core_ext/string/inflections"
2
+
3
+ module Agent
4
+ module Tome
5
+ module Commands
6
+ class Create
7
+ TRACKING_PARAMS = %w[fbclid gclid fbid mc_cid mc_eid].freeze
8
+
9
+ def call(input)
10
+ validate!(input)
11
+
12
+ result = {}
13
+
14
+ ActiveRecord::Base.transaction do
15
+ article = Article.create!(
16
+ description: input["description"],
17
+ created_at: Time.now
18
+ )
19
+
20
+ entry = Entry.create!(
21
+ article: article,
22
+ body: input["body"],
23
+ created_at: Time.now
24
+ )
25
+
26
+ process_keywords!(article, input["keywords"] || [])
27
+ web_source_ids = process_web_sources!(entry, input["web_sources"] || [])
28
+ file_source_ids = process_file_sources!(entry, input["file_sources"] || [])
29
+ process_related_articles!(article, input["related_article_ids"] || [])
30
+
31
+ result = {
32
+ "article_global_id" => article.global_id,
33
+ "entry_global_id" => entry.global_id,
34
+ "web_source_global_ids" => web_source_ids,
35
+ "file_source_global_ids" => file_source_ids
36
+ }
37
+ end
38
+
39
+ result
40
+ end
41
+
42
+ private
43
+
44
+ def validate!(input)
45
+ raise ValidationError, "Missing description" unless input.key?("description")
46
+ raise ValidationError, "Missing body" unless input.key?("body")
47
+
48
+ desc = input["description"]
49
+ raise ValidationError, "description must be a string" unless desc.is_a?(String)
50
+ raise ValidationError, "description cannot be blank" if desc.strip.empty?
51
+ raise ValidationError, "description must be 350 characters or fewer" if desc.length > 350
52
+
53
+ body = input["body"]
54
+ raise ValidationError, "body must be a string" unless body.is_a?(String)
55
+ raise ValidationError, "body cannot be blank" if body.strip.empty?
56
+
57
+ validate_keywords!(input["keywords"]) if input.key?("keywords")
58
+ validate_web_sources!(input["web_sources"]) if input.key?("web_sources")
59
+ validate_file_sources!(input["file_sources"]) if input.key?("file_sources")
60
+ validate_related_ids!(input["related_article_ids"]) if input.key?("related_article_ids")
61
+ end
62
+
63
+ def validate_keywords!(keywords)
64
+ return unless keywords
65
+
66
+ raise ValidationError, "keywords must be an array" unless keywords.is_a?(Array)
67
+
68
+ keywords.each do |kw|
69
+ raise ValidationError, "keyword must be a non-empty string" unless kw.is_a?(String) && !kw.strip.empty?
70
+ end
71
+ end
72
+
73
+ def validate_web_sources!(sources)
74
+ return unless sources
75
+
76
+ raise ValidationError, "web_sources must be an array" unless sources.is_a?(Array)
77
+
78
+ sources.each do |src|
79
+ raise ValidationError, "web_source url is required" unless src.is_a?(Hash) && src["url"]
80
+ raise ValidationError, "invalid URL: #{src["url"]}" unless UrlNormalizer.valid?(src["url"])
81
+ end
82
+ end
83
+
84
+ def validate_file_sources!(sources)
85
+ return unless sources
86
+
87
+ raise ValidationError, "file_sources must be an array" unless sources.is_a?(Array)
88
+
89
+ sources.each do |src|
90
+ raise ValidationError, "file_source path cannot be empty" if src["path"].to_s.strip.empty?
91
+ raise ValidationError, "file_source system_name cannot be empty" if src["system_name"].to_s.strip.empty?
92
+ end
93
+ end
94
+
95
+ def validate_related_ids!(ids)
96
+ return unless ids
97
+
98
+ raise ValidationError, "related_article_ids must be an array" unless ids.is_a?(Array)
99
+
100
+ ids.each do |id|
101
+ raise ValidationError, "Referenced article not found: #{id}" unless Article.exists?(global_id: id)
102
+ end
103
+ end
104
+
105
+ def process_keywords!(article, keywords)
106
+ keywords.each do |kw|
107
+ normalized = normalize_keyword(kw)
108
+ keyword = Keyword.find_or_create_by!(term: normalized) do |k|
109
+ k.created_at = Time.now
110
+ end
111
+ ArticleKeyword.find_or_create_by!(article: article, keyword: keyword) do |ak|
112
+ ak.created_at = Time.now
113
+ end
114
+ end
115
+ end
116
+
117
+ def process_web_sources!(entry, sources)
118
+ sources.map do |src|
119
+ normalized_url = UrlNormalizer.normalize(src["url"])
120
+ ws = WebSource.find_or_create_by!(url: normalized_url) do |w|
121
+ w.global_id = GlobalId.generate
122
+ w.title = src["title"]
123
+ w.fetched_at = src["fetched_at"] ? Time.parse(src["fetched_at"]) : nil
124
+ w.created_at = Time.now
125
+ end
126
+ EntryWebSource.find_or_create_by!(entry: entry, web_source: ws) do |ews|
127
+ ews.created_at = Time.now
128
+ end
129
+ ws.global_id
130
+ end
131
+ end
132
+
133
+ def process_file_sources!(entry, sources)
134
+ sources.map do |src|
135
+ fs = FileSource.find_or_create_by!(path: src["path"], system_name: src["system_name"]) do |f|
136
+ f.global_id = GlobalId.generate
137
+ f.created_at = Time.now
138
+ end
139
+ EntryFileSource.find_or_create_by!(entry: entry, file_source: fs) do |efs|
140
+ efs.created_at = Time.now
141
+ end
142
+ fs.global_id
143
+ end
144
+ end
145
+
146
+ def process_related_articles!(article, related_ids)
147
+ related_ids.each do |target_id|
148
+ raise ValidationError, "An article cannot reference itself" if target_id == article.global_id
149
+
150
+ target = Article.find_by!(global_id: target_id)
151
+ ArticleReference.find_or_create_by!(
152
+ source_article: article,
153
+ target_article: target
154
+ ) do |ref|
155
+ ref.created_at = Time.now
156
+ end
157
+ end
158
+ end
159
+
160
+ def normalize_keyword(kw)
161
+ words = kw.downcase.split("-")
162
+ words[-1] = ActiveSupport::Inflector.singularize(words[-1])
163
+ words.join("-")
164
+ end
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,69 @@
1
+ module Agent
2
+ module Tome
3
+ module Commands
4
+ class Fetch
5
+ def initialize(global_id:)
6
+ @global_id = global_id
7
+ end
8
+
9
+ def call
10
+ article = Article.find_by(global_id: @global_id)
11
+ raise NotFoundError, "Article not found: #{@global_id}" unless article
12
+
13
+ result = {
14
+ "global_id" => article.global_id,
15
+ "description" => article.description,
16
+ "keywords" => article.keywords.pluck(:term).sort,
17
+ "created_at" => article.created_at.iso8601,
18
+ "entries" => format_entries(article)
19
+ }
20
+
21
+ if (link = article.consolidation_as_new)
22
+ old = link.old_article
23
+ result["consolidated_from"] = {
24
+ "global_id" => old.global_id,
25
+ "description" => old.description
26
+ }
27
+ end
28
+
29
+ result
30
+ end
31
+
32
+ private
33
+
34
+ def format_entries(article)
35
+ article.entries.order(:created_at).map do |entry|
36
+ {
37
+ "global_id" => entry.global_id,
38
+ "body" => entry.body,
39
+ "created_at" => entry.created_at.iso8601,
40
+ "web_sources" => format_web_sources(entry),
41
+ "file_sources" => format_file_sources(entry)
42
+ }
43
+ end
44
+ end
45
+
46
+ def format_web_sources(entry)
47
+ entry.web_sources.map do |ws|
48
+ {
49
+ "global_id" => ws.global_id,
50
+ "url" => ws.url,
51
+ "title" => ws.title,
52
+ "fetched_at" => ws.fetched_at&.iso8601
53
+ }
54
+ end
55
+ end
56
+
57
+ def format_file_sources(entry)
58
+ entry.file_sources.map do |fs|
59
+ {
60
+ "global_id" => fs.global_id,
61
+ "path" => fs.path,
62
+ "system_name" => fs.system_name
63
+ }
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,22 @@
1
+ module Agent
2
+ module Tome
3
+ module Commands
4
+ class KeywordsList
5
+ def initialize(prefix:)
6
+ @prefix = prefix
7
+ end
8
+
9
+ def call
10
+ raise ValidationError, "A prefix/substring argument is required" if @prefix.nil? || @prefix.strip.empty?
11
+
12
+ terms = Keyword
13
+ .where("LOWER(term) LIKE ?", "%#{@prefix.downcase}%")
14
+ .order(:term)
15
+ .pluck(:term)
16
+
17
+ { "keywords" => terms }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,80 @@
1
+ module Agent
2
+ module Tome
3
+ module Commands
4
+ class Related
5
+ def initialize(global_id:)
6
+ @global_id = global_id
7
+ end
8
+
9
+ def call
10
+ article = Article.find_by(global_id: @global_id)
11
+ raise NotFoundError, "Article not found: #{@global_id}" unless article
12
+
13
+ {
14
+ "shared_keywords" => find_shared_keywords(article),
15
+ "references_to" => find_references_to(article),
16
+ "referenced_by" => find_referenced_by(article),
17
+ "consolidated_from" => find_consolidated_from(article),
18
+ "consolidated_into" => find_consolidated_into(article)
19
+ }
20
+ end
21
+
22
+ private
23
+
24
+ def find_shared_keywords(article)
25
+ keyword_ids = article.keywords.pluck(:id)
26
+ return [] if keyword_ids.empty?
27
+
28
+ Article
29
+ .joins(:article_keywords)
30
+ .where(article_keywords: { keyword_id: keyword_ids })
31
+ .where.not(id: article.id)
32
+ .group("articles.id")
33
+ .select("articles.*, COUNT(DISTINCT article_keywords.keyword_id) AS shared_keyword_count")
34
+ .order("shared_keyword_count DESC")
35
+ .limit(100)
36
+ .map { |a| format_article(a, shared_keyword_count: a.shared_keyword_count.to_i) }
37
+ end
38
+
39
+ def find_references_to(article)
40
+ ArticleReference
41
+ .where(source_article: article)
42
+ .includes(:target_article)
43
+ .map { |ref| format_article(ref.target_article) }
44
+ end
45
+
46
+ def find_referenced_by(article)
47
+ ArticleReference
48
+ .where(target_article: article)
49
+ .includes(:source_article)
50
+ .map { |ref| format_article(ref.source_article) }
51
+ end
52
+
53
+ def find_consolidated_from(article)
54
+ ConsolidationLink
55
+ .where(new_article: article)
56
+ .includes(:old_article)
57
+ .map { |link| format_article(link.old_article) }
58
+ end
59
+
60
+ def find_consolidated_into(article)
61
+ ConsolidationLink
62
+ .where(old_article: article)
63
+ .includes(:new_article)
64
+ .map { |link| format_article(link.new_article) }
65
+ end
66
+
67
+ def format_article(article, extra = {})
68
+ base = {
69
+ "global_id" => article.global_id,
70
+ "description" => article.description,
71
+ "keywords" => article.keywords.pluck(:term).sort,
72
+ "created_at" => article.created_at.iso8601
73
+ }
74
+ base.merge!(extra.transform_keys(&:to_s)) if extra.any?
75
+ base
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,62 @@
1
+ require "active_support/core_ext/string/inflections"
2
+
3
+ module Agent
4
+ module Tome
5
+ module Commands
6
+ class Search
7
+ def initialize(keywords:, match: "any")
8
+ @keywords = keywords
9
+ @match = match
10
+ end
11
+
12
+ def call
13
+ raise ValidationError, "At least one keyword is required" if @keywords.empty?
14
+
15
+ normalized = @keywords.map { |kw| normalize_keyword(kw) }
16
+ keyword_ids = Keyword.where(term: normalized).pluck(:id)
17
+
18
+ return { "results" => [] } if keyword_ids.empty?
19
+
20
+ articles = find_matching_articles(keyword_ids, normalized)
21
+
22
+ {
23
+ "results" => articles.first(1000).map { |row| format_result(row) }
24
+ }
25
+ end
26
+
27
+ private
28
+
29
+ def find_matching_articles(keyword_ids, normalized_terms)
30
+ base = Article
31
+ .joins(:article_keywords)
32
+ .where(article_keywords: { keyword_id: keyword_ids })
33
+ .group("articles.id")
34
+ .select("articles.*, COUNT(DISTINCT article_keywords.keyword_id) AS matching_keyword_count")
35
+ .order("matching_keyword_count DESC")
36
+
37
+ if @match == "all"
38
+ base = base.having("COUNT(DISTINCT article_keywords.keyword_id) = ?", keyword_ids.length)
39
+ end
40
+
41
+ base.limit(1000)
42
+ end
43
+
44
+ def format_result(article)
45
+ {
46
+ "global_id" => article.global_id,
47
+ "description" => article.description,
48
+ "keywords" => article.keywords.pluck(:term).sort,
49
+ "matching_keyword_count" => article.matching_keyword_count.to_i,
50
+ "created_at" => article.created_at.iso8601
51
+ }
52
+ end
53
+
54
+ def normalize_keyword(kw)
55
+ words = kw.downcase.split("-")
56
+ words[-1] = ActiveSupport::Inflector.singularize(words[-1])
57
+ words.join("-")
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,60 @@
1
+ module Agent
2
+ module Tome
3
+ module Commands
4
+ class SourceSearch
5
+ def initialize(source:, system: nil)
6
+ @source = source
7
+ @system = system
8
+ end
9
+
10
+ def call
11
+ articles = if url?(@source)
12
+ search_by_url
13
+ else
14
+ search_by_path
15
+ end
16
+
17
+ {
18
+ "results" => articles.map { |a| format_article(a) }
19
+ }
20
+ end
21
+
22
+ private
23
+
24
+ def url?(str)
25
+ str.start_with?("http://", "https://")
26
+ end
27
+
28
+ def search_by_url
29
+ normalized = UrlNormalizer.normalize(@source)
30
+ ws = WebSource.find_by(url: normalized)
31
+ return [] unless ws
32
+
33
+ Article
34
+ .joins(entries: :web_sources)
35
+ .where(web_sources: { id: ws.id })
36
+ .distinct
37
+ end
38
+
39
+ def search_by_path
40
+ scope = Article
41
+ .joins(entries: :file_sources)
42
+ .where(file_sources: { path: @source })
43
+
44
+ scope = scope.where(file_sources: { system_name: @system }) if @system
45
+
46
+ scope.distinct
47
+ end
48
+
49
+ def format_article(article)
50
+ {
51
+ "global_id" => article.global_id,
52
+ "description" => article.description,
53
+ "keywords" => article.keywords.pluck(:term).sort,
54
+ "created_at" => article.created_at.iso8601
55
+ }
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,52 @@
1
+ require "yaml"
2
+ require "fileutils"
3
+
4
+ module Agent
5
+ module Tome
6
+ class Config
7
+ DEFAULT_CONFIG_DIR = File.expand_path("~/.agent-tome")
8
+
9
+ attr_reader :db_path, :config_dir
10
+
11
+ def initialize(config_dir: nil)
12
+ @config_dir = config_dir || ENV.fetch("AGENT_TOME_CONFIG_DIR", DEFAULT_CONFIG_DIR)
13
+ end
14
+
15
+ def load!
16
+ if File.directory?(@config_dir)
17
+ read_config!
18
+ else
19
+ bootstrap!
20
+ read_config!
21
+ end
22
+ self
23
+ end
24
+
25
+ private
26
+
27
+ def bootstrap!
28
+ FileUtils.mkdir_p(@config_dir)
29
+ default_db = File.join(@config_dir, "tome.db")
30
+ File.write(config_file_path, YAML.dump("db_path" => default_db))
31
+ end
32
+
33
+ def config_file_path
34
+ File.join(@config_dir, "config.yml")
35
+ end
36
+
37
+ def read_config!
38
+ raise ConfigError, "Config file not found: #{config_file_path}" unless File.exist?(config_file_path)
39
+
40
+ data = YAML.load_file(config_file_path)
41
+
42
+ unless data.is_a?(Hash) && data.key?("db_path") && !data["db_path"].to_s.strip.empty?
43
+ raise ConfigError, "db_path is not configured in #{config_file_path}"
44
+ end
45
+
46
+ @db_path = data["db_path"]
47
+ end
48
+ end
49
+
50
+ class ConfigError < StandardError; end
51
+ end
52
+ end
@@ -0,0 +1,77 @@
1
+ require "active_record"
2
+ require "fileutils"
3
+
4
+ module Agent
5
+ module Tome
6
+ module Database
7
+ MIGRATIONS_PATH = File.expand_path("../../../../db/migrate", __FILE__)
8
+
9
+ @migrations_path_override = nil
10
+
11
+ class << self
12
+ def migrations_path
13
+ @migrations_path_override || MIGRATIONS_PATH
14
+ end
15
+
16
+ def migrations_path=(path)
17
+ @migrations_path_override = path
18
+ end
19
+ end
20
+
21
+ def self.connect!(db_path)
22
+ db_dir = File.dirname(db_path)
23
+
24
+ unless File.directory?(db_dir)
25
+ begin
26
+ FileUtils.mkdir_p(db_dir)
27
+ rescue Errno::EACCES, Errno::EPERM, Errno::EROFS => e
28
+ raise DatabaseError, "Database path is not writable: #{db_path} (#{e.message})"
29
+ end
30
+ end
31
+
32
+ unless writable_path?(db_path)
33
+ raise DatabaseError, "Database path is not writable: #{db_path}"
34
+ end
35
+
36
+ ActiveRecord::Base.logger = nil
37
+
38
+ ActiveRecord::Base.establish_connection(
39
+ adapter: "sqlite3",
40
+ database: db_path
41
+ )
42
+
43
+ ActiveRecord::Base.connection.execute("PRAGMA foreign_keys = ON")
44
+ ActiveRecord::Base.connection.execute("PRAGMA journal_mode = WAL")
45
+ ActiveRecord::Base.connection.execute("PRAGMA busy_timeout = 5000")
46
+
47
+ run_migrations!
48
+ rescue Errno::EACCES, Errno::EPERM, Errno::EROFS => e
49
+ raise DatabaseError, "Database path is not writable: #{db_path} (#{e.message})"
50
+ end
51
+
52
+ def self.disconnect!
53
+ ActiveRecord::Base.remove_connection
54
+ rescue StandardError
55
+ nil
56
+ end
57
+
58
+ def self.run_migrations!
59
+ ActiveRecord::Migration.verbose = false
60
+ context = ActiveRecord::MigrationContext.new(migrations_path)
61
+ context.migrate
62
+ end
63
+
64
+ def self.writable_path?(path)
65
+ if File.exist?(path)
66
+ File.writable?(path)
67
+ else
68
+ dir = File.dirname(path)
69
+ File.directory?(dir) && File.writable?(dir)
70
+ end
71
+ end
72
+ private_class_method :writable_path?
73
+ end
74
+
75
+ class DatabaseError < StandardError; end
76
+ end
77
+ end
@@ -0,0 +1,16 @@
1
+ module Agent
2
+ module Tome
3
+ module GlobalId
4
+ BASE58_ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
5
+ PATTERN = /\A[1-9A-HJ-NP-Za-km-z]{7}\z/
6
+
7
+ def self.generate
8
+ Array.new(7) { BASE58_ALPHABET[rand(58)] }.join
9
+ end
10
+
11
+ def self.valid?(id)
12
+ id.is_a?(String) && PATTERN.match?(id)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,8 @@
1
+ module Agent
2
+ module Tome
3
+ class ApplicationRecord < ActiveRecord::Base
4
+ self.abstract_class = true
5
+ self.table_name_prefix = ""
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,36 @@
1
+ module Agent
2
+ module Tome
3
+ class Article < ApplicationRecord
4
+ has_many :entries, dependent: :destroy
5
+ has_many :article_keywords, dependent: :destroy
6
+ has_many :keywords, through: :article_keywords
7
+ has_many :source_references,
8
+ class_name: "Agent::Tome::ArticleReference",
9
+ foreign_key: :source_article_id,
10
+ dependent: :destroy
11
+ has_many :target_references,
12
+ class_name: "Agent::Tome::ArticleReference",
13
+ foreign_key: :target_article_id,
14
+ dependent: :destroy
15
+ has_one :consolidation_as_new,
16
+ class_name: "Agent::Tome::ConsolidationLink",
17
+ foreign_key: :new_article_id
18
+ has_one :consolidation_as_old,
19
+ class_name: "Agent::Tome::ConsolidationLink",
20
+ foreign_key: :old_article_id
21
+
22
+ validates :global_id, presence: true, length: { is: 7 }, uniqueness: true
23
+ validates :description, presence: true, length: { maximum: 350 }
24
+
25
+ before_validation :assign_global_id, on: :create
26
+
27
+ private
28
+
29
+ def assign_global_id
30
+ return if global_id.present?
31
+
32
+ self.global_id = GlobalId.generate
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,10 @@
1
+ module Agent
2
+ module Tome
3
+ class ArticleKeyword < ApplicationRecord
4
+ belongs_to :article
5
+ belongs_to :keyword
6
+
7
+ validates :article_id, uniqueness: { scope: :keyword_id }
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,20 @@
1
+ module Agent
2
+ module Tome
3
+ class ArticleReference < ApplicationRecord
4
+ belongs_to :source_article, class_name: "Agent::Tome::Article"
5
+ belongs_to :target_article, class_name: "Agent::Tome::Article"
6
+
7
+ validates :source_article_id, uniqueness: { scope: :target_article_id }
8
+ validate :not_self_referencing
9
+
10
+ private
11
+
12
+ def not_self_referencing
13
+ return unless source_article_id && target_article_id
14
+ return unless source_article_id == target_article_id
15
+
16
+ errors.add(:base, "An article cannot reference itself")
17
+ end
18
+ end
19
+ end
20
+ end