llmemory 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/generators/llmemory/install/templates/create_llmemory_tables.rb +1 -0
- data/lib/llmemory/configuration.rb +2 -0
- data/lib/llmemory/long_term/episodic/episode.rb +94 -0
- data/lib/llmemory/long_term/episodic/memory.rb +93 -0
- data/lib/llmemory/long_term/episodic/storage.rb +31 -0
- data/lib/llmemory/long_term/episodic/storages/base.rb +39 -0
- data/lib/llmemory/long_term/episodic/storages/file_storage.rb +117 -0
- data/lib/llmemory/long_term/episodic/storages/memory_storage.rb +67 -0
- data/lib/llmemory/long_term/episodic.rb +12 -0
- data/lib/llmemory/long_term/file_based/item.rb +4 -2
- data/lib/llmemory/long_term/file_based/memory.rb +21 -2
- data/lib/llmemory/long_term/file_based/storages/active_record_storage.rb +3 -1
- data/lib/llmemory/long_term/file_based/storages/base.rb +1 -1
- data/lib/llmemory/long_term/file_based/storages/database_storage.rb +18 -8
- data/lib/llmemory/long_term/file_based/storages/file_storage.rb +2 -1
- data/lib/llmemory/long_term/file_based/storages/memory_storage.rb +2 -1
- data/lib/llmemory/long_term/graph_based/edge.rb +7 -0
- data/lib/llmemory/long_term/graph_based/memory.rb +8 -6
- data/lib/llmemory/long_term/graph_based/node.rb +7 -0
- data/lib/llmemory/long_term.rb +1 -0
- data/lib/llmemory/provenance.rb +64 -0
- data/lib/llmemory/reflection/reflector.rb +116 -0
- data/lib/llmemory/reflection.rb +8 -0
- data/lib/llmemory/retrieval/engine.rb +1 -0
- data/lib/llmemory/retrieval/temporal_ranker.rb +17 -3
- data/lib/llmemory/version.rb +1 -1
- data/lib/llmemory.rb +2 -0
- metadata +12 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b86647810e47140fff5732a066da4a16188704249d09db14720d8c565b6eaf0e
|
|
4
|
+
data.tar.gz: 7c52c551746d22c29e41015098a68e166788bb614b0e0c2b064fa5fc0824989f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f584d487eca13280b13f7a3e9f4b8eda60ed10ec8dbb45ff72483acbc76b7feb7f79fb5ae572640a31e87ade05b0762f07cacbecb778dffa6d62a7096231fb12
|
|
7
|
+
data.tar.gz: c68e284c21fc22ccdf20160d9082575a47482b03c5a91fb3b52ec5a6c51b7f5cf13a8915bfd3fe2f5933aa6a52fe8c382e2373393b27d68d8ed1c7ac83766e49
|
|
@@ -14,6 +14,7 @@ module Llmemory
|
|
|
14
14
|
:database_url,
|
|
15
15
|
:vector_store,
|
|
16
16
|
:time_decay_half_life_days,
|
|
17
|
+
:importance_weight,
|
|
17
18
|
:max_retrieval_tokens,
|
|
18
19
|
:prune_after_days,
|
|
19
20
|
:compact_max_bytes,
|
|
@@ -56,6 +57,7 @@ module Llmemory
|
|
|
56
57
|
@database_url = ENV["DATABASE_URL"]
|
|
57
58
|
@vector_store = nil
|
|
58
59
|
@time_decay_half_life_days = 30
|
|
60
|
+
@importance_weight = 1.0
|
|
59
61
|
@max_retrieval_tokens = 2000
|
|
60
62
|
@prune_after_days = 90
|
|
61
63
|
@compact_max_bytes = 8192
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
module Llmemory
|
|
6
|
+
module LongTerm
|
|
7
|
+
module Episodic
|
|
8
|
+
# An Episode is a trajectory of an agent's experience: an ordered list of
|
|
9
|
+
# steps (observation -> action -> result) plus a summary, an outcome label
|
|
10
|
+
# and an importance score. This is CoALA's "episodic memory" — distinct
|
|
11
|
+
# from semantic memory (facts), it stores what happened so it can later be
|
|
12
|
+
# retrieved as examples or distilled into semantic knowledge (see P2,
|
|
13
|
+
# reflection).
|
|
14
|
+
class Episode
|
|
15
|
+
attr_reader :id, :user_id, :steps, :summary, :outcome, :importance, :provenance, :created_at
|
|
16
|
+
|
|
17
|
+
STEP_KEYS = %i[observation action result timestamp].freeze
|
|
18
|
+
|
|
19
|
+
def initialize(id:, user_id:, steps: [], summary: nil, outcome: nil, importance: 0.5, provenance: nil, created_at: nil)
|
|
20
|
+
@id = id
|
|
21
|
+
@user_id = user_id
|
|
22
|
+
@steps = self.class.normalize_steps(steps)
|
|
23
|
+
@summary = summary
|
|
24
|
+
@outcome = outcome
|
|
25
|
+
@importance = importance.nil? ? 0.5 : importance.to_f
|
|
26
|
+
@provenance = provenance
|
|
27
|
+
@created_at = created_at || Time.now
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Flat, searchable representation used for keyword retrieval and, in the
|
|
31
|
+
# future, embedding. Combines summary, outcome and every step field.
|
|
32
|
+
def searchable_text
|
|
33
|
+
parts = [summary, outcome]
|
|
34
|
+
steps.each do |s|
|
|
35
|
+
parts << s[:observation]
|
|
36
|
+
parts << s[:action]
|
|
37
|
+
parts << s[:result]
|
|
38
|
+
end
|
|
39
|
+
parts.compact.map(&:to_s).reject(&:empty?).join("\n")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.normalize_steps(steps)
|
|
43
|
+
Array(steps).filter_map do |step|
|
|
44
|
+
next nil unless step.is_a?(Hash)
|
|
45
|
+
{
|
|
46
|
+
observation: step[:observation] || step["observation"],
|
|
47
|
+
action: step[:action] || step["action"],
|
|
48
|
+
result: step[:result] || step["result"],
|
|
49
|
+
timestamp: normalize_time(step[:timestamp] || step["timestamp"])
|
|
50
|
+
}
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def self.normalize_time(value)
|
|
55
|
+
return nil if value.nil?
|
|
56
|
+
value.respond_to?(:iso8601) ? value.iso8601 : value.to_s
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def self.from_h(hash)
|
|
60
|
+
new(
|
|
61
|
+
id: hash[:id] || hash["id"],
|
|
62
|
+
user_id: hash[:user_id] || hash["user_id"],
|
|
63
|
+
steps: hash[:steps] || hash["steps"] || [],
|
|
64
|
+
summary: hash[:summary] || hash["summary"],
|
|
65
|
+
outcome: hash[:outcome] || hash["outcome"],
|
|
66
|
+
importance: hash[:importance] || hash["importance"] || 0.5,
|
|
67
|
+
provenance: hash[:provenance] || hash["provenance"],
|
|
68
|
+
created_at: parse_created_at(hash[:created_at] || hash["created_at"])
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def self.parse_created_at(value)
|
|
73
|
+
return value if value.nil? || value.is_a?(Time)
|
|
74
|
+
Time.parse(value.to_s)
|
|
75
|
+
rescue ArgumentError
|
|
76
|
+
nil
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def to_h
|
|
80
|
+
{
|
|
81
|
+
id: id,
|
|
82
|
+
user_id: user_id,
|
|
83
|
+
steps: steps,
|
|
84
|
+
summary: summary,
|
|
85
|
+
outcome: outcome,
|
|
86
|
+
importance: importance,
|
|
87
|
+
provenance: provenance,
|
|
88
|
+
created_at: created_at.respond_to?(:iso8601) ? created_at.iso8601(6) : created_at
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "episode"
|
|
4
|
+
require_relative "storage"
|
|
5
|
+
|
|
6
|
+
module Llmemory
|
|
7
|
+
module LongTerm
|
|
8
|
+
module Episodic
|
|
9
|
+
# Episodic long-term memory: records agent trajectories and retrieves them
|
|
10
|
+
# by recency, importance and relevance. Designed to coexist with semantic
|
|
11
|
+
# memory (file/graph), not replace it, and to feed reflection (P2), which
|
|
12
|
+
# distills episodes into semantic knowledge.
|
|
13
|
+
#
|
|
14
|
+
# Deliberately LLM-free: recording and retrieval are deterministic. Higher
|
|
15
|
+
# order summarization belongs to reflection.
|
|
16
|
+
class Memory
|
|
17
|
+
attr_reader :user_id, :storage
|
|
18
|
+
|
|
19
|
+
def initialize(user_id:, storage: nil)
|
|
20
|
+
@user_id = user_id
|
|
21
|
+
@storage = storage || Storages.build
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Records a trajectory. `steps` is an array of hashes with any of
|
|
25
|
+
# :observation, :action, :result, :timestamp. Returns the episode id.
|
|
26
|
+
def record_episode(steps:, summary: nil, outcome: nil, importance: 0.5)
|
|
27
|
+
episode = Episode.new(
|
|
28
|
+
id: nil,
|
|
29
|
+
user_id: @user_id,
|
|
30
|
+
steps: steps,
|
|
31
|
+
summary: summary || derive_summary(steps),
|
|
32
|
+
outcome: outcome,
|
|
33
|
+
importance: importance
|
|
34
|
+
)
|
|
35
|
+
provenance = Llmemory::Provenance.from_text_fingerprint(
|
|
36
|
+
episode.searchable_text, method: "episode_recording", confidence: episode.importance
|
|
37
|
+
)
|
|
38
|
+
record = episode.to_h.merge(provenance: provenance)
|
|
39
|
+
@storage.save_episode(@user_id, record)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def recent_episodes(limit: 10)
|
|
43
|
+
@storage.list_episodes(@user_id, limit: limit).map { |e| Episode.from_h(e) }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def episodes(limit: nil)
|
|
47
|
+
@storage.list_episodes(@user_id, limit: limit).map { |e| Episode.from_h(e) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def find_episode(id)
|
|
51
|
+
raw = @storage.get_episode(@user_id, id)
|
|
52
|
+
raw && Episode.from_h(raw)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def count
|
|
56
|
+
@storage.count_episodes(@user_id)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Retrieval Engine integration. Returns candidates shaped like the other
|
|
60
|
+
# long-term memories so the Engine can rank episodes by relevance,
|
|
61
|
+
# recency (temporal decay) and importance (P3), with provenance (P10).
|
|
62
|
+
def search_candidates(query, user_id: nil, top_k: 20)
|
|
63
|
+
uid = user_id || @user_id
|
|
64
|
+
return [] unless uid == @user_id
|
|
65
|
+
|
|
66
|
+
@storage.search_episodes(uid, query).first(top_k).map do |e|
|
|
67
|
+
episode = Episode.from_h(e)
|
|
68
|
+
{
|
|
69
|
+
text: episode.summary.to_s.empty? ? episode.searchable_text : episode.summary,
|
|
70
|
+
timestamp: episode.created_at,
|
|
71
|
+
score: 1.0,
|
|
72
|
+
importance: episode.importance,
|
|
73
|
+
evergreen: false,
|
|
74
|
+
provenance: e[:provenance] || e["provenance"]
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
# Cheap, deterministic summary when the caller does not provide one.
|
|
82
|
+
# LLM-based summarization is reflection's job (P2).
|
|
83
|
+
def derive_summary(steps)
|
|
84
|
+
normalized = Episode.normalize_steps(steps)
|
|
85
|
+
return nil if normalized.empty?
|
|
86
|
+
actions = normalized.filter_map { |s| s[:action] }.reject { |a| a.to_s.strip.empty? }
|
|
87
|
+
return nil if actions.empty?
|
|
88
|
+
"Episode with #{normalized.size} step(s): #{actions.join(' -> ')}"
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "storages/base"
|
|
4
|
+
require_relative "storages/memory_storage"
|
|
5
|
+
require_relative "storages/file_storage"
|
|
6
|
+
|
|
7
|
+
module Llmemory
|
|
8
|
+
module LongTerm
|
|
9
|
+
module Episodic
|
|
10
|
+
# Backward compatibility: Storage points to the in-memory backend.
|
|
11
|
+
Storage = Storages::MemoryStorage
|
|
12
|
+
|
|
13
|
+
module Storages
|
|
14
|
+
def self.build(store: nil, base_path: nil)
|
|
15
|
+
case (store || Llmemory.configuration.long_term_store).to_s.to_sym
|
|
16
|
+
when :memory
|
|
17
|
+
MemoryStorage.new
|
|
18
|
+
when :file
|
|
19
|
+
FileStorage.new(base_path: base_path || Llmemory.configuration.long_term_storage_path)
|
|
20
|
+
when :postgres, :database, :active_record, :activerecord
|
|
21
|
+
raise NotImplementedError,
|
|
22
|
+
"Episodic SQL/ActiveRecord storage is not implemented yet; use :memory or :file " \
|
|
23
|
+
"(or pass an explicit storage instance)."
|
|
24
|
+
else
|
|
25
|
+
MemoryStorage.new
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Llmemory
|
|
4
|
+
module LongTerm
|
|
5
|
+
module Episodic
|
|
6
|
+
module Storages
|
|
7
|
+
# Storage contract for episodic memory. Implementations persist Episode
|
|
8
|
+
# hashes and expose recency-ordered listing plus keyword search so the
|
|
9
|
+
# retrieval Engine can rank episodes alongside other memory types.
|
|
10
|
+
class Base
|
|
11
|
+
def save_episode(user_id, episode)
|
|
12
|
+
raise NotImplementedError, "#{self.class}#save_episode must be implemented"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def get_episode(user_id, id)
|
|
16
|
+
raise NotImplementedError, "#{self.class}#get_episode must be implemented"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Newest first. Optionally capped by limit.
|
|
20
|
+
def list_episodes(user_id, limit: nil)
|
|
21
|
+
raise NotImplementedError, "#{self.class}#list_episodes must be implemented"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def search_episodes(user_id, query)
|
|
25
|
+
raise NotImplementedError, "#{self.class}#search_episodes must be implemented"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def count_episodes(user_id)
|
|
29
|
+
raise NotImplementedError, "#{self.class}#count_episodes must be implemented"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def list_users
|
|
33
|
+
raise NotImplementedError, "#{self.class}#list_users must be implemented"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "json"
|
|
5
|
+
require "time"
|
|
6
|
+
require_relative "base"
|
|
7
|
+
|
|
8
|
+
module Llmemory
|
|
9
|
+
module LongTerm
|
|
10
|
+
module Episodic
|
|
11
|
+
module Storages
|
|
12
|
+
class FileStorage < Base
|
|
13
|
+
def initialize(base_path: nil)
|
|
14
|
+
@base_path = base_path || Llmemory.configuration.long_term_storage_path || "./llmemory_data"
|
|
15
|
+
@base_path = File.expand_path(@base_path)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def save_episode(user_id, episode)
|
|
19
|
+
id = episode[:id] || episode["id"] || "ep_#{next_seq(user_id)}"
|
|
20
|
+
data = stringify_for_json(episode).merge("id" => id, "user_id" => user_id)
|
|
21
|
+
data["created_at"] ||= Time.now.iso8601
|
|
22
|
+
File.write(episode_path(user_id, id), JSON.generate(data))
|
|
23
|
+
id
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def get_episode(user_id, id)
|
|
27
|
+
path = episode_path(user_id, id)
|
|
28
|
+
return nil unless File.file?(path)
|
|
29
|
+
load_episode(path)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def list_episodes(user_id, limit: nil)
|
|
33
|
+
sorted = all_episodes(user_id).sort_by { |e| e[:created_at] }.reverse
|
|
34
|
+
limit && limit.to_i.positive? ? sorted.first(limit.to_i) : sorted
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def search_episodes(user_id, query)
|
|
38
|
+
q = query.to_s.downcase
|
|
39
|
+
return list_episodes(user_id) if q.strip.empty?
|
|
40
|
+
all_episodes(user_id).select { |e| episode_text(e).downcase.include?(q) }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def count_episodes(user_id)
|
|
44
|
+
dir = user_path(user_id, "episodes")
|
|
45
|
+
return 0 unless Dir.exist?(dir)
|
|
46
|
+
Dir.children(dir).count { |f| f.end_with?(".json") }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def list_users
|
|
50
|
+
return [] unless Dir.exist?(@base_path)
|
|
51
|
+
Dir.children(@base_path).select { |d| Dir.exist?(File.join(@base_path, d, "episodes")) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def all_episodes(user_id)
|
|
57
|
+
dir = user_path(user_id, "episodes")
|
|
58
|
+
return [] unless Dir.exist?(dir)
|
|
59
|
+
Dir.children(dir).select { |f| f.end_with?(".json") }.map { |f| load_episode(File.join(dir, f)) }.compact
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def load_episode(path)
|
|
63
|
+
data = JSON.parse(File.read(path), symbolize_names: true)
|
|
64
|
+
data[:created_at] = parse_time(data[:created_at])
|
|
65
|
+
data
|
|
66
|
+
rescue JSON::ParserError
|
|
67
|
+
nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def episode_text(episode)
|
|
71
|
+
parts = [episode[:summary], episode[:outcome]]
|
|
72
|
+
Array(episode[:steps]).each do |s|
|
|
73
|
+
next unless s.is_a?(Hash)
|
|
74
|
+
parts << s[:observation] << s[:action] << s[:result]
|
|
75
|
+
end
|
|
76
|
+
parts.compact.join("\n")
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def stringify_for_json(episode)
|
|
80
|
+
JSON.parse(JSON.generate(episode))
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def user_path(user_id, *parts)
|
|
84
|
+
safe = user_id.to_s.gsub(%r{[^\w\-.]}, "_")
|
|
85
|
+
File.join(@base_path, safe, *parts)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def episode_path(user_id, id)
|
|
89
|
+
dir = user_path(user_id, "episodes")
|
|
90
|
+
FileUtils.mkdir_p(dir)
|
|
91
|
+
File.join(dir, "#{id}.json")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def meta_path(user_id)
|
|
95
|
+
FileUtils.mkdir_p(user_path(user_id))
|
|
96
|
+
File.join(user_path(user_id), "meta.json")
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def next_seq(user_id)
|
|
100
|
+
path = meta_path(user_id)
|
|
101
|
+
meta = File.file?(path) ? JSON.parse(File.read(path)) : {}
|
|
102
|
+
meta["episode_id_seq"] = (meta["episode_id_seq"] || 0) + 1
|
|
103
|
+
File.write(path, JSON.generate(meta))
|
|
104
|
+
meta["episode_id_seq"]
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def parse_time(val)
|
|
108
|
+
return val if val.is_a?(Time)
|
|
109
|
+
Time.parse(val.to_s)
|
|
110
|
+
rescue ArgumentError
|
|
111
|
+
Time.now
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base"
|
|
4
|
+
|
|
5
|
+
module Llmemory
|
|
6
|
+
module LongTerm
|
|
7
|
+
module Episodic
|
|
8
|
+
module Storages
|
|
9
|
+
class MemoryStorage < Base
|
|
10
|
+
def initialize
|
|
11
|
+
@episodes = Hash.new { |h, k| h[k] = [] }
|
|
12
|
+
@seq = 0
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def save_episode(user_id, episode)
|
|
16
|
+
@seq += 1
|
|
17
|
+
id = episode[:id] || episode["id"] || "ep_#{@seq}"
|
|
18
|
+
record = symbolize(episode).merge(id: id, user_id: user_id)
|
|
19
|
+
record[:created_at] ||= Time.now
|
|
20
|
+
@episodes[user_id] << record
|
|
21
|
+
id
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def get_episode(user_id, id)
|
|
25
|
+
@episodes[user_id].find { |e| e[:id] == id }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def list_episodes(user_id, limit: nil)
|
|
29
|
+
sorted = @episodes[user_id].sort_by { |e| e[:created_at] }.reverse
|
|
30
|
+
limit && limit.to_i.positive? ? sorted.first(limit.to_i) : sorted
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def search_episodes(user_id, query)
|
|
34
|
+
q = query.to_s.downcase
|
|
35
|
+
return list_episodes(user_id) if q.strip.empty?
|
|
36
|
+
@episodes[user_id].select { |e| episode_text(e).downcase.include?(q) }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def count_episodes(user_id)
|
|
40
|
+
@episodes[user_id].size
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def list_users
|
|
44
|
+
@episodes.keys
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def symbolize(hash)
|
|
50
|
+
hash.each_with_object({}) { |(k, v), acc| acc[k.to_sym] = v }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def episode_text(episode)
|
|
54
|
+
parts = [episode[:summary], episode[:outcome]]
|
|
55
|
+
Array(episode[:steps]).each do |s|
|
|
56
|
+
next unless s.is_a?(Hash)
|
|
57
|
+
parts << (s[:observation] || s["observation"])
|
|
58
|
+
parts << (s[:action] || s["action"])
|
|
59
|
+
parts << (s[:result] || s["result"])
|
|
60
|
+
end
|
|
61
|
+
parts.compact.join("\n")
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -4,14 +4,15 @@ module Llmemory
|
|
|
4
4
|
module LongTerm
|
|
5
5
|
module FileBased
|
|
6
6
|
class Item
|
|
7
|
-
attr_reader :id, :user_id, :category, :content, :source_resource_id, :created_at
|
|
7
|
+
attr_reader :id, :user_id, :category, :content, :source_resource_id, :provenance, :created_at
|
|
8
8
|
|
|
9
|
-
def initialize(id:, user_id:, category:, content:, source_resource_id: nil, created_at: nil)
|
|
9
|
+
def initialize(id:, user_id:, category:, content:, source_resource_id: nil, provenance: nil, created_at: nil)
|
|
10
10
|
@id = id
|
|
11
11
|
@user_id = user_id
|
|
12
12
|
@category = category
|
|
13
13
|
@content = content
|
|
14
14
|
@source_resource_id = source_resource_id
|
|
15
|
+
@provenance = provenance
|
|
15
16
|
@created_at = created_at || Time.now
|
|
16
17
|
end
|
|
17
18
|
|
|
@@ -22,6 +23,7 @@ module Llmemory
|
|
|
22
23
|
category: category,
|
|
23
24
|
content: content,
|
|
24
25
|
source_resource_id: source_resource_id,
|
|
26
|
+
provenance: provenance,
|
|
25
27
|
created_at: created_at.iso8601
|
|
26
28
|
}
|
|
27
29
|
end
|
|
@@ -65,7 +65,8 @@ module Llmemory
|
|
|
65
65
|
out << {
|
|
66
66
|
text: i[:content] || i["content"],
|
|
67
67
|
timestamp: i[:created_at] || i["created_at"],
|
|
68
|
-
score:
|
|
68
|
+
score: 1.0,
|
|
69
|
+
importance: (i[:importance] || i["importance"] || 1.0).to_f,
|
|
69
70
|
evergreen: i[:evergreen] || i["evergreen"]
|
|
70
71
|
}
|
|
71
72
|
end
|
|
@@ -84,6 +85,21 @@ module Llmemory
|
|
|
84
85
|
out
|
|
85
86
|
end
|
|
86
87
|
|
|
88
|
+
# Stores a single fact produced outside the extraction flow (e.g. by
|
|
89
|
+
# reflection over episodes), preserving caller-supplied provenance so the
|
|
90
|
+
# insight remains traceable to its source. Returns the item id.
|
|
91
|
+
def remember_fact(content:, category: "general", importance: 0.6, provenance: nil)
|
|
92
|
+
return nil if content.to_s.strip.empty?
|
|
93
|
+
@storage.save_item(
|
|
94
|
+
@user_id,
|
|
95
|
+
category: category.to_s,
|
|
96
|
+
content: content.to_s,
|
|
97
|
+
source_resource_id: nil,
|
|
98
|
+
importance: importance,
|
|
99
|
+
provenance: provenance
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
|
|
87
103
|
attr_reader :storage, :user_id
|
|
88
104
|
|
|
89
105
|
private
|
|
@@ -94,7 +110,10 @@ module Llmemory
|
|
|
94
110
|
|
|
95
111
|
def save_item(category:, item:, source_resource_id:, importance: 0.7)
|
|
96
112
|
content = item.is_a?(Hash) ? item["content"] || item[:content] : item.to_s
|
|
97
|
-
|
|
113
|
+
provenance = Llmemory::Provenance.from_resource(
|
|
114
|
+
source_resource_id, method: "fact_extraction", confidence: importance
|
|
115
|
+
)
|
|
116
|
+
@storage.save_item(@user_id, category: category, content: content, source_resource_id: source_resource_id, importance: importance, provenance: provenance)
|
|
98
117
|
end
|
|
99
118
|
|
|
100
119
|
def append_to_daily_log(conversation_text)
|
|
@@ -30,7 +30,7 @@ module Llmemory
|
|
|
30
30
|
id
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
-
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7)
|
|
33
|
+
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7, provenance: nil)
|
|
34
34
|
id = "item_#{SecureRandom.hex(8)}"
|
|
35
35
|
attrs = {
|
|
36
36
|
id: id,
|
|
@@ -41,6 +41,7 @@ module Llmemory
|
|
|
41
41
|
created_at: Time.current
|
|
42
42
|
}
|
|
43
43
|
attrs[:importance] = importance if LlmemoryItem.column_names.include?("importance")
|
|
44
|
+
attrs[:provenance] = provenance if provenance && LlmemoryItem.column_names.include?("provenance")
|
|
44
45
|
LlmemoryItem.create!(attrs)
|
|
45
46
|
id
|
|
46
47
|
end
|
|
@@ -189,6 +190,7 @@ module Llmemory
|
|
|
189
190
|
created_at: r.created_at
|
|
190
191
|
}
|
|
191
192
|
h[:importance] = r.respond_to?(:importance) ? (r.importance || 0.7).to_f : 0.7
|
|
193
|
+
h[:provenance] = r.provenance if r.respond_to?(:provenance)
|
|
192
194
|
h
|
|
193
195
|
end
|
|
194
196
|
|
|
@@ -9,7 +9,7 @@ module Llmemory
|
|
|
9
9
|
raise NotImplementedError, "#{self.class}#save_resource must be implemented"
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
-
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7)
|
|
12
|
+
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7, provenance: nil)
|
|
13
13
|
raise NotImplementedError, "#{self.class}#save_item must be implemented"
|
|
14
14
|
end
|
|
15
15
|
|
|
@@ -24,12 +24,12 @@ module Llmemory
|
|
|
24
24
|
id
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7)
|
|
27
|
+
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7, provenance: nil)
|
|
28
28
|
ensure_tables!
|
|
29
29
|
id = "item_#{SecureRandom.hex(8)}"
|
|
30
30
|
conn.exec_params(
|
|
31
|
-
"INSERT INTO llmemory_items (id, user_id, category, content, source_resource_id, importance, created_at) VALUES ($1, $2, $3, $4, $5, $6, $7)",
|
|
32
|
-
[id, user_id, category, content, source_resource_id, importance.to_f, Time.now.utc.iso8601]
|
|
31
|
+
"INSERT INTO llmemory_items (id, user_id, category, content, source_resource_id, importance, provenance, created_at) VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8)",
|
|
32
|
+
[id, user_id, category, content, source_resource_id, importance.to_f, provenance ? JSON.generate(provenance) : nil, Time.now.utc.iso8601]
|
|
33
33
|
)
|
|
34
34
|
id
|
|
35
35
|
end
|
|
@@ -67,7 +67,7 @@ module Llmemory
|
|
|
67
67
|
ensure_tables!
|
|
68
68
|
pattern = "%#{conn.escape_string(query.to_s.downcase)}%"
|
|
69
69
|
rows = conn.exec_params(
|
|
70
|
-
"SELECT id, category, content, source_resource_id, importance, created_at FROM llmemory_items WHERE user_id = $1 AND LOWER(content) LIKE $2",
|
|
70
|
+
"SELECT id, category, content, source_resource_id, importance, provenance, created_at FROM llmemory_items WHERE user_id = $1 AND LOWER(content) LIKE $2",
|
|
71
71
|
[user_id, pattern]
|
|
72
72
|
)
|
|
73
73
|
rows_to_items(rows)
|
|
@@ -97,7 +97,7 @@ module Llmemory
|
|
|
97
97
|
ensure_tables!
|
|
98
98
|
cutoff = (Time.now - (days * 86400)).utc.iso8601
|
|
99
99
|
rows = conn.exec_params(
|
|
100
|
-
"SELECT id, category, content, source_resource_id, importance, created_at FROM llmemory_items WHERE user_id = $1 AND created_at < $2 ORDER BY created_at",
|
|
100
|
+
"SELECT id, category, content, source_resource_id, importance, provenance, created_at FROM llmemory_items WHERE user_id = $1 AND created_at < $2 ORDER BY created_at",
|
|
101
101
|
[user_id, cutoff]
|
|
102
102
|
)
|
|
103
103
|
rows_to_items(rows)
|
|
@@ -106,7 +106,7 @@ module Llmemory
|
|
|
106
106
|
def get_all_items(user_id)
|
|
107
107
|
ensure_tables!
|
|
108
108
|
rows = conn.exec_params(
|
|
109
|
-
"SELECT id, category, content, source_resource_id, importance, created_at FROM llmemory_items WHERE user_id = $1 ORDER BY created_at",
|
|
109
|
+
"SELECT id, category, content, source_resource_id, importance, provenance, created_at FROM llmemory_items WHERE user_id = $1 ORDER BY created_at",
|
|
110
110
|
[user_id]
|
|
111
111
|
)
|
|
112
112
|
rows_to_items(rows)
|
|
@@ -125,7 +125,7 @@ module Llmemory
|
|
|
125
125
|
ensure_tables!
|
|
126
126
|
cutoff = (Time.now - (hours * 3600)).utc.iso8601
|
|
127
127
|
rows = conn.exec_params(
|
|
128
|
-
"SELECT id, category, content, source_resource_id, importance, created_at FROM llmemory_items WHERE user_id = $1 AND created_at >= $2 ORDER BY created_at",
|
|
128
|
+
"SELECT id, category, content, source_resource_id, importance, provenance, created_at FROM llmemory_items WHERE user_id = $1 AND created_at >= $2 ORDER BY created_at",
|
|
129
129
|
[user_id, cutoff]
|
|
130
130
|
)
|
|
131
131
|
rows_to_items(rows)
|
|
@@ -179,7 +179,7 @@ module Llmemory
|
|
|
179
179
|
|
|
180
180
|
def list_items(user_id:, category: nil, limit: nil)
|
|
181
181
|
ensure_tables!
|
|
182
|
-
sql = "SELECT id, category, content, source_resource_id, importance, created_at FROM llmemory_items WHERE user_id = $1"
|
|
182
|
+
sql = "SELECT id, category, content, source_resource_id, importance, provenance, created_at FROM llmemory_items WHERE user_id = $1"
|
|
183
183
|
params = [user_id]
|
|
184
184
|
if category
|
|
185
185
|
sql += " AND category = $2"
|
|
@@ -258,11 +258,13 @@ module Llmemory
|
|
|
258
258
|
content TEXT NOT NULL,
|
|
259
259
|
source_resource_id TEXT,
|
|
260
260
|
importance REAL DEFAULT 0.7,
|
|
261
|
+
provenance JSONB,
|
|
261
262
|
created_at TIMESTAMPTZ NOT NULL
|
|
262
263
|
);
|
|
263
264
|
CREATE INDEX IF NOT EXISTS idx_llmemory_items_user_id ON llmemory_items(user_id);
|
|
264
265
|
SQL
|
|
265
266
|
conn.exec("ALTER TABLE llmemory_items ADD COLUMN IF NOT EXISTS importance REAL DEFAULT 0.7") rescue nil
|
|
267
|
+
conn.exec("ALTER TABLE llmemory_items ADD COLUMN IF NOT EXISTS provenance JSONB") rescue nil
|
|
266
268
|
conn.exec(<<~SQL)
|
|
267
269
|
CREATE TABLE IF NOT EXISTS llmemory_categories (
|
|
268
270
|
user_id TEXT NOT NULL,
|
|
@@ -282,11 +284,19 @@ module Llmemory
|
|
|
282
284
|
content: r["content"],
|
|
283
285
|
source_resource_id: r["source_resource_id"],
|
|
284
286
|
importance: (r["importance"] || 0.7).to_f,
|
|
287
|
+
provenance: parse_provenance(r["provenance"]),
|
|
285
288
|
created_at: Time.parse(r["created_at"])
|
|
286
289
|
}
|
|
287
290
|
end
|
|
288
291
|
end
|
|
289
292
|
|
|
293
|
+
def parse_provenance(value)
|
|
294
|
+
return nil if value.nil? || value.to_s.strip.empty?
|
|
295
|
+
JSON.parse(value, symbolize_names: true)
|
|
296
|
+
rescue JSON::ParserError
|
|
297
|
+
nil
|
|
298
|
+
end
|
|
299
|
+
|
|
290
300
|
def rows_to_resources(rows)
|
|
291
301
|
rows.map do |r|
|
|
292
302
|
{
|
|
@@ -24,7 +24,7 @@ module Llmemory
|
|
|
24
24
|
id
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7)
|
|
27
|
+
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7, provenance: nil)
|
|
28
28
|
ensure_user_dir(user_id)
|
|
29
29
|
seq = next_seq(user_id, "item_id_seq")
|
|
30
30
|
id = "item_#{seq}"
|
|
@@ -35,6 +35,7 @@ module Llmemory
|
|
|
35
35
|
content: content,
|
|
36
36
|
source_resource_id: source_resource_id,
|
|
37
37
|
importance: importance,
|
|
38
|
+
provenance: provenance,
|
|
38
39
|
created_at: Time.now.iso8601
|
|
39
40
|
}
|
|
40
41
|
File.write(path, JSON.generate(data))
|
|
@@ -22,7 +22,7 @@ module Llmemory
|
|
|
22
22
|
id
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7)
|
|
25
|
+
def save_item(user_id, category:, content:, source_resource_id:, importance: 0.7, provenance: nil)
|
|
26
26
|
@item_id_seq += 1
|
|
27
27
|
id = "item_#{@item_id_seq}"
|
|
28
28
|
@items[user_id] << {
|
|
@@ -31,6 +31,7 @@ module Llmemory
|
|
|
31
31
|
content: content,
|
|
32
32
|
source_resource_id: source_resource_id,
|
|
33
33
|
importance: importance,
|
|
34
|
+
provenance: provenance,
|
|
34
35
|
created_at: Time.now
|
|
35
36
|
}
|
|
36
37
|
id
|
|
@@ -31,6 +31,13 @@ module Llmemory
|
|
|
31
31
|
!archived_at.nil?
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
+
# Lineage of this edge, stored within properties so it round-trips
|
|
35
|
+
# through every backend without a schema change. See Llmemory::Provenance.
|
|
36
|
+
def provenance
|
|
37
|
+
props = properties || {}
|
|
38
|
+
props[:provenance] || props["provenance"]
|
|
39
|
+
end
|
|
40
|
+
|
|
34
41
|
def to_h
|
|
35
42
|
{
|
|
36
43
|
id: id,
|
|
@@ -32,6 +32,7 @@ module Llmemory
|
|
|
32
32
|
|
|
33
33
|
return true if entities.empty? && relations.empty?
|
|
34
34
|
|
|
35
|
+
provenance = Llmemory::Provenance.from_text_fingerprint(text, method: "entity_relation_extraction")
|
|
35
36
|
name_to_id = {}
|
|
36
37
|
|
|
37
38
|
entities.each do |e|
|
|
@@ -39,7 +40,7 @@ module Llmemory
|
|
|
39
40
|
entity_type = e[:type] || e["type"] || "concept"
|
|
40
41
|
name = e[:name] || e["name"]
|
|
41
42
|
next if name.nil? || name.to_s.strip.empty?
|
|
42
|
-
id = @kg.add_node(entity_type: entity_type, name: name.to_s.strip, properties: {})
|
|
43
|
+
id = @kg.add_node(entity_type: entity_type, name: name.to_s.strip, properties: { "provenance" => provenance })
|
|
43
44
|
name_to_id[name.to_s.strip] ||= id
|
|
44
45
|
end
|
|
45
46
|
|
|
@@ -50,8 +51,8 @@ module Llmemory
|
|
|
50
51
|
object = (r[:object] || r["object"]).to_s.strip
|
|
51
52
|
next if subject.empty? || predicate.empty? || object.empty?
|
|
52
53
|
|
|
53
|
-
subject_id = name_to_id[subject] || @kg.add_node(entity_type: "concept", name: subject, properties: {})
|
|
54
|
-
object_id = name_to_id[object] || @kg.add_node(entity_type: "concept", name: object, properties: {})
|
|
54
|
+
subject_id = name_to_id[subject] || @kg.add_node(entity_type: "concept", name: subject, properties: { "provenance" => provenance })
|
|
55
|
+
object_id = name_to_id[object] || @kg.add_node(entity_type: "concept", name: object, properties: { "provenance" => provenance })
|
|
55
56
|
|
|
56
57
|
edge = Edge.new(
|
|
57
58
|
id: nil,
|
|
@@ -59,12 +60,12 @@ module Llmemory
|
|
|
59
60
|
subject_id: subject_id,
|
|
60
61
|
predicate: predicate,
|
|
61
62
|
target_id: object_id,
|
|
62
|
-
properties: {},
|
|
63
|
+
properties: { "provenance" => provenance },
|
|
63
64
|
created_at: Time.now,
|
|
64
65
|
archived_at: nil
|
|
65
66
|
)
|
|
66
67
|
@conflict_resolver.resolve(edge)
|
|
67
|
-
edge_id = @kg.add_edge(subject: subject_id, predicate: predicate, object: object_id, properties: {})
|
|
68
|
+
edge_id = @kg.add_edge(subject: subject_id, predicate: predicate, object: object_id, properties: { "provenance" => provenance })
|
|
68
69
|
|
|
69
70
|
text = "#{subject} #{predicate} #{object}"
|
|
70
71
|
embedding = @vector_store.respond_to?(:embed) ? @vector_store.embed(text) : nil
|
|
@@ -89,7 +90,8 @@ module Llmemory
|
|
|
89
90
|
{
|
|
90
91
|
text: r[:text],
|
|
91
92
|
timestamp: r[:created_at] || r[:timestamp],
|
|
92
|
-
score: r[:score] || 1.0
|
|
93
|
+
score: r[:score] || 1.0,
|
|
94
|
+
importance: r[:importance]
|
|
93
95
|
}
|
|
94
96
|
end
|
|
95
97
|
end
|
|
@@ -25,6 +25,13 @@ module Llmemory
|
|
|
25
25
|
)
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
+
# Lineage of this node, stored within properties so it round-trips
|
|
29
|
+
# through every backend without a schema change. See Llmemory::Provenance.
|
|
30
|
+
def provenance
|
|
31
|
+
props = properties || {}
|
|
32
|
+
props[:provenance] || props["provenance"]
|
|
33
|
+
end
|
|
34
|
+
|
|
28
35
|
def to_h
|
|
29
36
|
{
|
|
30
37
|
id: id,
|
data/lib/llmemory/long_term.rb
CHANGED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
module Llmemory
|
|
6
|
+
# Provenance records the lineage of a long-term memory item: where it came
|
|
7
|
+
# from, how it was produced, and with what confidence. It is stored as a
|
|
8
|
+
# plain JSON-safe Hash so it round-trips through every storage backend
|
|
9
|
+
# (in-memory, JSON files, SQL columns, jsonb properties) without coupling.
|
|
10
|
+
#
|
|
11
|
+
# Shape: { sources: [{ type:, id: }], method:, confidence:, created_at: }
|
|
12
|
+
#
|
|
13
|
+
# `method` identifies the producing process (e.g. "fact_extraction",
|
|
14
|
+
# "entity_relation_extraction", and in the future "reflection"), so a
|
|
15
|
+
# semantic datum can always be traced back to its raw source.
|
|
16
|
+
module Provenance
|
|
17
|
+
module_function
|
|
18
|
+
|
|
19
|
+
def build(method:, sources: [], confidence: nil, created_at: nil)
|
|
20
|
+
{
|
|
21
|
+
sources: Array(sources).filter_map { |s| normalize_source(s) },
|
|
22
|
+
method: method&.to_s,
|
|
23
|
+
confidence: confidence.nil? ? nil : confidence.to_f,
|
|
24
|
+
created_at: normalize_time(created_at)
|
|
25
|
+
}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Convenience for the file-based path, where the raw text is persisted as a
|
|
29
|
+
# Resource and referenced by id.
|
|
30
|
+
def from_resource(resource_id, method:, confidence: nil, created_at: nil)
|
|
31
|
+
sources = resource_id ? [{ type: "resource", id: resource_id }] : []
|
|
32
|
+
build(method: method, sources: sources, confidence: confidence, created_at: created_at)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Convenience for the graph-based path, which does not persist the raw text.
|
|
36
|
+
# We record a stable fingerprint of the source instead of the document
|
|
37
|
+
# itself, keeping lineage verifiable without exposing sensitive content.
|
|
38
|
+
def from_text_fingerprint(text, method:, confidence: nil, created_at: nil)
|
|
39
|
+
require "digest"
|
|
40
|
+
sources = []
|
|
41
|
+
unless text.to_s.strip.empty?
|
|
42
|
+
sources = [{ type: "text_sha256", id: Digest::SHA256.hexdigest(text.to_s)[0, 16] }]
|
|
43
|
+
end
|
|
44
|
+
build(method: method, sources: sources, confidence: confidence, created_at: created_at)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def normalize_source(source)
|
|
48
|
+
return nil if source.nil?
|
|
49
|
+
if source.is_a?(Hash)
|
|
50
|
+
type = source[:type] || source["type"]
|
|
51
|
+
id = source[:id] || source["id"]
|
|
52
|
+
return nil if id.nil?
|
|
53
|
+
{ type: type.nil? ? "unknown" : type.to_s, id: id }
|
|
54
|
+
else
|
|
55
|
+
{ type: "unknown", id: source }
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def normalize_time(value)
|
|
60
|
+
value ||= Time.now
|
|
61
|
+
value.respond_to?(:iso8601) ? value.iso8601 : value.to_s
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Llmemory
|
|
6
|
+
module Reflection
|
|
7
|
+
# Reflection distills an agent's recent episodes (episodic memory) into
|
|
8
|
+
# durable, higher-order insights and writes them to semantic memory. This is
|
|
9
|
+
# CoALA's "updating semantic memory with knowledge" (Reflexion / Generative
|
|
10
|
+
# Agents): unlike one-shot extraction from raw text, it reasons over lived
|
|
11
|
+
# experience to generalize lessons and patterns.
|
|
12
|
+
#
|
|
13
|
+
# Each insight is stored with provenance { method: "reflection",
|
|
14
|
+
# sources: [{ type: "episode", id: ... }] } so it stays traceable to the
|
|
15
|
+
# experiences that produced it.
|
|
16
|
+
#
|
|
17
|
+
# `semantic` must respond to:
|
|
18
|
+
# remember_fact(content:, category:, importance:, provenance:)
|
|
19
|
+
# (FileBased::Memory implements this; graph-based is a future target.)
|
|
20
|
+
class Reflector
|
|
21
|
+
DEFAULT_CATEGORY = "insights"
|
|
22
|
+
DEFAULT_IMPORTANCE = 0.6
|
|
23
|
+
|
|
24
|
+
def initialize(episodic:, semantic:, llm: nil)
|
|
25
|
+
@episodic = episodic
|
|
26
|
+
@semantic = semantic
|
|
27
|
+
@llm = llm || Llmemory::LLM.client
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Reflects over the most recent `window` episodes and writes the resulting
|
|
31
|
+
# insights to semantic memory. Returns the ids of the stored insights.
|
|
32
|
+
def reflect(window: 10, category: DEFAULT_CATEGORY)
|
|
33
|
+
episodes = @episodic.recent_episodes(limit: window)
|
|
34
|
+
return [] if episodes.empty?
|
|
35
|
+
|
|
36
|
+
insights = distill(episodes)
|
|
37
|
+
return [] if insights.empty?
|
|
38
|
+
|
|
39
|
+
sources = episodes.map(&:id).compact.map { |id| { type: "episode", id: id } }
|
|
40
|
+
|
|
41
|
+
insights.filter_map do |insight|
|
|
42
|
+
provenance = Llmemory::Provenance.build(
|
|
43
|
+
method: "reflection",
|
|
44
|
+
sources: sources,
|
|
45
|
+
confidence: insight[:confidence]
|
|
46
|
+
)
|
|
47
|
+
@semantic.remember_fact(
|
|
48
|
+
content: insight[:content],
|
|
49
|
+
category: category,
|
|
50
|
+
importance: insight[:confidence] || DEFAULT_IMPORTANCE,
|
|
51
|
+
provenance: provenance
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def distill(episodes)
|
|
59
|
+
response = @llm.invoke(build_prompt(episodes))
|
|
60
|
+
parse_insights(response)
|
|
61
|
+
rescue Llmemory::LLMError
|
|
62
|
+
[]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def build_prompt(episodes)
|
|
66
|
+
episodes_text = episodes.each_with_index.map do |ep, i|
|
|
67
|
+
"Episode #{i + 1} (outcome: #{ep.outcome || 'n/a'}):\n#{ep.searchable_text}"
|
|
68
|
+
end.join("\n\n")
|
|
69
|
+
|
|
70
|
+
<<~PROMPT
|
|
71
|
+
You are reflecting on an agent's recent experiences to distill durable,
|
|
72
|
+
higher-order insights: lessons learned, recurring patterns, and stable
|
|
73
|
+
preferences that will help in future situations. Generalize; do not
|
|
74
|
+
restate raw events.
|
|
75
|
+
|
|
76
|
+
Recent episodes:
|
|
77
|
+
#{episodes_text}
|
|
78
|
+
|
|
79
|
+
Return a JSON array of objects with "content" (the insight) and
|
|
80
|
+
"confidence" (0-1) keys. Return an empty array if nothing durable can
|
|
81
|
+
be concluded.
|
|
82
|
+
Example: [{"content": "Rolling back on deploy failure reliably restores service", "confidence": 0.8}]
|
|
83
|
+
PROMPT
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def parse_insights(response)
|
|
87
|
+
json = extract_json_array(response)
|
|
88
|
+
return [] unless json
|
|
89
|
+
|
|
90
|
+
json.filter_map do |item|
|
|
91
|
+
next nil unless item.is_a?(Hash)
|
|
92
|
+
content = item["content"] || item[:content]
|
|
93
|
+
next nil if content.to_s.strip.empty?
|
|
94
|
+
{ content: content.to_s, confidence: normalize_confidence(item["confidence"] || item[:confidence]) }
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def normalize_confidence(value)
|
|
99
|
+
return nil if value.nil?
|
|
100
|
+
v = value.to_f
|
|
101
|
+
v.between?(0, 1) ? v : nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def extract_json_array(response)
|
|
105
|
+
response = response.to_s.strip
|
|
106
|
+
start_idx = response.index("[")
|
|
107
|
+
end_idx = response.rindex("]")
|
|
108
|
+
return nil unless start_idx && end_idx
|
|
109
|
+
|
|
110
|
+
JSON.parse(response[start_idx..end_idx])
|
|
111
|
+
rescue JSON::ParserError
|
|
112
|
+
nil
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
@@ -54,6 +54,7 @@ module Llmemory
|
|
|
54
54
|
text: c[:text] || c["text"],
|
|
55
55
|
timestamp: parse_timestamp(c[:timestamp] || c["timestamp"] || c[:created_at] || c["created_at"]),
|
|
56
56
|
score: (c[:score] || c["score"] || 1.0).to_f,
|
|
57
|
+
importance: c[:importance] || c["importance"],
|
|
57
58
|
evergreen: c[:evergreen] || c["evergreen"]
|
|
58
59
|
}
|
|
59
60
|
end
|
|
@@ -3,12 +3,14 @@
|
|
|
3
3
|
module Llmemory
|
|
4
4
|
module Retrieval
|
|
5
5
|
class TemporalRanker
|
|
6
|
-
def initialize(half_life_days: nil)
|
|
6
|
+
def initialize(half_life_days: nil, importance_weight: nil)
|
|
7
7
|
@half_life_days = half_life_days || Llmemory.configuration.time_decay_half_life_days
|
|
8
|
+
@importance_weight = importance_weight || Llmemory.configuration.importance_weight
|
|
8
9
|
end
|
|
9
10
|
|
|
10
11
|
def rank(candidates, now: Time.now)
|
|
11
12
|
lambda_val = Math.log(2) / @half_life_days.to_f
|
|
13
|
+
weight = [@importance_weight.to_f, 0.0].max
|
|
12
14
|
|
|
13
15
|
candidates.map do |c|
|
|
14
16
|
score = (c[:score] || c["score"] || 1.0).to_f
|
|
@@ -22,10 +24,22 @@ module Llmemory
|
|
|
22
24
|
Math.exp(-lambda_val * age_days.to_f)
|
|
23
25
|
end
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
importance = normalize_importance(c[:importance] || c["importance"])
|
|
28
|
+
importance_factor = importance**weight
|
|
29
|
+
|
|
30
|
+
final_score = score * time_decay * importance_factor
|
|
31
|
+
c.merge(score: score, importance: importance, temporal_score: final_score, timestamp: timestamp)
|
|
27
32
|
end.sort_by { |c| -(c[:temporal_score] || 0) }
|
|
28
33
|
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
# Missing importance is neutral (1.0) so candidates that carry no
|
|
38
|
+
# importance signal (resources, graph edges) are never penalised.
|
|
39
|
+
def normalize_importance(value)
|
|
40
|
+
return 1.0 if value.nil?
|
|
41
|
+
[[value.to_f, 0.0].max, 1.0].min
|
|
42
|
+
end
|
|
29
43
|
end
|
|
30
44
|
end
|
|
31
45
|
end
|
data/lib/llmemory/version.rb
CHANGED
data/lib/llmemory.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "llmemory/version"
|
|
4
4
|
require_relative "llmemory/configuration"
|
|
5
|
+
require_relative "llmemory/provenance"
|
|
5
6
|
require_relative "llmemory/llm"
|
|
6
7
|
require_relative "llmemory/short_term"
|
|
7
8
|
require_relative "llmemory/long_term"
|
|
@@ -9,6 +10,7 @@ require_relative "llmemory/retrieval"
|
|
|
9
10
|
require_relative "llmemory/vector_store"
|
|
10
11
|
require_relative "llmemory/maintenance"
|
|
11
12
|
require_relative "llmemory/extractors"
|
|
13
|
+
require_relative "llmemory/reflection"
|
|
12
14
|
require_relative "llmemory/memory"
|
|
13
15
|
|
|
14
16
|
module Llmemory
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llmemory
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.17
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- llmemory
|
|
@@ -152,6 +152,13 @@ files:
|
|
|
152
152
|
- lib/llmemory/llm/base.rb
|
|
153
153
|
- lib/llmemory/llm/openai.rb
|
|
154
154
|
- lib/llmemory/long_term.rb
|
|
155
|
+
- lib/llmemory/long_term/episodic.rb
|
|
156
|
+
- lib/llmemory/long_term/episodic/episode.rb
|
|
157
|
+
- lib/llmemory/long_term/episodic/memory.rb
|
|
158
|
+
- lib/llmemory/long_term/episodic/storage.rb
|
|
159
|
+
- lib/llmemory/long_term/episodic/storages/base.rb
|
|
160
|
+
- lib/llmemory/long_term/episodic/storages/file_storage.rb
|
|
161
|
+
- lib/llmemory/long_term/episodic/storages/memory_storage.rb
|
|
155
162
|
- lib/llmemory/long_term/file_based.rb
|
|
156
163
|
- lib/llmemory/long_term/file_based/category.rb
|
|
157
164
|
- lib/llmemory/long_term/file_based/item.rb
|
|
@@ -195,6 +202,9 @@ files:
|
|
|
195
202
|
- lib/llmemory/mcp/tools/memory_timeline_context.rb
|
|
196
203
|
- lib/llmemory/memory.rb
|
|
197
204
|
- lib/llmemory/noise_filter.rb
|
|
205
|
+
- lib/llmemory/provenance.rb
|
|
206
|
+
- lib/llmemory/reflection.rb
|
|
207
|
+
- lib/llmemory/reflection/reflector.rb
|
|
198
208
|
- lib/llmemory/retrieval.rb
|
|
199
209
|
- lib/llmemory/retrieval/bm25_scorer.rb
|
|
200
210
|
- lib/llmemory/retrieval/context_assembler.rb
|
|
@@ -240,7 +250,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
240
250
|
- !ruby/object:Gem::Version
|
|
241
251
|
version: '0'
|
|
242
252
|
requirements: []
|
|
243
|
-
rubygems_version: 4.0.
|
|
253
|
+
rubygems_version: 4.0.10
|
|
244
254
|
specification_version: 4
|
|
245
255
|
summary: Persistent memory system for LLM agents
|
|
246
256
|
test_files: []
|