pikuri-memory 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +96 -13
- data/docker/README.md +50 -0
- data/docker/docker-compose.yml +113 -0
- data/docker/qdrant-default-config.patch +24 -0
- data/lib/pikuri/memory/extension.rb +293 -0
- data/lib/pikuri/memory/mem0_client.rb +264 -0
- data/lib/pikuri/memory/mem0_server.rb +551 -0
- data/lib/pikuri/memory/recall.rb +107 -0
- data/lib/pikuri/memory/record.rb +72 -0
- data/lib/pikuri/memory/recorder.rb +134 -0
- data/lib/pikuri-memory.rb +78 -5
- data/prompts/memory-extraction.txt +44 -0
- data/prompts/pikuri-memory.txt +7 -0
- metadata +50 -12
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'faraday'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
module Pikuri
|
|
7
|
+
module Memory
|
|
8
|
+
# Thin Faraday HTTP client against a self-hosted mem0 server
|
|
9
|
+
# (the v3 "token-efficient" line — see DESIGN.md). Only
|
|
10
|
+
# the handful of REST endpoints pikuri needs are wired; the client
|
|
11
|
+
# is hand-rolled rather than a dependency on a mem0 Ruby SDK
|
|
12
|
+
# (there isn't a maintained one), Faraday is already in
|
|
13
|
+
# pikuri-core's closure, and a thin first-party client keeps the
|
|
14
|
+
# wire protocol auditable in one readable file. Same shape as
|
|
15
|
+
# +pikuri-vectordb+'s +Backend::Chroma+.
|
|
16
|
+
#
|
|
17
|
+
# == Bring your own server
|
|
18
|
+
#
|
|
19
|
+
# +Mem0Client.new(endpoint:)+ points at an already-running mem0 server
|
|
20
|
+
# (a docker-compose stack, a shared deployment). This release does
|
|
21
|
+
# *not* ship a supervisor that starts one — that's the
|
|
22
|
+
# +Server::Chroma+-style follow-on. The server must be configured for
|
|
23
|
+
# the local stack pikuri assumes: a local OpenAI-compatible LLM +
|
|
24
|
+
# embedder (llama.cpp via +openai_base_url+), the **Qdrant** vector
|
|
25
|
+
# backend (the pgvector path has a top-k inversion bug — see
|
|
26
|
+
# DESIGN.md §"Root cause: the pgvector top-k inversion"), and a non-reasoning
|
|
27
|
+
# extraction model. Endpoints used:
|
|
28
|
+
#
|
|
29
|
+
# * +POST /memories+ — append. Body
|
|
30
|
+
# +{ messages:, user_id:, infer:, prompt? }+. Returns
|
|
31
|
+
# +{ "results": [{ id, memory, event }] }+.
|
|
32
|
+
# * +POST /search+ — semantic recall. Body
|
|
33
|
+
# +{ query:, filters: { user_id: }, top_k:, threshold? }+.
|
|
34
|
+
# Returns +{ "results": [{ id, memory, score, created_at, ... }] }+,
|
|
35
|
+
# ranked nearest-first (Qdrant +score+ = similarity).
|
|
36
|
+
# * +GET /memories?user_id=+ — every memory for a user (search
|
|
37
|
+
# row shape, minus +score+/+event+).
|
|
38
|
+
# * +DELETE /memories/{id}+ — granular erase.
|
|
39
|
+
# * +POST /reset+ — coarse erase (drop everything).
|
|
40
|
+
#
|
|
41
|
+
# == User-role content only (write-side hygiene)
|
|
42
|
+
#
|
|
43
|
+
# {#add} takes a single +content+ String and wraps it as one
|
|
44
|
+
# +role: "user"+ message — it cannot send assistant/tool/system
|
|
45
|
+
# turns. That's deliberate: feeding only the user's own words to
|
|
46
|
+
# extraction structurally removes the dominant junk sources a
|
|
47
|
+
# production mem0 audit measured (assistant restating, recalled-
|
|
48
|
+
# memory feedback loops, involuntary secret leakage) — see
|
|
49
|
+
# DESIGN.md §"Extraction-input discipline". The rule lives
|
|
50
|
+
# in the method signature so it can't be bypassed by accident.
|
|
51
|
+
#
|
|
52
|
+
# == Errors are loud
|
|
53
|
+
#
|
|
54
|
+
# Non-2xx responses and Faraday transport errors raise
|
|
55
|
+
# +RuntimeError+ with the offending detail. The client doesn't
|
|
56
|
+
# decide what's recoverable — its callers do: {Recall} turns a
|
|
57
|
+
# failure into an +"Error: ..."+ observation the LLM can react to,
|
|
58
|
+
# {Recorder} logs-and-drops so a transient mem0 blip never crashes
|
|
59
|
+
# the capture worker, and {Extension}'s prefetch rescues to "inject
|
|
60
|
+
# nothing this turn."
|
|
61
|
+
class Mem0Client
|
|
62
|
+
LOGGER = Pikuri.logger_for('Memory::Mem0Client')
|
|
63
|
+
|
|
64
|
+
# @return [String] default mem0 server base URL — the server's
|
|
65
|
+
# own +:8000+ on localhost. A host running the dev
|
|
66
|
+
# docker-compose (which publishes +8888->8000+) passes that
|
|
67
|
+
# port explicitly; the supervisor follow-on will own the
|
|
68
|
+
# mapping.
|
|
69
|
+
DEFAULT_ENDPOINT = 'http://localhost:8000'
|
|
70
|
+
|
|
71
|
+
# @return [Integer] default per-request read timeout, in seconds.
|
|
72
|
+
# Deliberately generous: the first +POST /memories+ (and the
|
|
73
|
+
# first +/search+) on a fresh stack blocks on the local
|
|
74
|
+
# llama.cpp router cold-loading the extraction / embedder model
|
|
75
|
+
# into memory — a one-off wait that can run well past
|
|
76
|
+
# net_http's stock ~60s before any token comes back. A short
|
|
77
|
+
# timeout there turns a normal cold start into a dropped turn
|
|
78
|
+
# (+Recorder+ logs-and-drops). Steady-state extraction is ~3s,
|
|
79
|
+
# so this ceiling only ever bites on the cold path or a genuine
|
|
80
|
+
# hang. Override with +PIKURI_MEMORY_TIMEOUT+ or the +timeout:+
|
|
81
|
+
# kwarg.
|
|
82
|
+
DEFAULT_TIMEOUT = 300
|
|
83
|
+
|
|
84
|
+
# @param endpoint [String] mem0 server base URL. +/memories+,
|
|
85
|
+
# +/search+, etc. are appended internally.
|
|
86
|
+
# @param timeout [Integer] per-request read timeout in seconds
|
|
87
|
+
# (see {DEFAULT_TIMEOUT} for why it's large). Resolves as
|
|
88
|
+
# +PIKURI_MEMORY_TIMEOUT+ env → this kwarg → {DEFAULT_TIMEOUT}.
|
|
89
|
+
# @param connection [Faraday::Connection, nil] dependency-
|
|
90
|
+
# injection hook for tests (wire +Faraday::Adapter::Test+ stubs
|
|
91
|
+
# here). Production callers leave it +nil+; a fresh JSON
|
|
92
|
+
# connection is built against +endpoint+. When supplied, the
|
|
93
|
+
# +timeout+ kwarg is ignored — the injected connection owns its
|
|
94
|
+
# own options.
|
|
95
|
+
# @return [Mem0Client]
|
|
96
|
+
# @raise [ArgumentError] on a blank +endpoint+.
|
|
97
|
+
def initialize(endpoint: DEFAULT_ENDPOINT, timeout: DEFAULT_TIMEOUT, connection: nil)
|
|
98
|
+
raise ArgumentError, 'endpoint must be non-empty' if endpoint.nil? || endpoint.to_s.empty?
|
|
99
|
+
|
|
100
|
+
@endpoint = endpoint
|
|
101
|
+
@timeout = (ENV['PIKURI_MEMORY_TIMEOUT'] || timeout).to_i
|
|
102
|
+
@connection = connection || Faraday.new(url: endpoint) do |f|
|
|
103
|
+
f.request :json
|
|
104
|
+
f.response :json
|
|
105
|
+
f.options.timeout = @timeout
|
|
106
|
+
f.adapter Faraday.default_adapter
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# @return [String] the mem0 server base URL this client targets.
|
|
111
|
+
attr_reader :endpoint
|
|
112
|
+
|
|
113
|
+
# Append a memory. Wraps +content+ as one +role: "user"+ message
|
|
114
|
+
# (the user-only rule — see the class header) and posts it for
|
|
115
|
+
# extraction. mem0 is append-only: a correction is a *newer*
|
|
116
|
+
# add, never a mutation of an earlier row (DESIGN.md §"Why mem0").
|
|
117
|
+
#
|
|
118
|
+
# @param content [String] the user's own words to extract from.
|
|
119
|
+
# @param user_id [String] the mem0 namespace (one per user).
|
|
120
|
+
# @param infer [Boolean] +true+ (default) stores LLM-extracted
|
|
121
|
+
# facts; +false+ stores the verbatim turn (the raw-log option,
|
|
122
|
+
# unused in v1).
|
|
123
|
+
# @param prompt [String, nil] optional per-request
|
|
124
|
+
# +custom_fact_extraction_prompt+ override. {Extension} passes
|
|
125
|
+
# the curated +memory-extraction+ prompt here so a BYO server
|
|
126
|
+
# needn't be reconfigured.
|
|
127
|
+
# @return [Array<Record>] the +add+ outcome rows (each carries an
|
|
128
|
+
# +event+: +"ADD"+ / +"UPDATE"+ / +"NONE"+ / ...). Empty when
|
|
129
|
+
# extraction found nothing worth storing.
|
|
130
|
+
# @raise [ArgumentError] on blank +content+ or +user_id+.
|
|
131
|
+
# @raise [RuntimeError] on HTTP failure.
|
|
132
|
+
def add(content:, user_id:, infer: true, prompt: nil)
|
|
133
|
+
raise ArgumentError, 'content must be non-empty' if content.nil? || content.to_s.strip.empty?
|
|
134
|
+
raise ArgumentError, 'user_id must be non-empty' if user_id.nil? || user_id.to_s.empty?
|
|
135
|
+
|
|
136
|
+
body = {
|
|
137
|
+
messages: [{ role: 'user', content: content }],
|
|
138
|
+
user_id: user_id,
|
|
139
|
+
infer: infer
|
|
140
|
+
}
|
|
141
|
+
body[:prompt] = prompt if prompt && !prompt.empty?
|
|
142
|
+
|
|
143
|
+
results_of(post_json('/memories', body))
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Semantic recall. Returns at most +top_k+ {Record}s ranked
|
|
147
|
+
# nearest-first (mem0's Qdrant backend returns a similarity in
|
|
148
|
+
# +score+; higher = more relevant).
|
|
149
|
+
#
|
|
150
|
+
# mem0 does *not* resolve contradictions at read time — it
|
|
151
|
+
# returns the relevant memories (including a stale fact and its
|
|
152
|
+
# correction) near-tied, and the consuming LLM resolves them at
|
|
153
|
+
# synthesis. That's why {Record#created_at} rides along. See
|
|
154
|
+
# DESIGN.md §"Supersede recall: resolution is the consumer's job".
|
|
155
|
+
#
|
|
156
|
+
# @param query [String] natural-language recall query (typically
|
|
157
|
+
# the latest user message, or a +recall+ topic).
|
|
158
|
+
# @param user_id [String] the mem0 namespace to search.
|
|
159
|
+
# @param top_k [Integer] max rows to return.
|
|
160
|
+
# @param threshold [Float, nil] optional server-side similarity
|
|
161
|
+
# floor. +nil+ lets the server decide; {Extension} also
|
|
162
|
+
# filters client-side so the contract holds regardless.
|
|
163
|
+
# @return [Array<Record>] ranked results; empty on no match.
|
|
164
|
+
# @raise [ArgumentError] on blank +query+/+user_id+ or
|
|
165
|
+
# non-positive +top_k+.
|
|
166
|
+
# @raise [RuntimeError] on HTTP failure.
|
|
167
|
+
def search(query:, user_id:, top_k: 5, threshold: nil)
|
|
168
|
+
raise ArgumentError, 'query must be non-empty' if query.nil? || query.to_s.strip.empty?
|
|
169
|
+
raise ArgumentError, 'user_id must be non-empty' if user_id.nil? || user_id.to_s.empty?
|
|
170
|
+
raise ArgumentError, "top_k must be positive (got #{top_k})" if top_k <= 0
|
|
171
|
+
|
|
172
|
+
body = { query: query, filters: { user_id: user_id }, top_k: top_k }
|
|
173
|
+
body[:threshold] = threshold unless threshold.nil?
|
|
174
|
+
|
|
175
|
+
results_of(post_json('/search', body))
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Every memory stored for +user_id+, in mem0's order. Backs the
|
|
179
|
+
# resident-persona summary and any future audit dump.
|
|
180
|
+
#
|
|
181
|
+
# @param user_id [String] the mem0 namespace.
|
|
182
|
+
# @return [Array<Record>] all rows (no +score+, no +event+).
|
|
183
|
+
# @raise [ArgumentError] on blank +user_id+.
|
|
184
|
+
# @raise [RuntimeError] on HTTP failure.
|
|
185
|
+
def get_all(user_id:)
|
|
186
|
+
raise ArgumentError, 'user_id must be non-empty' if user_id.nil? || user_id.to_s.empty?
|
|
187
|
+
|
|
188
|
+
response = @connection.get('/memories', { user_id: user_id })
|
|
189
|
+
unless response.status == 200
|
|
190
|
+
raise "Memory::Mem0Client: GET /memories returned HTTP #{response.status}: #{response.body.inspect}"
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
results_of(response.body)
|
|
194
|
+
rescue Faraday::Error => e
|
|
195
|
+
raise "Memory::Mem0Client: #{e.class.name.split('::').last} calling GET /memories: #{e.message}"
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Granular erase: physically remove one memory by id (the
|
|
199
|
+
# privacy right-to-forget, user-gated at the caller). Idempotent
|
|
200
|
+
# — a 404 is treated as "already gone".
|
|
201
|
+
#
|
|
202
|
+
# @param id [String] the {Record#id} to delete.
|
|
203
|
+
# @return [void]
|
|
204
|
+
# @raise [ArgumentError] on blank +id+.
|
|
205
|
+
# @raise [RuntimeError] on HTTP failure other than 404.
|
|
206
|
+
def delete(id:)
|
|
207
|
+
raise ArgumentError, 'id must be non-empty' if id.nil? || id.to_s.empty?
|
|
208
|
+
|
|
209
|
+
response = @connection.delete("/memories/#{id}")
|
|
210
|
+
return if [200, 204, 404].include?(response.status)
|
|
211
|
+
|
|
212
|
+
raise "Memory::Mem0Client: DELETE /memories/#{id} returned HTTP #{response.status}: #{response.body.inspect}"
|
|
213
|
+
rescue Faraday::Error => e
|
|
214
|
+
raise "Memory::Mem0Client: #{e.class.name.split('::').last} calling DELETE /memories/#{id}: #{e.message}"
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Coarse erase: drop *all* memories on the server. The blunt
|
|
218
|
+
# right-to-forget (granular per-fact erase is {#delete}). Used by
|
|
219
|
+
# tooling / tests, not on the agent path.
|
|
220
|
+
#
|
|
221
|
+
# @return [void]
|
|
222
|
+
# @raise [RuntimeError] on HTTP failure.
|
|
223
|
+
def reset!
|
|
224
|
+
post_json('/reset', {})
|
|
225
|
+
nil
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
private
|
|
229
|
+
|
|
230
|
+
# Extract the +"results"+ array from a mem0 response body and map
|
|
231
|
+
# each row to a {Record}. Tolerant of both shapes mem0 returns: a
|
|
232
|
+
# +{ "results" => [...] }+ envelope (add / search) and a bare
|
|
233
|
+
# array (some +get_all+ deployments).
|
|
234
|
+
def results_of(body)
|
|
235
|
+
rows = if body.is_a?(Hash)
|
|
236
|
+
body['results'] || []
|
|
237
|
+
elsif body.is_a?(Array)
|
|
238
|
+
body
|
|
239
|
+
else
|
|
240
|
+
[]
|
|
241
|
+
end
|
|
242
|
+
rows.map { |row| Record.from(row) }
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# JSON-POST helper shared by add / search / reset. Centralizes
|
|
246
|
+
# the error shape and +Faraday::Error+ wrapping (same template as
|
|
247
|
+
# +Backend::Chroma#post_json+).
|
|
248
|
+
def post_json(path, body)
|
|
249
|
+
response = @connection.post(path) do |req|
|
|
250
|
+
req.headers['Content-Type'] = 'application/json'
|
|
251
|
+
req.body = body
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
unless [200, 201].include?(response.status)
|
|
255
|
+
raise "Memory::Mem0Client: POST #{path} returned HTTP #{response.status}: #{response.body.inspect}"
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
response.body
|
|
259
|
+
rescue Faraday::Error => e
|
|
260
|
+
raise "Memory::Mem0Client: #{e.class.name.split('::').last} calling POST #{path}: #{e.message}"
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|