memoflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,308 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Memoflow
4
+ class Client
5
+ def initialize(configuration:, repo_path:, env: ENV, embedding_provider: nil)
6
+ @configuration = configuration
7
+ @repo_path = Pathname.new(repo_path)
8
+ @env = env
9
+ @git = GitContext.new(repo_path: @repo_path, env: @env)
10
+ @embedding_provider = embedding_provider || EmbeddingProvider.new(configuration: configuration)
11
+ @store = Store.new(
12
+ root: configuration.resolved_storage_path(@repo_path),
13
+ encryptor: Encryptor.new(configuration.encryption_key)
14
+ )
15
+ end
16
+
17
+ def init!
18
+ @store.setup!
19
+ end
20
+
21
+ def capture_last_commit
22
+ capture_commit(@git.last_commit_sha)
23
+ end
24
+
25
+ def last_commit_sha
26
+ @git.last_commit_sha
27
+ end
28
+
29
+ def capture_commit(sha, task_id: current_task_id)
30
+ record = @git.capture_commit(sha)
31
+ record[:task_id] = task_id if task_id
32
+ record[:embedding] = embed_record(record)
33
+ return record if @store.commit?(sha)
34
+
35
+ @store.write_commit(record)
36
+ record
37
+ end
38
+
39
+ def annotate(text, tags: [], related_sha: nil, source: "manual")
40
+ record = {
41
+ id: SecureRandom.uuid,
42
+ type: "annotation",
43
+ source: source,
44
+ tags: tags,
45
+ related_sha: related_sha,
46
+ task_id: current_task_id,
47
+ summary: text,
48
+ body: text,
49
+ timestamp: Time.now.utc.iso8601
50
+ }
51
+ record[:embedding] = embed_record(record)
52
+ @store.write_annotation(record)
53
+ record
54
+ end
55
+
56
+ def start_task(title, description: nil, tags: [])
57
+ task_id = SecureRandom.uuid
58
+ timestamp = Time.now.utc.iso8601
59
+ task = {
60
+ id: task_id,
61
+ type: "task",
62
+ title: title,
63
+ description: description,
64
+ tags: tags,
65
+ status: "active",
66
+ created_at: timestamp,
67
+ updated_at: timestamp,
68
+ repo_root: @repo_path.to_s
69
+ }
70
+ task[:embedding] = embed_record(task)
71
+ session = start_session(task_id)
72
+ @store.write_task(task)
73
+ @store.write_state("current_task", { task_id: task_id, session_id: session[:id], updated_at: timestamp })
74
+ task.merge(current_session_id: session[:id])
75
+ end
76
+
77
+ def finish_task(task_id = current_task_id, status: "completed")
78
+ raise Error, "no active task" unless task_id
79
+
80
+ task = find_task(task_id)
81
+ raise Error, "task not found" unless task
82
+
83
+ updated = task.merge(status: status, updated_at: Time.now.utc.iso8601)
84
+ updated[:embedding] = embed_record(updated)
85
+ @store.write_task(updated)
86
+ finish_current_session(task_id)
87
+ clear_current_task if current_task_id == task_id
88
+ updated
89
+ end
90
+
91
+ def resume_task(task_id)
92
+ task = find_task(task_id)
93
+ raise Error, "task not found" unless task
94
+
95
+ session = start_session(task_id)
96
+ now = Time.now.utc.iso8601
97
+ updated = task.merge(status: "active", updated_at: now)
98
+ updated[:embedding] = embed_record(updated)
99
+ @store.write_task(updated)
100
+ @store.write_state("current_task", { task_id: task_id, session_id: session[:id], updated_at: now })
101
+ updated.merge(current_session_id: session[:id])
102
+ end
103
+
104
+ def tasks(limit: 20, status: nil)
105
+ items = @store.read_scope("tasks")
106
+ items.select! { |task| task[:status] == status } if status
107
+ items.sort_by { |task| sortable_time(task, :updated_at) }.reverse.first(limit)
108
+ end
109
+
110
+ def current_task
111
+ task_id = current_task_id
112
+ task_id ? find_task(task_id) : nil
113
+ end
114
+
115
+ def current_task_id
116
+ @store.read_state("current_task")&.dig(:task_id)
117
+ end
118
+
119
+ def query(term = nil, limit: 5)
120
+ entries = @store.read_all
121
+ query_vector = term.to_s.strip.empty? ? nil : embed_text(term)
122
+ ranked = entries.map { |entry| [score(entry, term), entry] }
123
+ .select { |score_value, _entry| term.to_s.strip.empty? || score_value.positive? }
124
+ .sort_by { |score_value, entry| [-score_value, sortable_time(entry)] }
125
+ .map(&:last)
126
+
127
+ ranked.first(limit).map { |entry| present(entry, query_vector: query_vector) }
128
+ end
129
+
130
+ def context_packet(query: nil, limit: 5)
131
+ entries = query(query, limit: limit)
132
+ header = current_task ? "current_task=#{current_task[:title]} status=#{current_task[:status]}" : nil
133
+ lines = entries.map do |entry|
134
+ parts = [entry[:type], entry[:timestamp], entry[:summary]]
135
+ files = entry[:changed_files]
136
+ parts << "files=#{files.join(",")}" if files && !files.empty?
137
+ parts.compact.join(" | ")
138
+ end
139
+ ([header] + lines).compact.join("\n")
140
+ end
141
+
142
+ private
143
+
144
+ def present(entry, query_vector: nil)
145
+ {
146
+ id: entry[:id] || entry[:sha],
147
+ type: entry[:type],
148
+ timestamp: entry[:timestamp] || entry[:committed_at],
149
+ summary: entry[:summary] || entry[:subject],
150
+ body: entry[:body],
151
+ changed_files: entry[:changed_files],
152
+ tags: entry[:tags],
153
+ related_sha: entry[:related_sha],
154
+ task_id: entry[:task_id],
155
+ status: entry[:status],
156
+ title: entry[:title],
157
+ description: entry[:description],
158
+ provider: entry[:provider],
159
+ repository: entry[:repository],
160
+ pull_request: entry[:pull_request],
161
+ semantic_score: query_vector ? Vectorizer.similarity(entry[:embedding], query_vector).round(4) : nil
162
+ }
163
+ end
164
+
165
+ def sortable_time(entry, key = nil)
166
+ source = key ? entry[key] : (entry[:timestamp] || entry[:committed_at])
167
+ Time.parse(source || Time.at(0).utc.iso8601)
168
+ end
169
+
170
+ def score(entry, term)
171
+ return default_score(entry) if term.to_s.strip.empty?
172
+
173
+ tokens = term.downcase.split(/\s+/)
174
+ fields = normalized_fields(entry)
175
+ score = tokens.sum do |token|
176
+ field_score(fields, token)
177
+ end
178
+
179
+ score + recency_boost(entry) + task_boost(entry) + semantic_boost(entry, term)
180
+ end
181
+
182
+ def find_task(task_id)
183
+ @store.read_scope("tasks").reverse.find { |task| task[:id] == task_id }
184
+ end
185
+
186
+ def start_session(task_id)
187
+ record = {
188
+ id: SecureRandom.uuid,
189
+ type: "session",
190
+ task_id: task_id,
191
+ started_at: Time.now.utc.iso8601,
192
+ status: "active"
193
+ }
194
+ @store.write_session(record)
195
+ record
196
+ end
197
+
198
+ def finish_current_session(task_id)
199
+ current = @store.read_state("current_task")
200
+ return unless current && current[:task_id] == task_id
201
+
202
+ session = @store.read_scope("sessions").reverse.find { |item| item[:id] == current[:session_id] }
203
+ return unless session
204
+
205
+ @store.write_session(session.merge(status: "closed", ended_at: Time.now.utc.iso8601))
206
+ end
207
+
208
+ def clear_current_task
209
+ @store.write_state("current_task", { task_id: nil, session_id: nil, updated_at: Time.now.utc.iso8601 })
210
+ end
211
+
212
+ def normalized_fields(entry)
213
+ pr = entry[:pull_request] || {}
214
+ {
215
+ summary: entry[:summary].to_s.downcase,
216
+ subject: entry[:subject].to_s.downcase,
217
+ body: entry[:body].to_s.downcase,
218
+ title: entry[:title].to_s.downcase,
219
+ description: entry[:description].to_s.downcase,
220
+ files: Array(entry[:changed_files]).join(" ").downcase,
221
+ tags: Array(entry[:tags]).join(" ").downcase,
222
+ task_id: entry[:task_id].to_s.downcase,
223
+ repository: entry[:repository].to_s.downcase,
224
+ provider: entry[:provider].to_s.downcase,
225
+ pr_title: pr[:title].to_s.downcase,
226
+ pr_body: pr[:body].to_s.downcase,
227
+ pr_number: pr[:number].to_s.downcase
228
+ }
229
+ end
230
+
231
+ def field_score(fields, token)
232
+ score = 0
233
+ score += 6 if exact_or_prefix_match?(fields[:title], token)
234
+ score += 5 if exact_or_prefix_match?(fields[:subject], token)
235
+ score += 5 if exact_or_prefix_match?(fields[:summary], token)
236
+ score += 4 if exact_or_prefix_match?(fields[:pr_title], token)
237
+ score += 3 if exact_or_prefix_match?(fields[:files], token)
238
+ score += 2 if exact_or_prefix_match?(fields[:tags], token)
239
+ score += 2 if exact_or_prefix_match?(fields[:repository], token)
240
+ score += 1 if exact_or_prefix_match?(fields[:body], token)
241
+ score += 1 if exact_or_prefix_match?(fields[:description], token)
242
+ score += 1 if exact_or_prefix_match?(fields[:pr_body], token)
243
+ score += 1 if exact_or_prefix_match?(fields[:pr_number], token)
244
+ score += 1 if exact_or_prefix_match?(fields[:provider], token)
245
+ score += 1 if exact_or_prefix_match?(fields[:task_id], token)
246
+ score
247
+ end
248
+
249
+ def exact_or_prefix_match?(text, token)
250
+ return false if text.empty?
251
+
252
+ text.split(/\W+/).any? { |part| part == token || part.start_with?(token) } || text.include?(token)
253
+ end
254
+
255
+ def recency_boost(entry)
256
+ timestamp = sortable_time(entry)
257
+ age_in_days = ((Time.now.utc - timestamp) / 86_400.0)
258
+ return 0 if age_in_days.negative?
259
+ return 2 if age_in_days <= 1
260
+ return 1 if age_in_days <= 7
261
+
262
+ 0
263
+ end
264
+
265
+ def task_boost(entry)
266
+ current_task_id && entry[:task_id] == current_task_id ? 4 : 0
267
+ end
268
+
269
+ def default_score(entry)
270
+ recency_boost(entry) + task_boost(entry) + 1
271
+ end
272
+
273
+ def semantic_boost(entry, term)
274
+ vector = entry[:embedding]
275
+ return 0 if vector.nil? || term.to_s.strip.empty?
276
+
277
+ (Vectorizer.similarity(vector, embed_text(term)) * 10).round(4)
278
+ end
279
+
280
+ public
281
+
282
+ def export_bundle(path)
283
+ @store.export_bundle(path)
284
+ end
285
+
286
+ def import_bundle(path)
287
+ @store.import_bundle(path)
288
+ end
289
+
290
+ def prune!(keep_days: nil, max_records: nil)
291
+ @store.prune!(keep_days: keep_days, max_records: max_records)
292
+ end
293
+
294
+ def embedding_mode
295
+ @embedding_provider.enabled? ? "provider" : "local"
296
+ end
297
+
298
+ private
299
+
300
+ def embed_record(record)
301
+ embed_text(Vectorizer.indexable_text(record))
302
+ end
303
+
304
+ def embed_text(text)
305
+ @embedding_provider.embed(text)
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Memoflow
4
+ class Configuration
5
+ attr_accessor :storage_path, :encryption_key, :logger, :storage_policy, :server_host, :server_port,
6
+ :embedding_command, :embedding_timeout
7
+
8
+ def initialize
9
+ @storage_path = ".memoflow"
10
+ @encryption_key = ENV["MEMOFLOW_KEY"]
11
+ @logger = nil
12
+ @storage_policy = :repo
13
+ @server_host = "127.0.0.1"
14
+ @server_port = 4599
15
+ @embedding_command = nil
16
+ @embedding_timeout = 5
17
+ end
18
+
19
+ def resolved_storage_path(repo_path)
20
+ path = configured_storage_path(repo_path)
21
+ return path if path.absolute?
22
+
23
+ Pathname.new(repo_path).join(path)
24
+ end
25
+
26
+ private
27
+
28
+ def configured_storage_path(repo_path)
29
+ return Pathname.new(storage_path) unless storage_path.to_s.empty?
30
+
31
+ case storage_policy.to_sym
32
+ when :external
33
+ Pathname.new(Dir.home).join(".memoflow", Pathname.new(repo_path).basename)
34
+ else
35
+ Pathname.new(".memoflow")
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Memoflow
4
+ class EmbeddingProvider
5
+ def initialize(configuration:, runner: nil)
6
+ @configuration = configuration
7
+ @runner = runner || method(:default_runner)
8
+ end
9
+
10
+ def enabled?
11
+ command.any?
12
+ end
13
+
14
+ def embed(text)
15
+ return fallback(text) unless enabled?
16
+
17
+ payload = call_provider(text)
18
+ vector = payload.is_a?(Array) ? payload : payload["embedding"]
19
+ return fallback(text) unless vector.is_a?(Array) && !vector.empty?
20
+
21
+ normalize(vector.map(&:to_f))
22
+ rescue JSON::ParserError, Errno::ENOENT, Memoflow::Error
23
+ fallback(text)
24
+ end
25
+
26
+ private
27
+
28
+ def command
29
+ Array(@configuration.embedding_command)
30
+ end
31
+
32
+ def call_provider(text)
33
+ result = @runner.call(command, text, @configuration.embedding_timeout)
34
+ raise Error, "embedding command failed" unless result[:success]
35
+
36
+ JSON.parse(result[:output])
37
+ end
38
+
39
+ def fallback(text)
40
+ Vectorizer.encode(text)
41
+ end
42
+
43
+ def normalize(vector)
44
+ magnitude = Math.sqrt(vector.sum { |value| value * value })
45
+ return vector if magnitude.zero?
46
+
47
+ vector.map { |value| (value / magnitude).round(6) }
48
+ end
49
+
50
+ def default_runner(command, text, timeout)
51
+ output = nil
52
+ status = nil
53
+ Open3.popen3(*command) do |stdin, stdout, stderr, wait_thr|
54
+ stdin.write(JSON.generate(text: text))
55
+ stdin.close
56
+ if wait_thr.join(timeout)
57
+ output = stdout.read
58
+ status = wait_thr.value
59
+ else
60
+ Process.kill("TERM", wait_thr.pid)
61
+ raise Error, "embedding command timed out"
62
+ end
63
+ output = stderr.read unless status.success?
64
+ end
65
+ { success: status.success?, output: output.to_s }
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Memoflow
4
+ class Encryptor
5
+ NONCE_BYTES = 12
6
+
7
+ def initialize(secret)
8
+ raise ConfigurationError, "missing encryption key" if secret.to_s.empty?
9
+
10
+ @key = OpenSSL::Digest::SHA256.digest(secret)
11
+ end
12
+
13
+ def encrypt(payload)
14
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
15
+ cipher.encrypt
16
+ cipher.key = @key
17
+ nonce = SecureRandom.random_bytes(NONCE_BYTES)
18
+ cipher.iv = nonce
19
+
20
+ compressed = Zlib::Deflate.deflate(payload)
21
+ ciphertext = cipher.update(compressed) + cipher.final
22
+ tag = cipher.auth_tag
23
+
24
+ [nonce, tag, ciphertext].map { |part| [part.bytesize].pack("N") + part }.join
25
+ end
26
+
27
+ def decrypt(blob)
28
+ nonce, tag, ciphertext = unpack(blob)
29
+
30
+ cipher = OpenSSL::Cipher.new("aes-256-gcm")
31
+ cipher.decrypt
32
+ cipher.key = @key
33
+ cipher.iv = nonce
34
+ cipher.auth_tag = tag
35
+
36
+ compressed = cipher.update(ciphertext) + cipher.final
37
+ Zlib::Inflate.inflate(compressed)
38
+ end
39
+
40
+ private
41
+
42
+ def unpack(blob)
43
+ cursor = 0
44
+ parts = 3.times.map do
45
+ length = blob.byteslice(cursor, 4).unpack1("N")
46
+ cursor += 4
47
+ part = blob.byteslice(cursor, length)
48
+ cursor += length
49
+ part
50
+ end
51
+ parts
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Memoflow
4
+ class Error < StandardError; end
5
+ class ConfigurationError < Error; end
6
+ class GitError < Error; end
7
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Memoflow
4
+ class GitContext
5
+ def initialize(repo_path:, env: ENV)
6
+ @repo_path = Pathname.new(repo_path)
7
+ @env = env
8
+ end
9
+
10
+ def ensure_repo!
11
+ run_git("rev-parse --is-inside-work-tree")
12
+ true
13
+ rescue GitError
14
+ raise GitError, "#{@repo_path} is not a git repository"
15
+ end
16
+
17
+ def last_commit_sha
18
+ run_git("rev-parse HEAD").strip
19
+ end
20
+
21
+ def capture_commit(sha)
22
+ ensure_repo!
23
+
24
+ subject, body = commit_message_parts(sha)
25
+ record = {
26
+ type: "commit",
27
+ sha: sha,
28
+ repo_name: @repo_path.basename.to_s,
29
+ repo_root: repo_root,
30
+ branch: current_branch,
31
+ author_name: show(sha, "%an"),
32
+ author_email: show(sha, "%ae"),
33
+ committed_at: show(sha, "%aI"),
34
+ subject: subject,
35
+ body: body,
36
+ changed_files: changed_files(sha),
37
+ summary: [subject, body].reject(&:empty?).join(" | ")
38
+ }
39
+ provider_metadata = ProviderContext.new(remote_url: remote_url, repo_path: @repo_path, env: @env).capture
40
+ record.merge(provider_metadata).merge(embedding: Vectorizer.encode(Vectorizer.indexable_text(record.merge(provider_metadata))))
41
+ end
42
+
43
+ def repo_root
44
+ run_git("rev-parse --show-toplevel").strip
45
+ end
46
+
47
+ private
48
+
49
+ def current_branch
50
+ run_git("rev-parse --abbrev-ref HEAD").strip
51
+ end
52
+
53
+ def remote_url
54
+ run_git("config --get remote.origin.url").strip
55
+ rescue GitError
56
+ ""
57
+ end
58
+
59
+ def commit_message_parts(sha)
60
+ message = run_git("log -1 --pretty=%B #{Shellwords.escape(sha)}")
61
+ lines = message.lines.map(&:rstrip)
62
+ subject = lines.shift.to_s
63
+ body = lines.drop_while(&:empty?).join("\n")
64
+ [subject, body]
65
+ end
66
+
67
+ def changed_files(sha)
68
+ run_git("diff-tree --root --no-commit-id --name-only -r #{Shellwords.escape(sha)}").lines.map(&:strip).reject(&:empty?)
69
+ end
70
+
71
+ def show(sha, format)
72
+ run_git("show -s --format=#{Shellwords.escape(format)} #{Shellwords.escape(sha)}").strip
73
+ end
74
+
75
+ def run_git(args)
76
+ output, status = Open3.capture2e("git", "-C", @repo_path.to_s, *Shellwords.split(args))
77
+ raise GitError, output.strip unless status.success?
78
+
79
+ output
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Memoflow
4
+ class HookInstaller
5
+ def initialize(repo_path:)
6
+ @repo_path = Pathname.new(repo_path)
7
+ end
8
+
9
+ def install!
10
+ hooks_dir = @repo_path.join(".git", "hooks")
11
+ raise GitError, "missing .git/hooks directory" unless hooks_dir.directory?
12
+
13
+ write_hook(hooks_dir.join("post-commit"), <<~SH)
14
+ #!/usr/bin/env sh
15
+ if command -v bundle >/dev/null 2>&1; then
16
+ bundle exec memoflow capture --last >/dev/null 2>&1 || true
17
+ else
18
+ memoflow capture --last >/dev/null 2>&1 || true
19
+ fi
20
+ SH
21
+
22
+ write_hook(hooks_dir.join("prepare-commit-msg"), <<~SH)
23
+ #!/usr/bin/env sh
24
+ NOTE_FILE=".git/MEMOFLOW_TASK_NOTE"
25
+ COMMIT_MSG_FILE="$1"
26
+ if [ -f "$NOTE_FILE" ] && [ -f "$COMMIT_MSG_FILE" ]; then
27
+ printf "\\n\\nProblem-Statement: %s\\n" "$(cat "$NOTE_FILE")" >> "$COMMIT_MSG_FILE"
28
+ fi
29
+ SH
30
+
31
+ hooks_dir
32
+ end
33
+
34
+ private
35
+
36
+ def write_hook(path, script)
37
+ File.write(path, script)
38
+ File.chmod(0o755, path)
39
+ end
40
+ end
41
+ end