memoflow 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +198 -0
- data/bin/memoflow +7 -0
- data/examples/embedder.rb +15 -0
- data/lib/memoflow/cli.rb +171 -0
- data/lib/memoflow/client.rb +308 -0
- data/lib/memoflow/configuration.rb +39 -0
- data/lib/memoflow/embedding_provider.rb +68 -0
- data/lib/memoflow/encryptor.rb +54 -0
- data/lib/memoflow/errors.rb +7 -0
- data/lib/memoflow/git_context.rb +82 -0
- data/lib/memoflow/hook_installer.rb +41 -0
- data/lib/memoflow/provider_context.rb +123 -0
- data/lib/memoflow/server.rb +99 -0
- data/lib/memoflow/store.rb +188 -0
- data/lib/memoflow/vectorizer.rb +57 -0
- data/lib/memoflow/version.rb +6 -0
- data/lib/memoflow.rb +47 -0
- metadata +79 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Memoflow
|
|
4
|
+
class Client
|
|
5
|
+
def initialize(configuration:, repo_path:, env: ENV, embedding_provider: nil)
|
|
6
|
+
@configuration = configuration
|
|
7
|
+
@repo_path = Pathname.new(repo_path)
|
|
8
|
+
@env = env
|
|
9
|
+
@git = GitContext.new(repo_path: @repo_path, env: @env)
|
|
10
|
+
@embedding_provider = embedding_provider || EmbeddingProvider.new(configuration: configuration)
|
|
11
|
+
@store = Store.new(
|
|
12
|
+
root: configuration.resolved_storage_path(@repo_path),
|
|
13
|
+
encryptor: Encryptor.new(configuration.encryption_key)
|
|
14
|
+
)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def init!
|
|
18
|
+
@store.setup!
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def capture_last_commit
|
|
22
|
+
capture_commit(@git.last_commit_sha)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def last_commit_sha
|
|
26
|
+
@git.last_commit_sha
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def capture_commit(sha, task_id: current_task_id)
|
|
30
|
+
record = @git.capture_commit(sha)
|
|
31
|
+
record[:task_id] = task_id if task_id
|
|
32
|
+
record[:embedding] = embed_record(record)
|
|
33
|
+
return record if @store.commit?(sha)
|
|
34
|
+
|
|
35
|
+
@store.write_commit(record)
|
|
36
|
+
record
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def annotate(text, tags: [], related_sha: nil, source: "manual")
|
|
40
|
+
record = {
|
|
41
|
+
id: SecureRandom.uuid,
|
|
42
|
+
type: "annotation",
|
|
43
|
+
source: source,
|
|
44
|
+
tags: tags,
|
|
45
|
+
related_sha: related_sha,
|
|
46
|
+
task_id: current_task_id,
|
|
47
|
+
summary: text,
|
|
48
|
+
body: text,
|
|
49
|
+
timestamp: Time.now.utc.iso8601
|
|
50
|
+
}
|
|
51
|
+
record[:embedding] = embed_record(record)
|
|
52
|
+
@store.write_annotation(record)
|
|
53
|
+
record
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def start_task(title, description: nil, tags: [])
|
|
57
|
+
task_id = SecureRandom.uuid
|
|
58
|
+
timestamp = Time.now.utc.iso8601
|
|
59
|
+
task = {
|
|
60
|
+
id: task_id,
|
|
61
|
+
type: "task",
|
|
62
|
+
title: title,
|
|
63
|
+
description: description,
|
|
64
|
+
tags: tags,
|
|
65
|
+
status: "active",
|
|
66
|
+
created_at: timestamp,
|
|
67
|
+
updated_at: timestamp,
|
|
68
|
+
repo_root: @repo_path.to_s
|
|
69
|
+
}
|
|
70
|
+
task[:embedding] = embed_record(task)
|
|
71
|
+
session = start_session(task_id)
|
|
72
|
+
@store.write_task(task)
|
|
73
|
+
@store.write_state("current_task", { task_id: task_id, session_id: session[:id], updated_at: timestamp })
|
|
74
|
+
task.merge(current_session_id: session[:id])
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def finish_task(task_id = current_task_id, status: "completed")
|
|
78
|
+
raise Error, "no active task" unless task_id
|
|
79
|
+
|
|
80
|
+
task = find_task(task_id)
|
|
81
|
+
raise Error, "task not found" unless task
|
|
82
|
+
|
|
83
|
+
updated = task.merge(status: status, updated_at: Time.now.utc.iso8601)
|
|
84
|
+
updated[:embedding] = embed_record(updated)
|
|
85
|
+
@store.write_task(updated)
|
|
86
|
+
finish_current_session(task_id)
|
|
87
|
+
clear_current_task if current_task_id == task_id
|
|
88
|
+
updated
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def resume_task(task_id)
|
|
92
|
+
task = find_task(task_id)
|
|
93
|
+
raise Error, "task not found" unless task
|
|
94
|
+
|
|
95
|
+
session = start_session(task_id)
|
|
96
|
+
now = Time.now.utc.iso8601
|
|
97
|
+
updated = task.merge(status: "active", updated_at: now)
|
|
98
|
+
updated[:embedding] = embed_record(updated)
|
|
99
|
+
@store.write_task(updated)
|
|
100
|
+
@store.write_state("current_task", { task_id: task_id, session_id: session[:id], updated_at: now })
|
|
101
|
+
updated.merge(current_session_id: session[:id])
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def tasks(limit: 20, status: nil)
|
|
105
|
+
items = @store.read_scope("tasks")
|
|
106
|
+
items.select! { |task| task[:status] == status } if status
|
|
107
|
+
items.sort_by { |task| sortable_time(task, :updated_at) }.reverse.first(limit)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def current_task
|
|
111
|
+
task_id = current_task_id
|
|
112
|
+
task_id ? find_task(task_id) : nil
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def current_task_id
|
|
116
|
+
@store.read_state("current_task")&.dig(:task_id)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def query(term = nil, limit: 5)
|
|
120
|
+
entries = @store.read_all
|
|
121
|
+
query_vector = term.to_s.strip.empty? ? nil : embed_text(term)
|
|
122
|
+
ranked = entries.map { |entry| [score(entry, term), entry] }
|
|
123
|
+
.select { |score_value, _entry| term.to_s.strip.empty? || score_value.positive? }
|
|
124
|
+
.sort_by { |score_value, entry| [-score_value, sortable_time(entry)] }
|
|
125
|
+
.map(&:last)
|
|
126
|
+
|
|
127
|
+
ranked.first(limit).map { |entry| present(entry, query_vector: query_vector) }
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def context_packet(query: nil, limit: 5)
|
|
131
|
+
entries = query(query, limit: limit)
|
|
132
|
+
header = current_task ? "current_task=#{current_task[:title]} status=#{current_task[:status]}" : nil
|
|
133
|
+
lines = entries.map do |entry|
|
|
134
|
+
parts = [entry[:type], entry[:timestamp], entry[:summary]]
|
|
135
|
+
files = entry[:changed_files]
|
|
136
|
+
parts << "files=#{files.join(",")}" if files && !files.empty?
|
|
137
|
+
parts.compact.join(" | ")
|
|
138
|
+
end
|
|
139
|
+
([header] + lines).compact.join("\n")
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private
|
|
143
|
+
|
|
144
|
+
def present(entry, query_vector: nil)
|
|
145
|
+
{
|
|
146
|
+
id: entry[:id] || entry[:sha],
|
|
147
|
+
type: entry[:type],
|
|
148
|
+
timestamp: entry[:timestamp] || entry[:committed_at],
|
|
149
|
+
summary: entry[:summary] || entry[:subject],
|
|
150
|
+
body: entry[:body],
|
|
151
|
+
changed_files: entry[:changed_files],
|
|
152
|
+
tags: entry[:tags],
|
|
153
|
+
related_sha: entry[:related_sha],
|
|
154
|
+
task_id: entry[:task_id],
|
|
155
|
+
status: entry[:status],
|
|
156
|
+
title: entry[:title],
|
|
157
|
+
description: entry[:description],
|
|
158
|
+
provider: entry[:provider],
|
|
159
|
+
repository: entry[:repository],
|
|
160
|
+
pull_request: entry[:pull_request],
|
|
161
|
+
semantic_score: query_vector ? Vectorizer.similarity(entry[:embedding], query_vector).round(4) : nil
|
|
162
|
+
}
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def sortable_time(entry, key = nil)
|
|
166
|
+
source = key ? entry[key] : (entry[:timestamp] || entry[:committed_at])
|
|
167
|
+
Time.parse(source || Time.at(0).utc.iso8601)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def score(entry, term)
|
|
171
|
+
return default_score(entry) if term.to_s.strip.empty?
|
|
172
|
+
|
|
173
|
+
tokens = term.downcase.split(/\s+/)
|
|
174
|
+
fields = normalized_fields(entry)
|
|
175
|
+
score = tokens.sum do |token|
|
|
176
|
+
field_score(fields, token)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
score + recency_boost(entry) + task_boost(entry) + semantic_boost(entry, term)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def find_task(task_id)
|
|
183
|
+
@store.read_scope("tasks").reverse.find { |task| task[:id] == task_id }
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def start_session(task_id)
|
|
187
|
+
record = {
|
|
188
|
+
id: SecureRandom.uuid,
|
|
189
|
+
type: "session",
|
|
190
|
+
task_id: task_id,
|
|
191
|
+
started_at: Time.now.utc.iso8601,
|
|
192
|
+
status: "active"
|
|
193
|
+
}
|
|
194
|
+
@store.write_session(record)
|
|
195
|
+
record
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def finish_current_session(task_id)
|
|
199
|
+
current = @store.read_state("current_task")
|
|
200
|
+
return unless current && current[:task_id] == task_id
|
|
201
|
+
|
|
202
|
+
session = @store.read_scope("sessions").reverse.find { |item| item[:id] == current[:session_id] }
|
|
203
|
+
return unless session
|
|
204
|
+
|
|
205
|
+
@store.write_session(session.merge(status: "closed", ended_at: Time.now.utc.iso8601))
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def clear_current_task
|
|
209
|
+
@store.write_state("current_task", { task_id: nil, session_id: nil, updated_at: Time.now.utc.iso8601 })
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def normalized_fields(entry)
|
|
213
|
+
pr = entry[:pull_request] || {}
|
|
214
|
+
{
|
|
215
|
+
summary: entry[:summary].to_s.downcase,
|
|
216
|
+
subject: entry[:subject].to_s.downcase,
|
|
217
|
+
body: entry[:body].to_s.downcase,
|
|
218
|
+
title: entry[:title].to_s.downcase,
|
|
219
|
+
description: entry[:description].to_s.downcase,
|
|
220
|
+
files: Array(entry[:changed_files]).join(" ").downcase,
|
|
221
|
+
tags: Array(entry[:tags]).join(" ").downcase,
|
|
222
|
+
task_id: entry[:task_id].to_s.downcase,
|
|
223
|
+
repository: entry[:repository].to_s.downcase,
|
|
224
|
+
provider: entry[:provider].to_s.downcase,
|
|
225
|
+
pr_title: pr[:title].to_s.downcase,
|
|
226
|
+
pr_body: pr[:body].to_s.downcase,
|
|
227
|
+
pr_number: pr[:number].to_s.downcase
|
|
228
|
+
}
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def field_score(fields, token)
|
|
232
|
+
score = 0
|
|
233
|
+
score += 6 if exact_or_prefix_match?(fields[:title], token)
|
|
234
|
+
score += 5 if exact_or_prefix_match?(fields[:subject], token)
|
|
235
|
+
score += 5 if exact_or_prefix_match?(fields[:summary], token)
|
|
236
|
+
score += 4 if exact_or_prefix_match?(fields[:pr_title], token)
|
|
237
|
+
score += 3 if exact_or_prefix_match?(fields[:files], token)
|
|
238
|
+
score += 2 if exact_or_prefix_match?(fields[:tags], token)
|
|
239
|
+
score += 2 if exact_or_prefix_match?(fields[:repository], token)
|
|
240
|
+
score += 1 if exact_or_prefix_match?(fields[:body], token)
|
|
241
|
+
score += 1 if exact_or_prefix_match?(fields[:description], token)
|
|
242
|
+
score += 1 if exact_or_prefix_match?(fields[:pr_body], token)
|
|
243
|
+
score += 1 if exact_or_prefix_match?(fields[:pr_number], token)
|
|
244
|
+
score += 1 if exact_or_prefix_match?(fields[:provider], token)
|
|
245
|
+
score += 1 if exact_or_prefix_match?(fields[:task_id], token)
|
|
246
|
+
score
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def exact_or_prefix_match?(text, token)
|
|
250
|
+
return false if text.empty?
|
|
251
|
+
|
|
252
|
+
text.split(/\W+/).any? { |part| part == token || part.start_with?(token) } || text.include?(token)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def recency_boost(entry)
|
|
256
|
+
timestamp = sortable_time(entry)
|
|
257
|
+
age_in_days = ((Time.now.utc - timestamp) / 86_400.0)
|
|
258
|
+
return 0 if age_in_days.negative?
|
|
259
|
+
return 2 if age_in_days <= 1
|
|
260
|
+
return 1 if age_in_days <= 7
|
|
261
|
+
|
|
262
|
+
0
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def task_boost(entry)
|
|
266
|
+
current_task_id && entry[:task_id] == current_task_id ? 4 : 0
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def default_score(entry)
|
|
270
|
+
recency_boost(entry) + task_boost(entry) + 1
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def semantic_boost(entry, term)
|
|
274
|
+
vector = entry[:embedding]
|
|
275
|
+
return 0 if vector.nil? || term.to_s.strip.empty?
|
|
276
|
+
|
|
277
|
+
(Vectorizer.similarity(vector, embed_text(term)) * 10).round(4)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
public
|
|
281
|
+
|
|
282
|
+
def export_bundle(path)
|
|
283
|
+
@store.export_bundle(path)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def import_bundle(path)
|
|
287
|
+
@store.import_bundle(path)
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def prune!(keep_days: nil, max_records: nil)
|
|
291
|
+
@store.prune!(keep_days: keep_days, max_records: max_records)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def embedding_mode
|
|
295
|
+
@embedding_provider.enabled? ? "provider" : "local"
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
private
|
|
299
|
+
|
|
300
|
+
def embed_record(record)
|
|
301
|
+
embed_text(Vectorizer.indexable_text(record))
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def embed_text(text)
|
|
305
|
+
@embedding_provider.embed(text)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Memoflow
|
|
4
|
+
class Configuration
|
|
5
|
+
attr_accessor :storage_path, :encryption_key, :logger, :storage_policy, :server_host, :server_port,
|
|
6
|
+
:embedding_command, :embedding_timeout
|
|
7
|
+
|
|
8
|
+
def initialize
|
|
9
|
+
@storage_path = ".memoflow"
|
|
10
|
+
@encryption_key = ENV["MEMOFLOW_KEY"]
|
|
11
|
+
@logger = nil
|
|
12
|
+
@storage_policy = :repo
|
|
13
|
+
@server_host = "127.0.0.1"
|
|
14
|
+
@server_port = 4599
|
|
15
|
+
@embedding_command = nil
|
|
16
|
+
@embedding_timeout = 5
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def resolved_storage_path(repo_path)
|
|
20
|
+
path = configured_storage_path(repo_path)
|
|
21
|
+
return path if path.absolute?
|
|
22
|
+
|
|
23
|
+
Pathname.new(repo_path).join(path)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def configured_storage_path(repo_path)
|
|
29
|
+
return Pathname.new(storage_path) unless storage_path.to_s.empty?
|
|
30
|
+
|
|
31
|
+
case storage_policy.to_sym
|
|
32
|
+
when :external
|
|
33
|
+
Pathname.new(Dir.home).join(".memoflow", Pathname.new(repo_path).basename)
|
|
34
|
+
else
|
|
35
|
+
Pathname.new(".memoflow")
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Memoflow
|
|
4
|
+
class EmbeddingProvider
|
|
5
|
+
def initialize(configuration:, runner: nil)
|
|
6
|
+
@configuration = configuration
|
|
7
|
+
@runner = runner || method(:default_runner)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def enabled?
|
|
11
|
+
command.any?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def embed(text)
|
|
15
|
+
return fallback(text) unless enabled?
|
|
16
|
+
|
|
17
|
+
payload = call_provider(text)
|
|
18
|
+
vector = payload.is_a?(Array) ? payload : payload["embedding"]
|
|
19
|
+
return fallback(text) unless vector.is_a?(Array) && !vector.empty?
|
|
20
|
+
|
|
21
|
+
normalize(vector.map(&:to_f))
|
|
22
|
+
rescue JSON::ParserError, Errno::ENOENT, Memoflow::Error
|
|
23
|
+
fallback(text)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def command
|
|
29
|
+
Array(@configuration.embedding_command)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def call_provider(text)
|
|
33
|
+
result = @runner.call(command, text, @configuration.embedding_timeout)
|
|
34
|
+
raise Error, "embedding command failed" unless result[:success]
|
|
35
|
+
|
|
36
|
+
JSON.parse(result[:output])
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def fallback(text)
|
|
40
|
+
Vectorizer.encode(text)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def normalize(vector)
|
|
44
|
+
magnitude = Math.sqrt(vector.sum { |value| value * value })
|
|
45
|
+
return vector if magnitude.zero?
|
|
46
|
+
|
|
47
|
+
vector.map { |value| (value / magnitude).round(6) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def default_runner(command, text, timeout)
|
|
51
|
+
output = nil
|
|
52
|
+
status = nil
|
|
53
|
+
Open3.popen3(*command) do |stdin, stdout, stderr, wait_thr|
|
|
54
|
+
stdin.write(JSON.generate(text: text))
|
|
55
|
+
stdin.close
|
|
56
|
+
if wait_thr.join(timeout)
|
|
57
|
+
output = stdout.read
|
|
58
|
+
status = wait_thr.value
|
|
59
|
+
else
|
|
60
|
+
Process.kill("TERM", wait_thr.pid)
|
|
61
|
+
raise Error, "embedding command timed out"
|
|
62
|
+
end
|
|
63
|
+
output = stderr.read unless status.success?
|
|
64
|
+
end
|
|
65
|
+
{ success: status.success?, output: output.to_s }
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Memoflow
|
|
4
|
+
class Encryptor
|
|
5
|
+
NONCE_BYTES = 12
|
|
6
|
+
|
|
7
|
+
def initialize(secret)
|
|
8
|
+
raise ConfigurationError, "missing encryption key" if secret.to_s.empty?
|
|
9
|
+
|
|
10
|
+
@key = OpenSSL::Digest::SHA256.digest(secret)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def encrypt(payload)
|
|
14
|
+
cipher = OpenSSL::Cipher.new("aes-256-gcm")
|
|
15
|
+
cipher.encrypt
|
|
16
|
+
cipher.key = @key
|
|
17
|
+
nonce = SecureRandom.random_bytes(NONCE_BYTES)
|
|
18
|
+
cipher.iv = nonce
|
|
19
|
+
|
|
20
|
+
compressed = Zlib::Deflate.deflate(payload)
|
|
21
|
+
ciphertext = cipher.update(compressed) + cipher.final
|
|
22
|
+
tag = cipher.auth_tag
|
|
23
|
+
|
|
24
|
+
[nonce, tag, ciphertext].map { |part| [part.bytesize].pack("N") + part }.join
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def decrypt(blob)
|
|
28
|
+
nonce, tag, ciphertext = unpack(blob)
|
|
29
|
+
|
|
30
|
+
cipher = OpenSSL::Cipher.new("aes-256-gcm")
|
|
31
|
+
cipher.decrypt
|
|
32
|
+
cipher.key = @key
|
|
33
|
+
cipher.iv = nonce
|
|
34
|
+
cipher.auth_tag = tag
|
|
35
|
+
|
|
36
|
+
compressed = cipher.update(ciphertext) + cipher.final
|
|
37
|
+
Zlib::Inflate.inflate(compressed)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def unpack(blob)
|
|
43
|
+
cursor = 0
|
|
44
|
+
parts = 3.times.map do
|
|
45
|
+
length = blob.byteslice(cursor, 4).unpack1("N")
|
|
46
|
+
cursor += 4
|
|
47
|
+
part = blob.byteslice(cursor, length)
|
|
48
|
+
cursor += length
|
|
49
|
+
part
|
|
50
|
+
end
|
|
51
|
+
parts
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Memoflow
|
|
4
|
+
class GitContext
|
|
5
|
+
def initialize(repo_path:, env: ENV)
|
|
6
|
+
@repo_path = Pathname.new(repo_path)
|
|
7
|
+
@env = env
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def ensure_repo!
|
|
11
|
+
run_git("rev-parse --is-inside-work-tree")
|
|
12
|
+
true
|
|
13
|
+
rescue GitError
|
|
14
|
+
raise GitError, "#{@repo_path} is not a git repository"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def last_commit_sha
|
|
18
|
+
run_git("rev-parse HEAD").strip
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def capture_commit(sha)
|
|
22
|
+
ensure_repo!
|
|
23
|
+
|
|
24
|
+
subject, body = commit_message_parts(sha)
|
|
25
|
+
record = {
|
|
26
|
+
type: "commit",
|
|
27
|
+
sha: sha,
|
|
28
|
+
repo_name: @repo_path.basename.to_s,
|
|
29
|
+
repo_root: repo_root,
|
|
30
|
+
branch: current_branch,
|
|
31
|
+
author_name: show(sha, "%an"),
|
|
32
|
+
author_email: show(sha, "%ae"),
|
|
33
|
+
committed_at: show(sha, "%aI"),
|
|
34
|
+
subject: subject,
|
|
35
|
+
body: body,
|
|
36
|
+
changed_files: changed_files(sha),
|
|
37
|
+
summary: [subject, body].reject(&:empty?).join(" | ")
|
|
38
|
+
}
|
|
39
|
+
provider_metadata = ProviderContext.new(remote_url: remote_url, repo_path: @repo_path, env: @env).capture
|
|
40
|
+
record.merge(provider_metadata).merge(embedding: Vectorizer.encode(Vectorizer.indexable_text(record.merge(provider_metadata))))
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def repo_root
|
|
44
|
+
run_git("rev-parse --show-toplevel").strip
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def current_branch
|
|
50
|
+
run_git("rev-parse --abbrev-ref HEAD").strip
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def remote_url
|
|
54
|
+
run_git("config --get remote.origin.url").strip
|
|
55
|
+
rescue GitError
|
|
56
|
+
""
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def commit_message_parts(sha)
|
|
60
|
+
message = run_git("log -1 --pretty=%B #{Shellwords.escape(sha)}")
|
|
61
|
+
lines = message.lines.map(&:rstrip)
|
|
62
|
+
subject = lines.shift.to_s
|
|
63
|
+
body = lines.drop_while(&:empty?).join("\n")
|
|
64
|
+
[subject, body]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def changed_files(sha)
|
|
68
|
+
run_git("diff-tree --root --no-commit-id --name-only -r #{Shellwords.escape(sha)}").lines.map(&:strip).reject(&:empty?)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def show(sha, format)
|
|
72
|
+
run_git("show -s --format=#{Shellwords.escape(format)} #{Shellwords.escape(sha)}").strip
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def run_git(args)
|
|
76
|
+
output, status = Open3.capture2e("git", "-C", @repo_path.to_s, *Shellwords.split(args))
|
|
77
|
+
raise GitError, output.strip unless status.success?
|
|
78
|
+
|
|
79
|
+
output
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Memoflow
|
|
4
|
+
class HookInstaller
|
|
5
|
+
def initialize(repo_path:)
|
|
6
|
+
@repo_path = Pathname.new(repo_path)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def install!
|
|
10
|
+
hooks_dir = @repo_path.join(".git", "hooks")
|
|
11
|
+
raise GitError, "missing .git/hooks directory" unless hooks_dir.directory?
|
|
12
|
+
|
|
13
|
+
write_hook(hooks_dir.join("post-commit"), <<~SH)
|
|
14
|
+
#!/usr/bin/env sh
|
|
15
|
+
if command -v bundle >/dev/null 2>&1; then
|
|
16
|
+
bundle exec memoflow capture --last >/dev/null 2>&1 || true
|
|
17
|
+
else
|
|
18
|
+
memoflow capture --last >/dev/null 2>&1 || true
|
|
19
|
+
fi
|
|
20
|
+
SH
|
|
21
|
+
|
|
22
|
+
write_hook(hooks_dir.join("prepare-commit-msg"), <<~SH)
|
|
23
|
+
#!/usr/bin/env sh
|
|
24
|
+
NOTE_FILE=".git/MEMOFLOW_TASK_NOTE"
|
|
25
|
+
COMMIT_MSG_FILE="$1"
|
|
26
|
+
if [ -f "$NOTE_FILE" ] && [ -f "$COMMIT_MSG_FILE" ]; then
|
|
27
|
+
printf "\\n\\nProblem-Statement: %s\\n" "$(cat "$NOTE_FILE")" >> "$COMMIT_MSG_FILE"
|
|
28
|
+
fi
|
|
29
|
+
SH
|
|
30
|
+
|
|
31
|
+
hooks_dir
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def write_hook(path, script)
|
|
37
|
+
File.write(path, script)
|
|
38
|
+
File.chmod(0o755, path)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|