brainiac-fizzy 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CardIndex — duplicate card detection via trigram string similarity + qmd semantic search.
4
+ #
5
+ # Two detection layers run in parallel:
6
+ # 1. Trigram similarity — fast, catches near-identical titles ("Fix login bug" ≈ "Fix login bug on mobile")
7
+ # 2. qmd vsearch — semantic embeddings, catches same-meaning different-words
8
+ # ("Login page broken on mobile" ≈ "Users can't sign in from phones")
9
+ #
10
+ # Results are merged by card number, keeping the higher score from either method.
11
+ #
12
+ # Only the local machine's creator cards are checked for duplicates (creator-based
13
+ # routing prevents multi-machine races).
14
+
15
+ require "json"
16
+ require "open3"
17
+ require "fileutils"
18
+ require "yaml"
19
+
20
+ class CardIndex
21
+ SIMILARITY_THRESHOLD = 0.65
22
+ SEMANTIC_THRESHOLD = 0.65
23
+ SEMANTIC_COLLECTION = "card-titles"
24
+ QMD_DEBOUNCE = 30 # seconds
25
+
26
+ attr_reader :index_file, :titles_dir
27
+
28
+ def initialize(index_file:, titles_dir:)
29
+ @index_file = index_file
30
+ @titles_dir = titles_dir
31
+ @data = {}
32
+ @mutex = Mutex.new
33
+ @qmd_mutex = Mutex.new
34
+ @qmd_last_run = nil
35
+ @qmd_pending = false
36
+ end
37
+
38
+ # --- Hash-like access (thread-safe) ---
39
+
40
+ def [](key)
41
+ @mutex.synchronize { @data[key] }
42
+ end
43
+
44
+ def []=(key, value)
45
+ @mutex.synchronize { @data[key] = value }
46
+ end
47
+
48
+ def delete(key)
49
+ @mutex.synchronize { @data.delete(key) }
50
+ end
51
+
52
+ def size
53
+ @mutex.synchronize { @data.size }
54
+ end
55
+
56
+ def key?(key)
57
+ @mutex.synchronize { @data.key?(key) }
58
+ end
59
+
60
+ def each(&)
61
+ @mutex.synchronize { @data.each(&) }
62
+ end
63
+
64
+ def dig(*keys)
65
+ @mutex.synchronize { @data.dig(*keys) }
66
+ end
67
+
68
+ def to_json(...)
69
+ @mutex.synchronize { @data.to_json(...) }
70
+ end
71
+
72
+ def to_h
73
+ @mutex.synchronize { @data.dup }
74
+ end
75
+
76
+ # --- Trigram similarity ---
77
+
78
+ def trigrams(str)
79
+ normalized = str.downcase.gsub(/[^a-z0-9\s]/, "").strip
80
+ return Set.new if normalized.length < 3
81
+
82
+ Set.new((0..(normalized.length - 3)).map { |i| normalized[i, 3] })
83
+ end
84
+
85
+ def trigram_similarity(str_a, str_b)
86
+ ta = trigrams(str_a)
87
+ tb = trigrams(str_b)
88
+ return 0.0 if ta.empty? || tb.empty?
89
+
90
+ intersection = (ta & tb).size.to_f
91
+ union = (ta | tb).size.to_f
92
+ intersection / union
93
+ end
94
+
95
+ # --- Card title files for qmd collection ---
96
+
97
+ def sync_card_title_file(number, title, closed: false)
98
+ FileUtils.mkdir_p(@titles_dir)
99
+ path = File.join(@titles_dir, "#{number}.md")
100
+ if closed
101
+ FileUtils.rm_f(path)
102
+ else
103
+ File.write(path, title)
104
+ end
105
+ end
106
+
107
+ def remove_card_title_file(number)
108
+ FileUtils.rm_f(File.join(@titles_dir, "#{number}.md"))
109
+ end
110
+
111
+ # Ensure the qmd collection exists, create if not
112
+ def ensure_card_titles_collection
113
+ FileUtils.mkdir_p(@titles_dir)
114
+ output, _, status = Open3.capture3("qmd", "collection", "list")
115
+ return if status.success? && output.include?(SEMANTIC_COLLECTION)
116
+
117
+ LOG.info "[Fizzy:CardIndex] Creating qmd collection '#{SEMANTIC_COLLECTION}'"
118
+ _, stderr, s = Open3.capture3("qmd", "collection", "add", @titles_dir,
119
+ "--name", SEMANTIC_COLLECTION, "--mask", "*.md")
120
+ LOG.warn "[Fizzy:CardIndex] Failed to create qmd collection: #{stderr}" unless s.success?
121
+ end
122
+
123
+ # Debounced qmd update + embed. Runs in background thread.
124
+ def schedule_qmd_reindex
125
+ @qmd_mutex.synchronize do
126
+ @qmd_pending = true
127
+ return if @qmd_last_run && (Time.now - @qmd_last_run) < QMD_DEBOUNCE
128
+
129
+ @qmd_last_run = Time.now
130
+ @qmd_pending = false
131
+ end
132
+
133
+ Thread.new do
134
+ LOG.info "[Fizzy:CardIndex] Running qmd update for card titles..."
135
+ _, stderr, s = Open3.capture3("qmd", "update")
136
+ LOG.warn "[Fizzy:CardIndex] qmd update failed: #{stderr}" unless s.success?
137
+
138
+ LOG.info "[Fizzy:CardIndex] Running qmd embed for card titles..."
139
+ _, stderr, s = Open3.capture3("qmd", "embed")
140
+ LOG.warn "[Fizzy:CardIndex] qmd embed failed: #{stderr}" unless s.success?
141
+
142
+ LOG.info "[Fizzy:CardIndex] qmd reindex complete"
143
+
144
+ needs_rerun = @qmd_mutex.synchronize do
145
+ if @qmd_pending
146
+ @qmd_pending = false
147
+ @qmd_last_run = Time.now
148
+ true
149
+ else
150
+ false
151
+ end
152
+ end
153
+ schedule_qmd_reindex if needs_rerun
154
+ rescue StandardError => e
155
+ LOG.warn "[Fizzy:CardIndex] qmd reindex failed: #{e.message}"
156
+ end
157
+ end
158
+
159
+ # --- Index operations ---
160
+
161
+ def load
162
+ data = if File.exist?(@index_file)
163
+ JSON.parse(File.read(@index_file))
164
+ else
165
+ {}
166
+ end
167
+ @mutex.synchronize { @data.replace(data) }
168
+ LOG.info "[Fizzy:CardIndex] Loaded #{size} cards from disk"
169
+ rescue JSON::ParserError => e
170
+ LOG.error "Failed to parse card index: #{e.message}"
171
+ @mutex.synchronize { @data.replace({}) }
172
+ end
173
+
174
+ def save
175
+ @mutex.synchronize do
176
+ File.write(@index_file, JSON.generate(@data))
177
+ end
178
+ end
179
+
180
+ def index_card(number:, title:, creator_name: nil, creator_id: nil, tags: [], closed: false)
181
+ @mutex.synchronize do
182
+ @data[number.to_s] = {
183
+ "title" => title,
184
+ "creator_name" => creator_name,
185
+ "creator_id" => creator_id,
186
+ "tags" => tags.map { |t| t.is_a?(Hash) ? t["name"] : t.to_s },
187
+ "closed" => closed,
188
+ "indexed_at" => Time.now.iso8601
189
+ }
190
+ end
191
+ sync_card_title_file(number, title, closed: closed)
192
+ end
193
+
194
+ def evict_card(number)
195
+ delete(number.to_s)
196
+ remove_card_title_file(number)
197
+ end
198
+
199
+ # --- Scope extraction for cross-project duplicate filtering ---
200
+
201
+ def build_scope_map
202
+ return if @scope_map_built
203
+
204
+ @scope_map ||= {}
205
+ PROJECTS.each do |key, cfg|
206
+ (cfg["fizzy_tags"] || []).each { |t| @scope_map[t.downcase] = key }
207
+ (cfg["scope_tags"] || {}).each { |tag, scope| @scope_map[tag.downcase] = scope }
208
+ end
209
+ @scope_map_built = true
210
+ end
211
+
212
+ def card_scopes(tags)
213
+ return Set.new if tags.nil? || tags.empty?
214
+
215
+ build_scope_map
216
+ tag_names = tags.map { |t| (t.is_a?(Hash) ? t["name"] : t).to_s.downcase }
217
+ scopes = Set.new
218
+ tag_names.each { |t| scopes << @scope_map[t] if @scope_map[t] }
219
+ scopes
220
+ end
221
+
222
+ def different_scopes?(tags_a, tags_b)
223
+ scopes_a = card_scopes(tags_a)
224
+ scopes_b = card_scopes(tags_b)
225
+ scopes_a.any? && scopes_b.any? && !scopes_a.intersect?(scopes_b)
226
+ end
227
+
228
+ # --- Trigram search ---
229
+
230
+ def find_trigram_similar_cards(title, exclude_number: nil)
231
+ matches = []
232
+ each do |num, entry|
233
+ next if num == exclude_number.to_s
234
+ next if entry["closed"]
235
+
236
+ score = trigram_similarity(title, entry["title"])
237
+ matches << { number: num.to_i, title: entry["title"], score: score, method: :trigram } if score >= SIMILARITY_THRESHOLD
238
+ end
239
+ matches
240
+ end
241
+
242
+ # --- Semantic search via qmd vsearch ---
243
+
244
+ def find_semantic_similar_cards(title, exclude_number: nil)
245
+ output, stderr, status = Open3.capture3("qmd", "vsearch", title, "-c", SEMANTIC_COLLECTION,
246
+ "--json", "--min-score", SEMANTIC_THRESHOLD.to_s, "--all")
247
+ unless status.success?
248
+ LOG.warn "[Fizzy:CardIndex] qmd vsearch failed: #{stderr.lines.last&.strip}"
249
+ return []
250
+ end
251
+
252
+ clean = output.lines.reject { |l| l.start_with?("[node-llama-cpp]") }.join
253
+ json_start = clean.index("[")
254
+ return [] unless json_start
255
+
256
+ results = JSON.parse(clean[json_start..])
257
+ results.filter_map do |r|
258
+ num = r["file"]&.match(%r{/(\d+)\.md$})&.[](1)
259
+ next unless num
260
+ next if num == exclude_number.to_s
261
+
262
+ entry = self[num]
263
+ next if entry&.dig("closed")
264
+
265
+ { number: num.to_i, title: entry&.dig("title") || r["snippet"]&.strip || "", score: r["score"], method: :semantic }
266
+ end
267
+ rescue JSON::ParserError => e
268
+ LOG.warn "[Fizzy:CardIndex] Failed to parse qmd vsearch output: #{e.message}"
269
+ []
270
+ end
271
+
272
+ # --- Merged search: trigram + semantic in parallel ---
273
+
274
+ def find_similar_cards(title, exclude_number: nil, tags: nil)
275
+ trigram_thread = Thread.new { find_trigram_similar_cards(title, exclude_number: exclude_number) }
276
+ semantic_thread = Thread.new { find_semantic_similar_cards(title, exclude_number: exclude_number) }
277
+
278
+ trigram_results = trigram_thread.value
279
+ semantic_results = semantic_thread.value
280
+
281
+ merged = {}
282
+ (trigram_results + semantic_results).each do |match|
283
+ key = match[:number]
284
+ existing = merged[key]
285
+ if existing.nil? || match[:score] > existing[:score]
286
+ merged[key] = match
287
+ elsif match[:score] == existing[:score] && existing[:method] != match[:method]
288
+ merged[key] = existing.merge(method: :both)
289
+ end
290
+ end
291
+
292
+ if tags && card_scopes(tags).any?
293
+ merged.reject! do |num, _match|
294
+ match_tags = dig(num.to_s, "tags")
295
+ different_scopes?(tags, match_tags)
296
+ end
297
+ end
298
+
299
+ merged.values.sort_by { |m| -m[:score] }
300
+ end
301
+
302
+ # --- Backfill from Fizzy API on startup ---
303
+
304
+ def backfill
305
+ Thread.new do
306
+ LOG.info "[Fizzy:CardIndex] Starting backfill from Fizzy API..."
307
+ backfilled = 0
308
+ seen_boards = Set.new
309
+
310
+ PROJECTS.each do |project_key, config|
311
+ result = backfill_project(project_key, config, seen_boards)
312
+ backfilled += result if result
313
+ end
314
+
315
+ save
316
+ LOG.info "[Fizzy:CardIndex] Backfill complete: #{backfilled} new cards indexed (#{size} total)"
317
+
318
+ ensure_card_titles_collection
319
+ schedule_qmd_reindex
320
+ end
321
+ end
322
+
323
+ # Backfill cards for a single project. Returns count of new cards indexed, or nil if skipped.
324
+ def backfill_project(project_key, config, seen_boards)
325
+ repo_path = config["repo_path"]
326
+ return nil unless repo_path && File.directory?(repo_path)
327
+
328
+ fizzy_yaml = File.join(repo_path, ".fizzy.yaml")
329
+ unless File.exist?(fizzy_yaml)
330
+ LOG.debug "[Fizzy:CardIndex] Skipping '#{project_key}' — no .fizzy.yaml"
331
+ return nil
332
+ end
333
+
334
+ begin
335
+ board_id = YAML.safe_load_file(fizzy_yaml)["board"]
336
+ rescue StandardError => e
337
+ LOG.warn "[Fizzy:CardIndex] Could not read .fizzy.yaml for '#{project_key}': #{e.message}"
338
+ return nil
339
+ end
340
+
341
+ if seen_boards.include?(board_id)
342
+ LOG.debug "[Fizzy:CardIndex] Skipping '#{project_key}' — board #{board_id} already fetched"
343
+ return nil
344
+ end
345
+ seen_boards << board_id
346
+
347
+ count = 0
348
+ output = run_cmd("fizzy", "card", "list", "--all", chdir: repo_path, env: default_fizzy_env)
349
+ cards = JSON.parse(output)["data"] || []
350
+ cards.each do |card|
351
+ num = card["number"]
352
+ next unless num
353
+ next if key?(num.to_s)
354
+
355
+ index_card(
356
+ number: num,
357
+ title: card["title"] || card["description"]&.slice(0, 80) || "untitled",
358
+ creator_name: card.dig("creator", "name"),
359
+ creator_id: card.dig("creator", "id"),
360
+ tags: card["tags"] || [],
361
+ closed: card["closed"] || false
362
+ )
363
+ count += 1
364
+ end
365
+ count
366
+ rescue StandardError => e
367
+ LOG.warn "[Fizzy:CardIndex] Backfill failed for project '#{project_key}': #{e.message}"
368
+ 0
369
+ end
370
+
371
+ # --- Startup ---
372
+
373
+ def sync_title_files
374
+ FileUtils.mkdir_p(@titles_dir)
375
+ each do |num, entry|
376
+ sync_card_title_file(num, entry["title"], closed: entry["closed"])
377
+ end
378
+ end
379
+ end
380
+
381
+ # --- Create singleton instance ---
382
+
383
+ CARD_INDEX = CardIndex.new(
384
+ index_file: File.join(BRAINIAC_DIR, "card_index.json"),
385
+ titles_dir: File.join(BRAINIAC_DIR, "card_titles")
386
+ )
387
+
388
+ CARD_INDEX.load
389
+ CARD_INDEX.sync_title_files