swarm_sdk 2.7.14 → 3.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. checksums.yaml +4 -4
  2. data/lib/swarm_sdk/ruby_llm_patches/chat_callbacks_patch.rb +16 -0
  3. data/lib/swarm_sdk/ruby_llm_patches/init.rb +4 -1
  4. data/lib/swarm_sdk/v3/agent.rb +1165 -0
  5. data/lib/swarm_sdk/v3/agent_builder.rb +533 -0
  6. data/lib/swarm_sdk/v3/agent_definition.rb +330 -0
  7. data/lib/swarm_sdk/v3/configuration.rb +490 -0
  8. data/lib/swarm_sdk/v3/debug_log.rb +86 -0
  9. data/lib/swarm_sdk/v3/event_stream.rb +130 -0
  10. data/lib/swarm_sdk/v3/hooks/context.rb +112 -0
  11. data/lib/swarm_sdk/v3/hooks/result.rb +115 -0
  12. data/lib/swarm_sdk/v3/hooks/runner.rb +128 -0
  13. data/lib/swarm_sdk/v3/mcp/connector.rb +183 -0
  14. data/lib/swarm_sdk/v3/mcp/mcp_error.rb +15 -0
  15. data/lib/swarm_sdk/v3/mcp/server_definition.rb +125 -0
  16. data/lib/swarm_sdk/v3/mcp/ssl_http_transport.rb +103 -0
  17. data/lib/swarm_sdk/v3/mcp/stdio_transport.rb +135 -0
  18. data/lib/swarm_sdk/v3/mcp/tool_proxy.rb +53 -0
  19. data/lib/swarm_sdk/v3/memory/adapters/base.rb +297 -0
  20. data/lib/swarm_sdk/v3/memory/adapters/faiss_support.rb +194 -0
  21. data/lib/swarm_sdk/v3/memory/adapters/filesystem_adapter.rb +212 -0
  22. data/lib/swarm_sdk/v3/memory/adapters/sqlite_adapter.rb +507 -0
  23. data/lib/swarm_sdk/v3/memory/adapters/vector_utils.rb +88 -0
  24. data/lib/swarm_sdk/v3/memory/card.rb +206 -0
  25. data/lib/swarm_sdk/v3/memory/cluster.rb +146 -0
  26. data/lib/swarm_sdk/v3/memory/compressor.rb +496 -0
  27. data/lib/swarm_sdk/v3/memory/consolidator.rb +427 -0
  28. data/lib/swarm_sdk/v3/memory/context_builder.rb +339 -0
  29. data/lib/swarm_sdk/v3/memory/edge.rb +105 -0
  30. data/lib/swarm_sdk/v3/memory/embedder.rb +185 -0
  31. data/lib/swarm_sdk/v3/memory/exposure_tracker.rb +104 -0
  32. data/lib/swarm_sdk/v3/memory/ingestion_pipeline.rb +394 -0
  33. data/lib/swarm_sdk/v3/memory/retriever.rb +289 -0
  34. data/lib/swarm_sdk/v3/memory/store.rb +489 -0
  35. data/lib/swarm_sdk/v3/skills/loader.rb +147 -0
  36. data/lib/swarm_sdk/v3/skills/manifest.rb +45 -0
  37. data/lib/swarm_sdk/v3/sub_task_agent.rb +248 -0
  38. data/lib/swarm_sdk/v3/tools/base.rb +80 -0
  39. data/lib/swarm_sdk/v3/tools/bash.rb +174 -0
  40. data/lib/swarm_sdk/v3/tools/clock.rb +32 -0
  41. data/lib/swarm_sdk/v3/tools/edit.rb +111 -0
  42. data/lib/swarm_sdk/v3/tools/glob.rb +96 -0
  43. data/lib/swarm_sdk/v3/tools/grep.rb +200 -0
  44. data/lib/swarm_sdk/v3/tools/message_teammate.rb +15 -0
  45. data/lib/swarm_sdk/v3/tools/message_user.rb +15 -0
  46. data/lib/swarm_sdk/v3/tools/read.rb +181 -0
  47. data/lib/swarm_sdk/v3/tools/read_tracker.rb +40 -0
  48. data/lib/swarm_sdk/v3/tools/registry.rb +208 -0
  49. data/lib/swarm_sdk/v3/tools/sub_task.rb +183 -0
  50. data/lib/swarm_sdk/v3/tools/think.rb +88 -0
  51. data/lib/swarm_sdk/v3/tools/write.rb +87 -0
  52. data/lib/swarm_sdk/v3.rb +145 -0
  53. metadata +83 -148
  54. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -175
  55. data/lib/swarm_sdk/agent/builder.rb +0 -705
  56. data/lib/swarm_sdk/agent/chat.rb +0 -1438
  57. data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -375
  58. data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
  59. data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
  60. data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -85
  61. data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -290
  62. data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
  63. data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
  64. data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -134
  65. data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
  66. data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -146
  67. data/lib/swarm_sdk/agent/context.rb +0 -115
  68. data/lib/swarm_sdk/agent/context_manager.rb +0 -315
  69. data/lib/swarm_sdk/agent/definition.rb +0 -588
  70. data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -226
  71. data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -173
  72. data/lib/swarm_sdk/agent/tool_registry.rb +0 -189
  73. data/lib/swarm_sdk/agent_registry.rb +0 -146
  74. data/lib/swarm_sdk/builders/base_builder.rb +0 -558
  75. data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
  76. data/lib/swarm_sdk/concerns/cleanupable.rb +0 -42
  77. data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
  78. data/lib/swarm_sdk/concerns/validatable.rb +0 -55
  79. data/lib/swarm_sdk/config.rb +0 -368
  80. data/lib/swarm_sdk/configuration/parser.rb +0 -397
  81. data/lib/swarm_sdk/configuration/translator.rb +0 -285
  82. data/lib/swarm_sdk/configuration.rb +0 -165
  83. data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
  84. data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -102
  85. data/lib/swarm_sdk/context_compactor.rb +0 -335
  86. data/lib/swarm_sdk/context_management/builder.rb +0 -128
  87. data/lib/swarm_sdk/context_management/context.rb +0 -328
  88. data/lib/swarm_sdk/custom_tool_registry.rb +0 -226
  89. data/lib/swarm_sdk/defaults.rb +0 -251
  90. data/lib/swarm_sdk/events_to_messages.rb +0 -199
  91. data/lib/swarm_sdk/hooks/adapter.rb +0 -359
  92. data/lib/swarm_sdk/hooks/context.rb +0 -197
  93. data/lib/swarm_sdk/hooks/definition.rb +0 -80
  94. data/lib/swarm_sdk/hooks/error.rb +0 -29
  95. data/lib/swarm_sdk/hooks/executor.rb +0 -146
  96. data/lib/swarm_sdk/hooks/registry.rb +0 -147
  97. data/lib/swarm_sdk/hooks/result.rb +0 -150
  98. data/lib/swarm_sdk/hooks/shell_executor.rb +0 -256
  99. data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
  100. data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
  101. data/lib/swarm_sdk/log_collector.rb +0 -227
  102. data/lib/swarm_sdk/log_stream.rb +0 -127
  103. data/lib/swarm_sdk/markdown_parser.rb +0 -75
  104. data/lib/swarm_sdk/model_aliases.json +0 -8
  105. data/lib/swarm_sdk/models.json +0 -44002
  106. data/lib/swarm_sdk/models.rb +0 -161
  107. data/lib/swarm_sdk/node_context.rb +0 -245
  108. data/lib/swarm_sdk/observer/builder.rb +0 -81
  109. data/lib/swarm_sdk/observer/config.rb +0 -45
  110. data/lib/swarm_sdk/observer/manager.rb +0 -248
  111. data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
  112. data/lib/swarm_sdk/permissions/config.rb +0 -239
  113. data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
  114. data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
  115. data/lib/swarm_sdk/permissions/validator.rb +0 -173
  116. data/lib/swarm_sdk/permissions_builder.rb +0 -122
  117. data/lib/swarm_sdk/plugin.rb +0 -309
  118. data/lib/swarm_sdk/plugin_registry.rb +0 -101
  119. data/lib/swarm_sdk/proc_helpers.rb +0 -53
  120. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -119
  121. data/lib/swarm_sdk/restore_result.rb +0 -65
  122. data/lib/swarm_sdk/result.rb +0 -241
  123. data/lib/swarm_sdk/snapshot.rb +0 -156
  124. data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
  125. data/lib/swarm_sdk/state_restorer.rb +0 -476
  126. data/lib/swarm_sdk/state_snapshot.rb +0 -334
  127. data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -648
  128. data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -204
  129. data/lib/swarm_sdk/swarm/builder.rb +0 -256
  130. data/lib/swarm_sdk/swarm/executor.rb +0 -446
  131. data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -162
  132. data/lib/swarm_sdk/swarm/lazy_delegate_chat.rb +0 -372
  133. data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -361
  134. data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -290
  135. data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
  136. data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -392
  137. data/lib/swarm_sdk/swarm.rb +0 -973
  138. data/lib/swarm_sdk/swarm_loader.rb +0 -145
  139. data/lib/swarm_sdk/swarm_registry.rb +0 -136
  140. data/lib/swarm_sdk/tools/base.rb +0 -63
  141. data/lib/swarm_sdk/tools/bash.rb +0 -280
  142. data/lib/swarm_sdk/tools/clock.rb +0 -46
  143. data/lib/swarm_sdk/tools/delegate.rb +0 -389
  144. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
  145. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
  146. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
  147. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
  148. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
  149. data/lib/swarm_sdk/tools/edit.rb +0 -145
  150. data/lib/swarm_sdk/tools/glob.rb +0 -166
  151. data/lib/swarm_sdk/tools/grep.rb +0 -235
  152. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
  153. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -167
  154. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
  155. data/lib/swarm_sdk/tools/mcp_tool_stub.rb +0 -198
  156. data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
  157. data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
  158. data/lib/swarm_sdk/tools/read.rb +0 -261
  159. data/lib/swarm_sdk/tools/registry.rb +0 -205
  160. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
  161. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
  162. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
  163. data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
  164. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -273
  165. data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
  166. data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
  167. data/lib/swarm_sdk/tools/think.rb +0 -100
  168. data/lib/swarm_sdk/tools/todo_write.rb +0 -237
  169. data/lib/swarm_sdk/tools/web_fetch.rb +0 -264
  170. data/lib/swarm_sdk/tools/write.rb +0 -112
  171. data/lib/swarm_sdk/transcript_builder.rb +0 -278
  172. data/lib/swarm_sdk/utils.rb +0 -68
  173. data/lib/swarm_sdk/validation_result.rb +0 -33
  174. data/lib/swarm_sdk/version.rb +0 -5
  175. data/lib/swarm_sdk/workflow/agent_config.rb +0 -95
  176. data/lib/swarm_sdk/workflow/builder.rb +0 -227
  177. data/lib/swarm_sdk/workflow/executor.rb +0 -497
  178. data/lib/swarm_sdk/workflow/node_builder.rb +0 -593
  179. data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -250
  180. data/lib/swarm_sdk/workflow.rb +0 -589
  181. data/lib/swarm_sdk.rb +0 -721
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Memory
6
+ # Calculates exposure scores for memory cards
7
+ #
8
+ # Exposure score = α·frequency + β·recency + γ·dwell (weighted sum)
9
+ #
10
+ # Uses an additive formula so each signal contributes independently.
11
+ # A multiplicative formula would zero out the score whenever any single
12
+ # factor is zero (e.g., a fresh card with access_count=0), which would
13
+ # make every new card an immediate compression candidate.
14
+ #
15
+ # Used by the compressor to decide which cards get lossy compression first.
16
+ # Low-exposure cards are candidates for compression or eviction.
17
+ #
18
+ # @example
19
+ # tracker = ExposureTracker.new(adapter)
20
+ # score = tracker.exposure_score(card)
21
+ class ExposureTracker
22
+ # Default weights for the additive exposure formula
23
+ DEFAULT_FREQUENCY_WEIGHT = 0.4
24
+ DEFAULT_RECENCY_WEIGHT = 0.4
25
+ DEFAULT_DWELL_WEIGHT = 0.2
26
+
27
+ # Default half-life for recency decay in seconds (7 days)
28
+ DEFAULT_RECENCY_HALF_LIFE = 7 * 24 * 3600
29
+
30
+ # @param adapter [Adapters::Base] Storage adapter
31
+ # @param frequency_weight [Float] Weight for frequency component (α)
32
+ # @param recency_weight [Float] Weight for recency component (β)
33
+ # @param dwell_weight [Float] Weight for dwell component (γ)
34
+ # @param recency_half_life [Integer] Half-life for recency decay in seconds
35
+ def initialize(adapter, frequency_weight: nil, recency_weight: nil, dwell_weight: nil, recency_half_life: nil)
36
+ @adapter = adapter
37
+ config = Configuration.instance
38
+ @frequency_weight = frequency_weight || config.exposure_frequency_weight
39
+ @recency_weight = recency_weight || config.exposure_recency_weight
40
+ @dwell_weight = dwell_weight || config.exposure_dwell_weight
41
+ @recency_half_life = recency_half_life || config.exposure_recency_half_life
42
+ end
43
+
44
+ # Calculate exposure score for a card
45
+ #
46
+ # Combines three signals additively:
47
+ # - Frequency: log(1 + access_count) — dampens high-access cards
48
+ # - Recency: exponential decay from last access time
49
+ # - Dwell: accumulated time in working context
50
+ #
51
+ # E = α·log(1 + access_count) + β·recency(last_accessed) + γ·dwell
52
+ #
53
+ # @param card [Card] Card to score
54
+ # @return [Float] Exposure score (higher = more exposed)
55
+ #
56
+ # @example
57
+ # score = tracker.exposure_score(card)
58
+ # # High score: frequently accessed, recently used, high dwell
59
+ # # Low score: rarely accessed, long ago, low dwell
60
+ def exposure_score(card)
61
+ frequency = Math.log(1 + card.access_count)
62
+ recency = recency_factor(card.last_accessed)
63
+ dwell = card.dwell
64
+
65
+ @frequency_weight * frequency +
66
+ @recency_weight * recency +
67
+ @dwell_weight * dwell
68
+ end
69
+
70
+ # Rank all cards by exposure score (ascending = least exposed first)
71
+ #
72
+ # @return [Array<Hash>] Array of { card:, score: } sorted ascending
73
+ def rank_by_exposure
74
+ @adapter.list_cards.map do |card|
75
+ { card: card, score: exposure_score(card) }
76
+ end.sort_by { |entry| entry[:score] }
77
+ end
78
+
79
+ # Find cards with low exposure (candidates for compression)
80
+ #
81
+ # @param threshold [Float] Maximum exposure score
82
+ # @return [Array<Card>] Low-exposure cards
83
+ def low_exposure_cards(threshold: 1.0)
84
+ rank_by_exposure
85
+ .select { |entry| entry[:score] < threshold }
86
+ .map { |entry| entry[:card] }
87
+ end
88
+
89
+ private
90
+
91
+ # Calculate recency factor using exponential decay
92
+ #
93
+ # @param last_accessed [Time, nil] Last access time
94
+ # @return [Float] Recency factor (0.0-1.0)
95
+ def recency_factor(last_accessed)
96
+ return 0.0 if last_accessed.nil?
97
+
98
+ age_seconds = Time.now - last_accessed
99
+ Math.exp(-age_seconds * Math.log(2) / @recency_half_life)
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,394 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Memory
6
+ # Async post-turn processing pipeline
7
+ #
8
+ # After each conversation turn, this pipeline:
9
+ # 1. Segments the turn into atomic ideas (LLM when available, heuristic fallback)
10
+ # 2. Creates memory cards (<=250 words each)
11
+ # 3. Extracts entities
12
+ # 4. Creates graph edges between related cards (within turn and cross-turn)
13
+ # 5. Assigns cards to clusters (updating decision logs for decision cards)
14
+ # 6. Generates embeddings
15
+ #
16
+ # @example
17
+ # pipeline = IngestionPipeline.new(adapter: adapter, embedder: embedder)
18
+ # pipeline.ingest(turn_text: "The API uses JWT...", turn_id: "turn_001")
19
+ class IngestionPipeline
20
+ # @param adapter [Adapters::Base] Storage adapter
21
+ # @param embedder [Embedder] Text embedder
22
+ # @param chat [RubyLLM::Chat, nil] LLM for segmentation (nil = simple splitting)
23
+ def initialize(adapter:, embedder:, chat: nil)
24
+ @adapter = adapter
25
+ @embedder = embedder
26
+ @chat = chat
27
+ @config = Configuration.instance
28
+ end
29
+
30
+ # Ingest a conversation turn into memory cards
31
+ #
32
+ # @param turn_text [String] Full turn text (user + assistant + tool calls)
33
+ # @param turn_id [String] Unique turn identifier
34
+ # @return [Array<Card>] Created cards
35
+ def ingest(turn_text:, turn_id:)
36
+ DebugLog.log("ingestion", "ingest: turn_id=#{turn_id}, text_len=#{turn_text.size}")
37
+
38
+ segments = DebugLog.time("ingestion", "segment_turn") do
39
+ segment_turn(turn_text)
40
+ end
41
+ DebugLog.log("ingestion", "segments=#{segments.size}")
42
+
43
+ cards = DebugLog.time("ingestion", "create_cards(#{segments.size})") do
44
+ segments.map { |segment| create_card(text: segment, turn_id: turn_id) }
45
+ end
46
+
47
+ DebugLog.time("ingestion", "create_turn_edges") { create_turn_edges(cards) }
48
+ DebugLog.time("ingestion", "cross_turn_entity_edges") { create_cross_turn_entity_edges(cards) }
49
+ DebugLog.time("ingestion", "assign_to_clusters") { assign_to_clusters(cards) }
50
+
51
+ DebugLog.log("ingestion", "ingest complete: #{cards.size} cards created")
52
+ cards
53
+ end
54
+
55
+ private
56
+
57
+ # Segment a turn into atomic ideas
58
+ #
59
+ # Uses LLM for semantic segmentation when available. The LLM produces
60
+ # better idea-boundary splits and self-contained cards without dangling
61
+ # pronouns. Falls back to paragraph-based splitting when no LLM is
62
+ # configured.
63
+ #
64
+ # @param text [String] Turn text
65
+ # @return [Array<String>] Segments
66
+ def segment_turn(text)
67
+ return [] if text.nil? || text.strip.empty?
68
+
69
+ if @chat
70
+ llm_segment(text)
71
+ else
72
+ heuristic_segment(text)
73
+ end
74
+ end
75
+
76
+ # Use LLM to segment text into atomic, self-contained ideas
77
+ #
78
+ # Resets the background chat before each call to prevent message
79
+ # accumulation across multiple ingestion operations.
80
+ #
81
+ # @param text [String] Turn text
82
+ # @return [Array<String>] Self-contained segments
83
+ def llm_segment(text)
84
+ @chat.reset_messages!
85
+ response = @chat.ask(<<~PROMPT)
86
+ Segment the following conversation turn into atomic, self-contained memory cards.
87
+ Each card should:
88
+ - Capture exactly ONE idea, fact, decision, or concept
89
+ - Be <=250 words
90
+ - Be fully self-contained (no dangling "this/that/it" without referent)
91
+ - Include the subject/entity and claim/decision explicitly
92
+
93
+ Separate each card with "---" on its own line.
94
+ Output ONLY the cards, no explanations.
95
+
96
+ Text to segment:
97
+ #{text}
98
+ PROMPT
99
+
100
+ segments = response.content.strip.split(/^---+\s*$/).map(&:strip).reject(&:empty?)
101
+ # Validate segments are reasonable
102
+ return heuristic_segment(text) if segments.empty?
103
+
104
+ # Enforce word limit on LLM output
105
+ segments.flat_map { |s| word_count(s) > 250 ? split_long_segment(s) : s }
106
+ rescue StandardError => e
107
+ EventStream.emit(
108
+ type: "memory_segmentation_llm_error",
109
+ error: "#{e.class}: #{e.message}",
110
+ )
111
+ # Fall back to heuristic on any LLM error
112
+ heuristic_segment(text)
113
+ end
114
+
115
+ # Heuristic segmentation by paragraphs with merging/splitting
116
+ #
117
+ # @param text [String] Turn text
118
+ # @return [Array<String>] Segments
119
+ def heuristic_segment(text)
120
+ paragraphs = text.split(/\n\n+/).map(&:strip).reject(&:empty?)
121
+
122
+ # Merge very short paragraphs, split very long ones
123
+ segments = []
124
+ buffer = +""
125
+
126
+ paragraphs.each do |para|
127
+ if buffer.empty?
128
+ buffer << para
129
+ elsif word_count(buffer) + word_count(para) <= 250
130
+ buffer << "\n\n#{para}"
131
+ else
132
+ segments << buffer.dup
133
+ buffer.replace(para)
134
+ end
135
+ end
136
+
137
+ segments << buffer unless buffer.empty?
138
+
139
+ # Split any segments that are still too long
140
+ segments.flat_map { |s| word_count(s) > 250 ? split_long_segment(s) : s }
141
+ end
142
+
143
+ # Create a memory card from a text segment
144
+ #
145
+ # Sets importance based on inferred card type:
146
+ # - Constraints and decisions get high importance (0.8)
147
+ # - Preferences and incidents get medium-high importance (0.7)
148
+ # - Concepts get medium importance (0.6)
149
+ # - Facts get default importance (0.5)
150
+ #
151
+ # @param text [String] Card text
152
+ # @param turn_id [String] Source turn ID
153
+ # @return [Card] Created and persisted card
154
+ def create_card(text:, turn_id:)
155
+ entities = extract_entities(text)
156
+ type = infer_type(text)
157
+ embedding = @embedder.embed(text)
158
+ importance = importance_for_type(type)
159
+
160
+ card = Card.new(
161
+ text: text,
162
+ type: type,
163
+ entities: entities,
164
+ source_turn_ids: [turn_id],
165
+ embedding: embedding,
166
+ importance: importance,
167
+ dwell: 0.0,
168
+ )
169
+
170
+ @adapter.write_card(card)
171
+ card
172
+ end
173
+
174
+ # Determine importance score based on card type
175
+ #
176
+ # Constraints and decisions are high-importance because violating
177
+ # or forgetting them has serious consequences. Preferences and
178
+ # incidents are medium-high because they inform future decisions.
179
+ #
180
+ # @param type [Symbol] Card type
181
+ # @return [Float] Importance score (0.0-1.0)
182
+ def importance_for_type(type)
183
+ case type
184
+ when :constraint then 0.8
185
+ when :decision then 0.8
186
+ when :preference then 0.7
187
+ when :incident then 0.7
188
+ when :concept then 0.6
189
+ else 0.5 # :fact
190
+ end
191
+ end
192
+
193
+ # Extract named entities from text
194
+ #
195
+ # Simple heuristic: capitalized multi-word phrases, technical terms.
196
+ #
197
+ # @param text [String] Text to extract from
198
+ # @return [Array<String>] Extracted entities
199
+ def extract_entities(text)
200
+ # Find capitalized words that aren't at sentence starts
201
+ words = text.scan(/(?<=[.!?]\s)\w+|\b[A-Z][a-z]*(?:\s+[A-Z][a-z]*)*\b/)
202
+ words.uniq.reject { |w| w.length < 2 }.take(10)
203
+ end
204
+
205
+ # Infer card type from text content
206
+ #
207
+ # @param text [String] Card text
208
+ # @return [Symbol] Card type
209
+ def infer_type(text)
210
+ lower = text.downcase
211
+ if lower.include?("decided") || lower.include?("decision") || lower.include?("chose")
212
+ :decision
213
+ elsif lower.include?("must") || lower.include?("always") || lower.include?("never") || lower.include?("constraint")
214
+ :constraint
215
+ elsif lower.include?("prefer") || lower.include?("like") || lower.include?("want")
216
+ :preference
217
+ elsif lower.include?("error") || lower.include?("bug") || lower.include?("crash") || lower.include?("incident")
218
+ :incident
219
+ elsif lower.include?("concept") || lower.include?("pattern") || lower.include?("architecture")
220
+ :concept
221
+ else
222
+ :fact
223
+ end
224
+ end
225
+
226
+ # Create edges between cards from the same turn
227
+ #
228
+ # @param cards [Array<Card>] Cards from the same turn
229
+ # @return [void]
230
+ def create_turn_edges(cards)
231
+ cards.each_cons(2) do |a, b|
232
+ edge = Edge.new(
233
+ from_id: a.id,
234
+ to_id: b.id,
235
+ type: :same_episode,
236
+ weight: 0.8,
237
+ )
238
+ @adapter.write_edge(edge)
239
+
240
+ # Also create same_entity edges for shared entities
241
+ shared = a.entities & b.entities
242
+ next if shared.empty?
243
+
244
+ entity_edge = Edge.new(
245
+ from_id: a.id,
246
+ to_id: b.id,
247
+ type: :same_entity,
248
+ weight: [0.5 + (shared.size * 0.1), 1.0].min,
249
+ )
250
+ @adapter.write_edge(entity_edge)
251
+ end
252
+ end
253
+
254
+ # Create edges between new cards and existing cards that share entities
255
+ #
256
+ # Scans recent existing cards for shared entity names and creates
257
+ # same_entity edges when matches are found. This connects new knowledge
258
+ # to the existing memory graph across turns.
259
+ #
260
+ # @param new_cards [Array<Card>] Newly created cards
261
+ # @return [void]
262
+ def create_cross_turn_entity_edges(new_cards)
263
+ return if new_cards.empty?
264
+
265
+ new_card_ids = Set.new(new_cards.map(&:id))
266
+ existing_cards = @adapter.list_cards.reject { |c| new_card_ids.include?(c.id) }
267
+ return if existing_cards.empty?
268
+
269
+ # Limit scan to avoid O(n*m) explosion on large memory stores
270
+ candidates = existing_cards.last(@config.cross_turn_edge_scan_limit)
271
+
272
+ new_cards.each do |new_card|
273
+ next if new_card.entities.empty?
274
+
275
+ new_entities_down = new_card.entities.map(&:downcase).to_set
276
+
277
+ candidates.each do |existing_card|
278
+ next if existing_card.entities.empty?
279
+
280
+ shared = existing_card.entities.select { |e| new_entities_down.include?(e.downcase) }
281
+ next if shared.empty?
282
+
283
+ edge = Edge.new(
284
+ from_id: new_card.id,
285
+ to_id: existing_card.id,
286
+ type: :same_entity,
287
+ weight: [0.4 + (shared.size * 0.1), 1.0].min,
288
+ )
289
+ @adapter.write_edge(edge)
290
+ end
291
+ end
292
+ end
293
+
294
+ # Assign cards to existing clusters or create new ones
295
+ #
296
+ # When a decision-type card is added to a cluster, the cluster's
297
+ # decision_log is updated with the decision text.
298
+ #
299
+ # @param cards [Array<Card>] Cards to assign
300
+ # @return [void]
301
+ def assign_to_clusters(cards)
302
+ @adapter.transaction do
303
+ clusters = @adapter.list_clusters
304
+
305
+ cards.each do |card|
306
+ best_cluster = find_best_cluster(card, clusters)
307
+
308
+ if best_cluster
309
+ best_cluster.add_card(card.id)
310
+ update_cluster_for_card(best_cluster, card)
311
+ @adapter.write_cluster(best_cluster)
312
+ else
313
+ # Create a new cluster from this card
314
+ cluster = Cluster.new(
315
+ title: card.entities.first || card.type.to_s.capitalize,
316
+ card_ids: [card.id],
317
+ key_entities: card.entities.take(5),
318
+ embedding: card.embedding,
319
+ )
320
+ update_cluster_for_card(cluster, card)
321
+ @adapter.write_cluster(cluster)
322
+ clusters << cluster
323
+ end
324
+ end
325
+ end
326
+ end
327
+
328
+ # Update a cluster when a new card is added
329
+ #
330
+ # Appends to the cluster's decision_log if the card is a decision type.
331
+ # Updates key_entities with any new entities from the card.
332
+ #
333
+ # @param cluster [Cluster] Cluster to update
334
+ # @param card [Card] Card being added
335
+ # @return [void]
336
+ def update_cluster_for_card(cluster, card)
337
+ # Update decision log for decision-type cards
338
+ if card.type == :decision
339
+ decision_entry = card.text.slice(0, 200)
340
+ cluster.decision_log << decision_entry unless cluster.decision_log.include?(decision_entry)
341
+ end
342
+
343
+ # Merge new entities into cluster
344
+ new_entities = card.entities - cluster.key_entities
345
+ cluster.key_entities.concat(new_entities.take(5)) unless new_entities.empty?
346
+ end
347
+
348
+ # Find the best matching cluster for a card
349
+ #
350
+ # Uses the adapter's similarity method so that storage backends
351
+ # like pgvector can compute similarity server-side.
352
+ #
353
+ # @param card [Card] Card to match
354
+ # @param clusters [Array<Cluster>] Available clusters
355
+ # @return [Cluster, nil] Best cluster or nil
356
+ def find_best_cluster(card, clusters)
357
+ return if clusters.empty? || card.embedding.nil?
358
+
359
+ best = nil
360
+ best_score = 0.3 # Minimum similarity threshold
361
+
362
+ clusters.each do |cluster|
363
+ next unless cluster.embedding
364
+
365
+ score = @adapter.similarity(card.embedding, cluster.embedding)
366
+ if score > best_score
367
+ best_score = score
368
+ best = cluster
369
+ end
370
+ end
371
+
372
+ best
373
+ end
374
+
375
+ # @param text [String]
376
+ # @return [Integer]
377
+ def word_count(text)
378
+ text.split(/\s+/).size
379
+ end
380
+
381
+ # Split a long segment into chunks of ~250 words
382
+ #
383
+ # @param text [String] Long text
384
+ # @return [Array<String>] Chunks
385
+ def split_long_segment(text)
386
+ words = text.split(/\s+/)
387
+ chunks = []
388
+ words.each_slice(250) { |slice| chunks << slice.join(" ") }
389
+ chunks
390
+ end
391
+ end
392
+ end
393
+ end
394
+ end