swarm_sdk 2.7.14 → 3.0.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/lib/swarm_sdk/ruby_llm_patches/chat_callbacks_patch.rb +16 -0
  3. data/lib/swarm_sdk/ruby_llm_patches/init.rb +4 -1
  4. data/lib/swarm_sdk/v3/agent.rb +1165 -0
  5. data/lib/swarm_sdk/v3/agent_builder.rb +533 -0
  6. data/lib/swarm_sdk/v3/agent_definition.rb +330 -0
  7. data/lib/swarm_sdk/v3/configuration.rb +490 -0
  8. data/lib/swarm_sdk/v3/debug_log.rb +86 -0
  9. data/lib/swarm_sdk/v3/event_stream.rb +130 -0
  10. data/lib/swarm_sdk/v3/hooks/context.rb +112 -0
  11. data/lib/swarm_sdk/v3/hooks/result.rb +115 -0
  12. data/lib/swarm_sdk/v3/hooks/runner.rb +128 -0
  13. data/lib/swarm_sdk/v3/mcp/connector.rb +183 -0
  14. data/lib/swarm_sdk/v3/mcp/mcp_error.rb +15 -0
  15. data/lib/swarm_sdk/v3/mcp/server_definition.rb +125 -0
  16. data/lib/swarm_sdk/v3/mcp/ssl_http_transport.rb +103 -0
  17. data/lib/swarm_sdk/v3/mcp/stdio_transport.rb +135 -0
  18. data/lib/swarm_sdk/v3/mcp/tool_proxy.rb +53 -0
  19. data/lib/swarm_sdk/v3/memory/adapters/base.rb +297 -0
  20. data/lib/swarm_sdk/v3/memory/adapters/faiss_support.rb +194 -0
  21. data/lib/swarm_sdk/v3/memory/adapters/filesystem_adapter.rb +212 -0
  22. data/lib/swarm_sdk/v3/memory/adapters/sqlite_adapter.rb +507 -0
  23. data/lib/swarm_sdk/v3/memory/adapters/vector_utils.rb +88 -0
  24. data/lib/swarm_sdk/v3/memory/card.rb +206 -0
  25. data/lib/swarm_sdk/v3/memory/cluster.rb +146 -0
  26. data/lib/swarm_sdk/v3/memory/compressor.rb +496 -0
  27. data/lib/swarm_sdk/v3/memory/consolidator.rb +427 -0
  28. data/lib/swarm_sdk/v3/memory/context_builder.rb +339 -0
  29. data/lib/swarm_sdk/v3/memory/edge.rb +105 -0
  30. data/lib/swarm_sdk/v3/memory/embedder.rb +185 -0
  31. data/lib/swarm_sdk/v3/memory/exposure_tracker.rb +104 -0
  32. data/lib/swarm_sdk/v3/memory/ingestion_pipeline.rb +394 -0
  33. data/lib/swarm_sdk/v3/memory/retriever.rb +289 -0
  34. data/lib/swarm_sdk/v3/memory/store.rb +489 -0
  35. data/lib/swarm_sdk/v3/skills/loader.rb +147 -0
  36. data/lib/swarm_sdk/v3/skills/manifest.rb +45 -0
  37. data/lib/swarm_sdk/v3/sub_task_agent.rb +248 -0
  38. data/lib/swarm_sdk/v3/tools/base.rb +80 -0
  39. data/lib/swarm_sdk/v3/tools/bash.rb +174 -0
  40. data/lib/swarm_sdk/v3/tools/clock.rb +32 -0
  41. data/lib/swarm_sdk/v3/tools/document_converters/base.rb +84 -0
  42. data/lib/swarm_sdk/v3/tools/document_converters/docx_converter.rb +120 -0
  43. data/lib/swarm_sdk/v3/tools/document_converters/pdf_converter.rb +111 -0
  44. data/lib/swarm_sdk/v3/tools/document_converters/xlsx_converter.rb +128 -0
  45. data/lib/swarm_sdk/v3/tools/edit.rb +111 -0
  46. data/lib/swarm_sdk/v3/tools/glob.rb +96 -0
  47. data/lib/swarm_sdk/v3/tools/grep.rb +200 -0
  48. data/lib/swarm_sdk/v3/tools/message_teammate.rb +15 -0
  49. data/lib/swarm_sdk/v3/tools/message_user.rb +15 -0
  50. data/lib/swarm_sdk/v3/tools/read.rb +213 -0
  51. data/lib/swarm_sdk/v3/tools/read_tracker.rb +40 -0
  52. data/lib/swarm_sdk/v3/tools/registry.rb +208 -0
  53. data/lib/swarm_sdk/v3/tools/sub_task.rb +183 -0
  54. data/lib/swarm_sdk/v3/tools/think.rb +88 -0
  55. data/lib/swarm_sdk/v3/tools/write.rb +87 -0
  56. data/lib/swarm_sdk/v3.rb +145 -0
  57. metadata +88 -149
  58. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -175
  59. data/lib/swarm_sdk/agent/builder.rb +0 -705
  60. data/lib/swarm_sdk/agent/chat.rb +0 -1438
  61. data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -375
  62. data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
  63. data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
  64. data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -85
  65. data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -290
  66. data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
  67. data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
  68. data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -134
  69. data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
  70. data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -146
  71. data/lib/swarm_sdk/agent/context.rb +0 -115
  72. data/lib/swarm_sdk/agent/context_manager.rb +0 -315
  73. data/lib/swarm_sdk/agent/definition.rb +0 -588
  74. data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -226
  75. data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -173
  76. data/lib/swarm_sdk/agent/tool_registry.rb +0 -189
  77. data/lib/swarm_sdk/agent_registry.rb +0 -146
  78. data/lib/swarm_sdk/builders/base_builder.rb +0 -558
  79. data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
  80. data/lib/swarm_sdk/concerns/cleanupable.rb +0 -42
  81. data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
  82. data/lib/swarm_sdk/concerns/validatable.rb +0 -55
  83. data/lib/swarm_sdk/config.rb +0 -368
  84. data/lib/swarm_sdk/configuration/parser.rb +0 -397
  85. data/lib/swarm_sdk/configuration/translator.rb +0 -285
  86. data/lib/swarm_sdk/configuration.rb +0 -165
  87. data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
  88. data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -102
  89. data/lib/swarm_sdk/context_compactor.rb +0 -335
  90. data/lib/swarm_sdk/context_management/builder.rb +0 -128
  91. data/lib/swarm_sdk/context_management/context.rb +0 -328
  92. data/lib/swarm_sdk/custom_tool_registry.rb +0 -226
  93. data/lib/swarm_sdk/defaults.rb +0 -251
  94. data/lib/swarm_sdk/events_to_messages.rb +0 -199
  95. data/lib/swarm_sdk/hooks/adapter.rb +0 -359
  96. data/lib/swarm_sdk/hooks/context.rb +0 -197
  97. data/lib/swarm_sdk/hooks/definition.rb +0 -80
  98. data/lib/swarm_sdk/hooks/error.rb +0 -29
  99. data/lib/swarm_sdk/hooks/executor.rb +0 -146
  100. data/lib/swarm_sdk/hooks/registry.rb +0 -147
  101. data/lib/swarm_sdk/hooks/result.rb +0 -150
  102. data/lib/swarm_sdk/hooks/shell_executor.rb +0 -256
  103. data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
  104. data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
  105. data/lib/swarm_sdk/log_collector.rb +0 -227
  106. data/lib/swarm_sdk/log_stream.rb +0 -127
  107. data/lib/swarm_sdk/markdown_parser.rb +0 -75
  108. data/lib/swarm_sdk/model_aliases.json +0 -8
  109. data/lib/swarm_sdk/models.json +0 -44002
  110. data/lib/swarm_sdk/models.rb +0 -161
  111. data/lib/swarm_sdk/node_context.rb +0 -245
  112. data/lib/swarm_sdk/observer/builder.rb +0 -81
  113. data/lib/swarm_sdk/observer/config.rb +0 -45
  114. data/lib/swarm_sdk/observer/manager.rb +0 -248
  115. data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
  116. data/lib/swarm_sdk/permissions/config.rb +0 -239
  117. data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
  118. data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
  119. data/lib/swarm_sdk/permissions/validator.rb +0 -173
  120. data/lib/swarm_sdk/permissions_builder.rb +0 -122
  121. data/lib/swarm_sdk/plugin.rb +0 -309
  122. data/lib/swarm_sdk/plugin_registry.rb +0 -101
  123. data/lib/swarm_sdk/proc_helpers.rb +0 -53
  124. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -119
  125. data/lib/swarm_sdk/restore_result.rb +0 -65
  126. data/lib/swarm_sdk/result.rb +0 -241
  127. data/lib/swarm_sdk/snapshot.rb +0 -156
  128. data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
  129. data/lib/swarm_sdk/state_restorer.rb +0 -476
  130. data/lib/swarm_sdk/state_snapshot.rb +0 -334
  131. data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -648
  132. data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -204
  133. data/lib/swarm_sdk/swarm/builder.rb +0 -256
  134. data/lib/swarm_sdk/swarm/executor.rb +0 -446
  135. data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -162
  136. data/lib/swarm_sdk/swarm/lazy_delegate_chat.rb +0 -372
  137. data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -361
  138. data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -290
  139. data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
  140. data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -392
  141. data/lib/swarm_sdk/swarm.rb +0 -973
  142. data/lib/swarm_sdk/swarm_loader.rb +0 -145
  143. data/lib/swarm_sdk/swarm_registry.rb +0 -136
  144. data/lib/swarm_sdk/tools/base.rb +0 -63
  145. data/lib/swarm_sdk/tools/bash.rb +0 -280
  146. data/lib/swarm_sdk/tools/clock.rb +0 -46
  147. data/lib/swarm_sdk/tools/delegate.rb +0 -389
  148. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
  149. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
  150. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
  151. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
  152. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
  153. data/lib/swarm_sdk/tools/edit.rb +0 -145
  154. data/lib/swarm_sdk/tools/glob.rb +0 -166
  155. data/lib/swarm_sdk/tools/grep.rb +0 -235
  156. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
  157. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -167
  158. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
  159. data/lib/swarm_sdk/tools/mcp_tool_stub.rb +0 -198
  160. data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
  161. data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
  162. data/lib/swarm_sdk/tools/read.rb +0 -261
  163. data/lib/swarm_sdk/tools/registry.rb +0 -205
  164. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
  165. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
  166. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
  167. data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
  168. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -273
  169. data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
  170. data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
  171. data/lib/swarm_sdk/tools/think.rb +0 -100
  172. data/lib/swarm_sdk/tools/todo_write.rb +0 -237
  173. data/lib/swarm_sdk/tools/web_fetch.rb +0 -264
  174. data/lib/swarm_sdk/tools/write.rb +0 -112
  175. data/lib/swarm_sdk/transcript_builder.rb +0 -278
  176. data/lib/swarm_sdk/utils.rb +0 -68
  177. data/lib/swarm_sdk/validation_result.rb +0 -33
  178. data/lib/swarm_sdk/version.rb +0 -5
  179. data/lib/swarm_sdk/workflow/agent_config.rb +0 -95
  180. data/lib/swarm_sdk/workflow/builder.rb +0 -227
  181. data/lib/swarm_sdk/workflow/executor.rb +0 -497
  182. data/lib/swarm_sdk/workflow/node_builder.rb +0 -593
  183. data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -250
  184. data/lib/swarm_sdk/workflow.rb +0 -589
  185. data/lib/swarm_sdk.rb +0 -721
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Memory
6
+ # Calculates exposure scores for memory cards
7
+ #
8
+ # Exposure score = α·frequency + β·recency + γ·dwell (weighted sum)
9
+ #
10
+ # Uses an additive formula so each signal contributes independently.
11
+ # A multiplicative formula would zero out the score whenever any single
12
+ # factor is zero (e.g., a fresh card with access_count=0), which would
13
+ # make every new card an immediate compression candidate.
14
+ #
15
+ # Used by the compressor to decide which cards get lossy compression first.
16
+ # Low-exposure cards are candidates for compression or eviction.
17
+ #
18
+ # @example
19
+ # tracker = ExposureTracker.new(adapter)
20
+ # score = tracker.exposure_score(card)
21
+ class ExposureTracker
22
+ # Default weights for the additive exposure formula
23
+ DEFAULT_FREQUENCY_WEIGHT = 0.4
24
+ DEFAULT_RECENCY_WEIGHT = 0.4
25
+ DEFAULT_DWELL_WEIGHT = 0.2
26
+
27
+ # Default half-life for recency decay in seconds (7 days)
28
+ DEFAULT_RECENCY_HALF_LIFE = 7 * 24 * 3600
29
+
30
+ # @param adapter [Adapters::Base] Storage adapter
31
+ # @param frequency_weight [Float] Weight for frequency component (α)
32
+ # @param recency_weight [Float] Weight for recency component (β)
33
+ # @param dwell_weight [Float] Weight for dwell component (γ)
34
+ # @param recency_half_life [Integer] Half-life for recency decay in seconds
35
+ def initialize(adapter, frequency_weight: nil, recency_weight: nil, dwell_weight: nil, recency_half_life: nil)
36
+ @adapter = adapter
37
+ config = Configuration.instance
38
+ @frequency_weight = frequency_weight || config.exposure_frequency_weight
39
+ @recency_weight = recency_weight || config.exposure_recency_weight
40
+ @dwell_weight = dwell_weight || config.exposure_dwell_weight
41
+ @recency_half_life = recency_half_life || config.exposure_recency_half_life
42
+ end
43
+
44
+ # Calculate exposure score for a card
45
+ #
46
+ # Combines three signals additively:
47
+ # - Frequency: log(1 + access_count) — dampens high-access cards
48
+ # - Recency: exponential decay from last access time
49
+ # - Dwell: accumulated time in working context
50
+ #
51
+ # E = α·log(1 + access_count) + β·recency(last_accessed) + γ·dwell
52
+ #
53
+ # @param card [Card] Card to score
54
+ # @return [Float] Exposure score (higher = more exposed)
55
+ #
56
+ # @example
57
+ # score = tracker.exposure_score(card)
58
+ # # High score: frequently accessed, recently used, high dwell
59
+ # # Low score: rarely accessed, long ago, low dwell
60
+ def exposure_score(card)
61
+ frequency = Math.log(1 + card.access_count)
62
+ recency = recency_factor(card.last_accessed)
63
+ dwell = card.dwell
64
+
65
+ @frequency_weight * frequency +
66
+ @recency_weight * recency +
67
+ @dwell_weight * dwell
68
+ end
69
+
70
+ # Rank all cards by exposure score (ascending = least exposed first)
71
+ #
72
+ # @return [Array<Hash>] Array of { card:, score: } sorted ascending
73
+ def rank_by_exposure
74
+ @adapter.list_cards.map do |card|
75
+ { card: card, score: exposure_score(card) }
76
+ end.sort_by { |entry| entry[:score] }
77
+ end
78
+
79
+ # Find cards with low exposure (candidates for compression)
80
+ #
81
+ # @param threshold [Float] Maximum exposure score
82
+ # @return [Array<Card>] Low-exposure cards
83
+ def low_exposure_cards(threshold: 1.0)
84
+ rank_by_exposure
85
+ .select { |entry| entry[:score] < threshold }
86
+ .map { |entry| entry[:card] }
87
+ end
88
+
89
+ private
90
+
91
+ # Calculate recency factor using exponential decay
92
+ #
93
+ # @param last_accessed [Time, nil] Last access time
94
+ # @return [Float] Recency factor (0.0-1.0)
95
+ def recency_factor(last_accessed)
96
+ return 0.0 if last_accessed.nil?
97
+
98
+ age_seconds = Time.now - last_accessed
99
+ Math.exp(-age_seconds * Math.log(2) / @recency_half_life)
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,394 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Memory
6
+ # Async post-turn processing pipeline
7
+ #
8
+ # After each conversation turn, this pipeline:
9
+ # 1. Segments the turn into atomic ideas (LLM when available, heuristic fallback)
10
+ # 2. Creates memory cards (<=250 words each)
11
+ # 3. Extracts entities
12
+ # 4. Creates graph edges between related cards (within turn and cross-turn)
13
+ # 5. Assigns cards to clusters (updating decision logs for decision cards)
14
+ # 6. Generates embeddings
15
+ #
16
+ # @example
17
+ # pipeline = IngestionPipeline.new(adapter: adapter, embedder: embedder)
18
+ # pipeline.ingest(turn_text: "The API uses JWT...", turn_id: "turn_001")
19
+ class IngestionPipeline
20
+ # @param adapter [Adapters::Base] Storage adapter
21
+ # @param embedder [Embedder] Text embedder
22
+ # @param chat [RubyLLM::Chat, nil] LLM for segmentation (nil = simple splitting)
23
+ def initialize(adapter:, embedder:, chat: nil)
24
+ @adapter = adapter
25
+ @embedder = embedder
26
+ @chat = chat
27
+ @config = Configuration.instance
28
+ end
29
+
30
+ # Ingest a conversation turn into memory cards
31
+ #
32
+ # @param turn_text [String] Full turn text (user + assistant + tool calls)
33
+ # @param turn_id [String] Unique turn identifier
34
+ # @return [Array<Card>] Created cards
35
+ def ingest(turn_text:, turn_id:)
36
+ DebugLog.log("ingestion", "ingest: turn_id=#{turn_id}, text_len=#{turn_text.size}")
37
+
38
+ segments = DebugLog.time("ingestion", "segment_turn") do
39
+ segment_turn(turn_text)
40
+ end
41
+ DebugLog.log("ingestion", "segments=#{segments.size}")
42
+
43
+ cards = DebugLog.time("ingestion", "create_cards(#{segments.size})") do
44
+ segments.map { |segment| create_card(text: segment, turn_id: turn_id) }
45
+ end
46
+
47
+ DebugLog.time("ingestion", "create_turn_edges") { create_turn_edges(cards) }
48
+ DebugLog.time("ingestion", "cross_turn_entity_edges") { create_cross_turn_entity_edges(cards) }
49
+ DebugLog.time("ingestion", "assign_to_clusters") { assign_to_clusters(cards) }
50
+
51
+ DebugLog.log("ingestion", "ingest complete: #{cards.size} cards created")
52
+ cards
53
+ end
54
+
55
+ private
56
+
57
+ # Segment a turn into atomic ideas
58
+ #
59
+ # Uses LLM for semantic segmentation when available. The LLM produces
60
+ # better idea-boundary splits and self-contained cards without dangling
61
+ # pronouns. Falls back to paragraph-based splitting when no LLM is
62
+ # configured.
63
+ #
64
+ # @param text [String] Turn text
65
+ # @return [Array<String>] Segments
66
+ def segment_turn(text)
67
+ return [] if text.nil? || text.strip.empty?
68
+
69
+ if @chat
70
+ llm_segment(text)
71
+ else
72
+ heuristic_segment(text)
73
+ end
74
+ end
75
+
76
+ # Use LLM to segment text into atomic, self-contained ideas
77
+ #
78
+ # Resets the background chat before each call to prevent message
79
+ # accumulation across multiple ingestion operations.
80
+ #
81
+ # @param text [String] Turn text
82
+ # @return [Array<String>] Self-contained segments
83
+ def llm_segment(text)
84
+ @chat.reset_messages!
85
+ response = @chat.ask(<<~PROMPT)
86
+ Segment the following conversation turn into atomic, self-contained memory cards.
87
+ Each card should:
88
+ - Capture exactly ONE idea, fact, decision, or concept
89
+ - Be <=250 words
90
+ - Be fully self-contained (no dangling "this/that/it" without referent)
91
+ - Include the subject/entity and claim/decision explicitly
92
+
93
+ Separate each card with "---" on its own line.
94
+ Output ONLY the cards, no explanations.
95
+
96
+ Text to segment:
97
+ #{text}
98
+ PROMPT
99
+
100
+ segments = response.content.strip.split(/^---+\s*$/).map(&:strip).reject(&:empty?)
101
+ # Validate segments are reasonable
102
+ return heuristic_segment(text) if segments.empty?
103
+
104
+ # Enforce word limit on LLM output
105
+ segments.flat_map { |s| word_count(s) > 250 ? split_long_segment(s) : s }
106
+ rescue StandardError => e
107
+ EventStream.emit(
108
+ type: "memory_segmentation_llm_error",
109
+ error: "#{e.class}: #{e.message}",
110
+ )
111
+ # Fall back to heuristic on any LLM error
112
+ heuristic_segment(text)
113
+ end
114
+
115
+ # Heuristic segmentation by paragraphs with merging/splitting
116
+ #
117
+ # @param text [String] Turn text
118
+ # @return [Array<String>] Segments
119
+ def heuristic_segment(text)
120
+ paragraphs = text.split(/\n\n+/).map(&:strip).reject(&:empty?)
121
+
122
+ # Merge very short paragraphs, split very long ones
123
+ segments = []
124
+ buffer = +""
125
+
126
+ paragraphs.each do |para|
127
+ if buffer.empty?
128
+ buffer << para
129
+ elsif word_count(buffer) + word_count(para) <= 250
130
+ buffer << "\n\n#{para}"
131
+ else
132
+ segments << buffer.dup
133
+ buffer.replace(para)
134
+ end
135
+ end
136
+
137
+ segments << buffer unless buffer.empty?
138
+
139
+ # Split any segments that are still too long
140
+ segments.flat_map { |s| word_count(s) > 250 ? split_long_segment(s) : s }
141
+ end
142
+
143
+ # Create a memory card from a text segment
144
+ #
145
+ # Sets importance based on inferred card type:
146
+ # - Constraints and decisions get high importance (0.8)
147
+ # - Preferences and incidents get medium-high importance (0.7)
148
+ # - Concepts get medium importance (0.6)
149
+ # - Facts get default importance (0.5)
150
+ #
151
+ # @param text [String] Card text
152
+ # @param turn_id [String] Source turn ID
153
+ # @return [Card] Created and persisted card
154
+ def create_card(text:, turn_id:)
155
+ entities = extract_entities(text)
156
+ type = infer_type(text)
157
+ embedding = @embedder.embed(text)
158
+ importance = importance_for_type(type)
159
+
160
+ card = Card.new(
161
+ text: text,
162
+ type: type,
163
+ entities: entities,
164
+ source_turn_ids: [turn_id],
165
+ embedding: embedding,
166
+ importance: importance,
167
+ dwell: 0.0,
168
+ )
169
+
170
+ @adapter.write_card(card)
171
+ card
172
+ end
173
+
174
+ # Determine importance score based on card type
175
+ #
176
+ # Constraints and decisions are high-importance because violating
177
+ # or forgetting them has serious consequences. Preferences and
178
+ # incidents are medium-high because they inform future decisions.
179
+ #
180
+ # @param type [Symbol] Card type
181
+ # @return [Float] Importance score (0.0-1.0)
182
+ def importance_for_type(type)
183
+ case type
184
+ when :constraint then 0.8
185
+ when :decision then 0.8
186
+ when :preference then 0.7
187
+ when :incident then 0.7
188
+ when :concept then 0.6
189
+ else 0.5 # :fact
190
+ end
191
+ end
192
+
193
+ # Extract named entities from text
194
+ #
195
+ # Simple heuristic: capitalized multi-word phrases, technical terms.
196
+ #
197
+ # @param text [String] Text to extract from
198
+ # @return [Array<String>] Extracted entities
199
+ def extract_entities(text)
200
+ # Find capitalized words that aren't at sentence starts
201
+ words = text.scan(/(?<=[.!?]\s)\w+|\b[A-Z][a-z]*(?:\s+[A-Z][a-z]*)*\b/)
202
+ words.uniq.reject { |w| w.length < 2 }.take(10)
203
+ end
204
+
205
+ # Infer card type from text content
206
+ #
207
+ # @param text [String] Card text
208
+ # @return [Symbol] Card type
209
+ def infer_type(text)
210
+ lower = text.downcase
211
+ if lower.include?("decided") || lower.include?("decision") || lower.include?("chose")
212
+ :decision
213
+ elsif lower.include?("must") || lower.include?("always") || lower.include?("never") || lower.include?("constraint")
214
+ :constraint
215
+ elsif lower.include?("prefer") || lower.include?("like") || lower.include?("want")
216
+ :preference
217
+ elsif lower.include?("error") || lower.include?("bug") || lower.include?("crash") || lower.include?("incident")
218
+ :incident
219
+ elsif lower.include?("concept") || lower.include?("pattern") || lower.include?("architecture")
220
+ :concept
221
+ else
222
+ :fact
223
+ end
224
+ end
225
+
226
+ # Create edges between cards from the same turn
227
+ #
228
+ # @param cards [Array<Card>] Cards from the same turn
229
+ # @return [void]
230
+ def create_turn_edges(cards)
231
+ cards.each_cons(2) do |a, b|
232
+ edge = Edge.new(
233
+ from_id: a.id,
234
+ to_id: b.id,
235
+ type: :same_episode,
236
+ weight: 0.8,
237
+ )
238
+ @adapter.write_edge(edge)
239
+
240
+ # Also create same_entity edges for shared entities
241
+ shared = a.entities & b.entities
242
+ next if shared.empty?
243
+
244
+ entity_edge = Edge.new(
245
+ from_id: a.id,
246
+ to_id: b.id,
247
+ type: :same_entity,
248
+ weight: [0.5 + (shared.size * 0.1), 1.0].min,
249
+ )
250
+ @adapter.write_edge(entity_edge)
251
+ end
252
+ end
253
+
254
+ # Create edges between new cards and existing cards that share entities
255
+ #
256
+ # Scans recent existing cards for shared entity names and creates
257
+ # same_entity edges when matches are found. This connects new knowledge
258
+ # to the existing memory graph across turns.
259
+ #
260
+ # @param new_cards [Array<Card>] Newly created cards
261
+ # @return [void]
262
+ def create_cross_turn_entity_edges(new_cards)
263
+ return if new_cards.empty?
264
+
265
+ new_card_ids = Set.new(new_cards.map(&:id))
266
+ existing_cards = @adapter.list_cards.reject { |c| new_card_ids.include?(c.id) }
267
+ return if existing_cards.empty?
268
+
269
+ # Limit scan to avoid O(n*m) explosion on large memory stores
270
+ candidates = existing_cards.last(@config.cross_turn_edge_scan_limit)
271
+
272
+ new_cards.each do |new_card|
273
+ next if new_card.entities.empty?
274
+
275
+ new_entities_down = new_card.entities.map(&:downcase).to_set
276
+
277
+ candidates.each do |existing_card|
278
+ next if existing_card.entities.empty?
279
+
280
+ shared = existing_card.entities.select { |e| new_entities_down.include?(e.downcase) }
281
+ next if shared.empty?
282
+
283
+ edge = Edge.new(
284
+ from_id: new_card.id,
285
+ to_id: existing_card.id,
286
+ type: :same_entity,
287
+ weight: [0.4 + (shared.size * 0.1), 1.0].min,
288
+ )
289
+ @adapter.write_edge(edge)
290
+ end
291
+ end
292
+ end
293
+
294
+ # Assign cards to existing clusters or create new ones
295
+ #
296
+ # When a decision-type card is added to a cluster, the cluster's
297
+ # decision_log is updated with the decision text.
298
+ #
299
+ # @param cards [Array<Card>] Cards to assign
300
+ # @return [void]
301
+ def assign_to_clusters(cards)
302
+ @adapter.transaction do
303
+ clusters = @adapter.list_clusters
304
+
305
+ cards.each do |card|
306
+ best_cluster = find_best_cluster(card, clusters)
307
+
308
+ if best_cluster
309
+ best_cluster.add_card(card.id)
310
+ update_cluster_for_card(best_cluster, card)
311
+ @adapter.write_cluster(best_cluster)
312
+ else
313
+ # Create a new cluster from this card
314
+ cluster = Cluster.new(
315
+ title: card.entities.first || card.type.to_s.capitalize,
316
+ card_ids: [card.id],
317
+ key_entities: card.entities.take(5),
318
+ embedding: card.embedding,
319
+ )
320
+ update_cluster_for_card(cluster, card)
321
+ @adapter.write_cluster(cluster)
322
+ clusters << cluster
323
+ end
324
+ end
325
+ end
326
+ end
327
+
328
+ # Update a cluster when a new card is added
329
+ #
330
+ # Appends to the cluster's decision_log if the card is a decision type.
331
+ # Updates key_entities with any new entities from the card.
332
+ #
333
+ # @param cluster [Cluster] Cluster to update
334
+ # @param card [Card] Card being added
335
+ # @return [void]
336
+ def update_cluster_for_card(cluster, card)
337
+ # Update decision log for decision-type cards
338
+ if card.type == :decision
339
+ decision_entry = card.text.slice(0, 200)
340
+ cluster.decision_log << decision_entry unless cluster.decision_log.include?(decision_entry)
341
+ end
342
+
343
+ # Merge new entities into cluster
344
+ new_entities = card.entities - cluster.key_entities
345
+ cluster.key_entities.concat(new_entities.take(5)) unless new_entities.empty?
346
+ end
347
+
348
+ # Find the best matching cluster for a card
349
+ #
350
+ # Uses the adapter's similarity method so that storage backends
351
+ # like pgvector can compute similarity server-side.
352
+ #
353
+ # @param card [Card] Card to match
354
+ # @param clusters [Array<Cluster>] Available clusters
355
+ # @return [Cluster, nil] Best cluster or nil
356
+ def find_best_cluster(card, clusters)
357
+ return if clusters.empty? || card.embedding.nil?
358
+
359
+ best = nil
360
+ best_score = 0.3 # Minimum similarity threshold
361
+
362
+ clusters.each do |cluster|
363
+ next unless cluster.embedding
364
+
365
+ score = @adapter.similarity(card.embedding, cluster.embedding)
366
+ if score > best_score
367
+ best_score = score
368
+ best = cluster
369
+ end
370
+ end
371
+
372
+ best
373
+ end
374
+
375
+ # @param text [String]
376
+ # @return [Integer]
377
+ def word_count(text)
378
+ text.split(/\s+/).size
379
+ end
380
+
381
+ # Split a long segment into chunks of ~250 words
382
+ #
383
+ # @param text [String] Long text
384
+ # @return [Array<String>] Chunks
385
+ def split_long_segment(text)
386
+ words = text.split(/\s+/)
387
+ chunks = []
388
+ words.each_slice(250) { |slice| chunks << slice.join(" ") }
389
+ chunks
390
+ end
391
+ end
392
+ end
393
+ end
394
+ end