lex-coldstart 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0fb6a1e08b7bb5ba42d3ef2c62f2e4c4d90f8f9d3c2cf4891fdffda7f55af3cd
4
+ data.tar.gz: cb3d573d7657a27333bbed0b5850b429b20c9f8f8a62a64318f585e311a2917a
5
+ SHA512:
6
+ metadata.gz: cb3d44fa4365e62518583c7e0ac42848d999e7ea40419a378b3372029da2835bfae7a048c0925b3363f16941488163d21ad736815362e9b6ed46239acbaca2ff
7
+ data.tar.gz: 9ff4f0933b47c8111502cf0c7450cdf25b9ca91c69dd4da2fd6a997e79e662d49ef502202e8a0c243bcb8223ce86b611ee36ef5c296cc56fbddb7195f786babb
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
6
+
7
+ gem 'rspec', '~> 3.13'
8
+ gem 'rubocop', '~> 1.75', require: false
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/legion/extensions/coldstart/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'lex-coldstart'
7
+ spec.version = Legion::Extensions::Coldstart::VERSION
8
+ spec.authors = ['Esity']
9
+ spec.email = ['matthewdiverson@gmail.com']
10
+
11
+ spec.summary = 'LEX Coldstart'
12
+ spec.description = 'Imprint window and bootstrap calibration for brain-modeled agentic AI'
13
+ spec.homepage = 'https://github.com/LegionIO/lex-coldstart'
14
+ spec.license = 'MIT'
15
+ spec.required_ruby_version = '>= 3.4'
16
+
17
+ spec.metadata['homepage_uri'] = spec.homepage
18
+ spec.metadata['source_code_uri'] = 'https://github.com/LegionIO/lex-coldstart'
19
+ spec.metadata['documentation_uri'] = 'https://github.com/LegionIO/lex-coldstart'
20
+ spec.metadata['changelog_uri'] = 'https://github.com/LegionIO/lex-coldstart'
21
+ spec.metadata['bug_tracker_uri'] = 'https://github.com/LegionIO/lex-coldstart/issues'
22
+ spec.metadata['rubygems_mfa_required'] = 'true'
23
+
24
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
25
+ Dir.glob('{lib,spec}/**/*') + %w[lex-coldstart.gemspec Gemfile]
26
+ end
27
+ spec.require_paths = ['lib']
28
+ spec.add_development_dependency 'sequel', '~> 5.70'
29
+ spec.add_development_dependency 'sqlite3', '~> 2.0'
30
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/actors/once'
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Coldstart
8
+ module Actor
9
+ class Imprint < Legion::Extensions::Actors::Once
10
+ def runner_class
11
+ Legion::Extensions::Coldstart::Runners::Coldstart
12
+ end
13
+
14
+ def runner_function
15
+ 'begin_imprint'
16
+ end
17
+
18
+ def use_runner?
19
+ false
20
+ end
21
+
22
+ def check_subtask?
23
+ false
24
+ end
25
+
26
+ def generate_task?
27
+ false
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/coldstart/helpers/imprint'
4
+ require 'legion/extensions/coldstart/helpers/bootstrap'
5
+ require 'legion/extensions/coldstart/runners/coldstart'
6
+
7
+ module Legion
8
+ module Extensions
9
+ module Coldstart
10
+ class Client
11
+ include Runners::Coldstart
12
+
13
+ def initialize(**)
14
+ @bootstrap = Helpers::Bootstrap.new
15
+ end
16
+
17
+ private
18
+
19
+ attr_reader :bootstrap
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Coldstart
6
+ module Helpers
7
+ class Bootstrap
8
+ attr_reader :started_at, :observation_count, :firmware_loaded, :calibration_state
9
+
10
+ def initialize
11
+ @started_at = nil
12
+ @observation_count = 0
13
+ @firmware_loaded = false
14
+ @calibration_state = :not_started
15
+ load_from_local
16
+ end
17
+
18
+ def begin_imprint
19
+ @started_at = Time.now.utc
20
+ @calibration_state = :imprinting
21
+ save_to_local
22
+ end
23
+
24
+ def load_firmware
25
+ @firmware_loaded = true
26
+ save_to_local
27
+ end
28
+
29
+ def record_observation
30
+ @observation_count += 1
31
+ check_calibration_progress
32
+ save_to_local
33
+ end
34
+
35
+ def imprint_active?
36
+ Imprint.imprint_active?(@started_at)
37
+ end
38
+
39
+ def current_layer
40
+ return :firmware unless @firmware_loaded
41
+
42
+ Imprint.current_layer(@started_at, observations: @observation_count)
43
+ end
44
+
45
+ def progress
46
+ {
47
+ firmware_loaded: @firmware_loaded,
48
+ imprint_active: imprint_active?,
49
+ imprint_progress: Imprint.imprint_progress(@started_at),
50
+ observation_count: @observation_count,
51
+ calibration_state: @calibration_state,
52
+ current_layer: current_layer
53
+ }
54
+ end
55
+
56
+ private
57
+
58
+ def check_calibration_progress
59
+ if @observation_count >= Imprint::IMPRINT_ENTROPY_BASELINE && !imprint_active?
60
+ @calibration_state = :calibrated
61
+ elsif @observation_count >= Imprint::IMPRINT_ENTROPY_BASELINE
62
+ @calibration_state = :baseline_established
63
+ end
64
+ end
65
+
66
+ def save_to_local
67
+ return unless defined?(Legion::Data::Local) && Legion::Data::Local.connected?
68
+
69
+ ds = Legion::Data::Local.connection[:bootstrap_state]
70
+ row = {
71
+ started_at_i: @started_at&.to_i,
72
+ observation_count: @observation_count,
73
+ firmware_loaded: @firmware_loaded,
74
+ calibration_state: @calibration_state.to_s
75
+ }
76
+ if ds.where(id: 1).any?
77
+ ds.where(id: 1).update(row)
78
+ else
79
+ ds.insert(row.merge(id: 1))
80
+ end
81
+ rescue StandardError => e
82
+ Legion::Logging.warn "lex-coldstart: save_to_local failed: #{e.message}"
83
+ end
84
+
85
+ def load_from_local
86
+ return unless defined?(Legion::Data::Local) && Legion::Data::Local.connected?
87
+
88
+ row = Legion::Data::Local.connection[:bootstrap_state].where(id: 1).first
89
+ return unless row
90
+
91
+ @started_at = row[:started_at_i] ? Time.at(row[:started_at_i]).utc : nil
92
+ @observation_count = row[:observation_count].to_i
93
+ @firmware_loaded = [true, 1].include?(row[:firmware_loaded])
94
+ @calibration_state = row[:calibration_state].to_sym
95
+ rescue StandardError => e
96
+ Legion::Logging.warn "lex-coldstart: load_from_local failed: #{e.message}"
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,231 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Coldstart
6
+ module Helpers
7
+ module ClaudeParser
8
+ module_function
9
+
10
+ SECTION_TYPE_MAP = {
11
+ /\bhard rules\b/i => :firmware,
12
+ /\bidentity auth\b/i => :identity,
13
+ /\barchitecture\b/i => :semantic,
14
+ /\bkey concepts?\b|terminology/i => :semantic,
15
+ /\bproject structure\b/i => :semantic,
16
+ /\bdigital worker\b/i => :semantic,
17
+ /\bgotcha|caveat|pitfall|known issue/i => :procedural,
18
+ /\bcli\b|command|usage/i => :procedural,
19
+ /\bapi\b|routes?\b|endpoint/i => :procedural,
20
+ /\bmcp\b/i => :procedural,
21
+ /\bconfig|settings?\b|scaffold/i => :procedural,
22
+ /\bskills?\b/i => :procedural,
23
+ /\bdevelopment|conventions?\b|workflow/i => :procedural,
24
+ /\brubocop|lint/i => :procedural,
25
+ /\bdependenc/i => :semantic,
26
+ /\bfile (map|structure)\b/i => :semantic,
27
+ /\bwhat is\b/i => :semantic,
28
+ /\bpurpose\b/i => :semantic,
29
+ /\bstatus|stub|todo\b/i => :semantic,
30
+ /\bagentic\b/i => :semantic,
31
+ /\bjwt\b|auth|crypt|secur/i => :procedural,
32
+ /\bsinatra\b|rest\b/i => :procedural,
33
+ /\btransport|rabbitmq|amqp/i => :semantic
34
+ }.freeze
35
+
36
+ DEFAULT_TRACE_TYPE = :semantic
37
+
38
+ SECTION_VALENCE_MAP = {
39
+ /\bgotcha|caveat|pitfall|known issue|error|stub|todo\b/i => { valence: -0.4, intensity: 0.5 },
40
+ /\bhard rules?\b/i => { valence: 0.0, intensity: 0.8 },
41
+ /\barchitecture\b|design|key concepts?\b/i => { valence: 0.3, intensity: 0.4 },
42
+ /\bdependenc|integration|requirements?\b/i => { valence: 0.1, intensity: 0.3 }
43
+ }.freeze
44
+
45
+ DEFAULT_VALENCE = { valence: 0.1, intensity: 0.3 }.freeze
46
+
47
+ # Parse a markdown file into an array of trace candidates.
48
+ # Each candidate is a Hash ready for lex-memory's store_trace.
49
+ #
50
+ # Returns Array<Hash> with keys:
51
+ # :trace_type, :content_payload, :domain_tags, :origin, :confidence, :source_file
52
+ def parse_file(file_path)
53
+ content = File.read(file_path)
54
+ file_type = detect_file_type(file_path)
55
+ sections = split_sections(content)
56
+ source_name = File.basename(file_path)
57
+ dir_context = extract_dir_context(file_path)
58
+
59
+ traces = []
60
+ sections.each do |section|
61
+ trace_type = classify_section(section[:heading])
62
+ base_tags = [file_type.to_s, source_name, dir_context].compact
63
+ base_tags << section[:heading_slug] if section[:heading_slug]
64
+
65
+ items = extract_items(section[:body])
66
+ items.each do |item|
67
+ inline_tags = extract_inline_tags(item)
68
+ section_valence = classify_valence(section[:heading])
69
+ traces << {
70
+ trace_type: trace_type,
71
+ content_payload: item.strip,
72
+ domain_tags: (base_tags + inline_tags).uniq,
73
+ origin: file_type == :memory ? :firmware : :direct_experience,
74
+ confidence: trace_type == :firmware ? 1.0 : 0.7,
75
+ emotional_valence: trace_type == :firmware ? 0.0 : section_valence[:valence],
76
+ emotional_intensity: section_valence[:intensity],
77
+ source_file: file_path
78
+ }
79
+ end
80
+ end
81
+
82
+ traces.reject { |t| t[:content_payload].empty? }
83
+ end
84
+
85
+ # Parse all matching markdown files under a directory.
86
+ # Returns Array<Hash> of trace candidates.
87
+ def parse_directory(dir_path, pattern: '**/{CLAUDE,MEMORY}.md')
88
+ Dir.glob(File.join(dir_path, pattern)).flat_map do |path|
89
+ next [] if skip_path?(path)
90
+
91
+ parse_file(path)
92
+ end
93
+ end
94
+
95
+ # Detect whether a file is a MEMORY.md or CLAUDE.md
96
+ def detect_file_type(file_path)
97
+ basename = File.basename(file_path).downcase
98
+ if basename.include?('memory') && basename.end_with?('.md')
99
+ :memory
100
+ elsif basename.include?('claude') && basename.end_with?('.md')
101
+ :claude_md
102
+ else
103
+ :markdown
104
+ end
105
+ end
106
+
107
+ # Split markdown content into sections by ## headers.
108
+ # Returns Array<Hash{ heading:, heading_slug:, body: }>
109
+ def split_sections(content)
110
+ sections = []
111
+ current = { heading: 'preamble', heading_slug: 'preamble', body: String.new }
112
+
113
+ content.each_line do |line|
114
+ if line.match?(/\A##\s+/)
115
+ sections << current unless current[:body].strip.empty?
116
+ heading = line.sub(/\A##\s+/, '').strip
117
+ current = {
118
+ heading: heading,
119
+ heading_slug: slugify(heading),
120
+ body: String.new
121
+ }
122
+ else
123
+ current[:body] << line
124
+ end
125
+ end
126
+ sections << current unless current[:body].strip.empty?
127
+ sections
128
+ end
129
+
130
+ # Extract individual items from a section body.
131
+ # Bullets become individual items; paragraphs become single items;
132
+ # code blocks are kept as single items.
133
+ def extract_items(body) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
134
+ items = []
135
+ current_item = nil
136
+ in_code_block = false
137
+
138
+ body.each_line do |line|
139
+ if line.match?(/\A```/)
140
+ if in_code_block
141
+ current_item << line
142
+ items << current_item
143
+ current_item = nil
144
+ in_code_block = false
145
+ else
146
+ items << current_item if current_item && !current_item.strip.empty?
147
+ current_item = line
148
+ in_code_block = true
149
+ end
150
+ next
151
+ end
152
+
153
+ if in_code_block
154
+ current_item << line
155
+ next
156
+ end
157
+
158
+ if line.match?(/\A\s*[-*]\s+/)
159
+ items << current_item if current_item && !current_item.strip.empty?
160
+ current_item = line.sub(/\A\s*[-*]\s+/, '').strip
161
+ elsif line.match?(/\A\s{2,}[-*]\s+/) || (current_item && line.match?(/\A\s{2,}\S/))
162
+ # Sub-bullet or continuation of current bullet
163
+ current_item = "#{current_item || ''} #{line.strip}"
164
+ elsif line.strip.empty?
165
+ items << current_item if current_item && !current_item.strip.empty?
166
+ current_item = nil
167
+ elsif current_item
168
+ current_item += " #{line.strip}"
169
+ else
170
+ current_item = line.strip
171
+ end
172
+ end
173
+ items << current_item if current_item && !current_item.strip.empty?
174
+
175
+ items.compact.map(&:strip).reject(&:empty?)
176
+ end
177
+
178
+ # Extract backtick-quoted terms and bold terms as domain tags.
179
+ def extract_inline_tags(text)
180
+ tags = []
181
+ text.scan(/`([^`]+)`/) { |m| tags << m[0] }
182
+ text.scan(/\*\*([^*]+)\*\*/) { |m| tags << m[0] }
183
+ tags.map { |t| t.gsub(/[^a-zA-Z0-9_\-.]/, '_').downcase }
184
+ .reject { |t| t.length > 60 || t.length < 2 }
185
+ .uniq
186
+ end
187
+
188
+ # Classify a section heading into a trace type.
189
+ def classify_section(heading)
190
+ SECTION_TYPE_MAP.each do |pattern, type|
191
+ return type if heading.match?(pattern)
192
+ end
193
+ DEFAULT_TRACE_TYPE
194
+ end
195
+
196
+ # Classify a section heading into an emotional valence hash.
197
+ def classify_valence(heading)
198
+ SECTION_VALENCE_MAP.each do |pattern, vals|
199
+ return vals if heading.match?(pattern)
200
+ end
201
+ DEFAULT_VALENCE
202
+ end
203
+
204
+ # Extract directory context for domain tagging (e.g., "lex-coldstart", "LegionIO").
205
+ def extract_dir_context(file_path)
206
+ parts = file_path.split('/')
207
+ # Find the most meaningful directory name
208
+ meaningful = parts.reverse.find do |p|
209
+ p.match?(/\A(lex-|legion-|Legion|extensions)/) && p != File.basename(file_path)
210
+ end
211
+ meaningful || parts[-2]
212
+ end
213
+
214
+ def slugify(text)
215
+ text.downcase.gsub(/[^a-z0-9]+/, '-').gsub(/\A-|-\z/, '')
216
+ end
217
+
218
+ def skip_path?(path)
219
+ path.include?('/_deprecated/') ||
220
+ path.include?('/_ignored/') ||
221
+ path.include?('/z_other/') ||
222
+ path.include?('_working/') ||
223
+ path.include?('/test/') ||
224
+ path.include?('/.terraform/') ||
225
+ path.include?('/references/')
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Coldstart
6
+ module Helpers
7
+ module Imprint
8
+ # Three learning layers (spec: cold-start-spec.md)
9
+ LAYERS = %i[firmware imprint_window continuous_learning].freeze
10
+
11
+ # Imprint window parameters
12
+ IMPRINT_DURATION = 7 * 86_400 # 7 days
13
+ IMPRINT_MULTIPLIER = 3.0 # consolidation rate multiplier during imprint
14
+ IMPRINT_CONSENT_TIER = :consult # conservative consent during imprint
15
+ IMPRINT_ENTROPY_BASELINE = 50 # minimum observations before entropy is meaningful
16
+
17
+ # Self-play bootstrap parameters
18
+ SELF_PLAY_ITERATIONS = 100
19
+ BOOTSTRAP_TRACE_TYPES = %i[identity semantic procedural].freeze
20
+
21
+ module_function
22
+
23
+ def imprint_active?(started_at)
24
+ return false unless started_at
25
+
26
+ (Time.now.utc - started_at) < IMPRINT_DURATION
27
+ end
28
+
29
+ def imprint_progress(started_at)
30
+ return 1.0 unless started_at
31
+
32
+ elapsed = Time.now.utc - started_at
33
+ [elapsed / IMPRINT_DURATION.to_f, 1.0].min
34
+ end
35
+
36
+ def current_layer(started_at, observations:)
37
+ if (observations < IMPRINT_ENTROPY_BASELINE && imprint_active?(started_at)) ||
38
+ imprint_active?(started_at)
39
+ :imprint_window
40
+ else
41
+ :continuous_learning
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ Sequel.migration do
4
+ change do
5
+ create_table(:bootstrap_state) do
6
+ primary_key :id
7
+ Integer :started_at_i
8
+ Integer :observation_count, default: 0
9
+ TrueClass :firmware_loaded, default: false
10
+ String :calibration_state, default: 'not_started'
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Coldstart
6
+ module Runners
7
+ module Coldstart
8
+ include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
9
+ Legion::Extensions::Helpers.const_defined?(:Lex)
10
+
11
+ def begin_imprint(**)
12
+ bootstrap.load_firmware
13
+ bootstrap.begin_imprint
14
+ dur = Helpers::Imprint::IMPRINT_DURATION
15
+ mul = Helpers::Imprint::IMPRINT_MULTIPLIER
16
+ tier = Helpers::Imprint::IMPRINT_CONSENT_TIER
17
+ Legion::Logging.info "[coldstart] imprint begun: duration=#{dur}s multiplier=#{mul}x consent=#{tier}"
18
+ {
19
+ started: true,
20
+ imprint_duration: Helpers::Imprint::IMPRINT_DURATION,
21
+ multiplier: Helpers::Imprint::IMPRINT_MULTIPLIER,
22
+ consent_tier: Helpers::Imprint::IMPRINT_CONSENT_TIER
23
+ }
24
+ end
25
+
26
+ def record_observation(**)
27
+ bootstrap.record_observation
28
+ Legion::Logging.debug "[coldstart] observation: count=#{bootstrap.observation_count} " \
29
+ "calibration=#{bootstrap.calibration_state} layer=#{bootstrap.current_layer}"
30
+ {
31
+ observation_count: bootstrap.observation_count,
32
+ calibration_state: bootstrap.calibration_state,
33
+ current_layer: bootstrap.current_layer
34
+ }
35
+ end
36
+
37
+ def coldstart_progress(**)
38
+ progress = bootstrap.progress
39
+ Legion::Logging.debug "[coldstart] progress: #{progress.inspect}"
40
+ progress
41
+ end
42
+
43
+ def imprint_active?(**) # rubocop:disable Naming/PredicateMethod
44
+ active = bootstrap.imprint_active?
45
+ Legion::Logging.debug "[coldstart] imprint_active?=#{active}"
46
+ { active: active }
47
+ end
48
+
49
+ def current_multiplier(**)
50
+ active = bootstrap.imprint_active?
51
+ multiplier = active ? Helpers::Imprint::IMPRINT_MULTIPLIER : 1.0
52
+ Legion::Logging.debug "[coldstart] multiplier=#{multiplier} imprint_active=#{active}"
53
+ { multiplier: multiplier, imprint_active: active }
54
+ end
55
+
56
+ private
57
+
58
+ def bootstrap
59
+ @bootstrap ||= Helpers::Bootstrap.new
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end