woods 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +169 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +15 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +3 -4
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +737 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +1 -1
  102. data/lib/woods/unblocked/document_builder.rb +35 -10
  103. data/lib/woods/unblocked/exporter.rb +1 -1
  104. data/lib/woods/util/host_guard.rb +61 -0
  105. data/lib/woods/version.rb +1 -1
  106. data/lib/woods.rb +126 -6
  107. metadata +69 -4
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+ require 'tempfile'
5
+ require 'time'
6
+ require 'msgpack'
7
+ require 'woods/version'
8
+ require 'woods/storage/metadata_store'
9
+ require 'woods/mcp/errors'
10
+
11
+ module Woods
12
+ module Storage
13
+ module Snapshotter
14
+ # Reads and writes the metadata store snapshot (+metadata.msgpack+).
15
+ #
16
+ # MessagePack is chosen over +pack("e*")+ because metadata is heterogeneous
17
+ # hash-shaped data — type tags matter here. The vector format uses packed
18
+ # float32 for dense numeric data; metadata uses MessagePack for everything else.
19
+ #
20
+ # == On-disk format
21
+ #
22
+ # A stream of MessagePack-packed objects in a single file:
23
+ #
24
+ # 1. Header hash (one MessagePack object):
25
+ # { "magic" => "WMD1", "schema_version" => 1, "record_count" => N,
26
+ # "gem_version" => "1.2.0", "created_at" => "2026-04-23T03:42:17Z" }
27
+ #
28
+ # 2. One hash per record, streamed directly after the header:
29
+ # { "id" => "PostsController", "metadata" => { ... } }
30
+ #
31
+ # Stream-written via +MessagePack::Packer+ to avoid loading all records into
32
+ # memory at once. Stream-read via +MessagePack::Unpacker+ on load. Written
33
+ # atomically via +Tempfile+ + +File.rename+.
34
+ #
35
+ # @see Snapshotter::Vector companion class for vector stores
36
+ module Metadata
37
+ # Magic string identifying a valid Woods Metadata Dump file.
38
+ MAGIC = 'WMD1'
39
+
40
+ # Current schema version written by this implementation.
41
+ SCHEMA_VERSION = 1
42
+
43
+ # Maximum schema version this code can read. A dump with a higher version
44
+ # raises {Woods::MCP::UnsupportedArtifact} rather than silently misreading data.
45
+ MAX_SUPPORTED_SCHEMA_VERSION = 1
46
+
47
+ # Filename written inside the dump directory.
48
+ FILENAME = 'metadata.msgpack'
49
+
50
+ # Load a metadata store from the latest dump in +artifact+, or return an
51
+ # empty store if no dump exists yet.
52
+ #
53
+ # Never raises for a missing dump — callers that need an empty store on
54
+ # first run get one without special-casing.
55
+ #
56
+ # @param artifact [Woods::IndexArtifact] the artifact layout object
57
+ # @param resolved_config [Object, nil] reserved for future validation
58
+ # @return [Woods::Storage::MetadataStore::InMemory]
59
+ # @raise [Woods::MCP::UnsupportedArtifact] if magic is wrong or schema_version
60
+ # exceeds {MAX_SUPPORTED_SCHEMA_VERSION}
61
+ def self.load_or_empty(artifact, resolved_config: nil) # rubocop:disable Lint/UnusedMethodArgument
62
+ dump_path = dump_file_path(artifact)
63
+ return MetadataStore::InMemory.new unless dump_path&.exist?
64
+
65
+ store = MetadataStore::InMemory.new
66
+ File.open(dump_path.to_s, 'rb') do |io|
67
+ unpacker = MessagePack::Unpacker.new(io)
68
+ header = unpacker.read
69
+ validate_header!(header, dump_path)
70
+ header['record_count'].times do
71
+ record = unpacker.read
72
+ store.store(record['id'], record['metadata'])
73
+ end
74
+ end
75
+ store
76
+ end
77
+
78
+ # Write the metadata store to +dump_dir/metadata.msgpack+ atomically.
79
+ #
80
+ # Streams header then one packed hash per record — no full in-memory copy
81
+ # of the record set. Uses +Tempfile+ + +File.rename+ for atomicity.
82
+ #
83
+ # @param store [#each_entry, #bulk_load] an in-memory MetadataStore adapter
84
+ # @param artifact [Woods::IndexArtifact] the artifact layout object
85
+ # @param dump_dir [Pathname, String] target directory; must be under +artifact.dumps_root+
86
+ # @param resolved_config [Object, nil] reserved for future use
87
+ # @return [void]
88
+ # @raise [Woods::Storage::InapplicableBackend] if +store+ lacks +#each_entry+ or +#bulk_load+
89
+ # @raise [ArgumentError] if +dump_dir+ is not under +artifact.dumps_root+
90
+ def self.dump(store, artifact, dump_dir, resolved_config: nil) # rubocop:disable Lint/UnusedMethodArgument
91
+ validate_store!(store)
92
+ validate_dump_dir!(artifact, dump_dir)
93
+ target = Pathname.new(dump_dir.to_s).join(FILENAME)
94
+ target.dirname.mkpath
95
+ write_atomic(target, store)
96
+ nil
97
+ end
98
+
99
+ class << self
100
+ private
101
+
102
+ def dump_file_path(artifact)
103
+ latest = artifact.latest_dump_path
104
+ return nil unless latest
105
+
106
+ latest.join(FILENAME)
107
+ end
108
+
109
+ def validate_header!(header, path)
110
+ unless header.is_a?(Hash) && header['magic'] == MAGIC
111
+ raise Woods::MCP::UnsupportedArtifact,
112
+ "metadata.msgpack at #{path} has invalid magic " \
113
+ "(got #{header.is_a?(Hash) ? header['magic'].inspect : 'non-hash'}; " \
114
+ "expected #{MAGIC.inspect}). The file may be corrupt or from an incompatible tool."
115
+ end
116
+
117
+ version = header['schema_version']
118
+ return if version <= MAX_SUPPORTED_SCHEMA_VERSION
119
+
120
+ raise Woods::MCP::UnsupportedArtifact,
121
+ "metadata.msgpack at #{path} has schema_version #{version}; " \
122
+ "this gem supports up to #{MAX_SUPPORTED_SCHEMA_VERSION}. " \
123
+ 'Upgrade the woods gem or re-run woods:embed to regenerate.'
124
+ end
125
+
126
+ # Write +store+ contents to +target+ atomically via a sibling Tempfile + rename.
127
+ # Streams header then one record hash per entry.
128
+ #
129
+ # @param target [Pathname] final destination
130
+ # @param store [#count, #each_entry] populated metadata store
131
+ def write_atomic(target, store)
132
+ tmp = Tempfile.new([FILENAME, '.tmp'], target.dirname.to_s)
133
+ begin
134
+ tmp.binmode
135
+ packer = MessagePack::Packer.new(tmp)
136
+ packer.write('magic' => MAGIC, 'schema_version' => SCHEMA_VERSION,
137
+ 'record_count' => store.count, 'gem_version' => Woods::VERSION,
138
+ 'created_at' => Time.now.utc.iso8601)
139
+ store.each_entry { |id, metadata| packer.write('id' => id, 'metadata' => metadata) }
140
+ packer.flush
141
+ tmp.flush
142
+ tmp.fsync
143
+ tmp.close
144
+ File.rename(tmp.path, target.to_s)
145
+ rescue StandardError
146
+ tmp.close
147
+ tmp.unlink
148
+ raise
149
+ end
150
+ end
151
+
152
+ def validate_store!(store)
153
+ return if store.respond_to?(:each_entry) && store.respond_to?(:bulk_load)
154
+
155
+ raise InapplicableBackend,
156
+ "backend #{store.class} is already durable — Snapshotter should not have been invoked"
157
+ end
158
+
159
+ def validate_dump_dir!(artifact, dump_dir)
160
+ dump_path = Pathname.new(dump_dir.to_s).expand_path
161
+ root = artifact.dumps_root.expand_path
162
+
163
+ return if dump_path.to_s.start_with?("#{root}/") || dump_path == root
164
+
165
+ raise ArgumentError,
166
+ "dump_dir #{dump_path} is not under artifact.dumps_root #{root}"
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,238 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+ require 'tempfile'
5
+ require 'woods/storage/vector_store'
6
+ require 'woods/mcp/errors'
7
+ require 'woods/version'
8
+
9
+ module Woods
10
+ module Storage
11
+ module Snapshotter
12
+ # Reads and writes the +vectors.bin+ / +vectors.idx+ on-disk format.
13
+ #
14
+ # Binary layout of +vectors.bin+ (all integers little-endian):
15
+ #
16
+ # offset length field
17
+ # 0 4 bytes magic "WVF1"
18
+ # 4 4 bytes schema_version (u32 LE)
19
+ # 8 4 bytes dimension (u32 LE)
20
+ # 12 8 bytes vector_count (u64 LE)
21
+ # 20 4 bytes gem_version_length (u32 LE)
22
+ # 24 N bytes gem_version (UTF-8)
23
+ # 24+N 4 bytes model_name_length (u32 LE)
24
+ # 28+N M bytes model_name (UTF-8)
25
+ # ... — packed float32 data (vector_count × dimension × 4 bytes)
26
+ #
27
+ # +vectors.idx+ (one record per vector):
28
+ # 4 bytes id_length (u32 LE) + N bytes id (UTF-8) + 8 bytes offset (u64 LE)
29
+ #
30
+ # Atomic writes use +Tempfile+ + +File.rename+ for crash safety.
31
+ #
32
+ # @see Snapshotter::Metadata companion for metadata stores
33
+ module Vector # rubocop:disable Metrics/ModuleLength
34
+ MAGIC = 'WVF1'
35
+ SCHEMA_VERSION_SUPPORTED = 1
36
+
37
+ # Returns a populated in-memory vector store loaded from the latest dump,
38
+ # or an empty store when no dump exists yet.
39
+ #
40
+ # @param artifact [Woods::IndexArtifact] artifact layout object
41
+ # @param resolved_config [#dimension, nil] used for dimension validation
42
+ # @return [Woods::Storage::VectorStore::InMemory]
43
+ # @raise [Woods::MCP::UnsupportedArtifact] if magic or schema_version is invalid
44
+ # @raise [Woods::MCP::DimensionMismatch] if stored dimension ≠ +resolved_config.dimension+
45
+ def self.load_or_empty(artifact, resolved_config: nil)
46
+ dump_dir = artifact.latest_dump_path
47
+ return VectorStore::InMemory.new if dump_dir.nil?
48
+
49
+ bin_path = dump_dir.join('vectors.bin')
50
+ idx_path = dump_dir.join('vectors.idx')
51
+ return VectorStore::InMemory.new unless bin_path.exist? && idx_path.exist?
52
+
53
+ load_from(bin_path, idx_path, resolved_config)
54
+ end
55
+
56
+ # Writes +vectors.bin+ and +vectors.idx+ into +dump_dir+ atomically.
57
+ #
58
+ # @param store [#each_entry, #bulk_load] in-memory vector store adapter
59
+ # @param artifact [Woods::IndexArtifact] artifact layout object
60
+ # @param dump_dir [Pathname, String] target directory; must be under +artifact.dumps_root+
61
+ # @param resolved_config [#model_name, nil] model name written to header
62
+ # @return [void]
63
+ # @raise [Woods::Storage::InapplicableBackend] if +store+ lacks +#each_entry+ / +#bulk_load+
64
+ # @raise [ArgumentError] if +dump_dir+ is not under +artifact.dumps_root+
65
+ def self.dump(store, artifact, dump_dir, resolved_config: nil)
66
+ validate_store!(store)
67
+ validate_dump_dir!(artifact, Pathname.new(dump_dir.to_s))
68
+ model_name = resolved_config.respond_to?(:model_name) ? resolved_config.model_name.to_s : ''
69
+ entries = store.each_entry.to_a
70
+ write_bin_and_idx(Pathname.new(dump_dir.to_s), entries, Woods::VERSION, model_name)
71
+ nil
72
+ end
73
+
74
+ class << self # rubocop:disable Metrics/ClassLength
75
+ private
76
+
77
+ def load_from(bin_path, idx_path, resolved_config)
78
+ bin_data = File.binread(bin_path.to_s)
79
+ header, data_offset = parse_header(bin_data, bin_path)
80
+ validate_magic!(header[:magic], bin_path)
81
+ validate_schema_version!(header[:schema_version], bin_path)
82
+ dim = resolved_config.respond_to?(:dimension) ? resolved_config.dimension : nil
83
+ validate_dimension!(header[:dimension], dim, bin_path) if dim
84
+ floats = bin_data.byteslice(data_offset, header[:vector_count] * header[:dimension] * 4)
85
+ .unpack("e#{header[:vector_count] * header[:dimension]}")
86
+ hydrate_store(parse_idx(idx_path), floats, header[:dimension])
87
+ end
88
+
89
+ def parse_header(bin_data, bin_path) # rubocop:disable Metrics/AbcSize
90
+ # Minimum header is 28 bytes (magic + schema_version + dimension
91
+ # + vector_count + gem_version_length + model_name_length) plus
92
+ # the variable-length gem_version and model_name strings. A
93
+ # truncated header past the u32 guard below would produce a
94
+ # confusing NoMethodError on nil.unpack; raise a typed error
95
+ # with the file path instead.
96
+ raise_truncated(bin_path, bin_data.bytesize, 28) if bin_data.bytesize < 28
97
+
98
+ magic = bin_data.byteslice(0, 4)
99
+ schema_version, dimension = bin_data.byteslice(4, 8).unpack('L<L<')
100
+ vector_count = bin_data.byteslice(12, 8).unpack1('Q<')
101
+ gv_len = bin_data.byteslice(20, 4).unpack1('L<')
102
+ raise_truncated(bin_path, bin_data.bytesize, 24 + gv_len + 4) if bin_data.bytesize < 24 + gv_len + 4
103
+
104
+ off = 24 + gv_len
105
+ mn_len = bin_data.byteslice(off, 4).unpack1('L<')
106
+ raise_truncated(bin_path, bin_data.bytesize, off + 4 + mn_len) if bin_data.bytesize < off + 4 + mn_len
107
+
108
+ off += 4 + mn_len
109
+ [{ magic: magic, schema_version: schema_version,
110
+ dimension: dimension, vector_count: vector_count }, off]
111
+ end
112
+
113
+ def raise_truncated(path, actual, expected)
114
+ raise Woods::MCP::UnsupportedArtifact.new(
115
+ "#{path}: file truncated (got #{actual} bytes, need at least #{expected}) — " \
116
+ 'dump may have been interrupted mid-write; re-run woods:embed',
117
+ details: { path: path.to_s, actual_bytes: actual, needed_bytes: expected }
118
+ )
119
+ end
120
+
121
+ def parse_idx(idx_path)
122
+ idx_data = File.binread(idx_path.to_s)
123
+ pairs = []
124
+ pos = 0
125
+ while pos < idx_data.bytesize
126
+ id_len = idx_data.byteslice(pos, 4).unpack1('L<')
127
+ pos += 4
128
+ id = idx_data.byteslice(pos, id_len)
129
+ pos += id_len + 8 # skip the u64 offset (not needed for load)
130
+ pairs << id
131
+ end
132
+ pairs
133
+ end
134
+
135
+ def hydrate_store(ids, floats, dim)
136
+ store = VectorStore::InMemory.new
137
+ entries = ids.each_with_index.map do |id, idx|
138
+ { id: id, vector: floats[(idx * dim), dim], metadata: {} }
139
+ end
140
+ store.bulk_load(entries)
141
+ store
142
+ end
143
+
144
+ def validate_magic!(found, path)
145
+ return if found == MAGIC
146
+
147
+ raise Woods::MCP::UnsupportedArtifact.new(
148
+ "#{path}: invalid magic bytes (expected #{MAGIC.inspect}, found #{found.inspect})",
149
+ details: { path: path.to_s, expected: MAGIC, found: found }
150
+ )
151
+ end
152
+
153
+ def validate_schema_version!(version, path)
154
+ return if version <= SCHEMA_VERSION_SUPPORTED
155
+
156
+ raise Woods::MCP::UnsupportedArtifact.new(
157
+ "#{path}: schema_version #{version} > supported max #{SCHEMA_VERSION_SUPPORTED}; " \
158
+ 'upgrade the woods gem to read this artifact',
159
+ details: { path: path.to_s, artifact_version: version, max_supported: SCHEMA_VERSION_SUPPORTED }
160
+ )
161
+ end
162
+
163
+ def validate_dimension!(stored, expected, path)
164
+ return if stored == expected
165
+
166
+ raise Woods::MCP::DimensionMismatch.new(
167
+ "#{path}: stored dimension #{stored} ≠ provider dimension #{expected}",
168
+ details: { path: path.to_s, stored_dimension: stored, provider_dimension: expected }
169
+ )
170
+ end
171
+
172
+ def write_bin_and_idx(dump_dir, entries, gem_version, model_name)
173
+ header = build_header(entries, gem_version, model_name)
174
+ float_blob = entries.flat_map { |(_id, vector, _meta)| vector }.pack('e*')
175
+ idx_data = build_idx(entries, header.bytesize)
176
+ atomic_write(dump_dir.join('vectors.bin'), header + float_blob, binary: true)
177
+ atomic_write(dump_dir.join('vectors.idx'), idx_data, binary: true)
178
+ end
179
+
180
+ def build_header(entries, gem_version, model_name)
181
+ dim = entries.empty? ? 0 : entries.first[1].size
182
+ gv = gem_version.encode('UTF-8').b
183
+ mn = model_name.encode('UTF-8').b
184
+ buf = String.new(encoding: 'BINARY')
185
+ buf << MAGIC << [SCHEMA_VERSION_SUPPORTED, dim].pack('L<L<')
186
+ buf << [entries.size].pack('Q<')
187
+ buf << [gv.bytesize].pack('L<') << gv
188
+ buf << [mn.bytesize].pack('L<') << mn
189
+ buf
190
+ end
191
+
192
+ def build_idx(entries, header_size)
193
+ buf = String.new(encoding: 'BINARY')
194
+ float_offset = header_size
195
+ entries.each do |id, vector, _meta|
196
+ id_bytes = id.encode('UTF-8').b
197
+ buf << [id_bytes.bytesize].pack('L<') << id_bytes
198
+ buf << [float_offset].pack('Q<')
199
+ float_offset += vector.size * 4
200
+ end
201
+ buf
202
+ end
203
+
204
+ def atomic_write(path, content, binary: false)
205
+ FileUtils.mkdir_p(path.dirname) unless path.dirname.exist?
206
+ tmp = Tempfile.new('.woods-vec-', path.dirname.to_s)
207
+ tmp.binmode if binary
208
+ tmp.write(content)
209
+ tmp.flush
210
+ tmp.fsync
211
+ tmp.close
212
+ File.rename(tmp.path, path.to_s)
213
+ rescue StandardError
214
+ tmp&.close
215
+ tmp&.unlink
216
+ raise
217
+ end
218
+
219
+ def validate_store!(store)
220
+ return if store.respond_to?(:each_entry) && store.respond_to?(:bulk_load)
221
+
222
+ raise InapplicableBackend,
223
+ "backend #{store.class} is already durable — Snapshotter should not have been invoked"
224
+ end
225
+
226
+ def validate_dump_dir!(artifact, dump_path)
227
+ expanded = dump_path.expand_path
228
+ root = artifact.dumps_root.expand_path
229
+ return if expanded.to_s.start_with?("#{root}/") || expanded == root
230
+
231
+ raise ArgumentError,
232
+ "dump_dir #{expanded} is not under artifact.dumps_root #{root}"
233
+ end
234
+ end
235
+ end
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'woods/storage/snapshotter/vector'
4
+ require 'woods/storage/snapshotter/metadata'
5
+
6
+ module Woods
7
+ module Storage
8
+ # Namespace for the Snapshotter pair that persists and hydrates in-memory
9
+ # storage adapters to/from disk.
10
+ #
11
+ # Two adapters live here:
12
+ # - {Snapshotter::Vector} — handles {VectorStore::InMemory} round-trips via +pack("e*")+.
13
+ # - {Snapshotter::Metadata} — handles {MetadataStore::InMemory} round-trips via MessagePack.
14
+ #
15
+ # Persistent backends (pgvector, Qdrant, SQLite) never touch the Snapshotter.
16
+ # Passing one to {Snapshotter::Vector.dump} or {Snapshotter::Metadata.dump} raises
17
+ # {InapplicableBackend} immediately.
18
+ #
19
+ # PR 2 ships stub implementations: +load_or_empty+ always returns an empty store
20
+ # and +dump+ is a validated no-op. PR 3 wires in the real serialization paths.
21
+ module Snapshotter
22
+ end
23
+ end
24
+ end