woods 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/lib/woods/unblocked/exporter.rb +55 -5
- data/lib/woods/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b2da9b3b863eb794ca880de7a8b327c7edd22f5ea0b027bd705191af85ea755a
|
|
4
|
+
data.tar.gz: 31c23f340816f84d3c1acc8e2cf09daa9bb7009179d1bd51cca51f7833c376e5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 69f9bc1e0e83a7894ab0618b1044608f7eb3b869c7a881b04820d033a1a4c66bae7ce56be4c7bd858915679e941354af3f2907c2faa2decac1de8d0a4511913c
|
|
7
|
+
data.tar.gz: 17360ebaf41923cb074d0b829b8940e24fbd3b7724243a0738fe73ee5a4fbcaf43108194d025a955073088dcf2ea0cd19380304191c41d9b325f02dcee43badd
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.4.1] - 2026-06-10
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- **Unblocked sync: multiple units sharing one file no longer collide on a single
|
|
15
|
+
URI** (#130). A document's URI derives from `file_path`, so a file defining
|
|
16
|
+
several extracted units (nested/namespaced classes, STI subclasses, multiple
|
|
17
|
+
classes in one `.rb`) mapped every unit to the same URI — the remote document
|
|
18
|
+
was overwritten per unit (only the last survived) and, under the content-hash
|
|
19
|
+
manifest, those units re-pushed on every run. The exporter now detects files
|
|
20
|
+
shared by more than one synced unit and disambiguates: the lexically-first
|
|
21
|
+
identifier keeps the bare blob URL, siblings get a `?unit=<identifier>` suffix.
|
|
22
|
+
Solo files (the overwhelming majority) are untouched. Sibling of the
|
|
23
|
+
no-`file_path` guard shipped in 1.4.0.
|
|
24
|
+
|
|
10
25
|
## [1.4.0] - 2026-06-10
|
|
11
26
|
|
|
12
27
|
### Added — Incremental Unblocked sync (PR #128)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'set'
|
|
4
4
|
require 'digest'
|
|
5
|
+
require 'uri'
|
|
5
6
|
require 'woods'
|
|
6
7
|
require_relative 'client'
|
|
7
8
|
require_relative 'rate_limiter'
|
|
@@ -83,6 +84,9 @@ module Woods
|
|
|
83
84
|
# sync_type_partial methods work standalone (track_uri needs them).
|
|
84
85
|
@current_uris = Set.new
|
|
85
86
|
@budget_exhausted = false
|
|
87
|
+
# base URI => identifier that keeps the bare URI (only populated for
|
|
88
|
+
# URIs shared by >1 unit). Rebuilt per sync_all run.
|
|
89
|
+
@uri_primary = {}
|
|
86
90
|
end
|
|
87
91
|
|
|
88
92
|
# Sync all configured unit types to the Unblocked collection.
|
|
@@ -91,6 +95,7 @@ module Woods
|
|
|
91
95
|
def sync_all
|
|
92
96
|
@current_uris = Set.new
|
|
93
97
|
@budget_exhausted = false
|
|
98
|
+
build_uri_index
|
|
94
99
|
reconcile_from_remote if @manifest.empty?
|
|
95
100
|
|
|
96
101
|
synced = 0
|
|
@@ -232,6 +237,11 @@ module Woods
|
|
|
232
237
|
# ping-pong the manifest hash forever. Skip them.
|
|
233
238
|
return :skipped unless unit_data['file_path']
|
|
234
239
|
|
|
240
|
+
# When several units share one file they share one base URI; only one
|
|
241
|
+
# keeps it, the rest get a `?unit=` suffix so each is a distinct remote
|
|
242
|
+
# document (and a distinct manifest key).
|
|
243
|
+
uri = effective_uri(unit_data)
|
|
244
|
+
|
|
235
245
|
doc = @builder.build(unit_data)
|
|
236
246
|
# An empty body means the credential scrub failed closed (the builders
|
|
237
247
|
# always emit at least a header). Upserting it would overwrite a good
|
|
@@ -241,16 +251,16 @@ module Woods
|
|
|
241
251
|
end
|
|
242
252
|
|
|
243
253
|
hash = fingerprint(doc)
|
|
244
|
-
return :skipped if !@force_full && @manifest.unchanged?(
|
|
254
|
+
return :skipped if !@force_full && @manifest.unchanged?(uri, hash)
|
|
245
255
|
|
|
246
256
|
response = @client.put_document(
|
|
247
257
|
collection_id: @collection_id,
|
|
248
258
|
title: doc[:title],
|
|
249
259
|
body: doc[:body],
|
|
250
|
-
uri:
|
|
260
|
+
uri: uri
|
|
251
261
|
)
|
|
252
|
-
document_id = (response['id'] if response.is_a?(Hash)) || @manifest.document_id_for(
|
|
253
|
-
@manifest.record(uri:
|
|
262
|
+
document_id = (response['id'] if response.is_a?(Hash)) || @manifest.document_id_for(uri)
|
|
263
|
+
@manifest.record(uri: uri, hash: hash, document_id: document_id)
|
|
254
264
|
:synced
|
|
255
265
|
end
|
|
256
266
|
|
|
@@ -360,7 +370,47 @@ module Woods
|
|
|
360
370
|
# stale repo-root document from before this guard should purge.
|
|
361
371
|
return unless unit_data['file_path']
|
|
362
372
|
|
|
363
|
-
|
|
373
|
+
# Must match the URI push_document actually uses, or a colliding unit's
|
|
374
|
+
# disambiguated document would look stale and be purged.
|
|
375
|
+
@current_uris << effective_uri(unit_data)
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# The URI a unit's document is stored under. Normally the file's blob URL;
|
|
379
|
+
# when several units share that file, all but the lexically-first
|
|
380
|
+
# identifier get a `?unit=` suffix so each keeps a distinct document
|
|
381
|
+
# rather than overwriting the others (see #build_uri_index).
|
|
382
|
+
def effective_uri(unit_data)
|
|
383
|
+
base = @builder.uri_for(unit_data)
|
|
384
|
+
primary = @uri_primary[base]
|
|
385
|
+
return base if primary.nil? || primary == unit_data['identifier']
|
|
386
|
+
|
|
387
|
+
"#{base}?unit=#{URI.encode_www_form_component(unit_data['identifier'])}"
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# One cheap pass over the type indexes (entries already carry file_path,
|
|
391
|
+
# and read_index is cached) to find files that define more than one synced
|
|
392
|
+
# unit. For each such base URI, the lexically-smallest identifier — the
|
|
393
|
+
# outer/top-level class — keeps the bare URI; siblings are suffixed. Solo
|
|
394
|
+
# files (the overwhelming majority) are absent from the map and unchanged,
|
|
395
|
+
# so this introduces no churn for them.
|
|
396
|
+
def build_uri_index
|
|
397
|
+
groups = Hash.new { |h, k| h[k] = [] }
|
|
398
|
+
synced_types.each do |type|
|
|
399
|
+
@reader.list_units(type: type).each do |entry|
|
|
400
|
+
next unless entry['file_path']
|
|
401
|
+
|
|
402
|
+
groups[@builder.uri_for(entry)] << entry['identifier']
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
@uri_primary = groups.each_with_object({}) do |(uri, identifiers), primary|
|
|
407
|
+
unique = identifiers.uniq
|
|
408
|
+
primary[uri] = unique.min if unique.size > 1
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def synced_types
|
|
413
|
+
FULL_SYNC_TYPES + PARTIAL_SYNC_TYPES.map(&:first)
|
|
364
414
|
end
|
|
365
415
|
|
|
366
416
|
def fingerprint(doc)
|
data/lib/woods/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: woods
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.4.
|
|
4
|
+
version: 1.4.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Leah Armstrong
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mcp
|