woods 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f8991ef048ac039aafe41403a11c320333107ece6ad89da376765107820d27b9
4
- data.tar.gz: 3fdc6630de8946e1d2ba074ca0b115588934ff965712f1d18d6d3abeb8c5c50b
3
+ metadata.gz: b2da9b3b863eb794ca880de7a8b327c7edd22f5ea0b027bd705191af85ea755a
4
+ data.tar.gz: 31c23f340816f84d3c1acc8e2cf09daa9bb7009179d1bd51cca51f7833c376e5
5
5
  SHA512:
6
- metadata.gz: fed6f9a4a26f68dcd5304bc5ab6ba8a99ead3e5ca872a4c466c8ceb9faa23345e16a8405c14397bccb5fb0290970503b6b5ccb62ffe4d041d583593a64bd619a
7
- data.tar.gz: 10451e14fce82dd983d43b60875fac444dbb3eb08092c85f2085c08de79ebeeca7be80c3278de8145b88c841dae3f2cd909035899460dffd243cc85711523975
6
+ metadata.gz: 69f9bc1e0e83a7894ab0618b1044608f7eb3b869c7a881b04820d033a1a4c66bae7ce56be4c7bd858915679e941354af3f2907c2faa2decac1de8d0a4511913c
7
+ data.tar.gz: 17360ebaf41923cb074d0b829b8940e24fbd3b7724243a0738fe73ee5a4fbcaf43108194d025a955073088dcf2ea0cd19380304191c41d9b325f02dcee43badd
data/CHANGELOG.md CHANGED
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.4.1] - 2026-06-10
11
+
12
+ ### Fixed
13
+
14
+ - **Unblocked sync: multiple units sharing one file no longer collide on a single
15
+ URI** (#130). A document's URI derives from `file_path`, so a file defining
16
+ several extracted units (nested/namespaced classes, STI subclasses, multiple
17
+ classes in one `.rb`) mapped every unit to the same URI — the remote document
18
+ was overwritten per unit (only the last survived) and, under the content-hash
19
+ manifest, those units re-pushed on every run. The exporter now detects files
20
+ shared by more than one synced unit and disambiguates: the lexically-first
21
+ identifier keeps the bare blob URL, siblings get a `?unit=<identifier>` suffix.
22
+ Solo files (the overwhelming majority) are untouched. Sibling of the
23
+ no-`file_path` guard shipped in 1.4.0.
24
+
10
25
  ## [1.4.0] - 2026-06-10
11
26
 
12
27
  ### Added — Incremental Unblocked sync (PR #128)
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'set'
4
4
  require 'digest'
5
+ require 'uri'
5
6
  require 'woods'
6
7
  require_relative 'client'
7
8
  require_relative 'rate_limiter'
@@ -83,6 +84,9 @@ module Woods
83
84
  # sync_type_partial methods work standalone (track_uri needs them).
84
85
  @current_uris = Set.new
85
86
  @budget_exhausted = false
87
+ # base URI => identifier that keeps the bare URI (only populated for
88
+ # URIs shared by >1 unit). Rebuilt per sync_all run.
89
+ @uri_primary = {}
86
90
  end
87
91
 
88
92
  # Sync all configured unit types to the Unblocked collection.
@@ -91,6 +95,7 @@ module Woods
91
95
  def sync_all
92
96
  @current_uris = Set.new
93
97
  @budget_exhausted = false
98
+ build_uri_index
94
99
  reconcile_from_remote if @manifest.empty?
95
100
 
96
101
  synced = 0
@@ -232,6 +237,11 @@ module Woods
232
237
  # ping-pong the manifest hash forever. Skip them.
233
238
  return :skipped unless unit_data['file_path']
234
239
 
240
+ # When several units share one file they share one base URI; only one
241
+ # keeps it, the rest get a `?unit=` suffix so each is a distinct remote
242
+ # document (and a distinct manifest key).
243
+ uri = effective_uri(unit_data)
244
+
235
245
  doc = @builder.build(unit_data)
236
246
  # An empty body means the credential scrub failed closed (the builders
237
247
  # always emit at least a header). Upserting it would overwrite a good
@@ -241,16 +251,16 @@ module Woods
241
251
  end
242
252
 
243
253
  hash = fingerprint(doc)
244
- return :skipped if !@force_full && @manifest.unchanged?(doc[:uri], hash)
254
+ return :skipped if !@force_full && @manifest.unchanged?(uri, hash)
245
255
 
246
256
  response = @client.put_document(
247
257
  collection_id: @collection_id,
248
258
  title: doc[:title],
249
259
  body: doc[:body],
250
- uri: doc[:uri]
260
+ uri: uri
251
261
  )
252
- document_id = (response['id'] if response.is_a?(Hash)) || @manifest.document_id_for(doc[:uri])
253
- @manifest.record(uri: doc[:uri], hash: hash, document_id: document_id)
262
+ document_id = (response['id'] if response.is_a?(Hash)) || @manifest.document_id_for(uri)
263
+ @manifest.record(uri: uri, hash: hash, document_id: document_id)
254
264
  :synced
255
265
  end
256
266
 
@@ -360,7 +370,47 @@ module Woods
360
370
  # stale repo-root document from before this guard should purge.
361
371
  return unless unit_data['file_path']
362
372
 
363
- @current_uris << @builder.uri_for(unit_data)
373
+ # Must match the URI push_document actually uses, or a colliding unit's
374
+ # disambiguated document would look stale and be purged.
375
+ @current_uris << effective_uri(unit_data)
376
+ end
377
+
378
+ # The URI a unit's document is stored under. Normally the file's blob URL;
379
+ # when several units share that file, all but the lexically-first
380
+ # identifier get a `?unit=` suffix so each keeps a distinct document
381
+ # rather than overwriting the others (see #build_uri_index).
382
+ def effective_uri(unit_data)
383
+ base = @builder.uri_for(unit_data)
384
+ primary = @uri_primary[base]
385
+ return base if primary.nil? || primary == unit_data['identifier']
386
+
387
+ "#{base}?unit=#{URI.encode_www_form_component(unit_data['identifier'])}"
388
+ end
389
+
390
+ # One cheap pass over the type indexes (entries already carry file_path,
391
+ # and read_index is cached) to find files that define more than one synced
392
+ # unit. For each such base URI, the lexically-smallest identifier — the
393
+ # outer/top-level class — keeps the bare URI; siblings are suffixed. Solo
394
+ # files (the overwhelming majority) are absent from the map and unchanged,
395
+ # so this introduces no churn for them.
396
+ def build_uri_index
397
+ groups = Hash.new { |h, k| h[k] = [] }
398
+ synced_types.each do |type|
399
+ @reader.list_units(type: type).each do |entry|
400
+ next unless entry['file_path']
401
+
402
+ groups[@builder.uri_for(entry)] << entry['identifier']
403
+ end
404
+ end
405
+
406
+ @uri_primary = groups.each_with_object({}) do |(uri, identifiers), primary|
407
+ unique = identifiers.uniq
408
+ primary[uri] = unique.min if unique.size > 1
409
+ end
410
+ end
411
+
412
+ def synced_types
413
+ FULL_SYNC_TYPES + PARTIAL_SYNC_TYPES.map(&:first)
364
414
  end
365
415
 
366
416
  def fingerprint(doc)
data/lib/woods/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Woods
4
- VERSION = '1.4.0'
4
+ VERSION = '1.4.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: woods
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leah Armstrong
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-06-10 00:00:00.000000000 Z
11
+ date: 2026-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mcp