metanorma-release 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +1 -0
  4. data/.rubocop_todo.yml +504 -0
  5. data/CHANGELOG.md +15 -0
  6. data/PROMPT.md +282 -0
  7. data/README.adoc +430 -0
  8. data/Rakefile +8 -0
  9. data/exe/mn-release +6 -0
  10. data/lib/metanorma/release/aggregation_interfaces.rb +33 -0
  11. data/lib/metanorma/release/aggregation_pipeline.rb +155 -0
  12. data/lib/metanorma/release/asset_processor.rb +58 -0
  13. data/lib/metanorma/release/cache_store.rb +86 -0
  14. data/lib/metanorma/release/change_detector.rb +20 -0
  15. data/lib/metanorma/release/channel.rb +64 -0
  16. data/lib/metanorma/release/channel_audience.rb +24 -0
  17. data/lib/metanorma/release/channel_config.rb +55 -0
  18. data/lib/metanorma/release/channel_filter.rb +26 -0
  19. data/lib/metanorma/release/channel_manifest.rb +192 -0
  20. data/lib/metanorma/release/channel_registry.rb +60 -0
  21. data/lib/metanorma/release/cli.rb +129 -0
  22. data/lib/metanorma/release/commands/aggregate.rb +126 -0
  23. data/lib/metanorma/release/commands/package.rb +46 -0
  24. data/lib/metanorma/release/commands/publish.rb +51 -0
  25. data/lib/metanorma/release/config_fetcher.rb +11 -0
  26. data/lib/metanorma/release/config_locator.rb +37 -0
  27. data/lib/metanorma/release/config_resolver.rb +37 -0
  28. data/lib/metanorma/release/content_hash.rb +51 -0
  29. data/lib/metanorma/release/delta_state.rb +108 -0
  30. data/lib/metanorma/release/document_id.rb +45 -0
  31. data/lib/metanorma/release/document_index.rb +183 -0
  32. data/lib/metanorma/release/document_metadata.rb +39 -0
  33. data/lib/metanorma/release/document_stage.rb +86 -0
  34. data/lib/metanorma/release/document_type.rb +55 -0
  35. data/lib/metanorma/release/document_version.rb +50 -0
  36. data/lib/metanorma/release/file_routing.rb +51 -0
  37. data/lib/metanorma/release/interfaces.rb +47 -0
  38. data/lib/metanorma/release/naming_strategy.rb +158 -0
  39. data/lib/metanorma/release/platform/github/config_fetcher.rb +40 -0
  40. data/lib/metanorma/release/platform/github/manifest_reader.rb +32 -0
  41. data/lib/metanorma/release/platform/github/publisher.rb +73 -0
  42. data/lib/metanorma/release/platform/github/release_fetcher.rb +52 -0
  43. data/lib/metanorma/release/platform/github/topic_discoverer.rb +29 -0
  44. data/lib/metanorma/release/platform/github.rb +25 -0
  45. data/lib/metanorma/release/platform/local/config_fetcher.rb +20 -0
  46. data/lib/metanorma/release/platform/local/directory_discoverer.rb +26 -0
  47. data/lib/metanorma/release/platform/local/fetcher.rb +76 -0
  48. data/lib/metanorma/release/platform/local/publisher.rb +44 -0
  49. data/lib/metanorma/release/platform/local.rb +14 -0
  50. data/lib/metanorma/release/platform/null/publisher.rb +17 -0
  51. data/lib/metanorma/release/platform/null.rb +11 -0
  52. data/lib/metanorma/release/platform.rb +11 -0
  53. data/lib/metanorma/release/platform_factory.rb +78 -0
  54. data/lib/metanorma/release/rake_tasks.rb +71 -0
  55. data/lib/metanorma/release/relaton_enricher.rb +138 -0
  56. data/lib/metanorma/release/release_metadata.rb +79 -0
  57. data/lib/metanorma/release/release_pipeline.rb +115 -0
  58. data/lib/metanorma/release/release_tag.rb +49 -0
  59. data/lib/metanorma/release/repo_ref.rb +34 -0
  60. data/lib/metanorma/release/rxl_extractor.rb +115 -0
  61. data/lib/metanorma/release/stage_filter.rb +18 -0
  62. data/lib/metanorma/release/version.rb +7 -0
  63. data/lib/metanorma/release/zip_packager.rb +37 -0
  64. data/lib/metanorma/release.rb +116 -0
  65. metadata +156 -0
data/README.adoc ADDED
@@ -0,0 +1,430 @@
1
+ = metanorma-release
2
+ :toc: macro
3
+ :toclevels: 3
4
+
5
+ Release lifecycle management for Metanorma documents.
6
+
7
+ toc::[]
8
+
9
+ == Overview
10
+
11
+ `metanorma-release` manages the full release lifecycle of Metanorma documents:
12
+
13
+ **Release** (producer side)::
14
+ Discover compiled documents -> extract metadata from RXL -> detect changes -> package as zip -> publish to a platform (GitHub Releases, local filesystem).
15
+
16
+ **Aggregate** (consumer side)::
17
+ Discover repositories -> fetch published releases -> filter by channel and stage -> extract zip assets -> generate `index.json` with a file tree for any site generator.
18
+
19
+ The output is platform-agnostic: a directory containing `index.json` and a tree of document files. Any site generator (Jekyll, Hugo, Vite) consumes that output independently.
20
+
21
+ == Installation
22
+
23
+ Add to your Gemfile:
24
+
25
+ [source,ruby]
26
+ ----
27
+ gem "metanorma-release"
28
+ ----
29
+
30
+ Or install directly:
31
+
32
+ [source,sh]
33
+ ----
34
+ gem install metanorma-release
35
+ ----
36
+
37
+ Requires Ruby >= 3.2.
38
+
39
+ == Quick start
40
+
41
+ === CLI
42
+
43
+ The gem ships three commands:
44
+
45
+ [source,sh]
46
+ ----
47
+ # Package compiled documents as zip archives
48
+ mn-release package --output-dir _site --manifest metanorma.release.yml
49
+
50
+ # Package and publish to a platform
51
+ mn-release publish --platform github --output-dir _site --token $GITHUB_TOKEN
52
+
53
+ # Aggregate published releases into a file tree + index.json
54
+ mn-release aggregate --source github --organizations my-org --output-dir _site/cc
55
+ ----
56
+
57
+ === Rake tasks
58
+
59
+ Register tasks in your `Rakefile`:
60
+
61
+ [source,ruby]
62
+ ----
63
+ require "metanorma/release/rake_tasks"
64
+
65
+ Metanorma::Release::RakeTasks.install do |config|
66
+ config.output_dir = "_site"
67
+ config.manifest = "metanorma.release.yml"
68
+ config.platform = "github"
69
+ end
70
+ ----
71
+
72
+ This provides:
73
+
74
+ * `rake mn:package` -- package compiled documents
75
+ * `rake mn:publish` -- package and publish documents
76
+ * `rake mn:aggregate` -- aggregate published releases
77
+
78
+ === Ruby API
79
+
80
+ Use the pipelines directly for fine-grained control:
81
+
82
+ [source,ruby]
83
+ ----
84
+ deps = Metanorma::Release::ReleasePipeline::Dependencies.new(
85
+ extractor: Metanorma::Release::RxlExtractor.new,
86
+ filters: [],
87
+ change_detector: Metanorma::Release::ContentHashChangeDetector.new(previous_releases: {}),
88
+ packager: Metanorma::Release::ZipPackager.new,
89
+ publisher: Metanorma::Release::PlatformFactory.build_publisher("null", {}),
90
+ naming_registry: Metanorma::Release::NamingRegistry.default_registry,
91
+ manifest: nil,
92
+ channel_override: nil,
93
+ channel_config: nil
94
+ )
95
+
96
+ config = Metanorma::Release::ReleasePipeline::Config.new(
97
+ output_dir: "_site",
98
+ manifest_path: nil,
99
+ force: false,
100
+ force_replace_patterns: nil,
101
+ concurrency: 4,
102
+ default_visibility: "public"
103
+ )
104
+
105
+ result = Metanorma::Release::ReleasePipeline.new(deps).run(config)
106
+ result.released # => [#<DocumentMetadata ...>]
107
+ result.skipped # => [#<DocumentMetadata ...>]
108
+ result.failed # => [{ document: ..., error: "..." }]
109
+ ----
110
+
111
+ == CLI reference
112
+
113
+ === `mn-release package`
114
+
115
+ Package compiled documents into zip archives without publishing.
116
+
117
+ [source,sh]
118
+ ----
119
+ mn-release package [options]
120
+ ----
121
+
122
+ [cols="1m,3",options="header"]
123
+ |===
124
+ |Option |Description
125
+ |`--output-dir DIR` |Directory containing compiled documents (default: `_site`)
126
+ |`--dest DIR` |Destination for zip packages (default: `dist`)
127
+ |`--manifest FILE` |Release manifest file (default: `metanorma.release.yml`)
128
+ |`--config SOURCE` |Channel config file or platform ref
129
+ |===
130
+
131
+ === `mn-release publish`
132
+
133
+ Package and publish documents to a platform.
134
+
135
+ [source,sh]
136
+ ----
137
+ mn-release publish [options]
138
+ ----
139
+
140
+ [cols="1m,3",options="header"]
141
+ |===
142
+ |Option |Description
143
+ |`--platform NAME` |Target platform: `github`, `local` (default: `github`)
144
+ |`--output-dir DIR` |Compiled docs directory (default: `_site`)
145
+ |`--manifest FILE` |Release manifest file (default: `metanorma.release.yml`)
146
+ |`--force` |Force release even if unchanged
147
+ |`--force-replace PAT` |Glob pattern for forced replacement (repeatable)
148
+ |`--channels CHANS` |Override channels (comma-separated)
149
+ |`--concurrency N` |Parallel workers (default: 4)
150
+ |`--token TOKEN` |Platform auth token
151
+ |`--config SOURCE` |Channel config file or platform ref
152
+ |===
153
+
154
+ === `mn-release aggregate`
155
+
156
+ Aggregate published releases from multiple repositories into a unified file tree.
157
+
158
+ [source,sh]
159
+ ----
160
+ mn-release aggregate [options]
161
+ ----
162
+
163
+ [cols="1m,3",options="header"]
164
+ |===
165
+ |Option |Description
166
+ |`--source SOURCE` |Discovery source: `github`, `local:PATH` (default: `github`)
167
+ |`--organizations ORGS` |Comma-separated organization list
168
+ |`--topic TOPIC` |Repository topic filter (default: `metanorma-release`)
169
+ |`--repos REPOS` |Explicit repo list (comma-separated)
170
+ |`--channels CHANS` |Filter channels (comma-separated)
171
+ |`--stages STAGES` |Filter stages (comma-separated)
172
+ |`--output-dir DIR` |Output directory (default: `_site/cc`)
173
+ |`--file-routing MODE` |File layout: `by-document`, `flat`, `by-format` (default: `by-document`)
174
+ |`--cache-dir DIR` |Cache directory for delta state
175
+ |`--[no-]include-drafts` |Include draft releases
176
+ |`--concurrency N` |Parallel repos (default: 4)
177
+ |`--min-documents N` |Fail if fewer documents found (default: 0)
178
+ |`--token TOKEN` |Platform auth token
179
+ |===
180
+
181
+ == Concepts
182
+
183
+ === Channels
184
+
185
+ A channel is an `audience/category` pair that controls who can access a document:
186
+
187
+ [source,ruby]
188
+ ----
189
+ channel = Metanorma::Release::Channel.parse("public/standards")
190
+ channel.public? # => true
191
+ channel.audience # => "public"
192
+ channel.category # => "standards"
193
+ ----
194
+
195
+ Audiences: `public`, `members`, `internal`. When omitted, audience defaults to `public`.
196
+
197
+ === Channel configuration
198
+
199
+ A channel config defines the set of allowed channels for a project or organization, along with default visibility. This lets you enforce a channel taxonomy across all documents.
200
+
201
+ .Config resolution order
202
+ [arabic]
203
+ . `--config` CLI flag (highest priority)
204
+ . `config:` key in the release manifest
205
+ . Directory walk: `.metanorma.yml`, `.metanorma.yaml`, or `.metanorma/channels.yml`
206
+ . No config -- all channels allowed
207
+
208
+ ==== Config file format
209
+
210
+ [source,yaml]
211
+ ----
212
+ # .metanorma.yml
213
+ channels:
214
+ - public/standards
215
+ - public/reports
216
+ - members/early-access
217
+ - internal/working-drafts
218
+ defaults:
219
+ visibility: public
220
+ channels:
221
+ - public/standards
222
+ ----
223
+
224
+ The `channels` list defines the taxonomy -- only these channels are valid. The `defaults` section sets fallback visibility and channels when a document doesn't match any manifest entry.
225
+
226
+ ==== Specifying config in the manifest
227
+
228
+ Add a `config` key to `metanorma.release.yml`:
229
+
230
+ [source,yaml]
231
+ ----
232
+ config: local:/path/to/config.yml
233
+ defaults:
234
+ visibility: public
235
+ documents:
236
+ - source: sources/cc-18011.adoc
237
+ channels:
238
+ - public/standards
239
+ ----
240
+
241
+ The config source can be:
242
+
243
+ * `local:/path/to/config.yml` -- local file path
244
+ * `myorg/myrepo` -- GitHub repo (reads `channels.yml` from root)
245
+ * `myorg/myrepo#path/to/config.yml` -- GitHub repo with explicit path
246
+
247
+ ==== Ruby API
248
+
249
+ [source,ruby]
250
+ ----
251
+ # Parse from YAML
252
+ config = Metanorma::Release::ChannelConfig.from_yaml(File.read(".metanorma.yml"))
253
+
254
+ # Permissive config (all channels allowed)
255
+ config = Metanorma::Release::ChannelConfig.empty
256
+
257
+ # Validate a channel
258
+ config.registry.valid?(Channel.parse("public/standards")) # => true
259
+ config.registry.valid?(Channel.parse("public/secret")) # => false
260
+
261
+ # Locate config by walking up from a directory
262
+ config = Metanorma::Release::ConfigLocator.find("/path/to/project")
263
+ ----
264
+
265
+ === Naming strategies
266
+
267
+ Tag and file naming varies by document type. Strategies are resolved via a registry:
268
+
269
+ [cols="1m,1,2",options="header"]
270
+ |===
271
+ |Document type |Strategy |Tag format
272
+ |standard (default) |`EditionNaming` |`cc-18011/ed1`
273
+ |IETF draft |`InternetDraftNaming` |`id-ietf-foo/1`
274
+ |IETF RFC |`RfcNaming` |`rfc-1234/ed1`
275
+ |IEEE |`DraftSuffixNaming` |`ieee-8021/d1`
276
+ |IHO, OGC |`VersionNaming` |`iho-s44/v1`
277
+ |===
278
+
279
+ Register custom strategies:
280
+
281
+ [source,ruby]
282
+ ----
283
+ registry = Metanorma::Release::NamingRegistry.default_registry
284
+ registry.register("my-type", MyCustomNaming.new)
285
+ ----
286
+
287
+ === File routing
288
+
289
+ The aggregation pipeline supports three file layout modes:
290
+
291
+ [cols="1m,3",options="header"]
292
+ |===
293
+ |Mode |Example path
294
+ |`by-document` (default) |`cc-18011/cc-18011.html`
295
+ |`flat` |`cc-18011.html`
296
+ |`by-format` |`html/cc-18011.html`
297
+ |===
298
+
299
+ === Release manifest
300
+
301
+ A `metanorma.release.yml` file controls which documents are published and to which channels:
302
+
303
+ [source,yaml]
304
+ ----
305
+ config: myorg/.metanorma
306
+ defaults:
307
+ visibility: public
308
+ channels:
309
+ - public/standards
310
+ documents:
311
+ - source: sources/cc-18011.adoc
312
+ channels:
313
+ - public/standards
314
+ - source: sources/cc-19060.adoc
315
+ visibility: members
316
+ channels:
317
+ - members/early-access
318
+ - pattern: "sources/draft-*.adoc"
319
+ channels:
320
+ - internal/working-drafts
321
+ stages:
322
+ - working-draft
323
+ - committee-draft
324
+ ----
325
+
326
+ Documents not listed in the manifest use the `defaults` section. If no manifest exists, all documents are released as `public/standards`.
327
+
328
+ Key fields:
329
+
330
+ [cols="1m,3",options="header"]
331
+ |===
332
+ |Field |Description
333
+ |`source` |Exact path match (highest priority)
334
+ |`pattern` |Glob pattern match
335
+ |`visibility` |`public`, `members`, or `private`
336
+ |`channels` |List of target channels
337
+ |`stages` |Allow-list of document stages
338
+ |`config` |Channel config source (see <<channel-configuration>>)
339
+ |===
340
+
341
+ === Value objects
342
+
343
+ All domain types are immutable, frozen, and use value-based equality:
344
+
345
+ * `DocumentId` -- normalized document identifier (`CC 18011` -> `cc-18011`)
346
+ * `DocumentVersion` -- edition + stage + pre-release flag
347
+ * `DocumentStage` -- published, draft, working-draft, committee-draft, etc.
348
+ * `Channel` -- audience/category pair
349
+ * `ReleaseTag` -- tag string with pre-release flag
350
+ * `ContentHash` -- SHA-256 content fingerprint
351
+ * `RepoRef` -- owner/repo reference
352
+
353
+ === Bibliography enrichment
354
+
355
+ `RelatonEnricher` generates `index.json` and `index.yaml` from RXL (Relaton XML) files found in aggregated documents. It auto-detects the Relaton flavor from document metadata:
356
+
357
+ [source,ruby]
358
+ ----
359
+ enricher = Metanorma::Release::RelatonEnricher.new(flavor: "calconnect")
360
+ result = enricher.enrich(document_index, output_dir)
361
+ # writes: output_dir/relaton/index.json
362
+ # output_dir/relaton/index.yaml
363
+ ----
364
+
365
+ Flavor detection tries these gems in order: `relaton-calconnect`, `relaton-iso`, `relaton-iec`, `relaton-ogc`, `relaton-ietf`, and others. If a flavor gem is not installed, it falls back to `Relaton::Bib::Item` from the `relaton-bib` runtime dependency.
366
+
367
+ == Architecture
368
+
369
+ === Dependency flow
370
+
371
+ Unidirectional, no cycles:
372
+
373
+ ----
374
+ domain/ -> release/ -> platform/
375
+ -> aggregation/ -> platform/
376
+ -> cli/
377
+ ----
378
+
379
+ * `domain/` has zero knowledge of pipelines, platforms, or CLI
380
+ * Pipelines depend on domain + interfaces, not platform implementations
381
+ * Platform adapters depend on interfaces + domain, not pipelines
382
+ * CLI delegates to command classes; commands depend on pipelines + platform factory
383
+ * Commands use `ConfigResolver` mixin for channel config resolution
384
+
385
+ === Patterns
386
+
387
+ Value Objects:: Immutable, frozen, value-based equality via `eql?`/`hash`. All fields included in equality comparison.
388
+
389
+ Strategy Pattern:: Pluggable algorithms resolved via registry. Adding a new document type or platform requires zero changes to existing code.
390
+
391
+ Pipeline with DI:: Pipelines receive all dependencies through constructors. No global state, no service locators.
392
+
393
+ Null Object:: Disabled features inject null implementations (`NullDeltaState`, `NullPublisher`, `NullCacheStore`) instead of adding conditional checks.
394
+
395
+ Result Types:: Pipelines return frozen Structs. Errors are collected, not raised. The caller decides whether to abort.
396
+
397
+ Command Pattern:: CLI delegates to `PackageCommand`, `PublishCommand`, and `AggregateCommand` classes. Each command encapsulates pipeline construction and configuration resolution via the `ConfigResolver` mixin.
398
+
399
+ === Extending
400
+
401
+ |===
402
+ |To add... |Do this
403
+
404
+ |A new platform
405
+ |Create a directory under `platform/` with `Publisher`, `Discoverer`, `Fetcher`, `ManifestReader` classes; register in `PlatformFactory`
406
+
407
+ |A new naming strategy
408
+ |Create a class that includes `NamingStrategy`; register via `NamingRegistry#register`
409
+
410
+ |A new file routing mode
411
+ |Create a class with `#compute_path(file_name, metadata)`; register in `FileRoutingFactory`
412
+
413
+ |A new filter
414
+ |Create a class that includes `Filter`; pass to the pipeline's `filters` array
415
+
416
+ |A new channel config source
417
+ |Create a class that includes `ConfigFetcher` with a `#fetch(source)` method
418
+ |===
419
+
420
+ == Development
421
+
422
+ [source,sh]
423
+ ----
424
+ bundle install
425
+ bundle exec rspec
426
+ ----
427
+
428
+ == License
429
+
430
+ BSD-2-Clause. See `LICENSE` for details.
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
data/exe/mn-release ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'metanorma/release/cli'
5
+
6
+ Metanorma::Release::CLI.run(ARGV)
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Metanorma
4
+ module Release
5
+ module RepoDiscoverer
6
+ def discover
7
+ raise NotImplementedError, "#{self.class} must implement #discover"
8
+ end
9
+ end
10
+
11
+ module ReleaseFetcher
12
+ def fetch(repo, etag: nil)
13
+ raise NotImplementedError, "#{self.class} must implement #fetch"
14
+ end
15
+ end
16
+
17
+ module ManifestReader
18
+ def read(repo)
19
+ raise NotImplementedError, "#{self.class} must implement #read"
20
+ end
21
+ end
22
+
23
+ module IndexGenerator
24
+ def generate(documents, output_dir, format:, parameters:)
25
+ raise NotImplementedError, "#{self.class} must implement #generate"
26
+ end
27
+ end
28
+
29
+ FetchResult = Struct.new(:releases, :etag, :unchanged?, keyword_init: true)
30
+ RepoReport = Struct.new(:releases, :included, :skipped, :reason, :errors, keyword_init: true)
31
+ RepoError = Struct.new(:tag, :message, keyword_init: true)
32
+ end
33
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Metanorma
4
+ module Release
5
+ class AggregationPipeline
6
+ Dependencies = Struct.new(
7
+ :discoverer, :fetcher, :manifest_reader,
8
+ :channel_filter, :stage_filter,
9
+ :asset_processor, :delta_state,
10
+ keyword_init: true
11
+ )
12
+
13
+ Config = Struct.new(
14
+ :organizations, :channels, :topic,
15
+ :concurrency, :include_drafts, :fail_on_error,
16
+ keyword_init: true
17
+ )
18
+
19
+ Result = Struct.new(
20
+ :documents, :repo_count, :channels_found,
21
+ :report, :failed_repos,
22
+ keyword_init: true
23
+ )
24
+
25
+ def initialize(deps)
26
+ @deps = deps
27
+ end
28
+
29
+ def run(config, output_dir)
30
+ @deps.delta_state.load
31
+ repos = @deps.discoverer.discover
32
+ documents = []
33
+ reports = []
34
+ failed_repos = []
35
+
36
+ repos.each do |repo|
37
+ repo_docs, report = process_repo(repo, output_dir, config)
38
+ documents.concat(repo_docs)
39
+ reports << report
40
+ rescue StandardError => e
41
+ failed_repos << RepoError.new(tag: repo.to_s, message: e.message)
42
+ raise if config.fail_on_error
43
+ end
44
+
45
+ @deps.delta_state.save
46
+
47
+ Result.new(
48
+ documents: documents,
49
+ repo_count: repos.length,
50
+ channels_found: documents.flat_map { |d| d.channels || [] }.uniq.sort,
51
+ report: reports,
52
+ failed_repos: failed_repos
53
+ )
54
+ end
55
+
56
+ private
57
+
58
+ def process_repo(repo, _output_dir, config)
59
+ repo_key = repo.to_s
60
+
61
+ manifest_channels = @deps.manifest_reader.read(repo)
62
+ if manifest_channels && !@deps.channel_filter.overlaps?(manifest_channels)
63
+ return [], RepoReport.new(releases: 0, included: 0, skipped: 0,
64
+ reason: 'channel manifest', errors: [])
65
+ end
66
+
67
+ etag = @deps.delta_state.etag(repo_key)
68
+ fetch_result = @deps.fetcher.fetch(repo, etag: etag)
69
+
70
+ if fetch_result.unchanged?
71
+ return [], RepoReport.new(releases: 0, included: 0, skipped: 0,
72
+ reason: 'etag unchanged', errors: [])
73
+ end
74
+
75
+ current_tags = []
76
+ documents = []
77
+ errors = []
78
+
79
+ fetch_result.releases.each do |release|
80
+ metadata = ReleaseMetadata.from_release_body(release.body)
81
+ next if metadata.nil?
82
+
83
+ next unless @deps.channel_filter.matches?(metadata.to_h)
84
+ next unless @deps.stage_filter.matches?(metadata.to_h)
85
+ next if release.prerelease && !config.include_drafts
86
+
87
+ tag = release.tag_name
88
+ current_tags << tag
89
+
90
+ content_hash = extract_content_hash(release.body)
91
+
92
+ if @deps.delta_state.processed?(repo_key, tag, content_hash)
93
+ files = @deps.delta_state.release_files(repo_key, tag)
94
+ documents << build_document(metadata, files, content_hash, release, repo)
95
+ next
96
+ end
97
+
98
+ zip_asset = find_zip_asset(release)
99
+ next unless zip_asset
100
+
101
+ result = @deps.asset_processor.process(zip_asset.data, metadata.to_h)
102
+ @deps.delta_state.mark_processed(repo_key, tag, content_hash, result.files.map(&:path))
103
+ documents << build_document(metadata, result.files.map(&:path), content_hash, release, repo)
104
+ rescue StandardError => e
105
+ errors << RepoError.new(tag: release.tag_name, message: e.message)
106
+ end
107
+
108
+ @deps.delta_state.cleanup_stale(repo_key, current_tags)
109
+ @deps.delta_state.set_etag(repo_key, fetch_result.etag)
110
+
111
+ [documents, RepoReport.new(
112
+ releases: fetch_result.releases.length,
113
+ included: documents.length,
114
+ skipped: fetch_result.releases.length - documents.length,
115
+ reason: nil, errors: errors
116
+ )]
117
+ end
118
+
119
+ def build_document(metadata, files, content_hash, release, repo)
120
+ source = DocumentSource.new(
121
+ owner: repo.owner, repo: repo.repo,
122
+ tag: release.tag_name,
123
+ release_url: release.html_url,
124
+ release_date: release.published_at
125
+ )
126
+
127
+ file_structs = files.map { |f| DocumentFile.new(name: File.basename(f), path: f) }
128
+
129
+ AggregatedDocument.new(
130
+ id: metadata.id, title: metadata.title,
131
+ edition: metadata.edition, stage: metadata.stage,
132
+ doctype: metadata.doctype,
133
+ channels: metadata.channels,
134
+ formats: metadata.formats,
135
+ flavor: metadata.flavor,
136
+ content_hash: content_hash.to_s,
137
+ source: source, files: file_structs
138
+ )
139
+ end
140
+
141
+ def extract_content_hash(body)
142
+ return nil if body.nil?
143
+
144
+ match = body.match(/^content-hash:([a-f0-9]+)/)
145
+ match ? match[1] : nil
146
+ end
147
+
148
+ def find_zip_asset(release)
149
+ return nil unless release.assets
150
+
151
+ release.assets.find { |a| a.name.end_with?('.zip') }
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'zip'
5
+ rescue LoadError
6
+ raise LoadError, "The rubyzip gem is required for AssetProcessor. Add `gem 'rubyzip'` to your Gemfile."
7
+ end
8
+
9
+ module Metanorma
10
+ module Release
11
+ class AssetProcessor
12
+ ProcessResult = Struct.new(:files, :channels, keyword_init: true)
13
+
14
+ CANONICALIZE_PATTERN = /-ed\d+(\.\d+)?-/
15
+
16
+ def initialize(output_dir:, routing:, canonicalize: true)
17
+ @output_dir = output_dir
18
+ @routing = routing
19
+ @canonicalize = canonicalize
20
+ end
21
+
22
+ def process(zip_data, metadata)
23
+ files = []
24
+
25
+ Dir.mktmpdir do |tmp_dir|
26
+ zip_path = File.join(tmp_dir, 'archive.zip')
27
+ File.binwrite(zip_path, zip_data)
28
+
29
+ Zip::File.open(zip_path) do |zip_file|
30
+ zip_file.each do |entry|
31
+ next if entry.directory?
32
+
33
+ raw_name = File.basename(entry.name)
34
+ file_name = @canonicalize ? canonicalize_name(raw_name) : raw_name
35
+ relative_path = @routing.compute_path(file_name, metadata)
36
+ dest_path = File.join(@output_dir, relative_path)
37
+
38
+ FileUtils.mkdir_p(File.dirname(dest_path))
39
+ entry.extract(dest_path) { true }
40
+
41
+ files << DocumentFile.new(name: file_name, path: relative_path)
42
+ end
43
+ end
44
+ end
45
+
46
+ ProcessResult.new(files: files, channels: metadata['channels'])
47
+ end
48
+
49
+ private
50
+
51
+ def canonicalize_name(name)
52
+ # Strip edition suffix: -ed1. → ., -ed1-wd. → -wd.
53
+ name.sub(/-ed\d+(\.\d+)?-(?=[a-z0-9])/, '-')
54
+ .sub(/-ed\d+(\.\d+)?\./, '.')
55
+ end
56
+ end
57
+ end
58
+ end