lutaml-model 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +9 -0
  3. data/.github/workflows/downstream-performance.yml +0 -3
  4. data/.rubocop_todo.yml +18 -186
  5. data/README.adoc +212 -15
  6. data/bench/bench_xmi.rb +6 -6
  7. data/bench/gate_config.rb +2 -9
  8. data/docs/_pages/configuration.adoc +155 -41
  9. data/docs/_pages/serialization_adapters.adoc +65 -14
  10. data/docs/index.adoc +3 -1
  11. data/docs/yamls_sequence.adoc +335 -0
  12. data/lib/lutaml/hash_format.rb +4 -0
  13. data/lib/lutaml/json/adapter/multi_json_adapter.rb +4 -2
  14. data/lib/lutaml/json/adapter/oj_adapter.rb +4 -2
  15. data/lib/lutaml/json.rb +4 -0
  16. data/lib/lutaml/key_value/adapter/json/multi_json_adapter.rb +4 -2
  17. data/lib/lutaml/key_value/adapter/json/oj_adapter.rb +4 -2
  18. data/lib/lutaml/model/adapter_resolver.rb +410 -0
  19. data/lib/lutaml/model/adapter_scope.rb +64 -0
  20. data/lib/lutaml/model/config.rb +84 -21
  21. data/lib/lutaml/model/configuration.rb +17 -249
  22. data/lib/lutaml/model/format_registry.rb +44 -117
  23. data/lib/lutaml/model/mapping/listener.rb +4 -2
  24. data/lib/lutaml/model/serialize/format_conversion.rb +42 -3
  25. data/lib/lutaml/model/serialize.rb +4 -2
  26. data/lib/lutaml/model/services/base.rb +4 -2
  27. data/lib/lutaml/model/version.rb +1 -1
  28. data/lib/lutaml/model.rb +2 -0
  29. data/lib/lutaml/toml.rb +10 -3
  30. data/lib/lutaml/xml/serialization/instance_methods.rb +6 -0
  31. data/lib/lutaml/xml.rb +3 -4
  32. data/lib/lutaml/yaml.rb +4 -0
  33. data/lib/lutaml/yamls/adapter/mapping.rb +7 -0
  34. data/lib/lutaml/yamls/adapter/standard_adapter.rb +23 -2
  35. data/lib/lutaml/yamls/adapter/transform.rb +105 -7
  36. data/lib/lutaml/yamls/adapter/yamls_sequence.rb +20 -0
  37. data/lib/lutaml/yamls/adapter/yamls_sequence_rule.rb +48 -0
  38. data/lib/lutaml/yamls/adapter.rb +2 -0
  39. data/spec/fixtures/geolexica_v2_concept.rb +136 -0
  40. data/spec/fixtures/geolexica_v2_sample.yaml +36 -0
  41. data/spec/fixtures/geolexica_v2_sample2.yaml +38 -0
  42. data/spec/fixtures/yamls_range_concept.rb +139 -0
  43. data/spec/lutaml/model/xml_decoupling_spec.rb +5 -4
  44. data/spec/lutaml/model/yamls_range_spec.rb +393 -0
  45. data/spec/lutaml/model/yamls_sequence_spec.rb +245 -0
  46. data/spec/spec_helper.rb +5 -0
  47. metadata +13 -3
  48. data/bench/bench_uniword.rb +0 -69
@@ -0,0 +1,245 @@
1
+ require "spec_helper"
2
+ require_relative "../../fixtures/geolexica_v2_concept"
3
+
4
+ RSpec.describe "YAMLS sequence (heterogeneous YAML stream)" do
5
+ let(:v2_yaml) do
6
+ <<~YAMLS
7
+ ---
8
+ data:
9
+ identifier: 3.5.8.8
10
+ localized_concepts:
11
+ eng: fbe1444a-7c11-555e-bb1b-680a4e6f2502
12
+ id: 0171b198-d068-53d9-8741-fb87e6755d62
13
+
14
+ ---
15
+ data:
16
+ definition:
17
+ - content: characteristic of a financial model that requires users to enter into an agreement prior to receiving services
18
+ examples: []
19
+ notes:
20
+ - content: The agreement can be associated with fees.
21
+ - content: The agreement can be minimal.
22
+ sources:
23
+ - origin:
24
+ ref: ISO/TS 14812:2022
25
+ locality:
26
+ type: clause
27
+ reference_from: 3.5.8.8
28
+ link: https://www.iso.org/standard/79779.html
29
+ type: authoritative
30
+ terms:
31
+ - type: expression
32
+ normative_status: preferred
33
+ designation: membership-based
34
+ language_code: eng
35
+ entry_status: valid
36
+ id: fbe1444a-7c11-555e-bb1b-680a4e6f2502
37
+ YAMLS
38
+ end
39
+
40
+ describe "parsing heterogeneous YAML stream" do
41
+ subject(:managed) { GeolexicaV2::ManagedConcept.from_yamls(v2_yaml) }
42
+
43
+ it "parses document 0 as ConceptIndex" do
44
+ expect(managed.index).to be_a(GeolexicaV2::ConceptIndex)
45
+ expect(managed.index.id).to eq("0171b198-d068-53d9-8741-fb87e6755d62")
46
+ end
47
+
48
+ it "parses ConceptIndex data fields" do
49
+ expect(managed.index.data.identifier).to eq("3.5.8.8")
50
+ expect(managed.index.data.localized_concepts).to eq(
51
+ "eng" => "fbe1444a-7c11-555e-bb1b-680a4e6f2502",
52
+ )
53
+ end
54
+
55
+ it "parses document 1+ as LocalizedConcept collection" do
56
+ expect(managed.localized).to be_an(Array)
57
+ expect(managed.localized.length).to eq(1)
58
+ end
59
+
60
+ it "parses LocalizedConcept fields" do
61
+ lc = managed.localized.first
62
+ expect(lc).to be_a(GeolexicaV2::LocalizedConcept)
63
+ expect(lc.id).to eq("fbe1444a-7c11-555e-bb1b-680a4e6f2502")
64
+ expect(lc.data.language_code).to eq("eng")
65
+ expect(lc.data.entry_status).to eq("valid")
66
+ end
67
+
68
+ it "parses LocalizedConcept definition sequence" do
69
+ lc = managed.localized.first
70
+ expect(lc.data.definition).to be_an(Array)
71
+ expect(lc.data.definition.first.content).to include("characteristic of a financial model")
72
+ end
73
+
74
+ it "parses LocalizedConcept terms sequence" do
75
+ lc = managed.localized.first
76
+ expect(lc.data.terms.first.designation).to eq("membership-based")
77
+ expect(lc.data.terms.first.type).to eq("expression")
78
+ end
79
+
80
+ it "parses LocalizedConcept notes sequence" do
81
+ lc = managed.localized.first
82
+ expect(lc.data.notes.length).to eq(2)
83
+ expect(lc.data.notes.first.content).to eq("The agreement can be associated with fees.")
84
+ end
85
+
86
+ it "parses LocalizedConcept sources with nested origin" do
87
+ lc = managed.localized.first
88
+ source = lc.data.sources.first
89
+ expect(source.type).to eq("authoritative")
90
+ expect(source.origin.ref).to eq("ISO/TS 14812:2022")
91
+ expect(source.origin.locality.type).to eq("clause")
92
+ expect(source.origin.locality.reference_from).to eq("3.5.8.8")
93
+ end
94
+
95
+ it "parses empty examples array" do
96
+ lc = managed.localized.first
97
+ expect(lc.data.examples).to eq([])
98
+ end
99
+ end
100
+
101
+ describe "serialization" do
102
+ subject(:managed) { GeolexicaV2::ManagedConcept.from_yamls(v2_yaml) }
103
+
104
+ it "serializes back to a YAML stream with 2 documents" do
105
+ output = managed.to_yamls
106
+ docs = output.split(/^---\s*$/).reject do |d|
107
+ d.strip.empty?
108
+ end.map { |d| YAML.safe_load("---\n#{d}") }
109
+ expect(docs.length).to eq(2)
110
+ end
111
+
112
+ it "round-trips index data" do
113
+ output = managed.to_yamls
114
+ managed2 = GeolexicaV2::ManagedConcept.from_yamls(output)
115
+
116
+ expect(managed2.index.id).to eq(managed.index.id)
117
+ expect(managed2.index.data.identifier).to eq(managed.index.data.identifier)
118
+ expect(managed2.index.data.localized_concepts).to eq(managed.index.data.localized_concepts)
119
+ end
120
+
121
+ it "round-trips localized concept data" do
122
+ output = managed.to_yamls
123
+ managed2 = GeolexicaV2::ManagedConcept.from_yamls(output)
124
+
125
+ lc = managed2.localized.first
126
+ expect(lc.id).to eq(managed.localized.first.id)
127
+ expect(lc.data.language_code).to eq("eng")
128
+ expect(lc.data.definition.first.content).to include("characteristic of a financial model")
129
+ expect(lc.data.terms.first.designation).to eq("membership-based")
130
+ expect(lc.data.notes.length).to eq(2)
131
+ expect(lc.data.sources.first.origin.ref).to eq("ISO/TS 14812:2022")
132
+ end
133
+
134
+ it "round-trips empty arrays" do
135
+ output = managed.to_yamls
136
+ managed2 = GeolexicaV2::ManagedConcept.from_yamls(output)
137
+ expect(managed2.localized.first.data.examples).to eq([])
138
+ end
139
+ end
140
+
141
+ describe "parsing actual geolexica v2 file" do
142
+ let(:v2_file) do
143
+ File.read(File.expand_path("../../fixtures/geolexica_v2_sample.yaml",
144
+ __dir__))
145
+ end
146
+
147
+ it "parses the real geolexica v2 file" do
148
+ managed = GeolexicaV2::ManagedConcept.from_yamls(v2_file)
149
+ expect(managed.index.data.identifier).to eq("3.5.8.8")
150
+ expect(managed.localized.first.data.language_code).to eq("eng")
151
+ expect(managed.localized.first.data.terms.first.designation).to eq("membership-based")
152
+ end
153
+ end
154
+
155
+ describe "YAMLS sequence with 3 documents" do
156
+ let(:three_doc_yaml) do
157
+ <<~YAMLS
158
+ ---
159
+ data:
160
+ identifier: 3.7.1.5
161
+ localized_concepts:
162
+ eng: doc1-eng-id
163
+ id: doc0-id
164
+
165
+ ---
166
+ data:
167
+ definition:
168
+ - content: First localized concept
169
+ examples: []
170
+ notes: []
171
+ sources: []
172
+ terms:
173
+ - type: expression
174
+ normative_status: preferred
175
+ designation: term one
176
+ language_code: eng
177
+ entry_status: valid
178
+ id: doc1-eng-id
179
+
180
+ ---
181
+ data:
182
+ definition:
183
+ - content: Second localized concept (French)
184
+ examples: []
185
+ notes: []
186
+ sources: []
187
+ terms:
188
+ - type: expression
189
+ normative_status: preferred
190
+ designation: terme un
191
+ language_code: fra
192
+ entry_status: valid
193
+ id: doc1-fra-id
194
+ YAMLS
195
+ end
196
+
197
+ it "parses 1 index + 2 localized concepts" do
198
+ managed = GeolexicaV2::ManagedConcept.from_yamls(three_doc_yaml)
199
+ expect(managed.index.data.identifier).to eq("3.7.1.5")
200
+ expect(managed.localized.length).to eq(2)
201
+ expect(managed.localized[0].data.language_code).to eq("eng")
202
+ expect(managed.localized[1].data.language_code).to eq("fra")
203
+ end
204
+
205
+ it "round-trips 3 documents" do
206
+ managed = GeolexicaV2::ManagedConcept.from_yamls(three_doc_yaml)
207
+ output = managed.to_yamls
208
+ managed2 = GeolexicaV2::ManagedConcept.from_yamls(output)
209
+
210
+ expect(managed2.localized.length).to eq(2)
211
+ expect(managed2.localized[0].data.terms.first.designation).to eq("term one")
212
+ expect(managed2.localized[1].data.terms.first.designation).to eq("terme un")
213
+ end
214
+ end
215
+
216
+ describe "ManagedConceptCollection (directory of v2 files)" do
217
+ let(:fixture_dir) { File.expand_path("../../fixtures", __dir__) }
218
+ let(:v2_files) do
219
+ %w[geolexica_v2_sample.yaml geolexica_v2_sample2.yaml].map do |f|
220
+ File.join(fixture_dir, f)
221
+ end
222
+ end
223
+
224
+ it "loads each v2 file as a separate ManagedConcept" do
225
+ concepts = v2_files.map do |f|
226
+ GeolexicaV2::ManagedConcept.from_yamls(File.read(f))
227
+ end
228
+ expect(concepts.length).to eq(2)
229
+ concepts.each do |concept|
230
+ expect(concept.index).to be_a(GeolexicaV2::ConceptIndex)
231
+ expect(concept.localized).to be_an(Array)
232
+ expect(concept.localized).not_to be_empty
233
+ end
234
+ end
235
+
236
+ it "can be assembled into a collection manually" do
237
+ concepts = v2_files.map do |f|
238
+ GeolexicaV2::ManagedConcept.from_yamls(File.read(f))
239
+ end
240
+ collection = GeolexicaV2::ManagedConceptCollection.new(concepts)
241
+ expect(collection.size).to eq(2)
242
+ expect(collection.first.index).to be_a(GeolexicaV2::ConceptIndex)
243
+ end
244
+ end
245
+ end
data/spec/spec_helper.rb CHANGED
@@ -33,6 +33,11 @@ RSpec.configure do |config|
33
33
  Lutaml::Model::GlobalRegister.instance.reset if defined?(Lutaml::Model::GlobalRegister)
34
34
  end
35
35
 
36
+ # Reset adapter state between adapter-switching tests
37
+ config.after(:each, :adapter_test) do
38
+ Lutaml::Model::AdapterScope.reset! if defined?(Lutaml::Model::AdapterScope)
39
+ end
40
+
36
41
  # After each test, ensure :xsd context exists
37
42
  # Some specs call GlobalContext.reset! which removes :xsd context
38
43
  # but :xsd register (from lutaml-xsd) needs its context to exist
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lutaml-model
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-01 00:00:00.000000000 Z
11
+ date: 2026-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -189,7 +189,6 @@ files:
189
189
  - bench/bench_niso.rb
190
190
  - bench/bench_sts.rb
191
191
  - bench/bench_unitsml.rb
192
- - bench/bench_uniword.rb
193
192
  - bench/bench_xmi.rb
194
193
  - bench/gate_config.rb
195
194
  - benchmark/quick_benchmark.rb
@@ -290,6 +289,7 @@ files:
290
289
  - docs/model_transforms.adoc
291
290
  - docs/namespace-management.adoc
292
291
  - docs/xml-schema-qualification.md
292
+ - docs/yamls_sequence.adoc
293
293
  - exe/lutaml-model
294
294
  - flake.lock
295
295
  - flake.nix
@@ -373,6 +373,8 @@ files:
373
373
  - lib/lutaml/key_value/transformation/value_serializer.rb
374
374
  - lib/lutaml/key_value/transformation_builder.rb
375
375
  - lib/lutaml/model.rb
376
+ - lib/lutaml/model/adapter_resolver.rb
377
+ - lib/lutaml/model/adapter_scope.rb
376
378
  - lib/lutaml/model/attribute.rb
377
379
  - lib/lutaml/model/attribute_validator.rb
378
380
  - lib/lutaml/model/cached_type_resolver.rb
@@ -809,6 +811,8 @@ files:
809
811
  - lib/lutaml/yamls/adapter/mapping_rule.rb
810
812
  - lib/lutaml/yamls/adapter/standard_adapter.rb
811
813
  - lib/lutaml/yamls/adapter/transform.rb
814
+ - lib/lutaml/yamls/adapter/yamls_sequence.rb
815
+ - lib/lutaml/yamls/adapter/yamls_sequence_rule.rb
812
816
  - lib/tasks/benchmark_runner.rb
813
817
  - lib/tasks/memory_profile.rb
814
818
  - lib/tasks/performance.rake
@@ -827,6 +831,9 @@ files:
827
831
  - spec/ceramic_spec.rb
828
832
  - spec/fixtures/address.rb
829
833
  - spec/fixtures/ceramic.rb
834
+ - spec/fixtures/geolexica_v2_concept.rb
835
+ - spec/fixtures/geolexica_v2_sample.yaml
836
+ - spec/fixtures/geolexica_v2_sample2.yaml
830
837
  - spec/fixtures/liquid_templates/_ceramic.liquid
831
838
  - spec/fixtures/liquid_templates/_ceramics.liquid
832
839
  - spec/fixtures/liquid_templates/_ceramics_in_one.liquid
@@ -1500,6 +1507,7 @@ files:
1500
1507
  - spec/fixtures/xml/test_schema.xsd
1501
1508
  - spec/fixtures/xml/user.xsd
1502
1509
  - spec/fixtures/xml/valid_math_document.xml
1510
+ - spec/fixtures/yamls_range_concept.rb
1503
1511
  - spec/lutaml/key_value/transformation/collection_serializer_spec.rb
1504
1512
  - spec/lutaml/key_value/transformation/rule_compiler_spec.rb
1505
1513
  - spec/lutaml/key_value/transformation/value_serializer_spec.rb
@@ -1622,6 +1630,8 @@ files:
1622
1630
  - spec/lutaml/model/xsd_type_validation_spec.rb
1623
1631
  - spec/lutaml/model/yaml_adapter_spec.rb
1624
1632
  - spec/lutaml/model/yamls/standard_adapter_spec.rb
1633
+ - spec/lutaml/model/yamls_range_spec.rb
1634
+ - spec/lutaml/model/yamls_sequence_spec.rb
1625
1635
  - spec/lutaml/model/yamls_spec.rb
1626
1636
  - spec/lutaml/model_spec.rb
1627
1637
  - spec/lutaml/xml/adapter/nokogiri_adapter_spec.rb
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # Benchmark: Uniword (OOXML/DOCX document parsing)
5
- # Gate: parse time < 30s for ISO-690 (4.8MB document.xml)
6
- #
7
- # Usage:
8
- # UNIWORD_DIR=/path/to/uniword ITERATIONS=5 bundle exec ruby tmp/bench/bench_uniword.rb
9
- # BENCH_JSON=/tmp/results.json bundle exec ruby tmp/bench/bench_uniword.rb
10
-
11
- require_relative "bench_common"
12
- include BenchCommon
13
-
14
- print_header("Uniword Benchmark — OOXML/DOCX document parsing")
15
-
16
- uniword_root = ENV["UNIWORD_DIR"] || "/Users/mulgogi/src/mn/uniword"
17
- $LOAD_PATH.unshift("#{uniword_root}/lib")
18
- require "uniword"
19
- require "zip"
20
-
21
- iso_file = "#{uniword_root}/spec/fixtures/uniword-private/fixtures/iso/ISO_690_2021-Word_document(en).docx"
22
-
23
- unless File.exist?(iso_file)
24
- puts " ISO 690 fixture not found: #{iso_file}"
25
- puts " Ensure uniword-private submodule is initialized."
26
- exit 1
27
- end
28
-
29
- # Extract document.xml from DOCX
30
- xml_content = nil
31
- Zip::File.open(iso_file) do |zip|
32
- entry = zip.find_entry("word/document.xml")
33
- xml_content = entry.get_input_stream.read if entry
34
- end
35
-
36
- file_size_kb = File.size(iso_file) / 1024.0
37
- xml_size_kb = xml_content.bytesize / 1024.0
38
- puts " File: #{File.basename(iso_file)} (#{file_size_kb.round(0)}KB)"
39
- puts " document.xml: #{xml_size_kb.round(0)}KB"
40
- puts
41
-
42
- results = {}
43
- results[:iso690] = measure("Uniword ISO 690 (#{xml_size_kb.round(0)}KB)") do
44
- Uniword::Wordprocessingml::DocumentRoot.from_xml(xml_content)
45
- end
46
-
47
- # Also test with a simpler document if available
48
- demo_file = "#{uniword_root}/examples/demo_formal_integral_roundtrip_spec.docx"
49
- if File.exist?(demo_file)
50
- demo_xml = nil
51
- Zip::File.open(demo_file) do |zip|
52
- entry = zip.find_entry("word/document.xml")
53
- demo_xml = entry.get_input_stream.read if entry
54
- end
55
- if demo_xml
56
- demo_size_kb = demo_xml.bytesize / 1024.0
57
- results[:demo] = measure("Uniword demo_formal (#{demo_size_kb.round(0)}KB)") do
58
- Uniword::Wordprocessingml::DocumentRoot.from_xml(demo_xml)
59
- end
60
- end
61
- end
62
-
63
- puts "\n Gate checks:"
64
- if results[:iso690]
65
- status = results[:iso690][:avg_time] < 30.0 ? "PASS" : "FAIL"
66
- printf " ISO-690 < 30s: %s (%.3fs)\n", status, results[:iso690][:avg_time]
67
- end
68
-
69
- write_results_json(json_output_path, results) if json_output_path