rng 0.1.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +63 -0
  3. data/.github/workflows/release.yml +8 -3
  4. data/.gitignore +11 -0
  5. data/.rubocop.yml +10 -7
  6. data/.rubocop_todo.yml +229 -23
  7. data/CHANGELOG.md +317 -0
  8. data/CLAUDE.md +139 -0
  9. data/Gemfile +11 -12
  10. data/README.adoc +1538 -11
  11. data/Rakefile +11 -3
  12. data/docs/Gemfile +8 -0
  13. data/docs/_config.yml +23 -0
  14. data/docs/getting-started/index.adoc +75 -0
  15. data/docs/guides/error-handling.adoc +137 -0
  16. data/docs/guides/external-references.adoc +128 -0
  17. data/docs/guides/index.adoc +24 -0
  18. data/docs/guides/parsing-rnc.adoc +141 -0
  19. data/docs/guides/parsing-rng-xml.adoc +81 -0
  20. data/docs/guides/rng-to-rnc.adoc +101 -0
  21. data/docs/guides/validation.adoc +85 -0
  22. data/docs/index.adoc +52 -0
  23. data/docs/reference/api.adoc +126 -0
  24. data/docs/reference/cli.adoc +182 -0
  25. data/docs/understanding/architecture.adoc +58 -0
  26. data/docs/understanding/rng-vs-rnc.adoc +118 -0
  27. data/exe/rng +5 -0
  28. data/lib/rng/any_name.rb +10 -8
  29. data/lib/rng/attribute.rb +28 -26
  30. data/lib/rng/choice.rb +24 -24
  31. data/lib/rng/cli.rb +607 -0
  32. data/lib/rng/data.rb +10 -10
  33. data/lib/rng/datatype_declaration.rb +26 -0
  34. data/lib/rng/define.rb +44 -41
  35. data/lib/rng/div.rb +36 -0
  36. data/lib/rng/documentation.rb +9 -0
  37. data/lib/rng/element.rb +39 -37
  38. data/lib/rng/empty.rb +7 -7
  39. data/lib/rng/except.rb +25 -25
  40. data/lib/rng/external_ref.rb +8 -8
  41. data/lib/rng/external_ref_resolver.rb +582 -0
  42. data/lib/rng/foreign_attribute.rb +26 -0
  43. data/lib/rng/foreign_element.rb +33 -0
  44. data/lib/rng/grammar.rb +14 -12
  45. data/lib/rng/group.rb +26 -24
  46. data/lib/rng/include.rb +5 -6
  47. data/lib/rng/include_processor.rb +461 -0
  48. data/lib/rng/interleave.rb +23 -23
  49. data/lib/rng/list.rb +22 -22
  50. data/lib/rng/mixed.rb +23 -23
  51. data/lib/rng/name.rb +7 -7
  52. data/lib/rng/namespace_declaration.rb +47 -0
  53. data/lib/rng/namespaces.rb +15 -0
  54. data/lib/rng/not_allowed.rb +7 -7
  55. data/lib/rng/ns_name.rb +9 -9
  56. data/lib/rng/one_or_more.rb +23 -23
  57. data/lib/rng/optional.rb +23 -23
  58. data/lib/rng/param.rb +8 -8
  59. data/lib/rng/parent_ref.rb +8 -8
  60. data/lib/rng/parse_tree_processor.rb +695 -0
  61. data/lib/rng/pattern.rb +7 -7
  62. data/lib/rng/ref.rb +8 -8
  63. data/lib/rng/rnc_builder.rb +927 -0
  64. data/lib/rng/rnc_parser.rb +605 -305
  65. data/lib/rng/rnc_to_rng_converter.rb +1408 -0
  66. data/lib/rng/schema_preamble.rb +73 -0
  67. data/lib/rng/schema_validator.rb +1622 -0
  68. data/lib/rng/start.rb +27 -25
  69. data/lib/rng/test_suite_parser.rb +168 -0
  70. data/lib/rng/text.rb +11 -8
  71. data/lib/rng/to_rnc.rb +4 -35
  72. data/lib/rng/value.rb +6 -7
  73. data/lib/rng/version.rb +1 -1
  74. data/lib/rng/zero_or_more.rb +23 -23
  75. data/lib/rng.rb +68 -17
  76. data/rng.gemspec +18 -19
  77. data/scripts/extract_spectest_resources.rb +96 -0
  78. data/spec/fixtures/compacttest.xml +2511 -0
  79. data/spec/fixtures/external/circular_a.rng +7 -0
  80. data/spec/fixtures/external/circular_b.rng +7 -0
  81. data/spec/fixtures/external/circular_main.rng +7 -0
  82. data/spec/fixtures/external/external_ref_lib.rng +7 -0
  83. data/spec/fixtures/external/external_ref_main.rng +7 -0
  84. data/spec/fixtures/external/include_lib.rng +7 -0
  85. data/spec/fixtures/external/include_main.rng +3 -0
  86. data/spec/fixtures/external/nested_chain.rng +6 -0
  87. data/spec/fixtures/external/nested_leaf.rng +7 -0
  88. data/spec/fixtures/external/nested_mid.rng +8 -0
  89. data/spec/fixtures/metanorma/3gpp.rnc +35 -0
  90. data/spec/fixtures/metanorma/3gpp.rng +105 -0
  91. data/spec/fixtures/metanorma/basicdoc.rnc +11 -0
  92. data/spec/fixtures/metanorma/bipm.rnc +148 -0
  93. data/spec/fixtures/metanorma/bipm.rng +376 -0
  94. data/spec/fixtures/metanorma/bsi.rnc +104 -0
  95. data/spec/fixtures/metanorma/bsi.rng +332 -0
  96. data/spec/fixtures/metanorma/csa.rnc +45 -0
  97. data/spec/fixtures/metanorma/csa.rng +131 -0
  98. data/spec/fixtures/metanorma/csd.rnc +43 -0
  99. data/spec/fixtures/metanorma/csd.rng +132 -0
  100. data/spec/fixtures/metanorma/gbstandard.rnc +99 -0
  101. data/spec/fixtures/metanorma/gbstandard.rng +316 -0
  102. data/spec/fixtures/metanorma/iec.rnc +49 -0
  103. data/spec/fixtures/metanorma/iec.rng +193 -0
  104. data/spec/fixtures/metanorma/ietf.rnc +275 -0
  105. data/spec/fixtures/metanorma/ietf.rng +925 -0
  106. data/spec/fixtures/metanorma/iho.rnc +58 -0
  107. data/spec/fixtures/metanorma/iho.rng +179 -0
  108. data/spec/fixtures/metanorma/isodoc.rnc +873 -0
  109. data/spec/fixtures/metanorma/isodoc.rng +2704 -0
  110. data/spec/fixtures/metanorma/isostandard-amd.rnc +43 -0
  111. data/spec/fixtures/metanorma/isostandard-amd.rng +108 -0
  112. data/spec/fixtures/metanorma/isostandard.rnc +166 -0
  113. data/spec/fixtures/metanorma/isostandard.rng +494 -0
  114. data/spec/fixtures/metanorma/itu.rnc +122 -0
  115. data/spec/fixtures/metanorma/itu.rng +377 -0
  116. data/spec/fixtures/metanorma/m3d.rnc +41 -0
  117. data/spec/fixtures/metanorma/m3d.rng +122 -0
  118. data/spec/fixtures/metanorma/mpfd.rnc +36 -0
  119. data/spec/fixtures/metanorma/mpfd.rng +95 -0
  120. data/spec/fixtures/metanorma/nist.rnc +77 -0
  121. data/spec/fixtures/metanorma/nist.rng +216 -0
  122. data/spec/fixtures/metanorma/ogc.rnc +51 -0
  123. data/spec/fixtures/metanorma/ogc.rng +151 -0
  124. data/spec/fixtures/metanorma/reqt.rnc +6 -0
  125. data/spec/fixtures/metanorma/rsd.rnc +36 -0
  126. data/spec/fixtures/metanorma/rsd.rng +95 -0
  127. data/spec/fixtures/metanorma/un.rnc +103 -0
  128. data/spec/fixtures/metanorma/un.rng +367 -0
  129. data/spec/fixtures/rnc/base.rnc +4 -0
  130. data/spec/fixtures/rnc/grammar_with_trailing.rnc +8 -0
  131. data/spec/fixtures/rnc/main_include_trailing.rnc +3 -0
  132. data/spec/fixtures/rnc/main_with_include.rnc +5 -0
  133. data/spec/fixtures/rnc/test_augment.rnc +10 -0
  134. data/spec/fixtures/rnc/test_isodoc_simple.rnc +9 -0
  135. data/spec/fixtures/rnc/top_level_include.rnc +8 -0
  136. data/spec/fixtures/spectest_external/case_10_4.7/x +3 -0
  137. data/spec/fixtures/spectest_external/case_10_4.7/y +7 -0
  138. data/spec/fixtures/spectest_external/case_11_4.7/x +3 -0
  139. data/spec/fixtures/spectest_external/case_12_4.7/x +3 -0
  140. data/spec/fixtures/spectest_external/case_13_4.7/x +3 -0
  141. data/spec/fixtures/spectest_external/case_13_4.7/y +3 -0
  142. data/spec/fixtures/spectest_external/case_14_4.7/x +7 -0
  143. data/spec/fixtures/spectest_external/case_15_4.7/x +7 -0
  144. data/spec/fixtures/spectest_external/case_16_4.7/x +5 -0
  145. data/spec/fixtures/spectest_external/case_17_4.7/x +5 -0
  146. data/spec/fixtures/spectest_external/case_18_4.7/x +7 -0
  147. data/spec/fixtures/spectest_external/case_19_4.7/level1.rng +9 -0
  148. data/spec/fixtures/spectest_external/case_19_4.7/level2.rng +7 -0
  149. data/spec/fixtures/spectest_external/case_1_4.5/sub1/x +3 -0
  150. data/spec/fixtures/spectest_external/case_1_4.5/sub3/x +3 -0
  151. data/spec/fixtures/spectest_external/case_1_4.5/x +3 -0
  152. data/spec/fixtures/spectest_external/case_20_4.6/x +3 -0
  153. data/spec/fixtures/spectest_external/case_2_4.5/x +3 -0
  154. data/spec/fixtures/spectest_external/case_3_4.6/x +3 -0
  155. data/spec/fixtures/spectest_external/case_4_4.6/x +3 -0
  156. data/spec/fixtures/spectest_external/case_5_4.6/x +1 -0
  157. data/spec/fixtures/spectest_external/case_6_4.6/x +5 -0
  158. data/spec/fixtures/spectest_external/case_7_4.6/x +1 -0
  159. data/spec/fixtures/spectest_external/case_7_4.6/y +1 -0
  160. data/spec/fixtures/spectest_external/case_8_4.7/x +7 -0
  161. data/spec/fixtures/spectest_external/case_9_4.7/x +7 -0
  162. data/spec/fixtures/spectest_external/resources.json +149 -0
  163. data/spec/rng/advanced_rnc_spec.rb +101 -0
  164. data/spec/rng/compacttest_spec.rb +197 -0
  165. data/spec/rng/datatype_declaration_spec.rb +28 -0
  166. data/spec/rng/div_spec.rb +207 -0
  167. data/spec/rng/external_ref_resolver_spec.rb +122 -0
  168. data/spec/rng/metanorma_conversion_spec.rb +159 -0
  169. data/spec/rng/namespace_declaration_spec.rb +60 -0
  170. data/spec/rng/namespace_support_spec.rb +199 -0
  171. data/spec/rng/rnc_parser_spec.rb +498 -22
  172. data/spec/rng/rnc_roundtrip_spec.rb +96 -82
  173. data/spec/rng/rng_generation_spec.rb +288 -0
  174. data/spec/rng/roundtrip_spec.rb +342 -0
  175. data/spec/rng/schema_preamble_spec.rb +145 -0
  176. data/spec/rng/schema_spec.rb +68 -64
  177. data/spec/rng/spectest_spec.rb +168 -90
  178. data/spec/rng_spec.rb +2 -2
  179. data/spec/spec_helper.rb +7 -42
  180. metadata +141 -8
data/CHANGELOG.md ADDED
@@ -0,0 +1,317 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Fixed
11
+ - **Critical**: Parser baseline bug preventing simple RNC patterns from parsing
12
+ - Added `standalone_pattern` rule to allow simple patterns at grammar level
13
+ - Fixed `grammar` rule to accept references, text, empty, and other standalone patterns
14
+ - Resolved fundamental parser limitation where `foo` and similar simple patterns failed
15
+ - Improved valid RNC parsing from 17/53 (32.1%) to 23/53 (43.4%) - +6 tests (+35%)
16
+ - Maintained invalid rejection at 29/31 (93.5%) and Metanorma at 21/21 (100%)
17
+ - **Critical**: Include processor string literal extraction bug
18
+ - Fixed `extract_string_literal()` to properly handle `[:string_parts]` structure
19
+ - Resolved 18 of 21 Metanorma schema parsing failures
20
+ - Improved test suite compliance from 34% to 49% (+8 tests)
21
+ - Parser whitespace handling to allow files starting with documentation comments
22
+ - Parser choice ordering to prioritize element_def over named_pattern
23
+
24
+ ### Added
25
+ - Documentation comments support (`##` syntax)
26
+ - Model classes now have `documentation` attribute (Element, Attribute, Define, Start)
27
+ - RncBuilder generates `##` comments from documentation
28
+ - RncParser properly handles leading documentation comments
29
+ - Full round-trip preservation (RNC → RNG → RNC)
30
+ - RNG XML uses `<a:documentation>` elements in annotations namespace
31
+ - Annotation support infrastructure (ForeignAttribute, ForeignElement model classes)
32
+ - Parser rules for annotation blocks `[ns:attr = "val"]`
33
+ - Processor extracts annotations from parse tree
34
+ - Converter generates XML foreign attributes/elements
35
+
36
+ ### Added
37
+
38
+ - **Namespace Support (Complete)**: Enhanced namespace handling with multiple declaration types
39
+ - Full support for prefixed namespace declarations (`namespace prefix = "uri"`)
40
+ - Support for default namespace with prefix (`default namespace prefix = "uri"`)
41
+ - Support for multiple namespace and datatype declarations in schema preamble
42
+ - New model classes for structured namespace handling:
43
+ - `Rng::NamespaceDeclaration` - Represents namespace declarations with OOP API
44
+ - `Rng::DatatypeDeclaration` - Represents datatype library declarations
45
+ - `Rng::SchemaPreamble` - Container for preamble declarations
46
+ - Clean separation of preamble parsing from grammar tree building
47
+ - Namespace prefix resolution in RNG XML output (prefixes resolved to URIs)
48
+
49
+ ### Changed
50
+
51
+ - **Parser architecture enhanced** for namespace support
52
+ - Parse tree structure now includes `:preamble_data` for namespace/datatype declarations
53
+ - ParseTreeProcessor extracts preamble into `SchemaPreamble` objects
54
+ - Grammar tree receives structured metadata (`:default_namespace`, `:namespace_map`, `:datatype_map`)
55
+ - RncToRngConverter resolves namespace prefixes to URIs in element and attribute names
56
+ - **Backward compatibility maintained** - Both legacy and new namespace formats work seamlessly
57
+ - Legacy format: `default namespace = "uri"` (still fully supported)
58
+ - New formats: `namespace prefix = "uri"` and `default namespace prefix = "uri"`
59
+ - Converter handles both old and new metadata formats transparently
60
+
61
+ ### Fixed
62
+
63
+ - Fixed syntax error in ParseTreeProcessor (extraneous `end` statement removed)
64
+ - Fixed namespace prefix resolution in RNG XML generation (prefixes now map to URIs)
65
+
66
+ ### Architecture
67
+
68
+ - Implemented model-driven approach for namespace declarations (OOP classes instead of Hash objects)
69
+ - Applied separation of concerns: preamble metadata stored separately from grammar tree
70
+ - Followed Open/Closed Principle: new functionality added without modifying existing code paths
71
+ - Maintained MECE structure: each component has one clear responsibility
72
+
73
+ ### Testing
74
+
75
+ - Created comprehensive test suite for namespace support (18 examples, all passing)
76
+ - Legacy compatibility tests (3 tests)
77
+ - New namespace declaration tests (6 tests)
78
+ - Datatype library tests (3 tests)
79
+ - Combined declarations tests (3 tests)
80
+ - Edge case tests (3 tests)
81
+ - All unit tests pass for new model classes (21 examples)
82
+ - Zero regressions in existing test suite
83
+ - Namespace prefixes correctly resolve to URIs in generated RNG XML
84
+
85
+ ## [0.3.0] - 2025-11-28
86
+
87
+ ### Added
88
+
89
+ - **Documentation Comments Support**: Full support for `##` syntax with round-trip conversion (RNC ↔ RNG ↔ RNC)
90
+ - Parse documentation comments from RNC files
91
+ - Generate `<a:documentation>` elements in RNG XML
92
+ - Regenerate `##` comments when converting back to RNC
93
+ - Supported contexts: element, attribute, define, start patterns
94
+ - Preserves multi-line documentation through all transformations
95
+
96
+ - **String Concatenation Support**: Parse-time string joining with `~` operator
97
+ - Concatenate string literals in all contexts: namespaces, URIs, values, parameters
98
+ - Multi-part concatenation: `"a" ~ "b" ~ "c"` joins to `"abc"`
99
+ - Supports whitespace around operator for readability
100
+ - Transparent concatenation - final schema contains joined strings
101
+
102
+ - **Escape Sequence Support**: RELAX NG Compact Syntax escape sequences
103
+ - Unicode code points in identifiers: `\x{HHHHHH}` syntax (1-6 hex digits)
104
+ - Unicode code points in string literals: `\x{HHHHHH}` syntax
105
+ - Character escapes in strings: `\"`, `\\`, `\n`, `\r`, `\t`
106
+ - Escaped backslash: `\\x{...}` stays literal (not converted)
107
+ - Backward compatible: regular identifiers work unchanged
108
+
109
+ ### Changed
110
+
111
+ - **Parse tree structure** for identifiers and strings (backward compatible)
112
+ - Identifiers and strings now parsed as character arrays to support escapes
113
+ - Old format: `{identifier: "foo"}`, `{string: "hello"}`
114
+ - New format: `{identifier_parts: [{char: "f"}, ...]}`, `{string_parts: [...]}`
115
+ - Converter transparently handles both old and new formats
116
+ - ParseTreeProcessor now normalizes `:patterns` to `:definitions` in flat grammars
117
+
118
+ ### Fixed
119
+
120
+ - **Critical**: Restored documentation comment parsing rules that were lost in previous versions
121
+ - Recovered 5 regressed tests (tests #47-51)
122
+ - Baseline restored from 13/53 to 18/53 (24.5% → 34.0%)
123
+
124
+ ### Technical Details
125
+
126
+ - **Test Results**: 17/53 valid RNC parsing (32.1%), 27/31 invalid rejection (87.1%)
127
+ - **Production Support**: 100% Metanorma schema support maintained (21/21 passing)
128
+ - **Files Modified**:
129
+ - `lib/rng/rnc_parser.rb` - Added escape sequence grammar rules
130
+ - `lib/rng/rnc_to_rng_converter.rb` - Added escape sequence processing
131
+ - `lib/rng/parse_tree_processor.rb` - Fixed flat grammar normalization
132
+ - **Minor Regression**: One test regression (18→17) due to parse tree structure changes
133
+ - **Escape Sequences**: Core functionality fully working (Unicode, character escapes)
134
+
135
+ ### Documentation
136
+
137
+ - Updated README.adoc with comprehensive documentation for both features
138
+ - Added syntax examples, usage patterns, and API documentation
139
+ - Created `IMPLEMENTATION_STATUS_PHASE4B_COMPLETE.md` with technical details
140
+ - Created `CONTINUATION_PLAN_PHASE4_COMPLETE.md` for future roadmap
141
+
142
+ ## [0.2.0] - 2025-11-24
143
+
144
+ ### Added
145
+
146
+ #### Major Features
147
+
148
+ - **RNC Compact Syntax Parser** - Complete implementation of RELAX NG Compact syntax parser
149
+ - Parses RNC schemas to internal object model
150
+ - Converts RNC to RNG XML format
151
+ - Supports all basic RNC patterns and constructs
152
+
153
+ - **RNC Generator** - Generate RNC compact syntax from object model
154
+ - `Rng.to_rnc()` method for converting schemas to compact syntax
155
+ - Clean, readable output formatting
156
+ - Round-trip conversion support (RNC → RNG → RNC)
157
+
158
+ - **Augmentation Operators** - Support for RELAX NG pattern augmentation
159
+ - Choice augmentation with `|=` operator
160
+ - Interleave augmentation with `&=` operator
161
+ - Generates proper `combine="choice"` and `combine="interleave"` attributes
162
+ - Works both inside and outside grammar blocks
163
+
164
+ - **Datatype Parameters** - Full support for XML Schema datatype constraints
165
+ - Pattern constraints: `xsd:string { pattern = "..." }`
166
+ - Range constraints: `xsd:int { minInclusive = "0" maxInclusive = "120" }`
167
+ - Length constraints: `xsd:string { length = "4" }`
168
+ - Multiple parameters per datatype
169
+ - All standard XML Schema parameters supported
170
+
171
+ #### RNC Parser Features
172
+
173
+ - Comment support (`#` line comments)
174
+ - Element and attribute definitions
175
+ - Named pattern definitions and references
176
+ - Start pattern declarations
177
+ - Occurrence markers (`?`, `*`, `+`)
178
+ - Choice operator (`|`)
179
+ - Sequence operator (`,`)
180
+ - Group patterns with parentheses
181
+ - Text and empty patterns
182
+ - Value literals
183
+ - Namespace declarations
184
+ - Datatype library support
185
+ - Mixed content patterns
186
+
187
+ #### API Enhancements
188
+
189
+ - `Rng.parse_rnc(rnc_string)` - Parse RNC compact syntax
190
+ - `Rng.to_rnc(schema)` - Convert schema to RNC format
191
+ - `Rng::RncParser.parse()` - Low-level RNC parsing
192
+ - `Rng::RncParser.to_rnc()` - Low-level RNC generation
193
+
194
+ ### Fixed
195
+
196
+ - Fixed gem loading with autoload to resolve circular dependencies
197
+ - Fixed Text element rendering in nested XML structures (upgraded lutaml-model to 0.7.7)
198
+ - Fixed attribute special value handling (empty, omitted, nil)
199
+ - Fixed element ordering in round-trip conversions
200
+ - Fixed pattern reference handling in choice and sequence contexts
201
+ - Fixed empty array detection in RNC builder
202
+ - Fixed occurrence marker duplication in content generation
203
+ - Fixed group definition parsing with proper typo fix
204
+ - **Fixed Nokogiri adapter auto-configuration** - The XML adapter is now automatically configured when the gem is loaded, eliminating the need for manual setup
205
+
206
+ ### Changed
207
+
208
+ - Upgraded lutaml-model dependency from 0.7.3 to 0.7.7
209
+ - Improved RNG to RNC conversion logic
210
+ - Enhanced error handling in parser
211
+ - Updated test suite with 78 additional tests
212
+ - Reorganized parser grammar rules for clarity
213
+
214
+ ### Documentation
215
+
216
+ - Added comprehensive README.adoc sections for new features
217
+ - Added augmentation operators documentation with examples
218
+ - Added datatype parameters documentation with examples
219
+ - Created TEST_RESULTS_PHASE7.md with detailed test analysis
220
+ - Updated IMPLEMENTATION_STATUS.md with phase completion details
221
+
222
+ ### Testing
223
+
224
+ - Added Metanorma schema test suite (63 tests)
225
+ - Added complex pattern tests (11 tests)
226
+ - Added error handling tests (7 tests)
227
+ - Added performance benchmarks (2 tests)
228
+ - All basic functionality tests passing (100%)
229
+ - Round-trip conversion tests passing
230
+ - **Verified 100% success rate parsing all 19 Metanorma RNG files**
231
+ - **2 out of 21 Metanorma RNC files parse successfully** (standalone schemas only)
232
+
233
+ ### Known Limitations
234
+
235
+ The following RNC features are not yet implemented (planned for future releases):
236
+
237
+ **CRITICAL (Blocks real-world usage):**
238
+
239
+ - **`include` directive** - External file inclusion (blocks ~90% of production RNC schemas)
240
+ - Affects: 19 out of 21 Metanorma RNC schemas fail due to this
241
+ - Workaround: Use RNG XML format or manually inline included content
242
+ - Status: **Planned for v0.3.0 with HIGH priority**
243
+
244
+ **MEDIUM Priority:**
245
+
246
+ - `div` elements - Organizational sections
247
+ - `externalRef` - External grammar references
248
+
249
+ **LOW Priority:**
250
+
251
+ - `parentRef` - Parent grammar references
252
+ - Annotations - `[ ... ]` metadata blocks
253
+ - Advanced pattern combinations (interleave, list, etc. from RNC source)
254
+
255
+ **Round-trip notes:**
256
+
257
+ - XML comments are not preserved (Lutaml::Model limitation)
258
+ - Attribute ordering may change (not semantically significant)
259
+ - Namespace prefixes may be reassigned (URIs preserved)
260
+
261
+ These limitations affect parsing of complex real-world RNC schemas, but:
262
+ - ✅ All RNG XML schemas parse perfectly (100% Metanorma compatibility)
263
+ - ✅ Basic to moderate RNC schemas work correctly
264
+ - ✅ RNC generation from object model works for all supported patterns
265
+
266
+ ## [0.1.2] - 2025-11-23
267
+
268
+ ### Initial Release
269
+
270
+ Basic RELAX NG XML (RNG) support:
271
+ - Parse RNG XML schemas
272
+ - Object model for all RELAX NG patterns
273
+ - Round-trip RNG XML conversion
274
+ - Integration with Lutaml ecosystem
275
+
276
+ ---
277
+
278
+ ## Release Notes
279
+
280
+ ### v0.2.0 - RNC Compact Syntax Support
281
+
282
+ This release adds comprehensive support for RELAX NG Compact syntax (RNC), making it much easier to work with RELAX NG schemas in Ruby. You can now:
283
+
284
+ 1. **Parse RNC schemas** directly with `Rng.parse_rnc()`
285
+ 2. **Generate RNC syntax** from object model with `Rng.to_rnc()`
286
+ 3. **Use augmentation operators** to extend pattern definitions
287
+ 4. **Constrain datatypes** with parameters like pattern, range, and length
288
+ 5. **Convert between formats** seamlessly (RNC ↔ RNG)
289
+
290
+ The implementation is production-ready for basic to moderate complexity schemas. Complex schemas using advanced features like `div`, `externalRef`, and `parentRef` are not yet supported but are planned for v0.3.0.
291
+
292
+ ### Migration Guide from 0.1.x
293
+
294
+ No breaking changes. All existing code continues to work. New features are purely additive:
295
+
296
+ ```ruby
297
+ # New in 0.2.0: Parse RNC
298
+ schema = Rng.parse_rnc(File.read('schema.rnc'))
299
+
300
+ # New in 0.2.0: Generate RNC
301
+ rnc = Rng.to_rnc(schema)
302
+
303
+ # Existing: Parse RNG (still works)
304
+ schema = Rng.parse(File.read('schema.rng'))
305
+ ```
306
+
307
+ ### Performance
308
+
309
+ - RNC parsing: ~2.27ms average for moderate schemas
310
+ - Round-trip conversion: <5ms for most schemas
311
+ - No performance regressions in existing RNG parsing
312
+
313
+ ### Credits
314
+
315
+ - Development: Rng team
316
+ - Testing: Metanorma schema test suite
317
+ - Dependencies: lutaml-model 0.7.7+, parslet
data/CLAUDE.md ADDED
@@ -0,0 +1,139 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ RNG is a Ruby gem for parsing, manipulating, and converting RELAX NG schemas (both RNG XML and RNC compact syntax). It uses Lutaml::Model for the object model, Parslet for RNC parsing, and Nokogiri for XML processing.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ # Install dependencies
13
+ bundle install
14
+
15
+ # Run all tests
16
+ bundle exec rspec
17
+
18
+ # Run a specific test file
19
+ bundle exec rspec spec/rng/rnc_parser_spec.rb
20
+
21
+ # Run a specific test by line number
22
+ bundle exec rspec spec/rng/rnc_parser_spec.rb:42
23
+
24
+ # Run the official test suite (Jing-Trang compacttest.xml)
25
+ bundle exec rspec spec/rng/compacttest_spec.rb
26
+
27
+ # Run linter with auto-fix
28
+ bundle exec rubocop -A
29
+
30
+ # Run default task (tests + rubocop)
31
+ bundle exec rake
32
+ ```
33
+
34
+ ### CLI Commands
35
+
36
+ ```bash
37
+ # Validate a schema
38
+ rng validate schema.rng
39
+
40
+ # Validate an XML document against a schema
41
+ rng validate schema.rng document.xml
42
+
43
+ # Convert between RNG and RNC formats
44
+ rng convert schema.rng -o schema.rnc
45
+ rng convert schema.rnc -o schema.rng
46
+
47
+ # Parse and display schema structure
48
+ rng parse schema.rng
49
+
50
+ # Show schema information
51
+ rng info --statistics schema.rng
52
+ ```
53
+
54
+ ## Architecture
55
+
56
+ The library has two parsing paths and one generation path:
57
+
58
+ ### RNG XML Parsing
59
+ `Rng.parse()` → `Grammar.from_xml()` → Lutaml::Model with Nokogiri adapter
60
+
61
+ All RNG model classes inherit from `Lutaml::Model::Serializable` and define XML mappings via the `xml do` block.
62
+
63
+ ### RNC Compact Parsing
64
+ `Rng.parse_rnc()` → `RncParser.parse()` → `ParseTreeProcessor.normalize()` → `RncToRngConverter.convert()` → `Grammar.from_xml()`
65
+
66
+ The RNC parser is a Parslet-based PEG parser in `lib/rng/rnc_parser.rb`. Key supporting classes:
67
+ - `ParseTreeProcessor` (`lib/rng/parse_tree_processor.rb`) - Normalizes parse trees into consistent grammar structures
68
+ - `RncToRngConverter` (`lib/rng/rnc_to_rng_converter.rb`) - Converts parse trees to RNG XML using Nokogiri builder
69
+ - `IncludeProcessor` (`lib/rng/include_processor.rb`) - Handles file I/O and include directive resolution
70
+
71
+ ### RNG to RNC Generation
72
+ `Rng.to_rnc()` → `ToRnc.convert()` → `RncParser.to_rnc()` → `RncBuilder.build()`
73
+
74
+ `RncBuilder` (`lib/rng/rnc_builder.rb`) traverses the object model and generates RNC text.
75
+
76
+ ## Key Dependencies
77
+
78
+ - **lutaml-model** - Object model and XML serialization
79
+ - **nokogiri** - XML parsing and building
80
+ - **parslet** - RNC compact syntax parser
81
+ - **canon** - XML comparison matchers for tests (`be_xml_equivalent_with`)
82
+
83
+ ## Object Model Structure
84
+
85
+ The object model mirrors RELAX NG concepts:
86
+ - `Grammar` - Root container (can have start, define, element, include)
87
+ - `Start` - Entry point definition
88
+ - `Define` - Named pattern definitions
89
+ - `Element` / `Attribute` - XML structures
90
+ - Pattern classes: `Choice`, `Group`, `Interleave`, `Mixed`, `Optional`, `ZeroOrMore`, `OneOrMore`, `Text`, `Empty`, `Value`, `Data`, `List`
91
+ - Reference classes: `Ref`, `ParentRef`, `ExternalRef`
92
+ - Name classes: `Name`, `AnyName`, `NsName`, `Except`
93
+ - `Div` - Documentation and grouping container
94
+
95
+ ## Design Decisions
96
+
97
+ - **Foreign elements/attributes are NOT supported**: The RELAX NG spec allows elements and attributes from non-RNG namespaces as annotations. This library does not preserve or round-trip them. They are silently dropped during XML parsing and not stored in the object model. Tests containing foreign elements/attributes are skipped with an explicit message. Do not add `foreign_elements` or `foreign_attributes` attributes to model classes.
98
+
99
+ ## External Href Resolution
100
+
101
+ The library supports resolving external references via `Rng.parse(rng_xml, location: path, resolve_external: true)`:
102
+
103
+ - **`<include href="uri"/>`** at grammar level - merges definitions from external grammar
104
+ - **`<externalRef href="uri"/>`** at pattern level - replaces ref with content from external grammar's start pattern
105
+
106
+ ### Setting Up Test Fixtures from Jing-Trang
107
+
108
+ The spectest_spec.rb has 22 pending tests that require external resources from Jing-Trang's test suite. To enable these tests:
109
+
110
+ 1. Ensure Jing-Trang is checked out at `~/src/external/jing-trang`
111
+
112
+ 2. Extract test fixtures:
113
+ ```bash
114
+ bundle exec rake fixtures:extract_spectest
115
+ # or
116
+ ruby scripts/extract_spectest_resources.rb
117
+ ```
118
+
119
+ This creates `spec/fixtures/spectest_external/` with 20 test cases, each in its own `case_N/` subdirectory.
120
+
121
+ 3. Note: Each spectest.xml test case has isolated resources (virtual file system). The Jing-Trang framework runs each test with its own set of resources. To fully enable these tests, spectest_spec.rb would need to be updated to copy resources for each test case before running.
122
+
123
+ ## Important Notes
124
+
125
+ - The Nokogiri adapter must be configured at load time: `Lutaml::Model::Config.configure { |c| c.xml_adapter = Lutaml::Model::Xml::NokogiriAdapter }`
126
+ - Ruby 3.0.0+ required
127
+ - The `RNG_VERBOSE=1` environment variable enables parser warnings
128
+ - The official test suite (`spec/rng/compacttest_spec.rb`) uses Jing-Trang's `compacttest.xml` with 87 test cases
129
+
130
+ ## Documentation Site
131
+
132
+ The gem has a Jekyll-based documentation site in `docs/`:
133
+ - `docs/index.adoc` - Home page
134
+ - `docs/getting-started/` - Installation and quick start
135
+ - `docs/guides/` - Parsing, conversion, validation guides
136
+ - `docs/reference/` - API and CLI reference
137
+ - `docs/understanding/` - Architecture and format comparison
138
+
139
+ Build docs locally: `cd docs && bundle exec jekyll serve`
data/Gemfile CHANGED
@@ -1,18 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- source "https://rubygems.org"
3
+ source 'https://rubygems.org'
4
4
 
5
5
  # Specify your gem's dependencies in rng.gemspec
6
6
  gemspec
7
7
 
8
- gem "diffy" # For generating human-readable diffs
9
- gem "equivalent-xml"
10
- gem "lutaml-model"
11
- gem "nokogiri"
12
- gem "rake", "~> 13.0"
13
- gem "rspec", "~> 3.0"
14
- gem "rubocop", "~> 1.21"
15
- gem "rubocop-performance", require: false
16
- gem "rubocop-rake", require: false
17
- gem "rubocop-rspec", require: false
18
- gem "xml-c14n"
8
+ gem 'benchmark'
9
+ gem 'canon'
10
+ gem 'lutaml-model', github: 'lutaml/lutaml-model', branch: 'main'
11
+ gem 'nokogiri'
12
+ gem 'rake'
13
+ gem 'rspec'
14
+ gem 'rubocop'
15
+ gem 'rubocop-performance'
16
+ gem 'rubocop-rake'
17
+ gem 'rubocop-rspec'