canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -0,0 +1,327 @@
1
+ ---
2
+ title: Environment Configuration
3
+ parent: Features
4
+ nav_order: 5
5
+ has_children: true
6
+ ---
7
+ = Environment configuration
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Purpose
12
+
13
+ Canon supports configuration through environment variables, allowing you to override default settings without modifying code. This is essential for CI/CD pipelines, containerized environments, and different deployment scenarios.
14
+
15
+ **Critical**: Environment variables work uniformly across **all interfaces** - CLI, Ruby API, and RSpec.
16
+
17
+ == Priority chain
18
+
19
+ Configuration values are resolved using the following priority (highest to lowest):
20
+
21
+ . **Environment Variables** (highest priority)
22
+ . **Programmatic Configuration** (via `Canon::Config`)
23
+ . **Default Values** (lowest priority)
24
+
25
+ Environment variables always override programmatic settings, which in turn override defaults.
26
+
27
+ == Naming convention
28
+
29
+ Environment variables follow a consistent naming pattern:
30
+
31
+ [source]
32
+ ----
33
+ CANON_{FORMAT}_{CONFIG_TYPE}_{ATTRIBUTE}
34
+ ----
35
+
36
+ Where:
37
+
38
+ * `FORMAT`: `XML`, `HTML`, `JSON`, `YAML`, or `STRING`
39
+ * `CONFIG_TYPE`: `DIFF` or `MATCH`
40
+ * `ATTRIBUTE`: The configuration attribute name (e.g., `ALGORITHM`, `MODE`, `PROFILE`)
41
+
42
+ === Global variables
43
+
44
+ You can also use global environment variables that apply to all formats by omitting the format prefix:
45
+
46
+ [source]
47
+ ----
48
+ CANON_{ATTRIBUTE}
49
+ ----
50
+
51
+ Global variables are overridden by format-specific variables.
52
+
53
+ == Core environment variables
54
+
55
+ === Diff algorithm
56
+
57
+ Canon supports two diff algorithms:
58
+
59
+ * **`dom`**: DOM-based tree diff (default, stable)
60
+ * **`semantic`**: Semantic tree diff (experimental, more sophisticated)
61
+
62
+ [source,bash]
63
+ ----
64
+ # Set algorithm for XML diff
65
+ export CANON_XML_DIFF_ALGORITHM=semantic
66
+
67
+ # Set algorithm for HTML diff
68
+ export CANON_HTML_DIFF_ALGORITHM=dom
69
+
70
+ # Set globally for all formats
71
+ export CANON_ALGORITHM=semantic
72
+ ----
73
+
74
+ Valid values: `dom`, `semantic`
75
+
76
+ === Diff mode
77
+
78
+ [source,bash]
79
+ ----
80
+ # Set diff mode for JSON
81
+ export CANON_JSON_DIFF_MODE=by_object
82
+
83
+ # Set diff mode for YAML
84
+ export CANON_YAML_DIFF_MODE=by_line
85
+ ----
86
+
87
+ Valid values: `by_line`, `by_object`
88
+
89
+ === Color output
90
+
91
+ [source,bash]
92
+ ----
93
+ # Disable color output for XML
94
+ export CANON_XML_DIFF_USE_COLOR=false
95
+
96
+ # Enable color output for HTML
97
+ export CANON_HTML_DIFF_USE_COLOR=true
98
+ ----
99
+
100
+ Valid values: `true`, `false`, `1`, `0`, `yes`, `no`
101
+
102
+ === Context and grouping
103
+
104
+ [source,bash]
105
+ ----
106
+ # Set context lines for XML diff
107
+ export CANON_XML_DIFF_CONTEXT_LINES=5
108
+
109
+ # Set grouping lines for XML diff
110
+ export CANON_XML_DIFF_GROUPING_LINES=20
111
+ ----
112
+
113
+ Valid values: Any positive integer
114
+
115
+ === Show diffs filter
116
+
117
+ [source,bash]
118
+ ----
119
+ # Show only informative diffs
120
+ export CANON_XML_DIFF_SHOW_DIFFS=informative
121
+
122
+ # Show all diffs
123
+ export CANON_XML_DIFF_SHOW_DIFFS=all
124
+ ----
125
+
126
+ Valid values: `all`, `informative`, `normative`
127
+
128
+ === Verbose output
129
+
130
+ [source,bash]
131
+ ----
132
+ # Enable verbose diff output
133
+ export CANON_XML_DIFF_VERBOSE_DIFF=true
134
+ ----
135
+
136
+ Valid values: `true`, `false`, `1`, `0`, `yes`, `no`
137
+
138
+ == Size limits
139
+
140
+ Canon provides configurable size limits to prevent hangs or excessive resource usage when processing very large files.
141
+
142
+ See link:size-limits.adoc[Size Limits] for detailed configuration.
143
+
144
+ Key variables:
145
+
146
+ * `CANON_MAX_FILE_SIZE` - Maximum file size in bytes (default: 5,242,880 = 5MB)
147
+ * `CANON_MAX_NODE_COUNT` - Maximum tree node count (default: 10,000)
148
+ * `CANON_MAX_DIFF_LINES` - Maximum diff output lines (default: 10,000)
149
+
150
+ == Override system
151
+
152
+ See link:override-system.adoc[Override System] for how environment variables interact with programmatic configuration.
153
+
154
+ == Usage across interfaces
155
+
156
+ === CLI
157
+
158
+ Environment variables automatically affect CLI commands:
159
+
160
+ [source,bash]
161
+ ----
162
+ # Set algorithm via ENV
163
+ export CANON_ALGORITHM=semantic
164
+
165
+ # CLI uses the ENV setting
166
+ canon diff file1.xml file2.xml --verbose
167
+ ----
168
+
169
+ === Ruby API
170
+
171
+ Environment variables are applied when creating `Canon::Config`:
172
+
173
+ [source,ruby]
174
+ ----
175
+ # Environment variable is set
176
+ ENV['CANON_XML_DIFF_ALGORITHM'] = 'semantic'
177
+
178
+ # Config respects ENV variable
179
+ config = Canon::Config.new
180
+ puts config.xml.diff.algorithm # => :semantic
181
+
182
+ # Programmatic setting is ignored when ENV is set
183
+ config.xml.diff.algorithm = :dom
184
+ puts config.xml.diff.algorithm # => :semantic (ENV wins)
185
+ ----
186
+
187
+ === RSpec
188
+
189
+ Environment variables work with RSpec matchers:
190
+
191
+ [source,ruby]
192
+ ----
193
+ # In spec_helper.rb, set defaults
194
+ Canon::RSpecMatchers.configure do |config|
195
+ config.xml.diff.algorithm = :dom
196
+ end
197
+
198
+ # In shell, override for specific test run
199
+ # CANON_ALGORITHM=semantic bundle exec rspec
200
+ ----
201
+
202
+ == Type conversion
203
+
204
+ Environment variable values are automatically converted to the appropriate Ruby types:
205
+
206
+ === Boolean values
207
+
208
+ Accepted values for boolean attributes:
209
+
210
+ * **True**: `true`, `1`, `yes`
211
+ * **False**: `false`, `0`, `no`
212
+
213
+ Case-insensitive.
214
+
215
+ === Integer values
216
+
217
+ Any valid integer string is converted to an integer:
218
+
219
+ [source,bash]
220
+ ----
221
+ export CANON_CONTEXT_LINES=15 # Converted to Integer 15
222
+ ----
223
+
224
+ === Symbol values
225
+
226
+ String values are converted to symbols:
227
+
228
+ [source,bash]
229
+ ----
230
+ export CANON_ALGORITHM=semantic # Converted to Symbol :semantic
231
+ ----
232
+
233
+ == Common scenarios
234
+
235
+ === CI/CD environment
236
+
237
+ [source,bash]
238
+ ----
239
+ # .github/workflows/test.yml or similar
240
+ export CANON_USE_COLOR=false
241
+ export CANON_ALGORITHM=semantic
242
+ export CANON_SHOW_COMPARE=true
243
+
244
+ bundle exec rspec
245
+ ----
246
+
247
+ === Docker container
248
+
249
+ [source,dockerfile]
250
+ ----
251
+ # Dockerfile
252
+ ENV CANON_XML_DIFF_ALGORITHM=semantic
253
+ ENV CANON_USE_COLOR=false
254
+ ENV CANON_CONTEXT_LINES=5
255
+ ----
256
+
257
+ === Different environments
258
+
259
+ [source,bash]
260
+ ----
261
+ # Development
262
+ export CANON_VERBOSE_DIFF=true
263
+ export CANON_USE_COLOR=true
264
+
265
+ # Production
266
+ export CANON_VERBOSE_DIFF=false
267
+ export CANON_USE_COLOR=false
268
+ export CANON_XML_MATCH_PROFILE=strict
269
+ ----
270
+
271
+ === Format-specific configuration
272
+
273
+ [source,bash]
274
+ ----
275
+ # XML uses semantic diff
276
+ export CANON_XML_DIFF_ALGORITHM=semantic
277
+
278
+ # HTML uses DOM diff
279
+ export CANON_HTML_DIFF_ALGORITHM=dom
280
+
281
+ # All formats disable color
282
+ export CANON_USE_COLOR=false
283
+ ----
284
+
285
+ == Complete variable reference
286
+
287
+ See link:../../reference/environment-variables.adoc[Environment Variables Reference] for a complete table of all environment variables.
288
+
289
+ == Troubleshooting
290
+
291
+ === ENV variable not taking effect
292
+
293
+ Check the priority chain. If a programmatic value seems to override ENV, verify:
294
+
295
+ . The ENV variable is set before creating the Config instance
296
+ . The variable name follows the correct naming convention
297
+ . The value is valid for the attribute type
298
+
299
+ === Type conversion errors
300
+
301
+ If you encounter type conversion errors:
302
+
303
+ . Check that boolean values use accepted strings (`true`, `false`, `1`, `0`, `yes`, `no`)
304
+ . Ensure integer values are valid integers
305
+ . Verify symbol values don't contain special characters
306
+
307
+ === Debugging
308
+
309
+ You can inspect the resolver to see which values are from ENV:
310
+
311
+ [source,ruby]
312
+ ----
313
+ config = Canon::Config.new
314
+ resolver = config.xml.diff.instance_variable_get(:@resolver)
315
+
316
+ puts "ENV values: #{resolver.env.inspect}"
317
+ puts "Programmatic values: #{resolver.programmatic.inspect}"
318
+ puts "Defaults: #{resolver.defaults.inspect}"
319
+ puts "Source of algorithm: #{resolver.source_for(:algorithm)}"
320
+ ----
321
+
322
+ == See also
323
+
324
+ * link:size-limits.adoc[Size Limits] - File size and node count limits
325
+ * link:override-system.adoc[Override System] - How ENV vars override defaults
326
+ * link:../../reference/environment-variables.adoc[Environment Variables Reference] - Complete variable listing
327
+ * link:../../reference/options-across-interfaces.adoc[Options Across Interfaces] - How options map across CLI, Ruby, and RSpec