canon 0.1.16 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +6 -6
- data/docs/features/match-options/index.adoc +55 -11
- data/docs/lychee.toml +1 -10
- data/lib/canon/comparison/markup_comparator.rb +11 -6
- data/lib/canon/comparison/match_options/base_resolver.rb +2 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +9 -1
- data/lib/canon/comparison/whitespace_sensitivity.rb +73 -0
- data/lib/canon/comparison/xml_node_comparison.rb +11 -8
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +2 -2
- data/lib/canon/version.rb +1 -1
- data/lib/tasks/benchmark_runner.rb +83 -75
- data/lib/tasks/performance_helpers.rb +47 -3
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4bf32847db2d5c418daebb9ad5221646edecaf4b67b4f25c4e2a9e8a68167a8e
|
|
4
|
+
data.tar.gz: 6e595f08701e61f73ad62dc5aec3ec3b95da8f41fc75d579e70721f2d9af42e5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 42a21e5e1badd2c1b96b1b86dce89551ee5b0794150fd2844b345fcabeb3d9bb484ca3beb423209e0bd455887d3597aa7d5973aaa0985ee77c450f20ff755866
|
|
7
|
+
data.tar.gz: 8799d74f6a3738317387336308a3f95ffabaa5779d96dbde0ee9bccc424d360131230752031b0a0ee907af5907134b1ca8dec75e8cd0024fb600e090d3b681b7
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-03-
|
|
3
|
+
# on 2026-03-24 03:04:40 UTC using RuboCop version 1.85.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,7 +11,7 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'canon.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count:
|
|
14
|
+
# Offense count: 802
|
|
15
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
16
16
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
17
17
|
# URISchemes: http, https
|
|
@@ -58,7 +58,7 @@ Lint/UnusedMethodArgument:
|
|
|
58
58
|
- 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
|
|
59
59
|
- 'lib/canon/diff_formatter/by_object/base_formatter.rb'
|
|
60
60
|
|
|
61
|
-
# Offense count:
|
|
61
|
+
# Offense count: 235
|
|
62
62
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
63
63
|
Metrics/AbcSize:
|
|
64
64
|
Enabled: false
|
|
@@ -69,12 +69,12 @@ Metrics/AbcSize:
|
|
|
69
69
|
Metrics/BlockLength:
|
|
70
70
|
Max: 84
|
|
71
71
|
|
|
72
|
-
# Offense count:
|
|
72
|
+
# Offense count: 192
|
|
73
73
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
74
74
|
Metrics/CyclomaticComplexity:
|
|
75
75
|
Enabled: false
|
|
76
76
|
|
|
77
|
-
# Offense count:
|
|
77
|
+
# Offense count: 401
|
|
78
78
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
79
79
|
Metrics/MethodLength:
|
|
80
80
|
Max: 95
|
|
@@ -84,7 +84,7 @@ Metrics/MethodLength:
|
|
|
84
84
|
Metrics/ParameterLists:
|
|
85
85
|
Max: 9
|
|
86
86
|
|
|
87
|
-
# Offense count:
|
|
87
|
+
# Offense count: 158
|
|
88
88
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
89
89
|
Metrics/PerceivedComplexity:
|
|
90
90
|
Enabled: false
|
|
@@ -153,20 +153,33 @@ sensitivity in XML instance documents:
|
|
|
153
153
|
|
|
154
154
|
==== Whitelist and blacklist options
|
|
155
155
|
|
|
156
|
-
You can explicitly specify which elements are whitespace-sensitive:
|
|
156
|
+
You can explicitly specify which elements are whitespace-sensitive using either short or long option names:
|
|
157
157
|
|
|
158
158
|
[source,ruby]
|
|
159
159
|
----
|
|
160
|
-
#
|
|
160
|
+
# Short names (preferred)
|
|
161
161
|
Canon::Comparison.equivalent?(xml1, xml2,
|
|
162
162
|
match: {
|
|
163
163
|
structural_whitespace: :strict,
|
|
164
|
-
|
|
165
|
-
|
|
164
|
+
sensitive_elements: ["pre", "code", "sample"],
|
|
165
|
+
insensitive_elements: ["div", "span"]
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Long names (backward-compatible)
|
|
170
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
171
|
+
match: {
|
|
172
|
+
structural_whitespace: :strict,
|
|
173
|
+
whitespace_sensitive_elements: ["pre", "code", "sample"],
|
|
174
|
+
whitespace_insensitive_elements: ["div", "span"]
|
|
166
175
|
}
|
|
167
176
|
)
|
|
168
177
|
----
|
|
169
178
|
|
|
179
|
+
**Element names are strings** (not symbols) for consistency with XML/HTML conventions.
|
|
180
|
+
|
|
181
|
+
**Blacklist takes precedence over whitelist** — if an element appears in both lists, whitespace is stripped.
|
|
182
|
+
|
|
170
183
|
==== respect_xml_space option
|
|
171
184
|
|
|
172
185
|
Control whether xml:space attributes in the document are honored:
|
|
@@ -211,9 +224,40 @@ When determining if an element is whitespace-sensitive, Canon uses this priority
|
|
|
211
224
|
|
|
212
225
|
==== Format-specific defaults
|
|
213
226
|
|
|
214
|
-
**HTML**:: `[
|
|
227
|
+
**HTML**:: `["pre", "textarea", "script", "style"]` - These elements preserve whitespace by HTML specification
|
|
215
228
|
**XML**:: `[]` - No default whitespace-sensitive elements, purely user-controlled
|
|
216
229
|
|
|
230
|
+
==== Two types of whitespace sensitivity
|
|
231
|
+
|
|
232
|
+
Canon handles two distinct whitespace concerns:
|
|
233
|
+
|
|
234
|
+
**1. Structural whitespace stripping** — whitespace-only text nodes between sibling elements (indentation, newlines). These are never semantically meaningful and are stripped by default for XML to enable ElementMatcher to work correctly.
|
|
235
|
+
|
|
236
|
+
**2. Text content comparison** — how non-whitespace text content is compared. Controlled by `structural_whitespace` and `text_content` dimension behaviors (`:strict`, `:normalize`, `:ignore`).
|
|
237
|
+
|
|
238
|
+
The `sensitive_elements` / `insensitive_elements` options control both concerns:
|
|
239
|
+
|
|
240
|
+
[source,ruby]
|
|
241
|
+
----
|
|
242
|
+
# For XML: structural whitespace is stripped by default
|
|
243
|
+
# Use sensitive_elements to preserve whitespace in specific elements
|
|
244
|
+
xml1 = "<root><item>Test</item></root>"
|
|
245
|
+
xml2 = "<root>\n <item>Test</item>\n</root>"
|
|
246
|
+
|
|
247
|
+
# With sensitive_elements, whitespace inside <item> is preserved
|
|
248
|
+
Canon::Comparison.equivalent?(xml1, xml2,
|
|
249
|
+
match: {
|
|
250
|
+
structural_whitespace: :strict,
|
|
251
|
+
sensitive_elements: ["item"]
|
|
252
|
+
}
|
|
253
|
+
)
|
|
254
|
+
# => true
|
|
255
|
+
----
|
|
256
|
+
|
|
257
|
+
**Precedence**: blacklist (`insensitive_elements`) > whitelist (`sensitive_elements`) > format defaults
|
|
258
|
+
|
|
259
|
+
**No inheritance**: Only the immediate parent element's name is checked — not ancestor elements.
|
|
260
|
+
|
|
217
261
|
==== Examples
|
|
218
262
|
|
|
219
263
|
.Using xml:space attribute
|
|
@@ -232,11 +276,11 @@ Canon::Comparison.equivalent?(xml1, xml2,
|
|
|
232
276
|
.Using whitelist
|
|
233
277
|
[source,ruby]
|
|
234
278
|
----
|
|
235
|
-
# Make <p> elements whitespace-sensitive
|
|
279
|
+
# Make <p> elements whitespace-sensitive (strings, not symbols)
|
|
236
280
|
Canon::Comparison.equivalent?(xml1, xml2,
|
|
237
281
|
match: {
|
|
238
282
|
structural_whitespace: :strict,
|
|
239
|
-
|
|
283
|
+
sensitive_elements: ["p", "pre"]
|
|
240
284
|
}
|
|
241
285
|
)
|
|
242
286
|
----
|
|
@@ -249,7 +293,7 @@ Canon::Comparison.equivalent?(html1, html2,
|
|
|
249
293
|
format: :html,
|
|
250
294
|
match: {
|
|
251
295
|
structural_whitespace: :strict,
|
|
252
|
-
|
|
296
|
+
insensitive_elements: ["script"]
|
|
253
297
|
}
|
|
254
298
|
)
|
|
255
299
|
----
|
|
@@ -636,12 +680,12 @@ expect(actual).to be_xml_equivalent_to(expected,
|
|
|
636
680
|
element_hierarchy: :ignore
|
|
637
681
|
)
|
|
638
682
|
|
|
639
|
-
# Element-level whitespace sensitivity
|
|
683
|
+
# Element-level whitespace sensitivity (strings, not symbols)
|
|
640
684
|
expect(actual).to be_xml_equivalent_to(expected,
|
|
641
685
|
match: { structural_whitespace: :strict }
|
|
642
686
|
)
|
|
643
687
|
.with_options(
|
|
644
|
-
|
|
688
|
+
sensitive_elements: ["pre", "code", "sample"],
|
|
645
689
|
respect_xml_space: true
|
|
646
690
|
)
|
|
647
691
|
|
|
@@ -650,7 +694,7 @@ expect(html).to be_html_equivalent_to(expected,
|
|
|
650
694
|
match: { structural_whitespace: :strict }
|
|
651
695
|
)
|
|
652
696
|
.with_options(
|
|
653
|
-
|
|
697
|
+
insensitive_elements: ["script", "style"]
|
|
654
698
|
)
|
|
655
699
|
====
|
|
656
700
|
|
data/docs/lychee.toml
CHANGED
|
@@ -9,9 +9,6 @@ max_cache_age = "1d"
|
|
|
9
9
|
# Check both source files and built site
|
|
10
10
|
include_verbatim = true
|
|
11
11
|
|
|
12
|
-
# Recursively check all files
|
|
13
|
-
recursive = true
|
|
14
|
-
|
|
15
12
|
# File types to check (regex patterns)
|
|
16
13
|
include = [
|
|
17
14
|
"_site/**/*.html",
|
|
@@ -50,9 +47,6 @@ user_agent = "lychee/canon-docs-link-checker"
|
|
|
50
47
|
# Check HTTP, HTTPS, and file:// schemes
|
|
51
48
|
scheme = ["https", "http", "file"]
|
|
52
49
|
|
|
53
|
-
# Include file:// URLs for local link checking
|
|
54
|
-
include_file = true
|
|
55
|
-
|
|
56
50
|
# Handle different link types
|
|
57
51
|
include_mail = false # Don't check mailto: links
|
|
58
52
|
|
|
@@ -66,7 +60,4 @@ verbose = "warn"
|
|
|
66
60
|
require_https = false # Don't enforce
|
|
67
61
|
|
|
68
62
|
# Index files for directory URLs
|
|
69
|
-
index_files = ["index.html"]
|
|
70
|
-
|
|
71
|
-
# Ignore patterns file
|
|
72
|
-
ignore_file = ".lycheeignore"
|
|
63
|
+
index_files = ["index.html"]
|
|
@@ -174,12 +174,17 @@ module Canon
|
|
|
174
174
|
end
|
|
175
175
|
end
|
|
176
176
|
|
|
177
|
-
#
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
177
|
+
# Strip whitespace-only text nodes based on parent element configuration.
|
|
178
|
+
# Use sensitive_elements / insensitive_elements to control.
|
|
179
|
+
# Blacklist (insensitive) > whitelist (sensitive) > format defaults.
|
|
180
|
+
return false unless text_node?(node) && node.parent
|
|
181
|
+
return false unless MatchOptions.normalize_text(node_text(node)).empty?
|
|
182
|
+
|
|
183
|
+
return true unless WhitespaceSensitivity.whitespace_preserved?(
|
|
184
|
+
node.parent, match_opts
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
false
|
|
183
188
|
|
|
184
189
|
false
|
|
185
190
|
end
|
|
@@ -8,6 +8,12 @@ module Canon
|
|
|
8
8
|
# XML/HTML-specific match options resolver
|
|
9
9
|
class XmlResolver < BaseResolver
|
|
10
10
|
# Format-specific defaults for XML/HTML
|
|
11
|
+
#
|
|
12
|
+
# Sensitive elements (preserve structural whitespace):
|
|
13
|
+
# - XML: none by default — all structural whitespace stripped
|
|
14
|
+
# - HTML: pre, code, textarea, script, style by default
|
|
15
|
+
# Use sensitive_elements option to add elements that preserve whitespace.
|
|
16
|
+
#
|
|
11
17
|
FORMAT_DEFAULTS = {
|
|
12
18
|
html: {
|
|
13
19
|
preprocessing: :rendered,
|
|
@@ -33,7 +39,9 @@ module Canon
|
|
|
33
39
|
|
|
34
40
|
# Predefined match profiles for XML/HTML
|
|
35
41
|
MATCH_PROFILES = {
|
|
36
|
-
# Strict: Match exactly as written in source (XML default)
|
|
42
|
+
# Strict: Match exactly as written in source (XML default).
|
|
43
|
+
# Structural whitespace is stripped by default for XML.
|
|
44
|
+
# Use sensitive_elements to preserve structural whitespace in specific elements.
|
|
37
45
|
strict: {
|
|
38
46
|
preprocessing: :none,
|
|
39
47
|
text_content: :strict,
|
|
@@ -66,6 +66,79 @@ module Canon
|
|
|
66
66
|
element_sensitive?(node, opts)
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
+
# Check if structural whitespace is preserved (not stripped) for an element.
|
|
70
|
+
#
|
|
71
|
+
# Uses sensitive_elements (whitelist) and insensitive_elements (blacklist)
|
|
72
|
+
# from match_opts. Blacklist takes precedence over whitelist.
|
|
73
|
+
# Format defaults apply when neither is configured.
|
|
74
|
+
#
|
|
75
|
+
# No inheritance from ancestors — checks only the immediate parent element name.
|
|
76
|
+
#
|
|
77
|
+
# @param element [Object] Element node to check
|
|
78
|
+
# @param match_opts [Hash] Resolved match options
|
|
79
|
+
# @return [Boolean] true if whitespace is preserved (not stripped)
|
|
80
|
+
def whitespace_preserved?(element, match_opts)
|
|
81
|
+
return false unless element
|
|
82
|
+
return false unless element.respond_to?(:name)
|
|
83
|
+
|
|
84
|
+
elem_name = element.name.to_s
|
|
85
|
+
|
|
86
|
+
# Blacklist: always strip (highest priority)
|
|
87
|
+
insensitive_raw = match_opts[:insensitive_elements]
|
|
88
|
+
insensitive_raw ||= match_opts[:whitespace_insensitive_elements]
|
|
89
|
+
insensitive = (insensitive_raw || []).map(&:to_s)
|
|
90
|
+
return false if insensitive.include?(elem_name)
|
|
91
|
+
|
|
92
|
+
# Whitelist: preserve whitespace
|
|
93
|
+
sensitive = resolved_sensitive_elements(match_opts)
|
|
94
|
+
return true if sensitive.include?(elem_name)
|
|
95
|
+
|
|
96
|
+
# Default: preserve for HTML, strip for XML
|
|
97
|
+
format = match_opts[:format] || :xml
|
|
98
|
+
case format
|
|
99
|
+
when :html, :html4, :html5
|
|
100
|
+
true
|
|
101
|
+
else
|
|
102
|
+
false
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Get resolved list of whitespace-sensitive element names (strings).
|
|
107
|
+
#
|
|
108
|
+
# Combines format defaults + user whitelist, minus user blacklist.
|
|
109
|
+
# Supports both short names (sensitive_elements) and long names
|
|
110
|
+
# (whitespace_sensitive_elements) for backward compatibility.
|
|
111
|
+
#
|
|
112
|
+
# @param match_opts [Hash] Resolved match options
|
|
113
|
+
# @return [Array<String>] Sensitive element names
|
|
114
|
+
def resolved_sensitive_elements(match_opts)
|
|
115
|
+
sensitive = []
|
|
116
|
+
|
|
117
|
+
# 1. Format defaults
|
|
118
|
+
format = match_opts[:format] || :xml
|
|
119
|
+
case format
|
|
120
|
+
when :html, :html4, :html5
|
|
121
|
+
sensitive += %w[pre code textarea script style]
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# 2. User whitelist (additive to format defaults)
|
|
125
|
+
whitelist = match_opts[:sensitive_elements]
|
|
126
|
+
whitelist ||= match_opts[:whitespace_sensitive_elements]
|
|
127
|
+
if whitelist
|
|
128
|
+
sensitive += whitelist.map(&:to_s)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# 3. User blacklist removes from combined set
|
|
132
|
+
blacklist_raw = match_opts[:insensitive_elements]
|
|
133
|
+
blacklist_raw ||= match_opts[:whitespace_insensitive_elements]
|
|
134
|
+
if blacklist_raw
|
|
135
|
+
blacklist = blacklist_raw.to_set(&:to_s)
|
|
136
|
+
sensitive.reject! { |e| blacklist.include?(e) }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
sensitive.uniq
|
|
140
|
+
end
|
|
141
|
+
|
|
69
142
|
# Get format-specific default sensitive elements
|
|
70
143
|
#
|
|
71
144
|
# This is the SINGLE SOURCE OF TRUTH for default whitespace-sensitive
|
|
@@ -190,14 +190,17 @@ diff_children, differences)
|
|
|
190
190
|
end
|
|
191
191
|
end
|
|
192
192
|
|
|
193
|
-
#
|
|
194
|
-
#
|
|
195
|
-
#
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
193
|
+
# Strip whitespace-only text nodes based on parent element configuration.
|
|
194
|
+
# Use sensitive_elements / insensitive_elements to control.
|
|
195
|
+
# Blacklist (insensitive) > whitelist (sensitive) > format defaults.
|
|
196
|
+
return false unless text_node?(node) && node.parent
|
|
197
|
+
return false unless MatchOptions.normalize_text(node_text(node)).empty?
|
|
198
|
+
|
|
199
|
+
return true unless WhitespaceSensitivity.whitespace_preserved?(
|
|
200
|
+
node.parent, match_opts
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
false
|
|
201
204
|
|
|
202
205
|
false
|
|
203
206
|
end
|
|
@@ -326,9 +326,9 @@ module Canon
|
|
|
326
326
|
# Handle cases where one node is missing (e.g. text added or removed)
|
|
327
327
|
if node1.nil? || node2.nil?
|
|
328
328
|
if node1.nil?
|
|
329
|
-
text2 = node2
|
|
329
|
+
text2 = NodeUtils.get_node_text(node2)
|
|
330
330
|
else
|
|
331
|
-
text1 = node1
|
|
331
|
+
text1 = NodeUtils.get_node_text(node1)
|
|
332
332
|
end
|
|
333
333
|
end
|
|
334
334
|
|
data/lib/canon/version.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "benchmark/ips"
|
|
4
|
+
require "table_tennis"
|
|
4
5
|
|
|
5
6
|
# Ensure lib/ is on the load path regardless of tmp location
|
|
6
7
|
lib_path = File.expand_path(File.join(__dir__, "..", "..", "lib"))
|
|
@@ -101,7 +102,8 @@ class BenchmarkRunner
|
|
|
101
102
|
end
|
|
102
103
|
|
|
103
104
|
# Category section with description
|
|
104
|
-
def self.category(title, icon:, description:, failure_means:,
|
|
105
|
+
def self.category(title, icon:, description:, failure_means:,
|
|
106
|
+
compare_against: nil)
|
|
105
107
|
puts
|
|
106
108
|
puts "#{CYAN}#{VL}#{CLEAR} #{BOLD}#{MAGENTA}#{icon} #{title}#{CLEAR}"
|
|
107
109
|
puts
|
|
@@ -124,26 +126,21 @@ class BenchmarkRunner
|
|
|
124
126
|
puts
|
|
125
127
|
end
|
|
126
128
|
|
|
127
|
-
# Results table for a category
|
|
128
|
-
def self.
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
129
|
+
# Results table for a category using TableTennis
|
|
130
|
+
def self.table(results)
|
|
131
|
+
rows = results.map do |r|
|
|
132
|
+
{
|
|
133
|
+
test: r[:name],
|
|
134
|
+
ips: r[:ips],
|
|
135
|
+
deviation: "#{r[:deviation].round(1)}%",
|
|
136
|
+
status: r[:is_best] ? "BEST" : "",
|
|
137
|
+
}
|
|
138
|
+
end
|
|
133
139
|
|
|
134
|
-
|
|
135
|
-
speedup_str = speedup ? " ⚡#{speedup.round(2)}x" : ""
|
|
136
|
-
label_str = is_best ? "#{GREEN}#{label}#{CLEAR}" : label
|
|
137
|
-
bar = render_bar(ips)
|
|
140
|
+
return if rows.empty?
|
|
138
141
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
puts
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
def self.table_footer
|
|
145
|
-
sep(char: "─", width: 76)
|
|
146
|
-
puts
|
|
142
|
+
table = TableTennis.new(rows, theme: :dark)
|
|
143
|
+
table.render
|
|
147
144
|
end
|
|
148
145
|
|
|
149
146
|
def self.speedup_badge(factor, label)
|
|
@@ -151,24 +148,7 @@ class BenchmarkRunner
|
|
|
151
148
|
puts " #{GREEN} #{factor.round(2)}x faster#{CLEAR}"
|
|
152
149
|
end
|
|
153
150
|
|
|
154
|
-
|
|
155
|
-
@max_ips = nil
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
def self.set_max_ips(val)
|
|
159
|
-
@max_ips = val
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
def self.render_bar(ips, max_width: 20)
|
|
163
|
-
@max_ips ||= ips
|
|
164
|
-
ratio = ips / @max_ips.to_f
|
|
165
|
-
width = [(ratio * max_width).round, 1].max
|
|
166
|
-
filled = [width, max_width].min
|
|
167
|
-
empty = max_width - filled
|
|
168
|
-
("█" * filled) + ("░" * empty)
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# Summary card
|
|
151
|
+
# Summary card using TableTennis
|
|
172
152
|
def self.summary_card(results)
|
|
173
153
|
puts
|
|
174
154
|
sep(width: 78)
|
|
@@ -176,16 +156,23 @@ class BenchmarkRunner
|
|
|
176
156
|
puts " #{BOLD}#{MAGENTA}SUMMARY#{CLEAR}"
|
|
177
157
|
puts
|
|
178
158
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
puts " #{DIM}◆#{CLEAR} #{format('%-35s', r[:label])} #{ips_str}"
|
|
159
|
+
rows = results.map do |r|
|
|
160
|
+
{
|
|
161
|
+
benchmark: r[:label],
|
|
162
|
+
ips: r[:ips]&.round(1),
|
|
163
|
+
}
|
|
185
164
|
end
|
|
186
165
|
|
|
166
|
+
return if rows.empty?
|
|
167
|
+
|
|
168
|
+
table = TableTennis.new(rows,
|
|
169
|
+
title: "Performance Results",
|
|
170
|
+
theme: :dark,
|
|
171
|
+
headers: { benchmark: "Benchmark", ips: "IPS" })
|
|
172
|
+
table.render
|
|
173
|
+
|
|
187
174
|
puts
|
|
188
|
-
puts " #{DIM}#{
|
|
175
|
+
puts " #{DIM}#{results.length} benchmarks completed#{CLEAR}"
|
|
189
176
|
puts
|
|
190
177
|
end
|
|
191
178
|
end
|
|
@@ -239,24 +226,35 @@ class BenchmarkRunner
|
|
|
239
226
|
# Test definitions
|
|
240
227
|
BENCHMARKS = {
|
|
241
228
|
xml_parsing: [
|
|
242
|
-
{ name: "DOM (simple)", method: :xml_parse_dom_simple,
|
|
243
|
-
|
|
244
|
-
{ name: "
|
|
245
|
-
|
|
229
|
+
{ name: "DOM (simple)", method: :xml_parse_dom_simple,
|
|
230
|
+
desc: "Standard DOM parsing" },
|
|
231
|
+
{ name: "SAX (simple)", method: :xml_parse_sax_simple,
|
|
232
|
+
desc: "Streaming SAX parsing" },
|
|
233
|
+
{ name: "DOM (large)", method: :xml_parse_dom_large,
|
|
234
|
+
desc: "Large document DOM" },
|
|
235
|
+
{ name: "SAX (large)", method: :xml_parse_sax_large,
|
|
236
|
+
desc: "Large document SAX" },
|
|
246
237
|
],
|
|
247
238
|
html_parsing: [
|
|
248
239
|
{ name: "Simple HTML", method: :html_parse_simple, desc: "Basic HTML" },
|
|
249
|
-
{ name: "Complex HTML", method: :html_parse_complex,
|
|
240
|
+
{ name: "Complex HTML", method: :html_parse_complex,
|
|
241
|
+
desc: "HTML with scripts/tables" },
|
|
250
242
|
],
|
|
251
243
|
xml_comparison: [
|
|
252
|
-
{ name: "Identical XML", method: :xml_compare_identical,
|
|
253
|
-
|
|
254
|
-
{ name: "
|
|
244
|
+
{ name: "Identical XML", method: :xml_compare_identical,
|
|
245
|
+
desc: "Same documents" },
|
|
246
|
+
{ name: "Similar XML", method: :xml_compare_similar,
|
|
247
|
+
desc: "Slightly different" },
|
|
248
|
+
{ name: "Different XML", method: :xml_compare_different,
|
|
249
|
+
desc: "Different namespaces" },
|
|
255
250
|
],
|
|
256
251
|
html_comparison: [
|
|
257
|
-
{ name: "Identical HTML", method: :html_compare_identical,
|
|
258
|
-
|
|
259
|
-
{ name: "
|
|
252
|
+
{ name: "Identical HTML", method: :html_compare_identical,
|
|
253
|
+
desc: "Same HTML" },
|
|
254
|
+
{ name: "Similar HTML", method: :html_compare_similar,
|
|
255
|
+
desc: "Slightly different" },
|
|
256
|
+
{ name: "Different HTML", method: :html_compare_different,
|
|
257
|
+
desc: "Different structure" },
|
|
260
258
|
],
|
|
261
259
|
formatting: [
|
|
262
260
|
{ name: "XML C14N", method: :xml_c14n_format, desc: "Canonical XML" },
|
|
@@ -287,7 +285,8 @@ class BenchmarkRunner
|
|
|
287
285
|
end.join
|
|
288
286
|
"<#{prefix}root#{ns_attr}#{attrs}>#{children}</#{prefix}root>"
|
|
289
287
|
else
|
|
290
|
-
child = build_xml_element(items / 2, depth - 1, prefix, with_attrs,
|
|
288
|
+
child = build_xml_element(items / 2, depth - 1, prefix, with_attrs,
|
|
289
|
+
"")
|
|
291
290
|
"<#{prefix}root#{ns_attr}#{attrs}>#{child}</#{prefix}root>"
|
|
292
291
|
end
|
|
293
292
|
end
|
|
@@ -401,8 +400,6 @@ class BenchmarkRunner
|
|
|
401
400
|
end
|
|
402
401
|
|
|
403
402
|
def run_benchmarks
|
|
404
|
-
Term.reset_max_ips
|
|
405
|
-
|
|
406
403
|
# Header
|
|
407
404
|
Term.header("Canon Performance Benchmarks", color: Term::CYAN)
|
|
408
405
|
|
|
@@ -434,8 +431,6 @@ class BenchmarkRunner
|
|
|
434
431
|
compare_against: config[:compare_against],
|
|
435
432
|
)
|
|
436
433
|
|
|
437
|
-
Term.table_header
|
|
438
|
-
|
|
439
434
|
# Run each test in category
|
|
440
435
|
category_results = []
|
|
441
436
|
max_ips = 0
|
|
@@ -457,23 +452,32 @@ class BenchmarkRunner
|
|
|
457
452
|
$stdout = original_stdout
|
|
458
453
|
end
|
|
459
454
|
|
|
460
|
-
#
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
# Print results with relative bars
|
|
464
|
-
category_results.each do |r|
|
|
455
|
+
# Build results for TableTennis table
|
|
456
|
+
table_rows = category_results.map do |r|
|
|
465
457
|
is_best = r[:result][:upper] >= max_ips
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
458
|
+
label = "#{config[:name]}: #{r[:name]}"
|
|
459
|
+
@all_results << { label: label,
|
|
460
|
+
ips: (r[:result][:lower] + r[:result][:upper]) / 2.0 }
|
|
461
|
+
@results[label] = r[:result] # Populate @results for comparison
|
|
462
|
+
{
|
|
463
|
+
name: r[:name],
|
|
464
|
+
ips: (r[:result][:lower] + r[:result][:upper]) / 2.0,
|
|
465
|
+
deviation: calculate_deviation(r[:result]),
|
|
466
|
+
is_best: is_best,
|
|
467
|
+
}
|
|
469
468
|
end
|
|
470
469
|
|
|
471
|
-
|
|
470
|
+
# Render TableTennis table
|
|
471
|
+
Term.table(table_rows)
|
|
472
472
|
|
|
473
473
|
# SAX vs DOM comparison for XML parsing
|
|
474
474
|
if category == :xml_parsing && SAX_AVAILABLE
|
|
475
|
-
sax = category_results.find
|
|
476
|
-
|
|
475
|
+
sax = category_results.find do |r|
|
|
476
|
+
r[:name].include?("SAX") && r[:name].include?("large")
|
|
477
|
+
end
|
|
478
|
+
dom = category_results.find do |r|
|
|
479
|
+
r[:name].include?("DOM") && r[:name].include?("large")
|
|
480
|
+
end
|
|
477
481
|
|
|
478
482
|
if sax && dom
|
|
479
483
|
sax_ips = (sax[:result][:lower] + sax[:result][:upper]) / 2.0
|
|
@@ -481,9 +485,11 @@ class BenchmarkRunner
|
|
|
481
485
|
speedup = sax_ips / dom_ips
|
|
482
486
|
|
|
483
487
|
if speedup > 1.0
|
|
484
|
-
Term.speedup_badge(speedup,
|
|
488
|
+
Term.speedup_badge(speedup,
|
|
489
|
+
"SAX is faster than DOM for large documents")
|
|
485
490
|
else
|
|
486
|
-
Term.hint("DOM is #{format('%.2f',
|
|
491
|
+
Term.hint("DOM is #{format('%.2f',
|
|
492
|
+
1 / speedup)}x faster than SAX for large documents")
|
|
487
493
|
end
|
|
488
494
|
end
|
|
489
495
|
end
|
|
@@ -509,7 +515,8 @@ class BenchmarkRunner
|
|
|
509
515
|
html = DataGenerator.generate_html(items: @items)
|
|
510
516
|
measure { Canon.parse_html(html) }
|
|
511
517
|
when :html_parse_complex
|
|
512
|
-
html = DataGenerator.generate_html(items: @items, with_scripts: true,
|
|
518
|
+
html = DataGenerator.generate_html(items: @items, with_scripts: true,
|
|
519
|
+
with_tables: true)
|
|
513
520
|
measure { Canon.parse_html(html) }
|
|
514
521
|
when :xml_compare_identical
|
|
515
522
|
xml = DataGenerator.generate_xml(items: @items)
|
|
@@ -566,7 +573,8 @@ class BenchmarkRunner
|
|
|
566
573
|
error_margin = std_dev / mean
|
|
567
574
|
error_pct = error_margin.round(4)
|
|
568
575
|
|
|
569
|
-
{ lower: mean.round(4) * (1 - error_pct),
|
|
576
|
+
{ lower: mean.round(4) * (1 - error_pct),
|
|
577
|
+
upper: mean.round(4) * (1 + error_pct) }
|
|
570
578
|
end
|
|
571
579
|
|
|
572
580
|
def measure_time
|
|
@@ -4,6 +4,7 @@ require "json"
|
|
|
4
4
|
require "open3"
|
|
5
5
|
require "tmpdir"
|
|
6
6
|
require "fileutils"
|
|
7
|
+
require "table_tennis"
|
|
7
8
|
|
|
8
9
|
module PerformanceHelpers
|
|
9
10
|
# ANSI color codes for terminal output
|
|
@@ -97,9 +98,49 @@ module PerformanceHelpers
|
|
|
97
98
|
all_base.merge!(base_results)
|
|
98
99
|
all_current.merge!(curr_results)
|
|
99
100
|
|
|
101
|
+
# Collect comparison results for TableTennis table
|
|
102
|
+
comparison_rows = []
|
|
103
|
+
|
|
100
104
|
curr_results.each do |label, result|
|
|
101
|
-
|
|
105
|
+
base_result = base_results[label]
|
|
106
|
+
cmp = compare_metrics(label, result, base_result, threshold)
|
|
107
|
+
comparison_rows << cmp
|
|
102
108
|
end
|
|
109
|
+
|
|
110
|
+
print_comparison_table(comparison_rows, threshold)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def print_comparison_table(comparison_rows, threshold)
|
|
114
|
+
rows = comparison_rows.map do |cmp|
|
|
115
|
+
{
|
|
116
|
+
benchmark: cmp[:label],
|
|
117
|
+
base_ips: cmp[:base_ips]&.round(1),
|
|
118
|
+
curr_ips: cmp[:curr_ips]&.round(1),
|
|
119
|
+
change: cmp[:change] ? "#{(cmp[:change] * 100).round(1)}%" : "N/A",
|
|
120
|
+
status: if cmp[:base_ips].nil?
|
|
121
|
+
"NEW"
|
|
122
|
+
elsif cmp[:change] < -threshold
|
|
123
|
+
"REGRESSED"
|
|
124
|
+
else
|
|
125
|
+
"OK"
|
|
126
|
+
end,
|
|
127
|
+
}
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
return if rows.empty?
|
|
131
|
+
|
|
132
|
+
table = TableTennis.new(rows,
|
|
133
|
+
title: "Performance Comparison",
|
|
134
|
+
theme: :dark,
|
|
135
|
+
headers: {
|
|
136
|
+
benchmark: "Benchmark",
|
|
137
|
+
base_ips: "Base IPS",
|
|
138
|
+
curr_ips: "Curr IPS",
|
|
139
|
+
change: "Change",
|
|
140
|
+
status: "Status",
|
|
141
|
+
})
|
|
142
|
+
table.render
|
|
143
|
+
puts
|
|
103
144
|
end
|
|
104
145
|
|
|
105
146
|
def compare_metrics(label, curr, base, threshold)
|
|
@@ -197,7 +238,9 @@ module PerformanceHelpers
|
|
|
197
238
|
# Handle new benchmarks that don't exist in base
|
|
198
239
|
if base_metrics.nil?
|
|
199
240
|
curr_ips = (curr_metrics[:lower] + curr_metrics[:upper]) / 2.0
|
|
200
|
-
puts "#{format('%-30s',
|
|
241
|
+
puts "#{format('%-30s',
|
|
242
|
+
label)}: #{GREEN}NEW#{CLEAR} (current: #{format('%.2f',
|
|
243
|
+
curr_ips)} IPS) [N/A]\n\n"
|
|
201
244
|
return
|
|
202
245
|
end
|
|
203
246
|
|
|
@@ -212,7 +255,8 @@ module PerformanceHelpers
|
|
|
212
255
|
base_str = format("%.2f", base_ips)
|
|
213
256
|
curr_str = format("%.2f", curr_ips)
|
|
214
257
|
|
|
215
|
-
puts "#{format('%-30s',
|
|
258
|
+
puts "#{format('%-30s',
|
|
259
|
+
label)}: #{GRAY}#{base_str}#{CLEAR} → #{color}#{curr_str}#{CLEAR} IPS " \
|
|
216
260
|
"(change: #{color}#{delta_str}#{CLEAR}) [#{color}#{status}#{CLEAR}]\n\n"
|
|
217
261
|
end
|
|
218
262
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: canon
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.17
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: diff-lcs
|