svg_conform 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +182 -21
  3. data/README.adoc +391 -989
  4. data/config/profiles/metanorma.yml +5 -0
  5. data/docs/api_reference.adoc +1355 -0
  6. data/docs/cli_guide.adoc +846 -0
  7. data/docs/reference_manifest.adoc +370 -0
  8. data/docs/requirements.adoc +68 -1
  9. data/examples/document_input_demo.rb +102 -0
  10. data/lib/svg_conform/document.rb +40 -1
  11. data/lib/svg_conform/profile.rb +15 -9
  12. data/lib/svg_conform/references/base_reference.rb +130 -0
  13. data/lib/svg_conform/references/id_definition.rb +38 -0
  14. data/lib/svg_conform/references/reference_classifier.rb +45 -0
  15. data/lib/svg_conform/references/reference_manifest.rb +129 -0
  16. data/lib/svg_conform/references.rb +11 -0
  17. data/lib/svg_conform/remediations/namespace_attribute_remediation.rb +34 -43
  18. data/lib/svg_conform/requirements/id_collection_requirement.rb +38 -0
  19. data/lib/svg_conform/requirements/id_reference_requirement.rb +11 -0
  20. data/lib/svg_conform/requirements/invalid_id_references_requirement.rb +3 -0
  21. data/lib/svg_conform/requirements/link_validation_requirement.rb +114 -31
  22. data/lib/svg_conform/requirements/no_external_css_requirement.rb +5 -2
  23. data/lib/svg_conform/requirements.rb +11 -9
  24. data/lib/svg_conform/sax_validation_handler.rb +16 -1
  25. data/lib/svg_conform/validation_context.rb +67 -1
  26. data/lib/svg_conform/validation_result.rb +43 -2
  27. data/lib/svg_conform/validator.rb +56 -16
  28. data/lib/svg_conform/version.rb +1 -1
  29. data/lib/svg_conform.rb +11 -2
  30. data/spec/svg_conform/commands/svgcheck_compare_command_spec.rb +1 -0
  31. data/spec/svg_conform/commands/svgcheck_compatibility_command_spec.rb +1 -0
  32. data/spec/svg_conform/commands/svgcheck_generate_command_spec.rb +1 -0
  33. data/spec/svg_conform/references/integration_spec.rb +206 -0
  34. data/spec/svg_conform/references/reference_classifier_spec.rb +142 -0
  35. data/spec/svg_conform/references/reference_manifest_spec.rb +307 -0
  36. data/spec/svg_conform/requirements/id_reference_state_spec.rb +93 -0
  37. data/spec/svg_conform/validator_input_types_spec.rb +172 -0
  38. metadata +17 -2
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module SvgConform
6
+ module References
7
+ # Base class for all reference types
8
+ class BaseReference < Lutaml::Model::Serializable
9
+ attribute :value, :string
10
+ attribute :element_name, :string
11
+ attribute :attribute_name, :string
12
+ attribute :line_number, :integer
13
+ attribute :column_number, :integer
14
+
15
+ yaml do
16
+ map "value", to: :value
17
+ map "element_name", to: :element_name
18
+ map "attribute_name", to: :attribute_name
19
+ map "line_number", to: :line_number
20
+ map "column_number", to: :column_number
21
+ end
22
+
23
+ json do
24
+ map "value", to: :value
25
+ map "element_name", to: :element_name
26
+ map "attribute_name", to: :attribute_name
27
+ map "line_number", to: :line_number
28
+ map "column_number", to: :column_number
29
+ end
30
+
31
+ # Abstract method - defines validation scope
32
+ def validation_scope
33
+ raise NotImplementedError, "Subclasses must define validation scope"
34
+ end
35
+
36
+ # Can this reference be validated internally by svg_conform?
37
+ def internally_validatable?
38
+ validation_scope == :internal
39
+ end
40
+
41
+ # Should this be deferred to consumer for validation?
42
+ def requires_consumer_validation?
43
+ validation_scope == :external
44
+ end
45
+
46
+ def to_h
47
+ {
48
+ type: self.class.name.split("::").last,
49
+ value: value,
50
+ element_name: element_name,
51
+ attribute_name: attribute_name,
52
+ line_number: line_number,
53
+ column_number: column_number,
54
+ validation_scope: validation_scope,
55
+ }
56
+ end
57
+ end
58
+
59
+ # Internal SVG element reference (e.g., #element-id)
60
+ class InternalFragmentReference < BaseReference
61
+ def validation_scope
62
+ :internal
63
+ end
64
+
65
+ def target_id
66
+ value.sub(/^#/, "")
67
+ end
68
+ end
69
+
70
+ # External URL reference (http://, https://)
71
+ class ExternalUrlReference < BaseReference
72
+ def validation_scope
73
+ :external
74
+ end
75
+
76
+ def protocol
77
+ require "uri"
78
+ URI.parse(value).scheme
79
+ rescue StandardError
80
+ nil
81
+ end
82
+ end
83
+
84
+ # URN reference (urn:*)
85
+ class UrnReference < BaseReference
86
+ def validation_scope
87
+ :external
88
+ end
89
+
90
+ def namespace
91
+ value.split(":")[1]
92
+ rescue StandardError
93
+ nil
94
+ end
95
+ end
96
+
97
+ # Relative path reference (could be internal or external depending on context)
98
+ class RelativePathReference < BaseReference
99
+ def validation_scope
100
+ :external
101
+ end
102
+
103
+ def has_fragment?
104
+ value.include?("#")
105
+ end
106
+
107
+ def path_component
108
+ value.split("#").first
109
+ end
110
+
111
+ def fragment_component
112
+ parts = value.split("#")
113
+ parts.size > 1 ? parts.last : nil
114
+ end
115
+ end
116
+
117
+ # Data URI reference (data:*)
118
+ class DataUriReference < BaseReference
119
+ def validation_scope
120
+ :internal
121
+ end
122
+
123
+ def media_type
124
+ # Extract media type from data URI
125
+ match = value.match(%r{^data:([^;,]+)})
126
+ match ? match[1] : nil
127
+ end
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module SvgConform
6
+ module References
7
+ # Represents an ID definition in the SVG document
8
+ class IdDefinition < Lutaml::Model::Serializable
9
+ attribute :id_value, :string
10
+ attribute :element_name, :string
11
+ attribute :line_number, :integer
12
+ attribute :column_number, :integer
13
+
14
+ yaml do
15
+ map "id_value", to: :id_value
16
+ map "element_name", to: :element_name
17
+ map "line_number", to: :line_number
18
+ map "column_number", to: :column_number
19
+ end
20
+
21
+ json do
22
+ map "id_value", to: :id_value
23
+ map "element_name", to: :element_name
24
+ map "line_number", to: :line_number
25
+ map "column_number", to: :column_number
26
+ end
27
+
28
+ def to_h
29
+ {
30
+ id_value: id_value,
31
+ element_name: element_name,
32
+ line_number: line_number,
33
+ column_number: column_number,
34
+ }
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_reference"
4
+
5
+ module SvgConform
6
+ module References
7
+ # Classifies reference values into appropriate reference types
8
+ class ReferenceClassifier
9
+ # Classify a reference value into its appropriate type
10
+ def self.classify(href_value, element_name:, attribute_name:,
11
+ line_number: nil, column_number: nil)
12
+ return nil if href_value.nil? || href_value.empty?
13
+
14
+ reference_class = determine_type(href_value)
15
+ reference_class.new(
16
+ value: href_value,
17
+ element_name: element_name,
18
+ attribute_name: attribute_name,
19
+ line_number: line_number,
20
+ column_number: column_number,
21
+ )
22
+ end
23
+
24
+ def self.determine_type(href)
25
+ case href
26
+ when /^#/
27
+ InternalFragmentReference
28
+ when /^data:/i
29
+ DataUriReference
30
+ when /^urn:/i
31
+ UrnReference
32
+ when %r{^https?://}i
33
+ ExternalUrlReference
34
+ when %r{^[./]}
35
+ # Relative paths starting with ./ or /
36
+ RelativePathReference
37
+ else
38
+ # Could be relative or external depending on context
39
+ # Treat as relative by default (conservative approach)
40
+ RelativePathReference
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+ require_relative "id_definition"
5
+ require_relative "base_reference"
6
+
7
+ module SvgConform
8
+ module References
9
+ # Comprehensive manifest of all IDs and references in the document
10
+ # Provides complete context for consumer validation decisions
11
+ class ReferenceManifest < Lutaml::Model::Serializable
12
+ attribute :source_document, :string
13
+ attribute :available_ids, IdDefinition, collection: true, default: -> {
14
+ []
15
+ }
16
+ attribute :internal_references, BaseReference, collection: true, default: -> {
17
+ []
18
+ }
19
+ attribute :external_references, BaseReference, collection: true, default: -> {
20
+ []
21
+ }
22
+
23
+ yaml do
24
+ map "source_document", to: :source_document
25
+ map "available_ids", to: :available_ids
26
+ map "internal_references", to: :internal_references
27
+ map "external_references", to: :external_references
28
+ end
29
+
30
+ json do
31
+ map "source_document", to: :source_document
32
+ map "available_ids", to: :available_ids
33
+ map "internal_references", to: :internal_references
34
+ map "external_references", to: :external_references
35
+ end
36
+
37
+ def initialize(source_document: nil)
38
+ super()
39
+ @source_document = source_document
40
+ @available_ids = []
41
+ @internal_references = []
42
+ @external_references = []
43
+ end
44
+
45
+ # Register an ID definition
46
+ def register_id(id_value, element_name:, line_number: nil,
47
+ column_number: nil)
48
+ @available_ids << IdDefinition.new(
49
+ id_value: id_value,
50
+ element_name: element_name,
51
+ line_number: line_number,
52
+ column_number: column_number,
53
+ )
54
+ end
55
+
56
+ # Register a reference
57
+ def register_reference(reference)
58
+ if reference.internally_validatable?
59
+ @internal_references << reference
60
+ elsif reference.requires_consumer_validation?
61
+ @external_references << reference
62
+ end
63
+ end
64
+
65
+ # Check if an ID is defined
66
+ def id_defined?(id_value)
67
+ @available_ids.any? { |id_def| id_def.id_value == id_value }
68
+ end
69
+
70
+ # Get references targeting a specific ID
71
+ def references_to_id(id_value)
72
+ @internal_references.select do |ref|
73
+ ref.is_a?(InternalFragmentReference) &&
74
+ ref.target_id == id_value
75
+ end
76
+ end
77
+
78
+ # Get all references grouped by type
79
+ def references_by_type
80
+ all_refs = @internal_references + @external_references
81
+ all_refs.group_by { |ref| ref.class.name.split("::").last }
82
+ end
83
+
84
+ # Get unresolved internal references (references to non-existent IDs)
85
+ def unresolved_internal_references
86
+ @internal_references.select do |ref|
87
+ next unless ref.is_a?(InternalFragmentReference)
88
+
89
+ !id_defined?(ref.target_id)
90
+ end
91
+ end
92
+
93
+ # Get statistics
94
+ def statistics
95
+ {
96
+ total_ids: @available_ids.size,
97
+ total_references: @internal_references.size + @external_references.size,
98
+ internal_references: @internal_references.size,
99
+ external_references: @external_references.size,
100
+ unresolved_internal: unresolved_internal_references.size,
101
+ references_by_type: references_by_type.transform_values(&:size),
102
+ }
103
+ end
104
+
105
+ # Export manifest for consumer processing
106
+ def to_h
107
+ {
108
+ source_document: @source_document,
109
+ available_ids: @available_ids.map(&:to_h),
110
+ internal_references: @internal_references.map(&:to_h),
111
+ external_references: @external_references.map(&:to_h),
112
+ statistics: statistics,
113
+ }
114
+ end
115
+
116
+ # Export as YAML for easy inspection
117
+ def to_yaml
118
+ require "yaml"
119
+ to_h.to_yaml
120
+ end
121
+
122
+ # Export as JSON for programmatic processing
123
+ def to_json(*_args)
124
+ require "json"
125
+ JSON.pretty_generate(to_h)
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "references/base_reference"
4
+ require_relative "references/id_definition"
5
+ require_relative "references/reference_classifier"
6
+ require_relative "references/reference_manifest"
7
+
8
+ module SvgConform
9
+ module References
10
+ end
11
+ end
@@ -32,11 +32,11 @@ module SvgConform
32
32
  end
33
33
 
34
34
  # Second pass: remove unused namespace declarations
35
- document.traverse do |node|
36
- next unless element?(node)
37
-
38
- changes.concat(remove_unused_namespace_declarations(node,
39
- removed_namespaces))
35
+ # Nokogiri/libxml2 cannot remove namespace declarations via DOM,
36
+ # so we use string manipulation + reparse
37
+ if removed_namespaces.any? || disallowed_namespaces.any?
38
+ changes.concat(remove_namespace_declarations(document,
39
+ removed_namespaces))
40
40
  end
41
41
 
42
42
  changes
@@ -104,54 +104,45 @@ module SvgConform
104
104
  changes
105
105
  end
106
106
 
107
- def remove_unused_namespace_declarations(node, removed_namespaces)
107
+ def remove_namespace_declarations(document, removed_namespaces)
108
108
  changes = []
109
109
 
110
- # Look for xmlns declarations that correspond to removed namespaces
111
- attributes_to_remove = []
110
+ # Get current XML
111
+ xml_str = document.to_xml
112
112
 
113
- if node.respond_to?(:attribute_nodes)
114
- node.attribute_nodes.each do |attr|
115
- attr_name = attr.name
116
- next unless attr_name.start_with?("xmlns:")
117
-
118
- namespace_uri = attr.value
119
- # Also check if the namespace prefix itself was in disallowed_namespaces
120
- # since xmlns:lucid="lucid" means the URI is literally "lucid"
121
- prefix = attr_name.sub("xmlns:", "")
122
- if removed_namespaces.include?(namespace_uri) || disallowed_namespaces.include?(namespace_uri) || disallowed_namespaces.include?(prefix)
123
- attributes_to_remove << attr_name
124
- end
125
- end
126
- elsif node.respond_to?(:attributes)
127
- attributes = node.attributes
113
+ # Build regex to remove xmlns declarations for disallowed namespaces
114
+ # Match both removed namespaces (from attributes) and explicitly disallowed ones
115
+ namespaces_to_remove = removed_namespaces.to_a + disallowed_namespaces
128
116
 
129
- if attributes.respond_to?(:each_key)
130
- attributes.each_key do |name|
131
- name_str = name.to_s
132
- next unless name_str.start_with?("xmlns:")
117
+ namespaces_to_remove.uniq.each do |ns_identifier|
118
+ # Try to match xmlns:prefix="anything" where prefix matches the identifier
119
+ # or xmlns:prefix="identifier" where the URI matches
120
+ pattern = /\s+xmlns:#{Regexp.escape(ns_identifier)}="[^"]*"/
133
121
 
134
- namespace_uri = get_attribute(node, name_str)
135
- prefix = name_str.sub("xmlns:", "")
136
- if namespace_uri && (removed_namespaces.include?(namespace_uri) || disallowed_namespaces.include?(namespace_uri) || disallowed_namespaces.include?(prefix))
137
- attributes_to_remove << name_str
138
- end
139
- end
140
- end
141
- end
142
-
143
- # Remove the xmlns declarations
144
- attributes_to_remove.each do |attr_name|
145
- if remove_attribute(node, attr_name)
122
+ if xml_str.match?(pattern)
123
+ xml_str = xml_str.gsub(pattern, "")
146
124
  changes << {
147
- type: :attribute_removed,
148
- description: "Removed unused namespace declaration '#{attr_name}'",
149
- node_name: node.name,
150
- attribute: attr_name,
125
+ type: :namespace_removed,
126
+ description: "Removed unused namespace declaration 'xmlns:#{ns_identifier}'",
127
+ node_name: "svg",
128
+ attribute: "xmlns:#{ns_identifier}",
151
129
  }
152
130
  end
153
131
  end
154
132
 
133
+ # Reparse the document to update the internal DOM
134
+ # This is necessary because namespace declarations cannot be removed
135
+ # from the DOM directly in Nokogiri/libxml2
136
+ if changes.any?
137
+ context = Moxml.new
138
+ new_moxml_doc = context.parse(xml_str)
139
+
140
+ # Replace the document's internal moxml_document
141
+ # We need to use instance_variable_set since it's a private instance variable
142
+ document.instance_variable_set(:@moxml_document, new_moxml_doc)
143
+ document.clear_cache
144
+ end
145
+
155
146
  changes
156
147
  end
157
148
 
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_requirement"
4
+
5
+ module SvgConform
6
+ module Requirements
7
+ # Collects all ID definitions in the document for reference validation
8
+ class IdCollectionRequirement < BaseRequirement
9
+ def check(node, context)
10
+ return unless element?(node)
11
+
12
+ id_value = get_attribute(node, "id")
13
+ return unless id_value
14
+
15
+ # Register the ID in the manifest
16
+ context.register_id(
17
+ id_value,
18
+ element_name: node.name,
19
+ line_number: node.respond_to?(:line) ? node.line : nil,
20
+ column_number: node.respond_to?(:column) ? node.column : nil,
21
+ )
22
+ end
23
+
24
+ def validate_sax_element(element, context)
25
+ id_value = element.raw_attributes["id"]
26
+ return unless id_value
27
+
28
+ # Register the ID in the manifest
29
+ context.register_id(
30
+ id_value,
31
+ element_name: element.name,
32
+ line_number: element.respond_to?(:line) ? element.line : nil,
33
+ column_number: element.respond_to?(:column) ? element.column : nil,
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
@@ -10,6 +10,14 @@ module SvgConform
10
10
  true
11
11
  end
12
12
 
13
+ # Reset state before each validation run to prevent state leakage
14
+ def reset_state
15
+ @collected_ids = Set.new
16
+ @collected_url_refs = []
17
+ @collected_href_refs = []
18
+ @collected_other_refs = []
19
+ end
20
+
13
21
  def collect_sax_data(element, _context)
14
22
  # Initialize collections on first call
15
23
  @collected_ids ||= Set.new
@@ -67,6 +75,9 @@ module SvgConform
67
75
  end
68
76
 
69
77
  def validate_sax_complete(context)
78
+ # Guard against nil collections (if collect_sax_data was never called)
79
+ return unless @collected_url_refs && @collected_href_refs && @collected_other_refs && @collected_ids
80
+
70
81
  # Validate all collected references
71
82
  @collected_url_refs.each do |element, ref_id, attr_name|
72
83
  next if @collected_ids.include?(ref_id)
@@ -79,6 +79,9 @@ module SvgConform
79
79
  end
80
80
 
81
81
  def validate_sax_complete(context)
82
+ # Guard against nil collections
83
+ return unless @use_element_refs && @other_refs && @collected_ids
84
+
82
85
  # Validate use element references
83
86
  @use_element_refs.each do |element, ref_id, href|
84
87
  next if @collected_ids.include?(ref_id)