sbom 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,307 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "securerandom"
5
+ require "time"
6
+
7
+ module Sbom
8
+ module Cyclonedx
9
+ class Generator
10
+ DEFAULT_VERSION = "1.6"
11
+ SUPPORTED_VERSIONS = %w[1.4 1.5 1.6 1.7].freeze
12
+
13
+ LIFECYCLE_PHASES = %w[
14
+ design pre-build build post-build operations discovery decommission
15
+ ].freeze
16
+
17
+ def initialize(format: :json, application: "sbom", version: Sbom::VERSION)
18
+ @format = format
19
+ @application = application
20
+ @app_version = version
21
+ @spec_version = ENV.fetch("SBOM_CYCLONEDX_VERSION", DEFAULT_VERSION)
22
+ @organization = ENV["SBOM_ORGANIZATION"]
23
+
24
+ @output = {}
25
+ @components = []
26
+ @dependencies = []
27
+ @element_refs = {}
28
+ end
29
+
30
+ def generate(project_name, sbom_data)
31
+ return if sbom_data.nil? || (sbom_data.respond_to?(:empty?) && sbom_data.empty?)
32
+
33
+ data = sbom_data.is_a?(Hash) ? sbom_data : sbom_data.to_h
34
+
35
+ @spec_version = normalize_version(data[:version]) if data[:version]
36
+
37
+ uuid = data[:uuid] || "urn:uuid:#{SecureRandom.uuid}"
38
+ bom_version = data[:bom_version] || "1"
39
+
40
+ component_data = extract_component_data(data)
41
+ generate_document_header(project_name, component_data, uuid, bom_version)
42
+ generate_components(data[:packages])
43
+ generate_dependencies(data[:relationships])
44
+
45
+ finalize_output
46
+ end
47
+
48
+ def output
49
+ JSON.pretty_generate(@output)
50
+ end
51
+
52
+ def to_h
53
+ @output
54
+ end
55
+
56
+ private
57
+
58
+ def normalize_version(version)
59
+ return version if SUPPORTED_VERSIONS.include?(version)
60
+
61
+ match = version.to_s.match(/(\d+\.\d+)/)
62
+ return match[1] if match && SUPPORTED_VERSIONS.include?(match[1])
63
+
64
+ DEFAULT_VERSION
65
+ end
66
+
67
+ def extract_component_data(data)
68
+ result = {
69
+ type: "application",
70
+ supplier: @organization,
71
+ version: nil,
72
+ bom_ref: nil,
73
+ timestamp: nil,
74
+ creator: nil,
75
+ lifecycle: nil
76
+ }
77
+
78
+ return result unless data[:document]
79
+
80
+ doc = data[:document]
81
+ result[:type] = doc[:metadata_type] || "application"
82
+ result[:supplier] = doc[:metadata_supplier] || @organization
83
+ result[:version] = doc[:metadata_version]
84
+ result[:bom_ref] = doc[:bom_ref]
85
+ result[:lifecycle] = doc[:lifecycle]
86
+ result[:timestamp] = doc[:created]
87
+ result[:creator] = doc[:creators]&.first
88
+
89
+ result
90
+ end
91
+
92
+ def generate_document_header(name, component_data, uuid, bom_version)
93
+ timestamp = component_data[:timestamp] || Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
94
+
95
+ @output = {
96
+ "bomFormat" => "CycloneDX",
97
+ "specVersion" => @spec_version,
98
+ "serialNumber" => uuid,
99
+ "version" => bom_version.to_i
100
+ }
101
+
102
+ metadata = {
103
+ "timestamp" => timestamp
104
+ }
105
+
106
+ if version_at_least?("1.5")
107
+ metadata["tools"] = {
108
+ "components" => [
109
+ {
110
+ "type" => "application",
111
+ "name" => @application,
112
+ "version" => @app_version
113
+ }
114
+ ]
115
+ }
116
+ else
117
+ metadata["tools"] = [
118
+ {
119
+ "vendor" => "sbom",
120
+ "name" => @application,
121
+ "version" => @app_version
122
+ }
123
+ ]
124
+ end
125
+
126
+ if component_data[:supplier]
127
+ metadata["supplier"] = { "name" => component_data[:supplier] }
128
+ end
129
+
130
+ if component_data[:lifecycle] && LIFECYCLE_PHASES.include?(component_data[:lifecycle])
131
+ metadata["lifecycles"] = [{ "phase" => component_data[:lifecycle] }]
132
+ end
133
+
134
+ metadata["component"] = {
135
+ "type" => component_data[:type],
136
+ "name" => name
137
+ }
138
+
139
+ metadata["component"]["version"] = component_data[:version] if component_data[:version]
140
+ metadata["component"]["bom-ref"] = component_data[:bom_ref] if component_data[:bom_ref]
141
+
142
+ @output["metadata"] = metadata
143
+ end
144
+
145
+ def generate_components(packages_data)
146
+ return unless packages_data
147
+
148
+ packages = packages_data.is_a?(Hash) ? packages_data.values : packages_data
149
+ packages.each_with_index do |pkg, index|
150
+ generate_component(pkg, index + 1)
151
+ end
152
+ end
153
+
154
+ def generate_component(pkg, index)
155
+ name = pkg[:name]
156
+ return unless name
157
+
158
+ bom_ref = pkg[:bom_ref] || pkg[:id] || "#{index}-#{name}"
159
+ @element_refs[name] = bom_ref
160
+
161
+ component = {
162
+ "type" => normalize_component_type(pkg[:type]),
163
+ "name" => name,
164
+ "bom-ref" => bom_ref
165
+ }
166
+
167
+ component["version"] = pkg[:version] if pkg[:version]
168
+ component["description"] = pkg[:description] if pkg[:description]
169
+ component["copyright"] = pkg[:copyright_text] if pkg[:copyright_text]
170
+
171
+ if pkg[:supplier] && pkg[:supplier_type]
172
+ component["supplier"] = { "name" => pkg[:supplier] }
173
+ end
174
+
175
+ if version_at_least?("1.7") && pkg[:originator]
176
+ component["authors"] = [{ "name" => pkg[:originator] }]
177
+ elsif pkg[:originator]
178
+ component["author"] = pkg[:originator]
179
+ end
180
+
181
+ if pkg[:checksums]&.any?
182
+ component["hashes"] = pkg[:checksums].map do |algo, value|
183
+ { "alg" => normalize_algorithm(algo), "content" => value }
184
+ end
185
+ end
186
+
187
+ licenses = extract_licenses(pkg)
188
+ component["licenses"] = licenses if licenses.any?
189
+
190
+ purl = pkg[:purl] || find_purl(pkg)
191
+ component["purl"] = purl if purl
192
+
193
+ if pkg[:external_references]&.any?
194
+ refs = pkg[:external_references].reject { |r| r[1] == "purl" }
195
+ if refs.any?
196
+ component["externalReferences"] = refs.map do |ref|
197
+ { "type" => ref[1], "url" => ref[2] }
198
+ end
199
+ end
200
+ end
201
+
202
+ if pkg[:properties]&.any?
203
+ component["properties"] = pkg[:properties].map do |prop|
204
+ { "name" => prop[0], "value" => prop[1].to_s }
205
+ end
206
+ end
207
+
208
+ @components << component
209
+ end
210
+
211
+ def generate_dependencies(relationships_data)
212
+ return unless relationships_data&.any?
213
+
214
+ deps_map = {}
215
+
216
+ relationships_data.each do |rel|
217
+ source = rel[:source] || @element_refs.key(rel[:source_id])
218
+ target = rel[:target] || @element_refs.key(rel[:target_id])
219
+
220
+ next unless source && target
221
+
222
+ source_ref = @element_refs[source] || source
223
+ target_ref = @element_refs[target] || target
224
+
225
+ deps_map[source_ref] ||= []
226
+ deps_map[source_ref] << target_ref unless deps_map[source_ref].include?(target_ref)
227
+ end
228
+
229
+ deps_map.each do |ref, depends_on|
230
+ @dependencies << {
231
+ "ref" => ref,
232
+ "dependsOn" => depends_on
233
+ }
234
+ end
235
+ end
236
+
237
+ def finalize_output
238
+ @output["components"] = @components if @components.any?
239
+ @output["dependencies"] = @dependencies if @dependencies.any?
240
+ end
241
+
242
+ def version_at_least?(version)
243
+ Gem::Version.new(@spec_version) >= Gem::Version.new(version)
244
+ end
245
+
246
+ def normalize_component_type(type)
247
+ return "library" unless type
248
+
249
+ normalized = type.to_s.downcase.tr("_", "-")
250
+
251
+ valid_types = %w[
252
+ application framework library container operating-system
253
+ device firmware file machine-learning-model data
254
+ device-driver platform cryptographic-asset
255
+ ]
256
+
257
+ return "cryptographic-asset" if normalized == "cryptographic-asset" && version_at_least?("1.6")
258
+ return "library" if normalized == "cryptographic-asset"
259
+
260
+ valid_types.include?(normalized) ? normalized : "library"
261
+ end
262
+
263
+ def normalize_algorithm(algo)
264
+ algo.to_s.gsub(/^SHA(\d)/, 'SHA-\1')
265
+ end
266
+
267
+ def extract_licenses(pkg)
268
+ licenses = []
269
+
270
+ license_id = pkg[:license_concluded] || pkg[:license_declared]
271
+ return licenses unless license_id
272
+ return licenses if %w[NOASSERTION NONE].include?(license_id.upcase)
273
+
274
+ if license_id.include?(" AND ") || license_id.include?(" OR ")
275
+ licenses << { "expression" => license_id }
276
+ else
277
+ license_entry = { "license" => {} }
278
+
279
+ if license_id.start_with?("LicenseRef")
280
+ license_entry["license"]["name"] = license_id
281
+ else
282
+ license_entry["license"]["id"] = license_id
283
+ end
284
+
285
+ if version_at_least?("1.6")
286
+ if pkg[:license_concluded]
287
+ license_entry["license"]["acknowledgement"] = "concluded"
288
+ else
289
+ license_entry["license"]["acknowledgement"] = "declared"
290
+ end
291
+ end
292
+
293
+ licenses << license_entry
294
+ end
295
+
296
+ licenses
297
+ end
298
+
299
+ def find_purl(pkg)
300
+ return nil unless pkg[:external_references]
301
+
302
+ ref = pkg[:external_references].find { |r| r[1] == "purl" }
303
+ ref&.last
304
+ end
305
+ end
306
+ end
307
+ end
@@ -0,0 +1,275 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "rexml/document"
5
+
6
+ module Sbom
7
+ module Cyclonedx
8
+ class Parser
9
+ FORMAT_JSON = :json
10
+ FORMAT_XML = :xml
11
+
12
+ def initialize
13
+ @document = Data::Document.new
14
+ @packages = {}
15
+ @files = {}
16
+ @relationships = []
17
+ @licenses = []
18
+ @version = nil
19
+ end
20
+
21
+ def parse(content, format = nil)
22
+ format ||= detect_format(content)
23
+
24
+ case format
25
+ when FORMAT_JSON
26
+ parse_json(content)
27
+ when FORMAT_XML
28
+ parse_xml(content)
29
+ else
30
+ raise ParserError, "Unknown CycloneDX format"
31
+ end
32
+
33
+ build_sbom
34
+ end
35
+
36
+ private
37
+
38
+ def detect_format(content)
39
+ stripped = content.strip
40
+ return FORMAT_JSON if stripped.start_with?("{")
41
+ return FORMAT_XML if stripped.start_with?("<")
42
+
43
+ FORMAT_JSON
44
+ end
45
+
46
+ def parse_json(content)
47
+ data = JSON.parse(content)
48
+ return unless data["bomFormat"] == "CycloneDX"
49
+
50
+ @version = data["specVersion"]
51
+ @document.version = @version
52
+ @document.sbom_type = "cyclonedx"
53
+ @document.id = data["serialNumber"]
54
+
55
+ parse_metadata(data["metadata"]) if data["metadata"]
56
+ parse_components(data["components"]) if data["components"]
57
+ parse_dependencies(data["dependencies"]) if data["dependencies"]
58
+ rescue JSON::ParserError => e
59
+ raise ParserError, "Invalid JSON: #{e.message}"
60
+ end
61
+
62
+ def parse_xml(content)
63
+ doc = REXML::Document.new(content)
64
+ root = doc.root
65
+ return unless root && root.name == "bom"
66
+
67
+ @schema = root.namespace
68
+ @version = root.attributes["version"] || extract_version_from_namespace(@schema)
69
+ @document.version = @version
70
+ @document.sbom_type = "cyclonedx"
71
+ @document.id = root.attributes["serialNumber"]
72
+
73
+ parse_xml_metadata(root.elements["metadata"]) if root.elements["metadata"]
74
+ parse_xml_components(root.elements["components"]) if root.elements["components"]
75
+ parse_xml_dependencies(root.elements["dependencies"]) if root.elements["dependencies"]
76
+ rescue REXML::ParseException => e
77
+ raise ParserError, "Invalid XML: #{e.message}"
78
+ end
79
+
80
+ def parse_metadata(metadata)
81
+ @document.created = metadata["timestamp"]
82
+
83
+ if metadata["component"]
84
+ @document.name = metadata["component"]["name"]
85
+ @document.metadata_type = metadata["component"]["type"]
86
+ @document.metadata_version = metadata["component"]["version"]
87
+ end
88
+
89
+ if metadata["supplier"]
90
+ @document.metadata_supplier = metadata["supplier"]["name"]
91
+ end
92
+
93
+ if metadata["manufacture"]
94
+ @document.metadata_supplier ||= metadata["manufacture"]["name"]
95
+ end
96
+
97
+ Array(metadata["lifecycles"]).each do |lc|
98
+ @document.lifecycle = lc["phase"] if lc["phase"]
99
+ end
100
+ end
101
+
102
+ def parse_components(components, parent_ref = nil)
103
+ components.each do |comp|
104
+ parse_component(comp, parent_ref)
105
+ end
106
+ end
107
+
108
+ def parse_component(comp, parent_ref = nil)
109
+ package = Data::Package.new
110
+ package.name = comp["name"]
111
+ package.version = comp["version"]
112
+ package.id = comp["bom-ref"]
113
+ package.package_type = comp["type"]
114
+ package.description = comp["description"]
115
+ package.copyright_text = comp["copyright"]
116
+
117
+ if comp["supplier"]
118
+ package.set_supplier("Organization", comp["supplier"]["name"])
119
+ end
120
+
121
+ if comp["author"]
122
+ package.set_originator("Person", comp["author"])
123
+ end
124
+
125
+ Array(comp["hashes"]).each do |hash|
126
+ algo = hash["alg"]&.gsub("-", "")
127
+ package.add_checksum(algo, hash["content"]) if algo
128
+ end
129
+
130
+ Array(comp["licenses"]).each do |lic|
131
+ if lic["license"]
132
+ license_id = lic["license"]["id"] || lic["license"]["name"]
133
+ package.license_concluded = license_id
134
+ package.set_license_declared(license_id)
135
+ elsif lic["expression"]
136
+ package.license_concluded = lic["expression"]
137
+ package.set_license_declared(lic["expression"])
138
+ end
139
+ end
140
+
141
+ if comp["purl"]
142
+ package.purl = comp["purl"]
143
+ end
144
+
145
+ Array(comp["externalReferences"]).each do |ref|
146
+ package.add_external_reference(ref["type"], ref["type"], ref["url"])
147
+ end
148
+
149
+ Array(comp["properties"]).each do |prop|
150
+ package.add_property(prop["name"], prop["value"])
151
+ end
152
+
153
+ @packages[[package.name, package.version]] = package.to_h
154
+
155
+ if parent_ref
156
+ rel = Data::Relationship.new
157
+ rel.source = parent_ref
158
+ rel.target = package.id || package.name
159
+ rel.relationship_type = "DEPENDS_ON"
160
+ @relationships << rel.to_h
161
+ end
162
+
163
+ if comp["components"]
164
+ parse_components(comp["components"], package.id || package.name)
165
+ end
166
+ end
167
+
168
+ def parse_dependencies(dependencies)
169
+ dependencies.each do |dep|
170
+ ref = dep["ref"]
171
+ Array(dep["dependsOn"]).each do |depends_on|
172
+ rel = Data::Relationship.new
173
+ rel.source = ref
174
+ rel.target = depends_on
175
+ rel.relationship_type = "DEPENDS_ON"
176
+ @relationships << rel.to_h
177
+ end
178
+ end
179
+ end
180
+
181
+ def parse_xml_metadata(metadata)
182
+ timestamp = metadata.elements["timestamp"]
183
+ @document.created = timestamp.text if timestamp
184
+
185
+ component = metadata.elements["component"]
186
+ if component
187
+ @document.name = component.elements["name"]&.text
188
+ @document.metadata_type = component.attributes["type"]
189
+ @document.metadata_version = component.elements["version"]&.text
190
+ end
191
+
192
+ supplier = metadata.elements["supplier"]
193
+ @document.metadata_supplier = supplier.elements["name"]&.text if supplier
194
+ end
195
+
196
+ def parse_xml_components(components)
197
+ components.elements.each("component") do |comp|
198
+ parse_xml_component(comp)
199
+ end
200
+ end
201
+
202
+ def parse_xml_component(comp)
203
+ package = Data::Package.new
204
+ package.name = comp.elements["name"]&.text
205
+ package.version = comp.elements["version"]&.text
206
+ package.id = comp.attributes["bom-ref"]
207
+ package.package_type = comp.attributes["type"]
208
+ package.description = comp.elements["description"]&.text
209
+
210
+ supplier = comp.elements["supplier"]
211
+ if supplier
212
+ package.set_supplier("Organization", supplier.elements["name"]&.text)
213
+ end
214
+
215
+ comp.elements.each("hashes/hash") do |hash|
216
+ algo = hash.attributes["alg"]&.gsub("-", "")
217
+ package.add_checksum(algo, hash.text) if algo
218
+ end
219
+
220
+ comp.elements.each("licenses/license") do |lic|
221
+ license_id = lic.elements["id"]&.text || lic.elements["name"]&.text
222
+ if license_id
223
+ package.license_concluded = license_id
224
+ package.set_license_declared(license_id)
225
+ end
226
+ end
227
+
228
+ purl = comp.elements["purl"]
229
+ package.purl = purl.text if purl
230
+
231
+ comp.elements.each("externalReferences/reference") do |ref|
232
+ ref_type = ref.attributes["type"]
233
+ url = ref.elements["url"]&.text
234
+ package.add_external_reference(ref_type, ref_type, url) if url
235
+ end
236
+
237
+ @packages[[package.name, package.version]] = package.to_h
238
+
239
+ nested = comp.elements["components"]
240
+ parse_xml_components(nested) if nested
241
+ end
242
+
243
+ def parse_xml_dependencies(dependencies)
244
+ dependencies.elements.each("dependency") do |dep|
245
+ ref = dep.attributes["ref"]
246
+ dep.elements.each("dependency") do |child|
247
+ rel = Data::Relationship.new
248
+ rel.source = ref
249
+ rel.target = child.attributes["ref"]
250
+ rel.relationship_type = "DEPENDS_ON"
251
+ @relationships << rel.to_h
252
+ end
253
+ end
254
+ end
255
+
256
+ def extract_version_from_namespace(namespace)
257
+ return nil unless namespace
258
+
259
+ match = namespace.match(/bom[\/\-](\d+\.\d+)/)
260
+ match[1] if match
261
+ end
262
+
263
+ def build_sbom
264
+ sbom = Data::Sbom.new(sbom_type: :cyclonedx)
265
+ sbom.version = @document.version
266
+ sbom.add_document(@document.to_h)
267
+ sbom.add_packages(@packages)
268
+ sbom.add_files(@files)
269
+ sbom.add_relationships(@relationships)
270
+ sbom.add_licenses(@licenses)
271
+ sbom
272
+ end
273
+ end
274
+ end
275
+ end