sbom 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Sbom
6
+ module License
7
+ class Scanner
8
+ SPECIAL_VALUES = %w[NOASSERTION NONE].freeze
9
+
10
+ class << self
11
+ def instance
12
+ @instance ||= new
13
+ end
14
+ end
15
+
16
+ def initialize
17
+ @licenses = {}
18
+ @license_names = {}
19
+ @deprecated = {}
20
+ load_license_data
21
+ end
22
+
23
+ def find_license(license_id)
24
+ return "UNKNOWN" if license_id.nil? || license_id.empty?
25
+ return license_id if SPECIAL_VALUES.include?(license_id.upcase)
26
+ return license_id if license_id.start_with?("LicenseRef")
27
+
28
+ normalized = license_id.strip
29
+
30
+ return @licenses[normalized] if @licenses.key?(normalized)
31
+
32
+ downcased = normalized.downcase
33
+ @licenses.each do |id, _|
34
+ return id if id.downcase == downcased
35
+ end
36
+
37
+ @license_names.each do |name, id|
38
+ return id if name.downcase == downcased
39
+ end
40
+
41
+ "UNKNOWN"
42
+ end
43
+
44
+ def valid?(license_id)
45
+ find_license(license_id) != "UNKNOWN"
46
+ end
47
+
48
+ def deprecated?(license_id)
49
+ @deprecated[license_id] || false
50
+ end
51
+
52
+ def osi_approved?(license_id)
53
+ return false unless @licenses.key?(license_id)
54
+
55
+ @licenses[license_id][:osi_approved]
56
+ end
57
+
58
+ def validate_expression(expression)
59
+ return "NOASSERTION" if expression.nil? || expression.empty?
60
+
61
+ tokens = expression.split(/\s+(AND|OR|WITH)\s+/i)
62
+
63
+ tokens.map do |token|
64
+ next token if %w[AND OR WITH].include?(token.upcase)
65
+
66
+ cleaned = token.gsub(/[()]/, "").strip
67
+ next token if cleaned.empty?
68
+
69
+ found = find_license(cleaned)
70
+ found == "UNKNOWN" ? "NOASSERTION" : token
71
+ end.join(" ")
72
+ end
73
+
74
+ def license_list_version
75
+ @license_list_version
76
+ end
77
+
78
+ private
79
+
80
+ def load_license_data
81
+ data_path = File.join(File.dirname(__FILE__), "data", "spdx_licenses.json")
82
+
83
+ return unless File.exist?(data_path)
84
+
85
+ data = JSON.parse(File.read(data_path))
86
+ @license_list_version = data["licenseListVersion"]
87
+
88
+ data["licenses"].each do |license|
89
+ id = license["licenseId"]
90
+ @licenses[id] = {
91
+ name: license["name"],
92
+ osi_approved: license["isOsiApproved"],
93
+ deprecated: license["isDeprecatedLicenseId"]
94
+ }
95
+ @license_names[license["name"]] = id
96
+ @deprecated[id] = license["isDeprecatedLicenseId"]
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "yaml"
5
+
6
+ module Sbom
7
+ class Output
8
+ VALID_FORMATS = %i[tag json yaml].freeze
9
+
10
+ def initialize(filename: nil, format: :tag)
11
+ @filename = filename
12
+ @format = validate_format(format)
13
+ @output_type = filename && !filename.empty? ? :file : :console
14
+ end
15
+
16
+ def generate(data)
17
+ formatted = format_data(data)
18
+ send_output(formatted)
19
+ end
20
+
21
+ def format
22
+ @format
23
+ end
24
+
25
+ def output_type
26
+ @output_type
27
+ end
28
+
29
+ private
30
+
31
+ def validate_format(format)
32
+ format_sym = format.to_s.downcase.to_sym
33
+ return format_sym if VALID_FORMATS.include?(format_sym)
34
+
35
+ :tag
36
+ end
37
+
38
+ def format_data(data)
39
+ case @format
40
+ when :json
41
+ format_json(data)
42
+ when :yaml
43
+ format_yaml(data)
44
+ else
45
+ format_tag(data)
46
+ end
47
+ end
48
+
49
+ def format_json(data)
50
+ if data.is_a?(String)
51
+ data
52
+ else
53
+ JSON.pretty_generate(data)
54
+ end
55
+ end
56
+
57
+ def format_yaml(data)
58
+ if data.is_a?(String)
59
+ YAML.safe_load(data).to_yaml
60
+ else
61
+ data.to_yaml
62
+ end
63
+ end
64
+
65
+ def format_tag(data)
66
+ if data.is_a?(Array)
67
+ data.join("\n")
68
+ else
69
+ data.to_s
70
+ end
71
+ end
72
+
73
+ def send_output(content)
74
+ if @output_type == :file
75
+ write_to_file(content)
76
+ else
77
+ puts content
78
+ end
79
+ end
80
+
81
+ def write_to_file(content)
82
+ File.write(@filename, content + "\n")
83
+ rescue Errno::ENOENT, Errno::EACCES => e
84
+ warn "Unable to write to file: #{e.message}"
85
+ puts content
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sbom
4
+ class Parser
5
+ EXTENSION_MAP = {
6
+ ".spdx" => [:spdx, :tag],
7
+ ".spdx.json" => [:spdx, :json],
8
+ ".spdx.yaml" => [:spdx, :yaml],
9
+ ".spdx.yml" => [:spdx, :yaml],
10
+ ".spdx.xml" => [:spdx, :xml],
11
+ ".spdx.rdf" => [:spdx, :rdf],
12
+ ".cdx.json" => [:cyclonedx, :json],
13
+ ".bom.json" => [:cyclonedx, :json],
14
+ ".cdx.xml" => [:cyclonedx, :xml],
15
+ ".bom.xml" => [:cyclonedx, :xml]
16
+ }.freeze
17
+
18
+ def initialize(sbom_type: :auto)
19
+ @sbom_type = sbom_type
20
+ end
21
+
22
+ def parse_file(filename)
23
+ raise ParserError, "File not found: #{filename}" unless File.exist?(filename)
24
+ raise ParserError, "Empty file: #{filename}" if File.size(filename).zero?
25
+
26
+ content = File.read(filename)
27
+ sbom_type, format = detect_type_from_filename(filename)
28
+
29
+ parse_string(content, sbom_type: sbom_type, format: format)
30
+ end
31
+
32
+ def parse_string(content, sbom_type: nil, format: nil)
33
+ sbom_type ||= @sbom_type
34
+
35
+ if sbom_type == :auto
36
+ sbom_type, format = detect_type_from_content(content)
37
+ end
38
+
39
+ case sbom_type
40
+ when :spdx
41
+ parser = Spdx::Parser.new
42
+ parser.parse(content, format)
43
+ when :cyclonedx
44
+ parser = Cyclonedx::Parser.new
45
+ parser.parse(content, format)
46
+ else
47
+ try_both_parsers(content)
48
+ end
49
+ end
50
+
51
+ def self.parse_file(filename, sbom_type: :auto)
52
+ new(sbom_type: sbom_type).parse_file(filename)
53
+ end
54
+
55
+ def self.parse_string(content, sbom_type: :auto)
56
+ new(sbom_type: sbom_type).parse_string(content)
57
+ end
58
+
59
+ private
60
+
61
+ def detect_type_from_filename(filename)
62
+ EXTENSION_MAP.each do |ext, (type, format)|
63
+ return [type, format] if filename.end_with?(ext)
64
+ end
65
+
66
+ return [:cyclonedx, :json] if filename.end_with?(".json")
67
+ return [:cyclonedx, :xml] if filename.end_with?(".xml")
68
+
69
+ [:auto, nil]
70
+ end
71
+
72
+ def detect_type_from_content(content)
73
+ stripped = content.strip
74
+
75
+ if stripped.start_with?("{")
76
+ begin
77
+ data = JSON.parse(stripped)
78
+ return [:cyclonedx, :json] if data["bomFormat"] == "CycloneDX"
79
+ return [:spdx, :json] if data["spdxVersion"]
80
+ rescue JSON::ParserError
81
+ nil
82
+ end
83
+ end
84
+
85
+ return [:spdx, :tag] if stripped.include?("SPDXVersion:")
86
+ return [:spdx, :rdf] if stripped.include?("<spdx:")
87
+ return [:cyclonedx, :xml] if stripped.include?("cyclonedx")
88
+
89
+ [:auto, nil]
90
+ end
91
+
92
+ def try_both_parsers(content)
93
+ begin
94
+ spdx_parser = Spdx::Parser.new
95
+ result = spdx_parser.parse(content)
96
+ return result if result.packages.any? || result.files.any?
97
+ rescue StandardError
98
+ nil
99
+ end
100
+
101
+ begin
102
+ cdx_parser = Cyclonedx::Parser.new
103
+ return cdx_parser.parse(content)
104
+ rescue StandardError
105
+ nil
106
+ end
107
+
108
+ raise ParserError, "Unable to parse SBOM content"
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,337 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "yaml"
5
+ require "securerandom"
6
+ require "time"
7
+
8
+ module Sbom
9
+ module Spdx
10
+ class Generator
11
+ SPDX_VERSION = "SPDX-2.3"
12
+ SPDX_NAMESPACE = "http://spdx.org/spdxdocs/"
13
+
14
+ FORMAT_TAG = :tag
15
+ FORMAT_JSON = :json
16
+ FORMAT_YAML = :yaml
17
+
18
+ LIFECYCLE_MAP = {
19
+ "design" => "Design",
20
+ "pre-build" => "Source",
21
+ "build" => "Build",
22
+ "post-build" => "Analyzed",
23
+ "operations" => "Deployed",
24
+ "discovery" => "Runtime"
25
+ }.freeze
26
+
27
+ def initialize(format: FORMAT_TAG, application: "sbom", version: Sbom::VERSION)
28
+ @format = validate_format(format)
29
+ @application = application
30
+ @version = version
31
+ @spec_version = ENV.fetch("SBOM_SPDX_VERSION", SPDX_VERSION)
32
+ @organization = ENV["SBOM_ORGANIZATION"]
33
+
34
+ @tag_output = []
35
+ @json_output = {}
36
+ @packages = []
37
+ @files = []
38
+ @relationships = []
39
+ @licenses = []
40
+ @elements = {}
41
+ end
42
+
43
+ def generate(project_name, sbom_data)
44
+ return if sbom_data.nil? || (sbom_data.respond_to?(:empty?) && sbom_data.empty?)
45
+
46
+ data = sbom_data.is_a?(Hash) ? sbom_data : sbom_data.to_h
47
+
48
+ @spec_version = data[:version] if data[:version]&.start_with?("SPDX")
49
+ uuid = data[:uuid]
50
+
51
+ doc_name = extract_document_name(data, project_name)
52
+ organization = extract_organization(data)
53
+ lifecycle = extract_lifecycle(data)
54
+
55
+ generate_document_header(doc_name, uuid, lifecycle, organization)
56
+ generate_packages(data[:packages])
57
+ generate_files(data[:files])
58
+ generate_relationships(data[:relationships])
59
+ generate_license_info(data[:licenses])
60
+
61
+ finalize_output
62
+ end
63
+
64
+ def output
65
+ case @format
66
+ when FORMAT_JSON
67
+ JSON.pretty_generate(@json_output)
68
+ when FORMAT_YAML
69
+ @json_output.to_yaml
70
+ else
71
+ @tag_output.join("\n")
72
+ end
73
+ end
74
+
75
+ def to_h
76
+ @json_output
77
+ end
78
+
79
+ private
80
+
81
+ def validate_format(format)
82
+ fmt = format.to_s.downcase.to_sym
83
+ return fmt if [FORMAT_TAG, FORMAT_JSON, FORMAT_YAML].include?(fmt)
84
+
85
+ FORMAT_JSON
86
+ end
87
+
88
+ def extract_document_name(data, default)
89
+ return default unless data[:document]
90
+
91
+ data[:document][:name] || default
92
+ end
93
+
94
+ def extract_organization(data)
95
+ return @organization unless data[:document]
96
+
97
+ data[:document][:metadata_supplier] || @organization
98
+ end
99
+
100
+ def extract_lifecycle(data)
101
+ return nil unless data[:document]
102
+
103
+ data[:document][:lifecycle]
104
+ end
105
+
106
+ def generate_document_header(name, uuid, lifecycle, organization)
107
+ namespace = uuid || "#{SPDX_NAMESPACE}#{name}-#{SecureRandom.uuid}"
108
+ timestamp = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
109
+
110
+ @elements["SPDXRef-DOCUMENT"] = name
111
+
112
+ if @format == FORMAT_TAG
113
+ @tag_output << "SPDXVersion: #{@spec_version}"
114
+ @tag_output << "DataLicense: CC0-1.0"
115
+ @tag_output << "SPDXID: SPDXRef-DOCUMENT"
116
+ @tag_output << "DocumentName: #{name}"
117
+ @tag_output << "DocumentNamespace: #{namespace}"
118
+ @tag_output << "Creator: Tool: #{@application}-#{@version}"
119
+ @tag_output << "Creator: Organization: #{organization}" if organization
120
+ @tag_output << "Created: #{timestamp}"
121
+
122
+ if lifecycle
123
+ sbom_type = LIFECYCLE_MAP[lifecycle] || lifecycle
124
+ @tag_output << "CreatorComment: <text>SBOM Type: #{sbom_type}</text>"
125
+ end
126
+
127
+ @tag_output << ""
128
+ else
129
+ @json_output = {
130
+ "spdxVersion" => @spec_version,
131
+ "dataLicense" => "CC0-1.0",
132
+ "SPDXID" => "SPDXRef-DOCUMENT",
133
+ "name" => name,
134
+ "documentNamespace" => namespace,
135
+ "creationInfo" => {
136
+ "created" => timestamp,
137
+ "creators" => ["Tool: #{@application}-#{@version}"]
138
+ }
139
+ }
140
+
141
+ @json_output["creationInfo"]["creators"] << "Organization: #{organization}" if organization
142
+
143
+ if lifecycle
144
+ sbom_type = LIFECYCLE_MAP[lifecycle] || lifecycle
145
+ @json_output["creationInfo"]["comment"] = "SBOM Type: #{sbom_type}"
146
+ end
147
+ end
148
+ end
149
+
150
+ def generate_packages(packages_data)
151
+ return unless packages_data
152
+
153
+ packages = packages_data.is_a?(Hash) ? packages_data.values : packages_data
154
+ packages.each_with_index do |pkg, index|
155
+ generate_package(pkg, index + 1)
156
+ end
157
+ end
158
+
159
+ def generate_package(pkg, index)
160
+ name = pkg[:name]
161
+ return unless name
162
+
163
+ spdx_id = pkg[:id] || "SPDXRef-Package-#{index}-#{sanitize_id(name)}"
164
+ @elements[spdx_id] = name
165
+
166
+ if @format == FORMAT_TAG
167
+ @tag_output << "##### Package: #{name}"
168
+ @tag_output << ""
169
+ @tag_output << "PackageName: #{name}"
170
+ @tag_output << "SPDXID: #{spdx_id}"
171
+ @tag_output << "PackageVersion: #{pkg[:version]}" if pkg[:version]
172
+
173
+ if pkg[:supplier_type] && pkg[:supplier]
174
+ @tag_output << "PackageSupplier: #{pkg[:supplier_type]}: #{pkg[:supplier]}"
175
+ end
176
+
177
+ @tag_output << "PackageDownloadLocation: #{pkg[:download_location] || 'NOASSERTION'}"
178
+ @tag_output << "FilesAnalyzed: #{pkg[:files_analyzed] || 'false'}"
179
+ @tag_output << "PackageLicenseConcluded: #{pkg[:license_concluded] || 'NOASSERTION'}"
180
+ @tag_output << "PackageLicenseDeclared: #{pkg[:license_declared] || 'NOASSERTION'}"
181
+ @tag_output << "PackageCopyrightText: #{pkg[:copyright_text] || 'NOASSERTION'}"
182
+
183
+ pkg[:external_references]&.each do |ref|
184
+ @tag_output << "ExternalRef: #{ref[0]} #{ref[1]} #{ref[2]}"
185
+ end
186
+
187
+ @tag_output << ""
188
+
189
+ @relationships << ["SPDXRef-DOCUMENT", "DESCRIBES", spdx_id]
190
+ else
191
+ package_json = {
192
+ "SPDXID" => spdx_id,
193
+ "name" => name,
194
+ "downloadLocation" => pkg[:download_location] || "NOASSERTION",
195
+ "filesAnalyzed" => pkg[:files_analyzed] == "true" || pkg[:files_analyzed] == true,
196
+ "licenseConcluded" => pkg[:license_concluded] || "NOASSERTION",
197
+ "licenseDeclared" => pkg[:license_declared] || "NOASSERTION",
198
+ "copyrightText" => pkg[:copyright_text] || "NOASSERTION"
199
+ }
200
+
201
+ package_json["versionInfo"] = pkg[:version] if pkg[:version]
202
+
203
+ if pkg[:supplier_type] && pkg[:supplier]
204
+ package_json["supplier"] = "#{pkg[:supplier_type]}: #{pkg[:supplier]}"
205
+ end
206
+
207
+ if pkg[:checksums]&.any?
208
+ package_json["checksums"] = pkg[:checksums].map do |algo, value|
209
+ { "algorithm" => algo, "checksumValue" => value }
210
+ end
211
+ end
212
+
213
+ if pkg[:external_references]&.any?
214
+ package_json["externalRefs"] = pkg[:external_references].map do |ref|
215
+ {
216
+ "referenceCategory" => ref[0],
217
+ "referenceType" => ref[1],
218
+ "referenceLocator" => ref[2]
219
+ }
220
+ end
221
+ end
222
+
223
+ @packages << package_json
224
+ @relationships << {
225
+ "spdxElementId" => "SPDXRef-DOCUMENT",
226
+ "relationshipType" => "DESCRIBES",
227
+ "relatedSpdxElement" => spdx_id
228
+ }
229
+ end
230
+ end
231
+
232
+ def generate_files(files_data)
233
+ return unless files_data
234
+
235
+ files = files_data.is_a?(Hash) ? files_data.values : files_data
236
+ files.each_with_index do |file, index|
237
+ generate_file(file, index + 1)
238
+ end
239
+ end
240
+
241
+ def generate_file(file, index)
242
+ name = file[:name]
243
+ return unless name
244
+
245
+ spdx_id = file[:id] || "SPDXRef-File-#{index}-#{sanitize_id(name)}"
246
+ @elements[spdx_id] = name
247
+
248
+ if @format == FORMAT_TAG
249
+ @tag_output << "FileName: #{name}"
250
+ @tag_output << "SPDXID: #{spdx_id}"
251
+ @tag_output << "LicenseConcluded: #{file[:license_concluded] || 'NOASSERTION'}"
252
+ @tag_output << "FileCopyrightText: #{file[:copyright_text] || 'NOASSERTION'}"
253
+ @tag_output << ""
254
+ else
255
+ file_json = {
256
+ "SPDXID" => spdx_id,
257
+ "fileName" => name,
258
+ "licenseConcluded" => file[:license_concluded] || "NOASSERTION",
259
+ "copyrightText" => file[:copyright_text] || "NOASSERTION"
260
+ }
261
+
262
+ if file[:checksums]&.any?
263
+ file_json["checksums"] = file[:checksums].map do |algo, value|
264
+ { "algorithm" => algo, "checksumValue" => value }
265
+ end
266
+ end
267
+
268
+ @files << file_json
269
+ end
270
+ end
271
+
272
+ def generate_relationships(relationships_data)
273
+ return unless relationships_data
274
+
275
+ relationships_data.each do |rel|
276
+ source_id = rel[:source_id] || find_element_id(rel[:source])
277
+ target_id = rel[:target_id] || find_element_id(rel[:target])
278
+ rel_type = rel[:type] || rel[:relationship_type]
279
+
280
+ next unless source_id && target_id && rel_type
281
+
282
+ if @format == FORMAT_TAG
283
+ @relationships << [source_id, rel_type, target_id]
284
+ else
285
+ @relationships << {
286
+ "spdxElementId" => source_id,
287
+ "relationshipType" => rel_type,
288
+ "relatedSpdxElement" => target_id
289
+ }
290
+ end
291
+ end
292
+ end
293
+
294
+ def generate_license_info(licenses_data)
295
+ return unless licenses_data&.any?
296
+
297
+ licenses_data.each do |lic|
298
+ if @format == FORMAT_TAG
299
+ @tag_output << "LicenseID: #{lic[:id]}"
300
+ @tag_output << "LicenseName: #{lic[:name]}" if lic[:name]
301
+ @tag_output << "ExtractedText: <text>#{lic[:text]}</text>" if lic[:text]
302
+ @tag_output << ""
303
+ else
304
+ @licenses << {
305
+ "licenseId" => lic[:id],
306
+ "name" => lic[:name],
307
+ "extractedText" => lic[:text]
308
+ }.compact
309
+ end
310
+ end
311
+ end
312
+
313
+ def finalize_output
314
+ if @format == FORMAT_TAG
315
+ @relationships.each do |rel|
316
+ if rel.is_a?(Array)
317
+ @tag_output << "Relationship: #{rel[0]} #{rel[1]} #{rel[2]}"
318
+ end
319
+ end
320
+ else
321
+ @json_output["packages"] = @packages if @packages.any?
322
+ @json_output["files"] = @files if @files.any?
323
+ @json_output["relationships"] = @relationships if @relationships.any?
324
+ @json_output["hasExtractedLicensingInfos"] = @licenses if @licenses.any?
325
+ end
326
+ end
327
+
328
+ def find_element_id(name)
329
+ @elements.key(name) || @elements.find { |id, n| n == name }&.first
330
+ end
331
+
332
+ def sanitize_id(str)
333
+ str.to_s.gsub(/[^a-zA-Z0-9.\-]/, "-")
334
+ end
335
+ end
336
+ end
337
+ end