sbom 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,426 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "yaml"
5
+ require "rexml/document"
6
+
7
+ module Sbom
8
+ module Spdx
9
+ class Parser
10
+ FORMAT_TAG = :tag
11
+ FORMAT_JSON = :json
12
+ FORMAT_YAML = :yaml
13
+ FORMAT_XML = :xml
14
+ FORMAT_RDF = :rdf
15
+
16
+ def initialize
17
+ @document = Data::Document.new
18
+ @packages = {}
19
+ @files = {}
20
+ @relationships = []
21
+ @licenses = []
22
+ @elements = {}
23
+ end
24
+
25
+ def parse(content, format = nil)
26
+ format ||= detect_format(content)
27
+
28
+ case format
29
+ when FORMAT_JSON
30
+ parse_json(content)
31
+ when FORMAT_YAML
32
+ parse_yaml(content)
33
+ when FORMAT_TAG
34
+ parse_tag_value(content)
35
+ when FORMAT_XML
36
+ parse_xml(content)
37
+ when FORMAT_RDF
38
+ parse_rdf(content)
39
+ else
40
+ raise ParserError, "Unknown SPDX format"
41
+ end
42
+
43
+ build_sbom
44
+ end
45
+
46
+ private
47
+
48
+ def detect_format(content)
49
+ stripped = content.strip
50
+ return FORMAT_JSON if stripped.start_with?("{")
51
+ return FORMAT_XML if stripped.start_with?("<") && stripped.include?("<SpdxDocument")
52
+ return FORMAT_RDF if stripped.start_with?("<") && stripped.include?("<spdx:")
53
+ return FORMAT_TAG if stripped.include?("PackageName:")
54
+
55
+ begin
56
+ YAML.safe_load(stripped)
57
+ return FORMAT_YAML if stripped.include?("SPDXID:")
58
+ rescue StandardError
59
+ nil
60
+ end
61
+
62
+ FORMAT_TAG
63
+ end
64
+
65
+ def parse_json(content)
66
+ data = JSON.parse(content)
67
+ data = data["sbom"] if data["sbom"]
68
+ data = data["predicate"] if data["predicateType"]&.include?("spdx")
69
+ parse_spdx_data(data)
70
+ rescue JSON::ParserError => e
71
+ raise ParserError, "Invalid JSON: #{e.message}"
72
+ end
73
+
74
+ def parse_yaml(content)
75
+ data = YAML.safe_load(content)
76
+ parse_spdx_data(data)
77
+ rescue Psych::SyntaxError => e
78
+ raise ParserError, "Invalid YAML: #{e.message}"
79
+ end
80
+
81
+ def parse_spdx_data(data)
82
+ return unless data["spdxVersion"] || data["SPDXID"]
83
+
84
+ @document.version = data["spdxVersion"]
85
+ @document.id = data["SPDXID"]
86
+ @document.name = data["name"]
87
+ @document.data_license = data["dataLicense"]
88
+ @document.namespace = data["documentNamespace"]
89
+ @document.sbom_type = "spdx"
90
+
91
+ if data["creationInfo"]
92
+ @document.created = data["creationInfo"]["created"]
93
+ @document.license_list_version = data["creationInfo"]["licenseListVersion"]
94
+
95
+ Array(data["creationInfo"]["creators"]).each do |creator|
96
+ type, name = creator.split(": ", 2)
97
+ if type == "Organization"
98
+ @document.metadata_supplier = name
99
+ else
100
+ @document.add_creator(type, name)
101
+ end
102
+ end
103
+ end
104
+
105
+ Array(data["packages"]).each do |pkg_data|
106
+ parse_json_package(pkg_data)
107
+ end
108
+
109
+ Array(data["files"]).each do |file_data|
110
+ parse_json_file(file_data)
111
+ end
112
+
113
+ Array(data["relationships"]).each do |rel_data|
114
+ parse_json_relationship(rel_data)
115
+ end
116
+
117
+ Array(data["hasExtractedLicensingInfos"]).each do |lic_data|
118
+ @licenses << {
119
+ id: lic_data["licenseId"],
120
+ name: lic_data["name"],
121
+ text: lic_data["extractedText"],
122
+ comment: lic_data["comment"]
123
+ }
124
+ end
125
+ end
126
+
127
+ def parse_json_package(data)
128
+ package = Data::Package.new
129
+ package.name = data["name"]
130
+ package.id = data["SPDXID"]
131
+ package.version = data["versionInfo"]
132
+ package.download_location = data["downloadLocation"]
133
+ package.files_analyzed = data["filesAnalyzed"]
134
+ package.license_concluded = data["licenseConcluded"]
135
+ package.set_license_declared(data["licenseDeclared"])
136
+ package.copyright_text = data["copyrightText"]
137
+ package.description = data["description"]
138
+ package.summary = data["summary"]
139
+ package.comment = data["comment"]
140
+ package.homepage = data["homepage"]
141
+ package.filename = data["packageFileName"]
142
+
143
+ if data["supplier"]
144
+ type, name = data["supplier"].split(": ", 2)
145
+ package.set_supplier(type, name) if name
146
+ end
147
+
148
+ if data["originator"]
149
+ type, name = data["originator"].split(": ", 2)
150
+ package.set_originator(type, name) if name
151
+ end
152
+
153
+ if data["primaryPackagePurpose"]
154
+ package.package_type = data["primaryPackagePurpose"]
155
+ end
156
+
157
+ Array(data["checksums"]).each do |checksum|
158
+ package.add_checksum(checksum["algorithm"], checksum["checksumValue"])
159
+ end
160
+
161
+ Array(data["externalRefs"]).each do |ref|
162
+ package.add_external_reference(
163
+ ref["referenceCategory"],
164
+ ref["referenceType"],
165
+ ref["referenceLocator"]
166
+ )
167
+ end
168
+
169
+ @elements[package.id] = package.name
170
+ @packages[[package.name, package.version]] = package.to_h
171
+ end
172
+
173
+ def parse_json_file(data)
174
+ file = Data::SbomFile.new
175
+ file.name = data["fileName"]
176
+ file.id = data["SPDXID"]
177
+ file.license_concluded = data["licenseConcluded"]
178
+ file.copyright_text = data["copyrightText"]
179
+ file.comment = data["comment"]
180
+
181
+ Array(data["fileTypes"]).each do |type|
182
+ file.add_file_type(type)
183
+ end
184
+
185
+ Array(data["checksums"]).each do |checksum|
186
+ file.add_checksum(checksum["algorithm"], checksum["checksumValue"])
187
+ end
188
+
189
+ @elements[file.id] = file.name
190
+ @files[file.name] = file.to_h
191
+ end
192
+
193
+ def parse_json_relationship(data)
194
+ rel = Data::Relationship.new
195
+ rel.source_id = data["spdxElementId"]
196
+ rel.target_id = data["relatedSpdxElement"]
197
+ rel.relationship_type = data["relationshipType"]
198
+ rel.source = @elements[rel.source_id]
199
+ rel.target = @elements[rel.target_id]
200
+
201
+ @relationships << rel.to_h
202
+ end
203
+
204
+ def parse_tag_value(content)
205
+ lines = content.split("\n")
206
+ current_package = nil
207
+ current_file = nil
208
+
209
+ lines.each do |line|
210
+ next if line.strip.empty? || line.start_with?("#")
211
+
212
+ tag, value = parse_tag_line(line)
213
+ next unless tag && value
214
+
215
+ case tag
216
+ when "SPDXVersion"
217
+ @document.version = value
218
+ @document.sbom_type = "spdx"
219
+ when "DataLicense"
220
+ @document.data_license = value
221
+ when "SPDXID"
222
+ if current_package
223
+ current_package.id = value
224
+ @elements[value] = current_package.name
225
+ elsif current_file
226
+ current_file.id = value
227
+ @elements[value] = current_file.name
228
+ else
229
+ @document.id = value
230
+ @elements[value] = @document.name
231
+ end
232
+ when "DocumentName"
233
+ @document.name = value
234
+ @elements[@document.id] = value if @document.id
235
+ when "DocumentNamespace"
236
+ @document.namespace = value
237
+ when "LicenseListVersion"
238
+ @document.license_list_version = value
239
+ when "Creator"
240
+ type, name = value.split(" ", 2)
241
+ if type == "Organization"
242
+ @document.metadata_supplier = name
243
+ else
244
+ @document.add_creator(type, name)
245
+ end
246
+ when "Created"
247
+ @document.created = value
248
+ when "PackageName"
249
+ save_package(current_package) if current_package
250
+ current_file = nil
251
+ current_package = Data::Package.new
252
+ current_package.name = value
253
+ when "PackageVersion"
254
+ current_package&.version = value
255
+ when "PackageSupplier"
256
+ if current_package
257
+ type, name = value.split(" ", 2)
258
+ current_package.set_supplier(type, name)
259
+ end
260
+ when "PackageOriginator"
261
+ if current_package
262
+ type, name = value.split(" ", 2)
263
+ current_package.set_originator(type, name)
264
+ end
265
+ when "PackageDownloadLocation"
266
+ current_package&.download_location = value
267
+ when "FilesAnalyzed"
268
+ current_package&.files_analyzed = value
269
+ when "PackageChecksum"
270
+ if current_package
271
+ algo, checksum = value.split(": ", 2)
272
+ current_package.add_checksum(algo, checksum)
273
+ end
274
+ when "PackageLicenseConcluded"
275
+ current_package&.license_concluded = value
276
+ when "PackageLicenseDeclared"
277
+ current_package&.set_license_declared(value)
278
+ when "PackageCopyrightText"
279
+ current_package&.copyright_text = value
280
+ when "PackageDescription"
281
+ current_package&.description = value
282
+ when "PackageSummary"
283
+ current_package&.summary = value
284
+ when "PackageComment"
285
+ current_package&.comment = value
286
+ when "PackageHomePage"
287
+ current_package&.homepage = value
288
+ when "PackageFileName"
289
+ current_package&.filename = value
290
+ when "PrimaryPackagePurpose"
291
+ current_package&.package_type = value
292
+ when "ExternalRef"
293
+ if current_package
294
+ parts = value.split(" ", 3)
295
+ current_package.add_external_reference(parts[0], parts[1], parts[2]) if parts.length >= 3
296
+ end
297
+ when "FileName"
298
+ save_file(current_file) if current_file
299
+ save_package(current_package) if current_package
300
+ current_package = nil
301
+ current_file = Data::SbomFile.new
302
+ current_file.name = value
303
+ when "FileType"
304
+ current_file&.add_file_type(value)
305
+ when "FileChecksum"
306
+ if current_file
307
+ algo, checksum = value.split(": ", 2)
308
+ current_file.add_checksum(algo, checksum)
309
+ end
310
+ when "LicenseConcluded"
311
+ current_file&.license_concluded = value
312
+ when "FileCopyrightText"
313
+ current_file&.copyright_text = value
314
+ when "Relationship"
315
+ parse_tag_relationship(value)
316
+ end
317
+ end
318
+
319
+ save_package(current_package) if current_package
320
+ save_file(current_file) if current_file
321
+ end
322
+
323
+ def parse_tag_line(line)
324
+ return nil unless line.include?(":")
325
+
326
+ tag, value = line.split(":", 2)
327
+ [tag.strip, value&.strip]
328
+ end
329
+
330
+ def parse_tag_relationship(value)
331
+ parts = value.split(" ")
332
+ return unless parts.length >= 3
333
+
334
+ rel = Data::Relationship.new
335
+ rel.source_id = parts[0]
336
+ rel.relationship_type = parts[1]
337
+ rel.target_id = parts[2]
338
+ rel.source = @elements[rel.source_id]
339
+ rel.target = @elements[rel.target_id]
340
+
341
+ @relationships << rel.to_h
342
+ end
343
+
344
+ def save_package(package)
345
+ return unless package&.name
346
+
347
+ @elements[package.id] = package.name if package.id
348
+ @packages[[package.name, package.version]] = package.to_h
349
+ end
350
+
351
+ def save_file(file)
352
+ return unless file&.name
353
+
354
+ @elements[file.id] = file.name if file.id
355
+ @files[file.name] = file.to_h
356
+ end
357
+
358
+ def parse_xml(content)
359
+ doc = REXML::Document.new(content)
360
+ root = doc.root
361
+ return unless root
362
+
363
+ namespace = root.namespace
364
+
365
+ @document.version = root.elements["spdxVersion"]&.text
366
+ @document.id = root.elements["SPDXID"]&.text
367
+ @document.name = root.elements["name"]&.text
368
+ @document.data_license = root.elements["dataLicense"]&.text
369
+ @document.sbom_type = "spdx"
370
+
371
+ root.elements.each("packages") do |pkg|
372
+ parse_xml_package(pkg, namespace)
373
+ end
374
+ rescue REXML::ParseException => e
375
+ raise ParserError, "Invalid XML: #{e.message}"
376
+ end
377
+
378
+ def parse_xml_package(pkg, _namespace)
379
+ package = Data::Package.new
380
+ package.name = pkg.elements["name"]&.text
381
+ package.version = pkg.elements["versionInfo"]&.text
382
+ package.id = pkg.elements["SPDXID"]&.text
383
+
384
+ @elements[package.id] = package.name if package.id
385
+ @packages[[package.name, package.version]] = package.to_h
386
+ end
387
+
388
+ def parse_rdf(content)
389
+ lines = content.split("\n")
390
+
391
+ current_name = nil
392
+ current_version = nil
393
+
394
+ lines.each do |line|
395
+ if (match = line.match(/<spdx:name>(.+?)<\/spdx:name>/))
396
+ current_name = match[1]
397
+ elsif (match = line.match(/<spdx:versionInfo>(.+?)<\/spdx:versionInfo>/))
398
+ current_version = match[1]
399
+ if current_name
400
+ package = Data::Package.new
401
+ package.name = current_name
402
+ package.version = current_version
403
+ @packages[[current_name, current_version]] = package.to_h
404
+ current_name = nil
405
+ current_version = nil
406
+ end
407
+ elsif (match = line.match(/<spdx:spdxVersion>(.+?)<\/spdx:spdxVersion>/))
408
+ @document.version = match[1]
409
+ @document.sbom_type = "spdx"
410
+ end
411
+ end
412
+ end
413
+
414
+ def build_sbom
415
+ sbom = Data::Sbom.new(sbom_type: :spdx)
416
+ sbom.version = @document.version
417
+ sbom.add_document(@document.to_h)
418
+ sbom.add_packages(@packages)
419
+ sbom.add_files(@files)
420
+ sbom.add_relationships(@relationships)
421
+ sbom.add_licenses(@licenses)
422
+ sbom
423
+ end
424
+ end
425
+ end
426
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sbom
4
+ class ValidationResult
5
+ attr_reader :format, :version, :errors
6
+
7
+ def initialize(valid:, format: nil, version: nil, errors: [])
8
+ @valid = valid
9
+ @format = format
10
+ @version = version
11
+ @errors = errors
12
+ end
13
+
14
+ def valid?
15
+ @valid
16
+ end
17
+
18
+ def invalid?
19
+ !@valid
20
+ end
21
+
22
+ def to_s
23
+ if valid?
24
+ "#{format} #{version}"
25
+ else
26
+ "Invalid: #{errors.join(', ')}"
27
+ end
28
+ end
29
+ end
30
+ end