nvd_feed_api 0.0.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,410 @@
1
+ # Ruby internal
2
+ require 'digest'
3
+ require 'net/https'
4
+ require 'date'
5
+ # External
6
+ require 'archive/zip'
7
+ require 'oj'
8
+ # Project internal
9
+ require 'nvd_feed_api/meta'
10
+
11
+ class NVDFeedScraper
12
+ # Feed object.
13
+ class Feed
14
+ class << self
15
+ # Get / set default feed storage location, where will be stored JSON feeds and archives by default.
16
+ # @return [String] default feed storage location. Default to +/tmp/+.
17
+ # @example
18
+ # NVDFeedScraper::Feed.default_storage_location = '/srv/downloads/'
19
+ attr_accessor :default_storage_location
20
+ end
21
+ @default_storage_location = '/tmp/'
22
+
23
+ # @return [String] the name of the feed.
24
+ # @example
25
+ # 'CVE-2007'
26
+ attr_reader :name
27
+
28
+ # @return [String] the last update date of the feed information on the NVD website.
29
+ # @example
30
+ # '10/19/2017 3:27:02 AM -04:00'
31
+ attr_reader :updated
32
+
33
+ # @return [String] the URL of the metadata file of the feed.
34
+ # @example
35
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
36
+ attr_reader :meta_url
37
+
38
+ # @return [String] the URL of the gz archive of the feed.
39
+ # @example
40
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
41
+ attr_reader :gz_url
42
+
43
+ # @return [String] the URL of the zip archive of the feed.
44
+ # @example
45
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
46
+ attr_reader :zip_url
47
+
48
+ # @return [Meta] the {Meta} object of the feed.
49
+ # @note
50
+ # Return nil if not previously loaded by {#meta_pull}.
51
+ # Note that {#json_pull} also calls {#meta_pull}.
52
+ # @example
53
+ # s = NVDFeedScraper.new
54
+ # s.scrap
55
+ # f = s.feeds("CVE-2014")
56
+ # f.meta # => nil
57
+ # f.meta_pull
58
+ # f.meta # => #<NVDFeedScraper::Meta:0x00555b53027570 ... >
59
+ attr_reader :meta
60
+
61
+ # @return [String] the path of the saved JSON file.
62
+ # @note Return nil if not previously loaded by {#json_pull}.
63
+ # @example
64
+ # s = NVDFeedScraper.new
65
+ # s.scrap
66
+ # f = s.feeds("CVE-2014")
67
+ # f.json_file # => nil
68
+ # f.json_pull
69
+ # f.json_file # => "/tmp/nvdcve-1.0-2014.json"
70
+ attr_reader :json_file
71
+
72
+ # @return [String] the type of the feed, should always be +CVE+.
73
+ # @note Return nil if not previously loaded by {#json_pull}.
74
+ attr_reader :data_type
75
+
76
+ # @return [String] the format of the feed, should always be +MITRE+.
77
+ # @note Return nil if not previously loaded by {#json_pull}.
78
+ attr_reader :data_format
79
+
80
+ # @return [Float] the version of the JSON schema of the feed.
81
+ # @note Return nil if not previously loaded by {#json_pull}.
82
+ attr_reader :data_version
83
+
84
+ # @return [Integer] the number of CVEs of in the feed.
85
+ # @note Return nil if not previously loaded by {#json_pull}.
86
+ attr_reader :data_number_of_cves
87
+
88
+ # @return [Date] the date of the last update of the feed by the NVD.
89
+ # @note Return nil if not previously loaded by {#json_pull}.
90
+ attr_reader :data_timestamp
91
+
92
+ # A new instance of Feed.
93
+ # @param name [String] see {#name}.
94
+ # @param updated [String] see {#updated}.
95
+ # @param meta_url [String] see {#meta_url}.
96
+ # @param gz_url [String] see {#gz_url}.
97
+ # @param zip_url [String] see {#zip_url}.
98
+ def initialize(name, updated, meta_url, gz_url, zip_url)
99
+ # Frome meta file
100
+ @name = name
101
+ @updated = updated
102
+ @meta_url = meta_url
103
+ @gz_url = gz_url
104
+ @zip_url = zip_url
105
+ # do not pull meta and json automatically for speed and memory footprint
106
+ @meta = nil
107
+ @json_file = nil
108
+ # feed data
109
+ @data_type = nil
110
+ @data_format = nil
111
+ @data_version = nil
112
+ @data_number_of_cves = nil
113
+ @data_timestamp = nil
114
+ end
115
+
116
+ # Create or update the {Meta} object (fill the attribute).
117
+ # @return [Meta] the updated {Meta} object of the feed.
118
+ # @see #meta
119
+ def meta_pull
120
+ meta_content = NVDFeedScraper::Meta.new(@meta_url)
121
+ meta_content.parse
122
+ # update @meta
123
+ @meta = meta_content
124
+ end
125
+
126
+ # Download the gz archive of the feed.
127
+ # @param opts [Hash] see {#download_file}.
128
+ # @return [String] the saved gz file path.
129
+ # @example
130
+ # afeed.download_gz
131
+ # afeed.download_gz(destination_path: '/srv/save/')
132
+ def download_gz(opts = {})
133
+ download_file(@gz_url, opts)
134
+ end
135
+
136
+ # Download the zip archive of the feed.
137
+ # @param opts [Hash] see {#download_file}.
138
+ # @return [String] the saved zip file path.
139
+ # @example
140
+ # afeed.download_zip
141
+ # afeed.download_zip(destination_path: '/srv/save/')
142
+ def download_zip(opts = {})
143
+ download_file(@zip_url, opts)
144
+ end
145
+
146
+ # Download the JSON feed and fill the attribute.
147
+ # @param opts [Hash] see {#download_file}.
148
+ # @return [String] the path of the saved JSON file. Default use {Feed#default_storage_location}.
149
+ # @note Will downlaod and save the zip of the JSON file, unzip and save it. This massively consume time.
150
+ # @see #json_file
151
+ def json_pull(opts = {})
152
+ opts[:destination_path] ||= Feed.default_storage_location
153
+
154
+ skip_download = false
155
+ destination_path = opts[:destination_path]
156
+ destination_path += '/' unless destination_path[-1] == '/'
157
+ filename = URI(@zip_url).path.split('/').last.chomp('.zip')
158
+ # do not use @json_file for destination_file because of offline loading
159
+ destination_file = destination_path + filename
160
+ meta_pull
161
+ if File.file?(destination_file)
162
+ # Verify hash to see if it is the latest
163
+ computed_h = Digest::SHA256.file(destination_file)
164
+ skip_download = true if meta.sha256.casecmp(computed_h.hexdigest).zero?
165
+ end
166
+ if skip_download
167
+ @json_file = destination_file
168
+ # Set data
169
+ if @data_type.nil?
170
+ doc = Oj::Doc.open(File.read(@json_file))
171
+ @data_type = doc.fetch('/CVE_data_type')
172
+ @data_format = doc.fetch('/CVE_data_format')
173
+ @data_version = doc.fetch('/CVE_data_version').to_f
174
+ @data_number_of_cves = doc.fetch('/CVE_data_numberOfCVEs').to_i
175
+ @data_timestamp = Date.strptime(doc.fetch('/CVE_data_timestamp'), '%FT%RZ')
176
+ doc.close
177
+ end
178
+ else
179
+ zip_path = download_zip(opts)
180
+ Archive::Zip.open(zip_path) do |z|
181
+ z.extract(destination_path, flatten: true)
182
+ end
183
+ @json_file = zip_path.chomp('.zip')
184
+ # Verify hash integrity
185
+ computed_h = Digest::SHA256.file(@json_file)
186
+ raise "File corruption: #{@json_file}" unless meta.sha256.casecmp(computed_h.hexdigest).zero?
187
+
188
+ # update data
189
+ doc = Oj::Doc.open(File.read(@json_file))
190
+ @data_type = doc.fetch('/CVE_data_type')
191
+ @data_format = doc.fetch('/CVE_data_format')
192
+ @data_version = doc.fetch('/CVE_data_version').to_f
193
+ @data_number_of_cves = doc.fetch('/CVE_data_numberOfCVEs').to_i
194
+ @data_timestamp = Date.strptime(doc.fetch('/CVE_data_timestamp'), '%FT%RZ')
195
+ doc.close
196
+ end
197
+ return @json_file
198
+ end
199
+
200
+ # Search for CVE in the feed.
201
+ # @overload cve(cve)
202
+ # One CVE.
203
+ # @param cve [String] CVE ID, case insensitive.
204
+ # @return [Hash] a Ruby Hash corresponding to the CVE.
205
+ # @overload cve(cve_arr)
206
+ # An array of CVEs.
207
+ # @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
208
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
209
+ # @overload cve(cve, *)
210
+ # Multiple CVEs.
211
+ # @param cve [String] CVE ID, case insensitive.
212
+ # @param * [String] As many CVE ID as you want.
213
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
214
+ # @note {#json_pull} is needed before using this method. Remember you're searching only in the current feed.
215
+ # @todo implement a CVE Class instead of returning a Hash.
216
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
217
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
218
+ # @example
219
+ # s = NVDFeedScraper.new
220
+ # s.scrap
221
+ # f = s.feeds("CVE-2014")
222
+ # f.json_pull
223
+ # f.cve("CVE-2014-0002", "cve-2014-0001")
224
+ def cve(*arg_cve)
225
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
226
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
227
+
228
+ return_value = nil
229
+ raise 'no argument provided, 1 or more expected' if arg_cve.empty?
230
+
231
+ if arg_cve.length == 1
232
+ if arg_cve[0].is_a?(String)
233
+ raise "bad CVE name (#{arg_cve[0]})" unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
234
+
235
+ doc = Oj::Doc.open(File.read(@json_file))
236
+ # Quicker than doc.fetch('/CVE_Items').size
237
+ (1..@data_number_of_cves).each do |i|
238
+ if arg_cve[0].upcase == doc.fetch("/CVE_Items/#{i}/cve/CVE_data_meta/ID")
239
+ return_value = doc.fetch("/CVE_Items/#{i}")
240
+ break
241
+ end
242
+ end
243
+ doc.close
244
+ elsif arg_cve[0].is_a?(Array)
245
+ return_value = []
246
+ # Sorting CVE can allow us to parse quicker
247
+ # Upcase to be sure include? works
248
+ cves_to_find = arg_cve[0].map(&:upcase).sort
249
+ raise 'one of the provided arguments is not a String' unless cves_to_find.all? { |x| x.is_a?(String) }
250
+ raise 'bad CVE name' unless cves_to_find.all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
251
+
252
+ doc = Oj::Doc.open(File.read(@json_file))
253
+ # Quicker than doc.fetch('/CVE_Items').size
254
+ (1..@data_number_of_cves).each do |i|
255
+ doc.move("/CVE_Items/#{i}")
256
+ cve_id = doc.fetch('cve/CVE_data_meta/ID')
257
+ if cves_to_find.include?(cve_id)
258
+ return_value.push(doc.fetch)
259
+ cves_to_find.delete(cve_id)
260
+ elsif cves_to_find.empty?
261
+ break
262
+ end
263
+ end
264
+ raise "#{cves_to_find.join(', ')} are unexisting CVEs in this feed" unless cves_to_find.empty?
265
+ else
266
+ raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
267
+ end
268
+ else
269
+ # Overloading a list of arguments as one array argument
270
+ return_value = cve(arg_cve)
271
+ end
272
+ return return_value
273
+ end
274
+
275
+ # Return a list with the name of all available CVEs in the feed.
276
+ # Can only be called after {#json_pull}.
277
+ # @return [Array<String>] List with the name of all available CVEs. May return thousands CVEs.
278
+ def available_cves
279
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
280
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
281
+
282
+ doc = Oj::Doc.open(File.read(@json_file))
283
+ # Quicker than doc.fetch('/CVE_Items').size
284
+ cve_names = []
285
+ (1..@data_number_of_cves).each do |i|
286
+ doc.move("/CVE_Items/#{i}")
287
+ cve_names.push(doc.fetch('cve/CVE_data_meta/ID'))
288
+ end
289
+ doc.close
290
+ return cve_names
291
+ end
292
+
293
+ # @param arg_name [String] the new name of the feed.
294
+ # @return [String] the new name of the feed.
295
+ # @example
296
+ # 'CVE-2007'
297
+ def name=(arg_name)
298
+ raise "name (#{arg_name}) is not a string" unless arg_name.is_a?(String)
299
+
300
+ @name = arg_name
301
+ end
302
+
303
+ # @param arg_updated [String] the last update date of the feed information on the NVD website.
304
+ # @return [String] the new date.
305
+ # @example
306
+ # '10/19/2017 3:27:02 AM -04:00'
307
+ def updated=(arg_updated)
308
+ raise "updated date (#{arg_updated}) is not a string" unless arg_updated.is_a?(String)
309
+
310
+ @updated = arg_updated
311
+ end
312
+
313
+ # @param arg_meta_url [String] the new URL of the metadata file of the feed.
314
+ # @return [String] the new URL of the metadata file of the feed.
315
+ # @example
316
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
317
+ def meta_url=(arg_meta_url)
318
+ raise "meta_url (#{arg_meta_url}) is not a string" unless arg_meta_url.is_a?(String)
319
+
320
+ @meta_url = arg_meta_url
321
+ end
322
+
323
+ # @param arg_gz_url [String] the new URL of the gz archive of the feed.
324
+ # @return [String] the new URL of the gz archive of the feed.
325
+ # @example
326
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
327
+ def gz_url=(arg_gz_url)
328
+ raise "gz_url (#{arg_gz_url}) is not a string" unless arg_gz_url.is_a?(String)
329
+
330
+ @gz_url = arg_gz_url
331
+ end
332
+
333
+ # @param arg_zip_url [String] the new URL of the zip archive of the feed.
334
+ # @return [String] the new URL of the zip archive of the feed.
335
+ # @example
336
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
337
+ def zip_url=(arg_zip_url)
338
+ raise "zip_url (#{arg_zip_url}) is not a string" unless arg_zip_url.is_a?(String)
339
+
340
+ @zip_url = arg_zip_url
341
+ end
342
+
343
+ # Download a file.
344
+ # @param file_url [String] the URL of the file.
345
+ # @param opts [Hash] the optional downlaod parameters.
346
+ # @option opts [String] :destination_path the destination path (may
347
+ # overwrite existing file).
348
+ # Default use {Feed#default_storage_location}.
349
+ # @option opts [String] :sha256 the SHA256 hash to check, if the file
350
+ # already exist and the hash matches then the download will be skipped.
351
+ # @return [String] the saved file path.
352
+ # @example
353
+ # download_file('https://example.org/example.zip') # => '/tmp/example.zip'
354
+ # download_file('https://example.org/example.zip', destination_path: '/srv/save/') # => '/srv/save/example.zip'
355
+ # download_file('https://example.org/example.zip', {destination_path: '/srv/save/', sha256: '70d6ea136d5036b6ce771921a949357216866c6442f44cea8497f0528c54642d'}) # => '/srv/save/example.zip'
356
+ def download_file(file_url, opts = {})
357
+ opts[:destination_path] ||= Feed.default_storage_location
358
+ opts[:sha256] ||= nil
359
+
360
+ destination_path = opts[:destination_path]
361
+ destination_path += '/' unless destination_path[-1] == '/'
362
+ skip_download = false
363
+ uri = URI(file_url)
364
+ filename = uri.path.split('/').last
365
+ destination_file = destination_path + filename
366
+ unless opts[:sha256].nil?
367
+ if File.file?(destination_file)
368
+ # Verify hash to see if it is the latest
369
+ computed_h = Digest::SHA256.file(destination_file)
370
+ skip_download = true if opts[:sha256].casecmp(computed_h.hexdigest).zero?
371
+ end
372
+ end
373
+ unless skip_download
374
+ res = Net::HTTP.get_response(uri)
375
+ raise "#{file_url} ended with #{res.code} #{res.message}" unless res.is_a?(Net::HTTPSuccess)
376
+
377
+ File.open(destination_file, 'wb') do |file|
378
+ file.write(res.body)
379
+ end
380
+ end
381
+ return destination_file
382
+ end
383
+
384
+ # Update the feed
385
+ # @param fresh_feed [Feed] the fresh feed from which the feed will be updated.
386
+ # @return [Boolean] +true+ if the feed was updated, +false+ if it wasn't.
387
+ # @note Is not intended to be used directly, use {NVDFeedScraper#update_feeds} instead.
388
+ def update!(fresh_feed)
389
+ return_value = false
390
+ raise "#{fresh_feed} is not a Feed" unless fresh_feed.is_a?(Feed)
391
+
392
+ # update attributes
393
+ if updated != fresh_feed.updated
394
+ self.name = fresh_feed.name
395
+ self.updated = fresh_feed.updated
396
+ self.meta_url = fresh_feed.meta_url
397
+ self.gz_url = fresh_feed.gz_url
398
+ self.zip_url = fresh_feed.zip_url
399
+ # update if @meta was set
400
+ meta_pull unless @meta.nil?
401
+ # update if @json_file was set, this will also update @data_*
402
+ json_pull unless @json_file.nil?
403
+ return_value = true
404
+ end
405
+ return return_value
406
+ end
407
+
408
+ protected :name=, :updated=, :meta_url=, :gz_url=, :zip_url=, :download_file
409
+ end
410
+ end
@@ -0,0 +1,116 @@
1
+ # Ruby internal
2
+ require 'net/https'
3
+
4
+ class NVDFeedScraper
5
+ # Manage the meta file from a feed.
6
+ #
7
+ # == Usage
8
+ #
9
+ # @example
10
+ # s = NVDFeedScraper.new
11
+ # s.scrap
12
+ # metaUrl = s.feeds("CVE-2014").meta_url
13
+ # m = NVDFeedScraper::Meta.new
14
+ # m.url = metaUrl
15
+ # m.parse
16
+ # m.sha256
17
+ #
18
+ # Several ways to set the url:
19
+ #
20
+ # m = NVDFeedScraper::Meta.new(metaUrl)
21
+ # m.parse
22
+ # # or
23
+ # m = NVDFeedScraper::Meta.new
24
+ # m.url = metaUrl
25
+ # m.parse
26
+ # # or
27
+ # m = NVDFeedScraper::Meta.new
28
+ # m.parse(metaUrl)
29
+ class Meta
30
+ # {Meta} last modified date getter
31
+ # @return [String] the last modified date and time.
32
+ # @example
33
+ # '2017-10-19T03:27:02-04:00'
34
+ attr_reader :last_modified_date
35
+
36
+ # {Meta} JSON size getter
37
+ # @return [String] the size of the JSON file uncompressed.
38
+ # @example
39
+ # '29443314'
40
+ attr_reader :size
41
+
42
+ # {Meta} zip size getter
43
+ # @return [String] the size of the zip file.
44
+ # @example
45
+ # '2008493'
46
+ attr_reader :zip_size
47
+
48
+ # {Meta} gz size getter
49
+ # @return [String] the size of the gz file.
50
+ # @example
51
+ # '2008357'
52
+ attr_reader :gz_size
53
+
54
+ # {Meta} JSON sha256 getter
55
+ # @return [String] the SHA256 value of the uncompressed JSON file.
56
+ # @example
57
+ # '33ED52D451692596D644F23742ED42B4E350258B11ACB900F969F148FCE3777B'
58
+ attr_reader :sha256
59
+
60
+ # @param url [String, nil] see {Feed#meta_url}.
61
+ def initialize(url = nil)
62
+ @url = url
63
+ end
64
+
65
+ # {Meta} URL getter.
66
+ # @return [String] The URL of the meta file of the feed.
67
+ attr_reader :url
68
+
69
+ # {Meta} URL setter.
70
+ # @param url [String] see {Feed#meta_url}.
71
+ def url=(url)
72
+ @url = url
73
+ @last_modified_date = @size = @zip_size = @gz_size = @sha256 = nil
74
+ end
75
+
76
+ # Parse the meta file from the URL and set the attributes.
77
+ # @overload parse
78
+ # Parse the meta file from the URL and set the attributes.
79
+ # @return [Integer] Returns +0+ when there is no error.
80
+ # @overload parse(url)
81
+ # Set the URL of the meta file of the feed and
82
+ # parse the meta file from the URL and set the attributes.
83
+ # @param url [String] see {Feed.meta_url}
84
+ # @return [Integer] Returns +0+ when there is no error.
85
+ def parse(*arg)
86
+ if arg.empty?
87
+ elsif arg.length == 1 # arg = url
88
+ self.url = arg[0]
89
+ else
90
+ raise 'Too much arguments'
91
+ end
92
+
93
+ raise "Can't parse if the URL is empty" if @url.nil?
94
+
95
+ uri = URI(@url)
96
+
97
+ meta = Net::HTTP.get(uri)
98
+
99
+ meta = Hash[meta.split.map { |x| x.split(':', 2) }]
100
+
101
+ raise 'no lastModifiedDate attribute found' unless meta['lastModifiedDate']
102
+ raise 'no valid size attribute found' unless /[0-9]+/.match?(meta['size'])
103
+ raise 'no valid zipSize attribute found' unless /[0-9]+/.match?(meta['zipSize'])
104
+ raise 'no valid gzSize attribute found' unless /[0-9]+/.match?(meta['gzSize'])
105
+ raise 'no valid sha256 attribute found' unless /[0-9A-F]{64}/.match?(meta['sha256'])
106
+
107
+ @last_modified_date = meta['lastModifiedDate']
108
+ @size = meta['size']
109
+ @zip_size = meta['zipSize']
110
+ @gz_size = meta['gzSize']
111
+ @sha256 = meta['sha256']
112
+
113
+ 0
114
+ end
115
+ end
116
+ end