nvd_feed_api 0.0.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,410 @@
1
+ # Ruby internal
2
+ require 'digest'
3
+ require 'net/https'
4
+ require 'date'
5
+ # External
6
+ require 'archive/zip'
7
+ require 'oj'
8
+ # Project internal
9
+ require 'nvd_feed_api/meta'
10
+
11
+ class NVDFeedScraper
12
+ # Feed object.
13
+ class Feed
14
+ class << self
15
+ # Get / set default feed storage location, where will be stored JSON feeds and archives by default.
16
+ # @return [String] default feed storage location. Default to +/tmp/+.
17
+ # @example
18
+ # NVDFeedScraper::Feed.default_storage_location = '/srv/downloads/'
19
+ attr_accessor :default_storage_location
20
+ end
21
+ @default_storage_location = '/tmp/'
22
+
23
+ # @return [String] the name of the feed.
24
+ # @example
25
+ # 'CVE-2007'
26
+ attr_reader :name
27
+
28
+ # @return [String] the last update date of the feed information on the NVD website.
29
+ # @example
30
+ # '10/19/2017 3:27:02 AM -04:00'
31
+ attr_reader :updated
32
+
33
+ # @return [String] the URL of the metadata file of the feed.
34
+ # @example
35
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
36
+ attr_reader :meta_url
37
+
38
+ # @return [String] the URL of the gz archive of the feed.
39
+ # @example
40
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
41
+ attr_reader :gz_url
42
+
43
+ # @return [String] the URL of the zip archive of the feed.
44
+ # @example
45
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
46
+ attr_reader :zip_url
47
+
48
+ # @return [Meta] the {Meta} object of the feed.
49
+ # @note
50
+ # Return nil if not previously loaded by {#meta_pull}.
51
+ # Note that {#json_pull} also calls {#meta_pull}.
52
+ # @example
53
+ # s = NVDFeedScraper.new
54
+ # s.scrap
55
+ # f = s.feeds("CVE-2014")
56
+ # f.meta # => nil
57
+ # f.meta_pull
58
+ # f.meta # => #<NVDFeedScraper::Meta:0x00555b53027570 ... >
59
+ attr_reader :meta
60
+
61
+ # @return [String] the path of the saved JSON file.
62
+ # @note Return nil if not previously loaded by {#json_pull}.
63
+ # @example
64
+ # s = NVDFeedScraper.new
65
+ # s.scrap
66
+ # f = s.feeds("CVE-2014")
67
+ # f.json_file # => nil
68
+ # f.json_pull
69
+ # f.json_file # => "/tmp/nvdcve-1.0-2014.json"
70
+ attr_reader :json_file
71
+
72
+ # @return [String] the type of the feed, should always be +CVE+.
73
+ # @note Return nil if not previously loaded by {#json_pull}.
74
+ attr_reader :data_type
75
+
76
+ # @return [String] the format of the feed, should always be +MITRE+.
77
+ # @note Return nil if not previously loaded by {#json_pull}.
78
+ attr_reader :data_format
79
+
80
+ # @return [Float] the version of the JSON schema of the feed.
81
+ # @note Return nil if not previously loaded by {#json_pull}.
82
+ attr_reader :data_version
83
+
84
+ # @return [Integer] the number of CVEs of in the feed.
85
+ # @note Return nil if not previously loaded by {#json_pull}.
86
+ attr_reader :data_number_of_cves
87
+
88
+ # @return [Date] the date of the last update of the feed by the NVD.
89
+ # @note Return nil if not previously loaded by {#json_pull}.
90
+ attr_reader :data_timestamp
91
+
92
+ # A new instance of Feed.
93
+ # @param name [String] see {#name}.
94
+ # @param updated [String] see {#updated}.
95
+ # @param meta_url [String] see {#meta_url}.
96
+ # @param gz_url [String] see {#gz_url}.
97
+ # @param zip_url [String] see {#zip_url}.
98
+ def initialize(name, updated, meta_url, gz_url, zip_url)
99
+ # Frome meta file
100
+ @name = name
101
+ @updated = updated
102
+ @meta_url = meta_url
103
+ @gz_url = gz_url
104
+ @zip_url = zip_url
105
+ # do not pull meta and json automatically for speed and memory footprint
106
+ @meta = nil
107
+ @json_file = nil
108
+ # feed data
109
+ @data_type = nil
110
+ @data_format = nil
111
+ @data_version = nil
112
+ @data_number_of_cves = nil
113
+ @data_timestamp = nil
114
+ end
115
+
116
+ # Create or update the {Meta} object (fill the attribute).
117
+ # @return [Meta] the updated {Meta} object of the feed.
118
+ # @see #meta
119
+ def meta_pull
120
+ meta_content = NVDFeedScraper::Meta.new(@meta_url)
121
+ meta_content.parse
122
+ # update @meta
123
+ @meta = meta_content
124
+ end
125
+
126
+ # Download the gz archive of the feed.
127
+ # @param opts [Hash] see {#download_file}.
128
+ # @return [String] the saved gz file path.
129
+ # @example
130
+ # afeed.download_gz
131
+ # afeed.download_gz(destination_path: '/srv/save/')
132
+ def download_gz(opts = {})
133
+ download_file(@gz_url, opts)
134
+ end
135
+
136
+ # Download the zip archive of the feed.
137
+ # @param opts [Hash] see {#download_file}.
138
+ # @return [String] the saved zip file path.
139
+ # @example
140
+ # afeed.download_zip
141
+ # afeed.download_zip(destination_path: '/srv/save/')
142
+ def download_zip(opts = {})
143
+ download_file(@zip_url, opts)
144
+ end
145
+
146
+ # Download the JSON feed and fill the attribute.
147
+ # @param opts [Hash] see {#download_file}.
148
+ # @return [String] the path of the saved JSON file. Default use {Feed#default_storage_location}.
149
+ # @note Will downlaod and save the zip of the JSON file, unzip and save it. This massively consume time.
150
+ # @see #json_file
151
+ def json_pull(opts = {})
152
+ opts[:destination_path] ||= Feed.default_storage_location
153
+
154
+ skip_download = false
155
+ destination_path = opts[:destination_path]
156
+ destination_path += '/' unless destination_path[-1] == '/'
157
+ filename = URI(@zip_url).path.split('/').last.chomp('.zip')
158
+ # do not use @json_file for destination_file because of offline loading
159
+ destination_file = destination_path + filename
160
+ meta_pull
161
+ if File.file?(destination_file)
162
+ # Verify hash to see if it is the latest
163
+ computed_h = Digest::SHA256.file(destination_file)
164
+ skip_download = true if meta.sha256.casecmp(computed_h.hexdigest).zero?
165
+ end
166
+ if skip_download
167
+ @json_file = destination_file
168
+ # Set data
169
+ if @data_type.nil?
170
+ doc = Oj::Doc.open(File.read(@json_file))
171
+ @data_type = doc.fetch('/CVE_data_type')
172
+ @data_format = doc.fetch('/CVE_data_format')
173
+ @data_version = doc.fetch('/CVE_data_version').to_f
174
+ @data_number_of_cves = doc.fetch('/CVE_data_numberOfCVEs').to_i
175
+ @data_timestamp = Date.strptime(doc.fetch('/CVE_data_timestamp'), '%FT%RZ')
176
+ doc.close
177
+ end
178
+ else
179
+ zip_path = download_zip(opts)
180
+ Archive::Zip.open(zip_path) do |z|
181
+ z.extract(destination_path, flatten: true)
182
+ end
183
+ @json_file = zip_path.chomp('.zip')
184
+ # Verify hash integrity
185
+ computed_h = Digest::SHA256.file(@json_file)
186
+ raise "File corruption: #{@json_file}" unless meta.sha256.casecmp(computed_h.hexdigest).zero?
187
+
188
+ # update data
189
+ doc = Oj::Doc.open(File.read(@json_file))
190
+ @data_type = doc.fetch('/CVE_data_type')
191
+ @data_format = doc.fetch('/CVE_data_format')
192
+ @data_version = doc.fetch('/CVE_data_version').to_f
193
+ @data_number_of_cves = doc.fetch('/CVE_data_numberOfCVEs').to_i
194
+ @data_timestamp = Date.strptime(doc.fetch('/CVE_data_timestamp'), '%FT%RZ')
195
+ doc.close
196
+ end
197
+ return @json_file
198
+ end
199
+
200
+ # Search for CVE in the feed.
201
+ # @overload cve(cve)
202
+ # One CVE.
203
+ # @param cve [String] CVE ID, case insensitive.
204
+ # @return [Hash] a Ruby Hash corresponding to the CVE.
205
+ # @overload cve(cve_arr)
206
+ # An array of CVEs.
207
+ # @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
208
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
209
+ # @overload cve(cve, *)
210
+ # Multiple CVEs.
211
+ # @param cve [String] CVE ID, case insensitive.
212
+ # @param * [String] As many CVE ID as you want.
213
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
214
+ # @note {#json_pull} is needed before using this method. Remember you're searching only in the current feed.
215
+ # @todo implement a CVE Class instead of returning a Hash.
216
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
217
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
218
+ # @example
219
+ # s = NVDFeedScraper.new
220
+ # s.scrap
221
+ # f = s.feeds("CVE-2014")
222
+ # f.json_pull
223
+ # f.cve("CVE-2014-0002", "cve-2014-0001")
224
+ def cve(*arg_cve)
225
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
226
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
227
+
228
+ return_value = nil
229
+ raise 'no argument provided, 1 or more expected' if arg_cve.empty?
230
+
231
+ if arg_cve.length == 1
232
+ if arg_cve[0].is_a?(String)
233
+ raise "bad CVE name (#{arg_cve[0]})" unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
234
+
235
+ doc = Oj::Doc.open(File.read(@json_file))
236
+ # Quicker than doc.fetch('/CVE_Items').size
237
+ (1..@data_number_of_cves).each do |i|
238
+ if arg_cve[0].upcase == doc.fetch("/CVE_Items/#{i}/cve/CVE_data_meta/ID")
239
+ return_value = doc.fetch("/CVE_Items/#{i}")
240
+ break
241
+ end
242
+ end
243
+ doc.close
244
+ elsif arg_cve[0].is_a?(Array)
245
+ return_value = []
246
+ # Sorting CVE can allow us to parse quicker
247
+ # Upcase to be sure include? works
248
+ cves_to_find = arg_cve[0].map(&:upcase).sort
249
+ raise 'one of the provided arguments is not a String' unless cves_to_find.all? { |x| x.is_a?(String) }
250
+ raise 'bad CVE name' unless cves_to_find.all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
251
+
252
+ doc = Oj::Doc.open(File.read(@json_file))
253
+ # Quicker than doc.fetch('/CVE_Items').size
254
+ (1..@data_number_of_cves).each do |i|
255
+ doc.move("/CVE_Items/#{i}")
256
+ cve_id = doc.fetch('cve/CVE_data_meta/ID')
257
+ if cves_to_find.include?(cve_id)
258
+ return_value.push(doc.fetch)
259
+ cves_to_find.delete(cve_id)
260
+ elsif cves_to_find.empty?
261
+ break
262
+ end
263
+ end
264
+ raise "#{cves_to_find.join(', ')} are unexisting CVEs in this feed" unless cves_to_find.empty?
265
+ else
266
+ raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
267
+ end
268
+ else
269
+ # Overloading a list of arguments as one array argument
270
+ return_value = cve(arg_cve)
271
+ end
272
+ return return_value
273
+ end
274
+
275
+ # Return a list with the name of all available CVEs in the feed.
276
+ # Can only be called after {#json_pull}.
277
+ # @return [Array<String>] List with the name of all available CVEs. May return thousands CVEs.
278
+ def available_cves
279
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
280
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
281
+
282
+ doc = Oj::Doc.open(File.read(@json_file))
283
+ # Quicker than doc.fetch('/CVE_Items').size
284
+ cve_names = []
285
+ (1..@data_number_of_cves).each do |i|
286
+ doc.move("/CVE_Items/#{i}")
287
+ cve_names.push(doc.fetch('cve/CVE_data_meta/ID'))
288
+ end
289
+ doc.close
290
+ return cve_names
291
+ end
292
+
293
+ # @param arg_name [String] the new name of the feed.
294
+ # @return [String] the new name of the feed.
295
+ # @example
296
+ # 'CVE-2007'
297
+ def name=(arg_name)
298
+ raise "name (#{arg_name}) is not a string" unless arg_name.is_a?(String)
299
+
300
+ @name = arg_name
301
+ end
302
+
303
+ # @param arg_updated [String] the last update date of the feed information on the NVD website.
304
+ # @return [String] the new date.
305
+ # @example
306
+ # '10/19/2017 3:27:02 AM -04:00'
307
+ def updated=(arg_updated)
308
+ raise "updated date (#{arg_updated}) is not a string" unless arg_updated.is_a?(String)
309
+
310
+ @updated = arg_updated
311
+ end
312
+
313
+ # @param arg_meta_url [String] the new URL of the metadata file of the feed.
314
+ # @return [String] the new URL of the metadata file of the feed.
315
+ # @example
316
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
317
+ def meta_url=(arg_meta_url)
318
+ raise "meta_url (#{arg_meta_url}) is not a string" unless arg_meta_url.is_a?(String)
319
+
320
+ @meta_url = arg_meta_url
321
+ end
322
+
323
+ # @param arg_gz_url [String] the new URL of the gz archive of the feed.
324
+ # @return [String] the new URL of the gz archive of the feed.
325
+ # @example
326
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
327
+ def gz_url=(arg_gz_url)
328
+ raise "gz_url (#{arg_gz_url}) is not a string" unless arg_gz_url.is_a?(String)
329
+
330
+ @gz_url = arg_gz_url
331
+ end
332
+
333
+ # @param arg_zip_url [String] the new URL of the zip archive of the feed.
334
+ # @return [String] the new URL of the zip archive of the feed.
335
+ # @example
336
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
337
+ def zip_url=(arg_zip_url)
338
+ raise "zip_url (#{arg_zip_url}) is not a string" unless arg_zip_url.is_a?(String)
339
+
340
+ @zip_url = arg_zip_url
341
+ end
342
+
343
+ # Download a file.
344
+ # @param file_url [String] the URL of the file.
345
+ # @param opts [Hash] the optional downlaod parameters.
346
+ # @option opts [String] :destination_path the destination path (may
347
+ # overwrite existing file).
348
+ # Default use {Feed#default_storage_location}.
349
+ # @option opts [String] :sha256 the SHA256 hash to check, if the file
350
+ # already exist and the hash matches then the download will be skipped.
351
+ # @return [String] the saved file path.
352
+ # @example
353
+ # download_file('https://example.org/example.zip') # => '/tmp/example.zip'
354
+ # download_file('https://example.org/example.zip', destination_path: '/srv/save/') # => '/srv/save/example.zip'
355
+ # download_file('https://example.org/example.zip', {destination_path: '/srv/save/', sha256: '70d6ea136d5036b6ce771921a949357216866c6442f44cea8497f0528c54642d'}) # => '/srv/save/example.zip'
356
+ def download_file(file_url, opts = {})
357
+ opts[:destination_path] ||= Feed.default_storage_location
358
+ opts[:sha256] ||= nil
359
+
360
+ destination_path = opts[:destination_path]
361
+ destination_path += '/' unless destination_path[-1] == '/'
362
+ skip_download = false
363
+ uri = URI(file_url)
364
+ filename = uri.path.split('/').last
365
+ destination_file = destination_path + filename
366
+ unless opts[:sha256].nil?
367
+ if File.file?(destination_file)
368
+ # Verify hash to see if it is the latest
369
+ computed_h = Digest::SHA256.file(destination_file)
370
+ skip_download = true if opts[:sha256].casecmp(computed_h.hexdigest).zero?
371
+ end
372
+ end
373
+ unless skip_download
374
+ res = Net::HTTP.get_response(uri)
375
+ raise "#{file_url} ended with #{res.code} #{res.message}" unless res.is_a?(Net::HTTPSuccess)
376
+
377
+ File.open(destination_file, 'wb') do |file|
378
+ file.write(res.body)
379
+ end
380
+ end
381
+ return destination_file
382
+ end
383
+
384
+ # Update the feed
385
+ # @param fresh_feed [Feed] the fresh feed from which the feed will be updated.
386
+ # @return [Boolean] +true+ if the feed was updated, +false+ if it wasn't.
387
+ # @note Is not intended to be used directly, use {NVDFeedScraper#update_feeds} instead.
388
+ def update!(fresh_feed)
389
+ return_value = false
390
+ raise "#{fresh_feed} is not a Feed" unless fresh_feed.is_a?(Feed)
391
+
392
+ # update attributes
393
+ if updated != fresh_feed.updated
394
+ self.name = fresh_feed.name
395
+ self.updated = fresh_feed.updated
396
+ self.meta_url = fresh_feed.meta_url
397
+ self.gz_url = fresh_feed.gz_url
398
+ self.zip_url = fresh_feed.zip_url
399
+ # update if @meta was set
400
+ meta_pull unless @meta.nil?
401
+ # update if @json_file was set, this will also update @data_*
402
+ json_pull unless @json_file.nil?
403
+ return_value = true
404
+ end
405
+ return return_value
406
+ end
407
+
408
+ protected :name=, :updated=, :meta_url=, :gz_url=, :zip_url=, :download_file
409
+ end
410
+ end
@@ -0,0 +1,116 @@
1
+ # Ruby internal
2
+ require 'net/https'
3
+
4
+ class NVDFeedScraper
5
+ # Manage the meta file from a feed.
6
+ #
7
+ # == Usage
8
+ #
9
+ # @example
10
+ # s = NVDFeedScraper.new
11
+ # s.scrap
12
+ # metaUrl = s.feeds("CVE-2014").meta_url
13
+ # m = NVDFeedScraper::Meta.new
14
+ # m.url = metaUrl
15
+ # m.parse
16
+ # m.sha256
17
+ #
18
+ # Several ways to set the url:
19
+ #
20
+ # m = NVDFeedScraper::Meta.new(metaUrl)
21
+ # m.parse
22
+ # # or
23
+ # m = NVDFeedScraper::Meta.new
24
+ # m.url = metaUrl
25
+ # m.parse
26
+ # # or
27
+ # m = NVDFeedScraper::Meta.new
28
+ # m.parse(metaUrl)
29
+ class Meta
30
+ # {Meta} last modified date getter
31
+ # @return [String] the last modified date and time.
32
+ # @example
33
+ # '2017-10-19T03:27:02-04:00'
34
+ attr_reader :last_modified_date
35
+
36
+ # {Meta} JSON size getter
37
+ # @return [String] the size of the JSON file uncompressed.
38
+ # @example
39
+ # '29443314'
40
+ attr_reader :size
41
+
42
+ # {Meta} zip size getter
43
+ # @return [String] the size of the zip file.
44
+ # @example
45
+ # '2008493'
46
+ attr_reader :zip_size
47
+
48
+ # {Meta} gz size getter
49
+ # @return [String] the size of the gz file.
50
+ # @example
51
+ # '2008357'
52
+ attr_reader :gz_size
53
+
54
+ # {Meta} JSON sha256 getter
55
+ # @return [String] the SHA256 value of the uncompressed JSON file.
56
+ # @example
57
+ # '33ED52D451692596D644F23742ED42B4E350258B11ACB900F969F148FCE3777B'
58
+ attr_reader :sha256
59
+
60
+ # @param url [String, nil] see {Feed#meta_url}.
61
+ def initialize(url = nil)
62
+ @url = url
63
+ end
64
+
65
+ # {Meta} URL getter.
66
+ # @return [String] The URL of the meta file of the feed.
67
+ attr_reader :url
68
+
69
+ # {Meta} URL setter.
70
+ # @param url [String] see {Feed#meta_url}.
71
+ def url=(url)
72
+ @url = url
73
+ @last_modified_date = @size = @zip_size = @gz_size = @sha256 = nil
74
+ end
75
+
76
+ # Parse the meta file from the URL and set the attributes.
77
+ # @overload parse
78
+ # Parse the meta file from the URL and set the attributes.
79
+ # @return [Integer] Returns +0+ when there is no error.
80
+ # @overload parse(url)
81
+ # Set the URL of the meta file of the feed and
82
+ # parse the meta file from the URL and set the attributes.
83
+ # @param url [String] see {Feed.meta_url}
84
+ # @return [Integer] Returns +0+ when there is no error.
85
+ def parse(*arg)
86
+ if arg.empty?
87
+ elsif arg.length == 1 # arg = url
88
+ self.url = arg[0]
89
+ else
90
+ raise 'Too much arguments'
91
+ end
92
+
93
+ raise "Can't parse if the URL is empty" if @url.nil?
94
+
95
+ uri = URI(@url)
96
+
97
+ meta = Net::HTTP.get(uri)
98
+
99
+ meta = Hash[meta.split.map { |x| x.split(':', 2) }]
100
+
101
+ raise 'no lastModifiedDate attribute found' unless meta['lastModifiedDate']
102
+ raise 'no valid size attribute found' unless /[0-9]+/.match?(meta['size'])
103
+ raise 'no valid zipSize attribute found' unless /[0-9]+/.match?(meta['zipSize'])
104
+ raise 'no valid gzSize attribute found' unless /[0-9]+/.match?(meta['gzSize'])
105
+ raise 'no valid sha256 attribute found' unless /[0-9A-F]{64}/.match?(meta['sha256'])
106
+
107
+ @last_modified_date = meta['lastModifiedDate']
108
+ @size = meta['size']
109
+ @zip_size = meta['zipSize']
110
+ @gz_size = meta['gzSize']
111
+ @sha256 = meta['sha256']
112
+
113
+ 0
114
+ end
115
+ end
116
+ end