nvd_feed_api 0.0.1.rc1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6ce03fb10e963df256a7772e5ddc357d1702a387
4
+ data.tar.gz: 887231a4b7fd59dc8d2c10657c33d5825a775040
5
+ SHA512:
6
+ metadata.gz: 2e46e2ce61301c79339ef96bfba07d8e0a7e684b9390c49c950805932838144ee01ffc93a2d68d08b1dafd71bc0695769198626f5b46dbbeb3f27ee75855e4ef
7
+ data.tar.gz: 5714a1667e0a15edcbcec6a757c3175b957cb037952a29d9cd0d8a675d8bca8dab745b92d4bd53ddebc948cccfd0c74c645be99584eb039d9637289eef55ff91
data/.gitignore ADDED
@@ -0,0 +1,53 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ # do not check Gemfile.lock fror gems
53
+ Gemfile.lock
data/.rubocop.yml ADDED
@@ -0,0 +1,46 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.4
3
+
4
+ # Rubocop is too stupid too see that the variable is used
5
+ Lint/UselessAssignment:
6
+ Enabled: false
7
+
8
+ Metrics/AbcSize:
9
+ Enabled: false
10
+
11
+ Metrics/BlockLength:
12
+ Max: 50
13
+
14
+ Metrics/BlockNesting:
15
+ Max: 4
16
+
17
+ Metrics/ClassLength:
18
+ Enabled: false
19
+
20
+ Metrics/CyclomaticComplexity:
21
+ Max: 15
22
+
23
+ Metrics/LineLength:
24
+ Enabled: false
25
+
26
+ Metrics/MethodLength:
27
+ Max: 100
28
+
29
+ Metrics/PerceivedComplexity:
30
+ Enabled: false
31
+
32
+ Naming/VariableName:
33
+ EnforcedStyle: snake_case
34
+
35
+ Security/JSONLoad:
36
+ Enabled: false
37
+
38
+ Style/FrozenStringLiteralComment:
39
+ EnforcedStyle: never
40
+
41
+ Style/PerlBackrefs:
42
+ AutoCorrect: false
43
+
44
+ # Allow explicit return
45
+ Style/RedundantReturn:
46
+ Enabled: false
data/.yardopts ADDED
@@ -0,0 +1,9 @@
1
+ --protected
2
+ --private
3
+ --output-dir doc/
4
+ -
5
+ --main README.md
6
+ LICENSE.txt
7
+ pages/INSTALL.md
8
+ pages/FEATURES.md
9
+ pages/EXAMPLES.md
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in .gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Alexandre ZANNI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,11 @@
1
+ [![Codacy Badge](https://api.codacy.com/project/badge/Grade/e595382d940a4c6b9439325b9e50d398)](https://www.codacy.com/app/noraj1337/nvd_api?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=noraj1337/nvd_api&amp;utm_campaign=Badge_Grade)
2
+
3
+ # nvd_feed_api
4
+
5
+ Name | Link
6
+ --- | ---
7
+ Website | [link](#)
8
+ Git repository | [link](https://gitlab.com/noraj/nvd_api)
9
+ Merge Requests | [link](https://gitlab.com/noraj/nvd_api/merge_requests)
10
+ Issues | [link](https://gitlab.com/noraj/nvd_api/issues)
11
+ Wiki | [link](https://gitlab.com/noraj/nvd_api/wikis/home)
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake/testtask'
2
+ require 'bundler/gem_tasks'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ end
7
+
8
+ desc 'Run tests'
9
+ task default: :test
data/bin/nvd_feed_api ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'nvd_feed_api'
4
+
5
+ puts 'nvd_feed_api CLI is not existing yet'
6
+ puts 'but there are'
7
+ puts 'nvd_feed_api_console for lanching the API in irb'
8
+ puts 'nvd_feed_api_setup for installing dependencies'
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'nvd_feed_api'
5
+
6
+ require 'irb'
7
+ IRB.start(__FILE__)
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
@@ -0,0 +1,711 @@
1
+ # @author Alexandre ZANNI <alexandre.zanni@engineer.com>
2
+
3
+ # Ruby internal
4
+ require 'digest'
5
+ require 'net/https'
6
+ require 'set'
7
+ # External
8
+ require 'archive/zip'
9
+ require 'nokogiri'
10
+ require 'oj'
11
+ # Project internal
12
+ require 'nvd_feed_api/version'
13
+
14
+ # The class that parse NVD website to get information.
15
+ # @example Initialize a NVDFeedScraper object, get the feeds and see them:
16
+ # scraper = NVDFeedScraper.new
17
+ # scraper.scrap
18
+ # scraper.available_feeds
19
+ # scraper.feeds
20
+ # scraper.feeds("CVE-2007")
21
+ # cve2007, cve2015 = scraper.feeds("CVE-2007", "CVE-2015")
22
+ class NVDFeedScraper
23
+ # The NVD url where is located the data feeds.
24
+ URL = 'https://nvd.nist.gov/vuln/data-feeds'.freeze
25
+ # Load constants
26
+ include NvdFeedApi
27
+
28
+ # Feed object.
29
+ class Feed
30
+ class << self
31
+ # Get / set default feed storage location, where will be stored JSON feeds and archives by default.
32
+ # @return [String] default feed storage location. Default to +/tmp/+.
33
+ # @example
34
+ # NVDFeedScraper::Feed.default_storage_location = '/srv/downloads/'
35
+ attr_accessor :default_storage_location
36
+ end
37
+ @default_storage_location = '/tmp/'
38
+
39
+ # @return [String] the name of the feed.
40
+ # @example
41
+ # 'CVE-2007'
42
+ attr_reader :name
43
+
44
+ # @return [String] the last update date of the feed information on the NVD website.
45
+ # @example
46
+ # '10/19/2017 3:27:02 AM -04:00'
47
+ attr_reader :updated
48
+
49
+ # @return [String] the URL of the metadata file of the feed.
50
+ # @example
51
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
52
+ attr_reader :meta_url
53
+
54
+ # @return [String] the URL of the gz archive of the feed.
55
+ # @example
56
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
57
+ attr_reader :gz_url
58
+
59
+ # @return [String] the URL of the zip archive of the feed.
60
+ # @example
61
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
62
+ attr_reader :zip_url
63
+
64
+ # @return [Meta] the {Meta} object of the feed.
65
+ # @note
66
+ # Return nil if not previously loaded by {#meta_pull}.
67
+ # Note that {#json_pull} also calls {#meta_pull}.
68
+ # @example
69
+ # s = NVDFeedScraper.new
70
+ # s.scrap
71
+ # f = s.feeds("CVE-2014")
72
+ # f.meta # => nil
73
+ # f.meta_pull
74
+ # f.meta # => #<NVDFeedScraper::Meta:0x00555b53027570 ... >
75
+ attr_reader :meta
76
+
77
+ # @return [String] the path of the saved JSON file.
78
+ # @note Return nil if not previously loaded by {#json_pull}.
79
+ # @example
80
+ # s = NVDFeedScraper.new
81
+ # s.scrap
82
+ # f = s.feeds("CVE-2014")
83
+ # f.json_file # => nil
84
+ # f.json_pull
85
+ # f.json_file # => "/tmp/nvdcve-1.0-2014.json"
86
+ attr_reader :json_file
87
+
88
+ # A new instance of Feed.
89
+ # @param name [String] see {#name}.
90
+ # @param updated [String] see {#updated}.
91
+ # @param meta_url [String] see {#meta_url}.
92
+ # @param gz_url [String] see {#gz_url}.
93
+ # @param zip_url [String] see {#zip_url}.
94
+ def initialize(name, updated, meta_url, gz_url, zip_url)
95
+ @name = name
96
+ @updated = updated
97
+ @meta_url = meta_url
98
+ @gz_url = gz_url
99
+ @zip_url = zip_url
100
+ # do not pull meta and json automatically for speed and memory footprint
101
+ @meta = nil
102
+ @json_file = nil
103
+ end
104
+
105
+ # Create or update the {Meta} object (fill the attribute).
106
+ # @return [Meta] the updated {Meta} object of the feed.
107
+ # @see #meta
108
+ def meta_pull
109
+ meta_content = NVDFeedScraper::Meta.new(@meta_url)
110
+ meta_content.parse
111
+ # update @meta
112
+ @meta = meta_content
113
+ end
114
+
115
+ # Download the gz archive of the feed.
116
+ # @param opts [Hash] see {#download_file}.
117
+ # @return [String] the saved gz file path.
118
+ # @example
119
+ # afeed.download_gz
120
+ # afeed.download_gz(destination_path: '/srv/save/')
121
+ def download_gz(opts = {})
122
+ download_file(@gz_url, opts)
123
+ end
124
+
125
+ # Download the zip archive of the feed.
126
+ # @param opts [Hash] see {#download_file}.
127
+ # @return [String] the saved zip file path.
128
+ # @example
129
+ # afeed.download_zip
130
+ # afeed.download_zip(destination_path: '/srv/save/')
131
+ def download_zip(opts = {})
132
+ download_file(@zip_url, opts)
133
+ end
134
+
135
+ # Download the JSON feed and fill the attribute.
136
+ # @param opts [Hash] see {#download_file}.
137
+ # @return [String] the path of the saved JSON file. Default use {Feed#default_storage_location}.
138
+ # @note Will downlaod and save the zip of the JSON file, unzip and save it. This massively consume time.
139
+ # @see #json_file
140
+ def json_pull(opts = {})
141
+ opts[:destination_path] ||= Feed.default_storage_location
142
+
143
+ skip_download = false
144
+ destination_path = opts[:destination_path]
145
+ destination_path += '/' unless destination_path[-1] == '/'
146
+ filename = URI(@zip_url).path.split('/').last.chomp('.zip')
147
+ # do not use @json_file for destination_file because of offline loading
148
+ destination_file = destination_path + filename
149
+ meta_pull
150
+ if File.file?(destination_file)
151
+ # Verify hash to see if it is the latest
152
+ computed_h = Digest::SHA256.file(destination_file)
153
+ skip_download = true if meta.sha256.casecmp(computed_h.hexdigest).zero?
154
+ end
155
+ if skip_download
156
+ @json_file = destination_file
157
+ else
158
+ zip_path = download_zip(opts)
159
+ Archive::Zip.open(zip_path) do |z|
160
+ z.extract(destination_path, flatten: true)
161
+ end
162
+ @json_file = zip_path.chomp('.zip')
163
+ # Verify hash integrity
164
+ computed_h = Digest::SHA256.file(@json_file)
165
+ raise "File corruption: #{@json_file}" unless meta.sha256.casecmp(computed_h.hexdigest).zero?
166
+ end
167
+ return @json_file
168
+ end
169
+
170
+ # Search for CVE in the feed.
171
+ # @overload cve(cve)
172
+ # One CVE.
173
+ # @param cve [String] CVE ID, case insensitive.
174
+ # @return [Hash] a Ruby Hash corresponding to the CVE.
175
+ # @overload cve(cve_arr)
176
+ # An array of CVEs.
177
+ # @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
178
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
179
+ # @overload cve(cve, *)
180
+ # Multiple CVEs.
181
+ # @param cve [String] CVE ID, case insensitive.
182
+ # @param * [String] As many CVE ID as you want.
183
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
184
+ # @note {#json_pull} is needed before using this method. Remember you're searching only in the current feed.
185
+ # @todo implement a CVE Class instead of returning a Hash.
186
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
187
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
188
+ # @example
189
+ # s = NVDFeedScraper.new
190
+ # s.scrap
191
+ # f = s.feeds("CVE-2014")
192
+ # f.json_pull
193
+ # f.cve("CVE-2014-0002", "cve-2014-0001")
194
+ def cve(*arg_cve)
195
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
196
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
197
+ return_value = nil
198
+ raise 'no argument provided, 1 or more expected' if arg_cve.empty?
199
+ if arg_cve.length == 1
200
+ if arg_cve[0].is_a?(String)
201
+ raise "bad CVE name (#{arg_cve[0]})" unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
202
+ doc = Oj::Doc.open(File.read(@json_file))
203
+ # Quicker than doc.fetch('/CVE_Items').size
204
+ doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
205
+ (1..doc_size).each do |i|
206
+ if arg_cve[0].upcase == doc.fetch("/CVE_Items/#{i}/cve/CVE_data_meta/ID")
207
+ return_value = doc.fetch("/CVE_Items/#{i}")
208
+ break
209
+ end
210
+ end
211
+ doc.close
212
+ elsif arg_cve[0].is_a?(Array)
213
+ return_value = []
214
+ # Sorting CVE can allow us to parse quicker
215
+ # Upcase to be sure include? works
216
+ cves_to_find = arg_cve[0].map(&:upcase).sort
217
+ raise 'one of the provided arguments is not a String' unless cves_to_find.all? { |x| x.is_a?(String) }
218
+ raise 'bad CVE name' unless cves_to_find.all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
219
+ doc = Oj::Doc.open(File.read(@json_file))
220
+ # Quicker than doc.fetch('/CVE_Items').size
221
+ doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
222
+ (1..doc_size).each do |i|
223
+ doc.move("/CVE_Items/#{i}")
224
+ cve_id = doc.fetch('cve/CVE_data_meta/ID')
225
+ if cves_to_find.include?(cve_id)
226
+ return_value.push(doc.fetch)
227
+ cves_to_find.delete(cve_id)
228
+ elsif cves_to_find.empty?
229
+ break
230
+ end
231
+ end
232
+ raise "#{cves_to_find.join(', ')} are unexisting CVEs in this feed" unless cves_to_find.empty?
233
+ else
234
+ raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
235
+ end
236
+ else
237
+ # Overloading a list of arguments as one array argument
238
+ return_value = cve(arg_cve)
239
+ end
240
+ return return_value
241
+ end
242
+
243
+ # Return a list with the name of all available CVEs in the feed.
244
+ # Can only be called after {#json_pull}.
245
+ # @return [Array<String>] List with the name of all available CVEs. May return thousands CVEs.
246
+ def available_cves
247
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
248
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
249
+ doc = Oj::Doc.open(File.read(@json_file))
250
+ # Quicker than doc.fetch('/CVE_Items').size
251
+ doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
252
+ cve_names = []
253
+ (1..doc_size).each do |i|
254
+ doc.move("/CVE_Items/#{i}")
255
+ cve_names.push(doc.fetch('cve/CVE_data_meta/ID'))
256
+ end
257
+ doc.close
258
+ return cve_names
259
+ end
260
+
261
+ protected
262
+
263
+ # @param arg_name [String] the new name of the feed.
264
+ # @return [String] the new name of the feed.
265
+ # @example
266
+ # 'CVE-2007'
267
+ def name=(arg_name)
268
+ raise "name (#{arg_name}) is not a string" unless arg_name.is_a(String)
269
+ @name = arg_name
270
+ end
271
+
272
+ # @param arg_updated [String] the last update date of the feed information on the NVD website.
273
+ # @return [String] the new date.
274
+ # @example
275
+ # '10/19/2017 3:27:02 AM -04:00'
276
+ def updated=(arg_updated)
277
+ raise "updated date (#{arg_updated}) is not a string" unless arg_updated.is_a(String)
278
+ @updated = arg_updated
279
+ end
280
+
281
+ # @param arg_meta_url [String] the new URL of the metadata file of the feed.
282
+ # @return [String] the new URL of the metadata file of the feed.
283
+ # @example
284
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
285
+ def meta_url=(arg_meta_url)
286
+ raise "meta_url (#{arg_meta_url}) is not a string" unless arg_meta_url.is_a(String)
287
+ @meta_url = arg_meta_url
288
+ end
289
+
290
+ # @param arg_gz_url [String] the new URL of the gz archive of the feed.
291
+ # @return [String] the new URL of the gz archive of the feed.
292
+ # @example
293
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
294
+ def gz_url=(arg_gz_url)
295
+ raise "gz_url (#{arg_gz_url}) is not a string" unless arg_gz_url.is_a(String)
296
+ @gz_url = arg_gz_url
297
+ end
298
+
299
+ # @param arg_zip_url [String] the new URL of the zip archive of the feed.
300
+ # @return [String] the new URL of the zip archive of the feed.
301
+ # @example
302
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
303
+ def zip_url=(arg_zip_url)
304
+ raise "zip_url (#{arg_zip_url}) is not a string" unless arg_zip_url.is_a(String)
305
+ @zip_url = arg_zip_url
306
+ end
307
+
308
+ # Download a file.
309
+ # @param file_url [String] the URL of the file.
310
+ # @param opts [Hash] the optional downlaod parameters.
311
+ # @option opts [String] :destination_path the destination path (may
312
+ # overwrite existing file).
313
+ # Default use {Feed#default_storage_location}.
314
+ # @option opts [String] :sha256 the SHA256 hash to check, if the file
315
+ # already exist and the hash matches then the download will be skipped.
316
+ # @return [String] the saved file path.
317
+ # @example
318
+ # download_file('https://example.org/example.zip') # => '/tmp/example.zip'
319
+ # download_file('https://example.org/example.zip', destination_path: '/srv/save/') # => '/srv/save/example.zip'
320
+ # download_file('https://example.org/example.zip', {destination_path: '/srv/save/', sha256: '70d6ea136d5036b6ce771921a949357216866c6442f44cea8497f0528c54642d'}) # => '/srv/save/example.zip'
321
+ def download_file(file_url, opts = {})
322
+ opts[:destination_path] ||= Feed.default_storage_location
323
+ opts[:sha256] ||= nil
324
+
325
+ destination_path = opts[:destination_path]
326
+ destination_path += '/' unless destination_path[-1] == '/'
327
+ skip_download = false
328
+ uri = URI(file_url)
329
+ filename = uri.path.split('/').last
330
+ destination_file = destination_path + filename
331
+ unless opts[:sha256].nil?
332
+ if File.file?(destination_file)
333
+ # Verify hash to see if it is the latest
334
+ computed_h = Digest::SHA256.file(destination_file)
335
+ skip_download = true if opts[:sha256].casecmp(computed_h.hexdigest).zero?
336
+ end
337
+ end
338
+ unless skip_download
339
+ res = Net::HTTP.get_response(uri)
340
+ raise "#{file_url} ended with #{res.code} #{res.message}" unless res.is_a?(Net::HTTPSuccess)
341
+ open(destination_file, 'wb') do |file|
342
+ file.write(res.body)
343
+ end
344
+ end
345
+ return destination_file
346
+ end
347
+ end
348
+
349
+ # Initialize the scraper
350
+ def initialize
351
+ @url = URL
352
+ @feeds = nil
353
+ end
354
+
355
+ # Scrap / parse the website to get the feeds and fill the {#feeds} attribute.
356
+ # @note {#scrap} need to be called only once but can be called again to update if the NVD feed page changed.
357
+ # @return [Integer] +0+ when there is no error.
358
+ def scrap
359
+ uri = URI(@url)
360
+ html = Net::HTTP.get(uri)
361
+
362
+ doc = Nokogiri::HTML(html)
363
+ @feeds = []
364
+ doc.css('h3#JSON_FEED ~ div.row:first-of-type table.xml-feed-table > tbody > tr[data-testid*=desc]').each do |tr|
365
+ name = tr.css('td')[0].text
366
+ updated = tr.css('td')[1].text
367
+ meta = tr.css('td')[2].css('> a').attr('href').value
368
+ gz = tr.css('+ tr > td > a').attr('href').value
369
+ zip = tr.css('+ tr + tr > td > a').attr('href').value
370
+ @feeds.push(Feed.new(name, updated, meta, gz, zip))
371
+ end
372
+ end
373
+
374
+ # Return feeds. Can only be called after {#scrap}.
375
+ # @overload feeds
376
+ # All the feeds.
377
+ # @return [Array<Feed>] Attributes of all feeds. It's an array of {Feed} object.
378
+ # @overload feeds(feed)
379
+ # One feed.
380
+ # @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
381
+ # @return [Feed] Attributes of one feed. It's a {Feed} object.
382
+ # @overload feeds(feed_arr)
383
+ # An array of feeds.
384
+ # @param feed_arr [Array<String>] An array of feed names as written on NVD website. Names can be obtains with {#available_feeds}.
385
+ # @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
386
+ # @overload feeds(feed, *)
387
+ # Multiple feeds.
388
+ # @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
389
+ # @param * [String] As many feeds as you want.
390
+ # @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
391
+ # @example
392
+ # scraper.feeds # => all feeds
393
+ # scraper.feeds('CVE-2010') # => return only CVE-2010 feed
394
+ # scraper.feeds("CVE-2005", "CVE-2002") # => return CVE-2005 and CVE-2002 feeds
395
+ # @see https://nvd.nist.gov/vuln/data-feeds
396
+ def feeds(*arg_feeds)
397
+ raise 'call scrap method before using feeds method' if @feeds.nil?
398
+ return_value = nil
399
+ if arg_feeds.empty?
400
+ return_value = @feeds
401
+ elsif arg_feeds.length == 1
402
+ if arg_feeds[0].is_a?(String)
403
+ @feeds.each do |feed| # feed is an object
404
+ return_value = feed if arg_feeds.include?(feed.name)
405
+ end
406
+ # if nothing found return nil
407
+ elsif arg_feeds[0].is_a?(Array)
408
+ raise 'one of the provided arguments is not a String' unless arg_feeds[0].all? { |x| x.is_a?(String) }
409
+ # Sorting CVE can allow us to parse quicker
410
+ # Upcase to be sure include? works
411
+ # Does not use map(&:upcase) to preserve CVE-Recent and CVE-Modified
412
+ feeds_to_find = arg_feeds[0].map { |x| x[0..2].upcase.concat(x[3..x.size]) }.sort
413
+ matched_feeds = []
414
+ @feeds.each do |feed| # feed is an object
415
+ if feeds_to_find.include?(feed.name)
416
+ matched_feeds.push(feed)
417
+ feeds_to_find.delete(feed.name)
418
+ elsif feeds_to_find.empty?
419
+ break
420
+ end
421
+ end
422
+ return_value = matched_feeds
423
+ raise "#{feeds_to_find.join(', ')} are unexisting feeds" unless feeds_to_find.empty?
424
+ else
425
+ raise "the provided argument (#{arg_feeds[0]}) is nor a String or an Array"
426
+ end
427
+ else
428
+ # Overloading a list of arguments as one array argument
429
+ return_value = feeds(arg_feeds)
430
+ end
431
+ return return_value
432
+ end
433
+
434
+ # Return a list with the name of all available feeds. Returned feed names can be use as argument for {#feeds} method. Can only be called after {#scrap}.
435
+ # @return [Array<String>] List with the name of all available feeds.
436
+ # @example
437
+ # scraper.available_feeds => ["CVE-Modified", "CVE-Recent", "CVE-2017", "CVE-2016", "CVE-2015", "CVE-2014", "CVE-2013", "CVE-2012", "CVE-2011", "CVE-2010", "CVE-2009", "CVE-2008", "CVE-2007", "CVE-2006", "CVE-2005", "CVE-2004", "CVE-2003", "CVE-2002"]
438
+ def available_feeds
439
+ raise 'call scrap method before using available_feeds method' if @feeds.nil?
440
+ feed_names = []
441
+ @feeds.each do |feed| # feed is an objet
442
+ feed_names.push(feed.name)
443
+ end
444
+ feed_names
445
+ end
446
+
447
+ # Search for CVE in all year feeds.
448
+ # @overload cve(cve)
449
+ # One CVE.
450
+ # @param cve [String] CVE ID, case insensitive.
451
+ # @return [Hash] a Ruby Hash corresponding to the CVE.
452
+ # @overload cve(cve_arr)
453
+ # An array of CVEs.
454
+ # @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
455
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
456
+ # @overload cve(cve, *)
457
+ # Multiple CVEs.
458
+ # @param cve [String] CVE ID, case insensitive.
459
+ # @param * [String] As many CVE ID as you want.
460
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash.
461
+ # @todo implement a CVE Class instead of returning a Hash. May not be in the same order as provided.
462
+ # @note {#scrap} is needed before using this method.
463
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
464
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
465
+ # @example
466
+ # s = NVDFeedScraper.new
467
+ # s.scrap
468
+ # s.cve("CVE-2014-0002", "cve-2014-0001")
469
+ def cve(*arg_cve)
470
+ return_value = nil
471
+ raise 'no argument provided, 1 or more expected' if arg_cve.empty?
472
+ if arg_cve.length == 1
473
+ if arg_cve[0].is_a?(String)
474
+ raise 'bad CVE name' unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
475
+ year = /^CVE-([0-9]{4})-[0-9]{4,}$/i.match(arg_cve[0]).captures[0]
476
+ matched_feed = nil
477
+ feed_names = available_feeds
478
+ feed_names.delete('CVE-Modified')
479
+ feed_names.delete('CVE-Recent')
480
+ feed_names.each do |feed|
481
+ if /#{year}/.match?(feed)
482
+ matched_feed = feed
483
+ break
484
+ end
485
+ end
486
+ raise "bad CVE year in #{arg_cve}" if matched_feed.nil?
487
+ f = feeds(matched_feed)
488
+ f.json_pull
489
+ return_value = f.cve(arg_cve[0])
490
+ elsif arg_cve[0].is_a?(Array)
491
+ raise 'one of the provided arguments is not a String' unless arg_cve[0].all? { |x| x.is_a?(String) }
492
+ raise 'bad CVE name' unless arg_cve[0].all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
493
+ return_value = []
494
+ # Sorting CVE can allow us to parse quicker
495
+ # Upcase to be sure include? works
496
+ cves_to_find = arg_cve[0].map(&:upcase).sort
497
+ feeds_to_match = Set[]
498
+ cves_to_find.each do |cve|
499
+ feeds_to_match.add?(/^(CVE-[0-9]{4})-[0-9]{4,}$/i.match(cve).captures[0])
500
+ end
501
+ feed_names = available_feeds.to_set
502
+ feed_names.delete('CVE-Modified')
503
+ feed_names.delete('CVE-Recent')
504
+ raise 'unexisting CVE year was provided in some CVE' unless feeds_to_match.subset?(feed_names)
505
+ matched_feeds = feeds_to_match.intersection(feed_names)
506
+ feeds_arr = feeds(matched_feeds.to_a)
507
+ feeds_arr.each do |feed|
508
+ feed.json_pull
509
+ cves_obj = feed.cve(cves_to_find.select { |cve| cve.include?(feed.name) })
510
+ if cves_obj.is_a?(Hash)
511
+ return_value.push(cves_obj)
512
+ elsif cves_obj.is_a?(Array)
513
+ return_value.push(*cves_obj)
514
+ else
515
+ raise 'cve() method of the feed instance returns wrong value'
516
+ end
517
+ end
518
+ else
519
+ raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
520
+ end
521
+ else
522
+ # Overloading a list of arguments as one array argument
523
+ return_value = cve(arg_cve)
524
+ end
525
+ return return_value
526
+ end
527
+
528
+ # Update the feeds
529
+ # @overload update_feeds(feed)
530
+ # One feed.
531
+ # @param feed [Feed] feed object to update.
532
+ # @return [Boolean] +true+ if the feed was updated, +false+ if it wasn't.
533
+ # @overload update_feeds(feed_arr)
534
+ # An array of feed.
535
+ # @param feed_arr [Array<Feed>] array of feed objects to update.
536
+ # @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
537
+ # @overload update_feeds(feed, *)
538
+ # Multiple feeds.
539
+ # @param feed [Feed] feed object to update.
540
+ # @param * [Feed] As many feed objects as you want.
541
+ # @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
542
+ # @example
543
+ # s = NVDFeedScraper.new
544
+ # s.scrap
545
+ # f2015, f2017 = s.feeds("CVE-2015", "CVE-2017")
546
+ # s.update_feeds(f2015, f2017) # => [false, false]
547
+ def update_feeds(*arg_feed)
548
+ return_value = false
549
+ raise 'no argument provided, 1 or more expected' if arg_feed.empty?
550
+ scrap
551
+ if arg_feed.length == 1
552
+ if arg_feed[0].is_a?(Feed)
553
+ new_feed = feeds(arg_feed[0].name)
554
+ # update attributes
555
+ if arg_feed[0].updated != new_feed.updated
556
+ arg_feed[0].name = new_feed.name
557
+ arg_feed[0].updated = new_feed.updated
558
+ arg_feed[0].meta_url = new_feed.meta_url
559
+ arg_feed[0].gz_url = new_feed.gz_url
560
+ arg_feed[0].zip_url = new_feed.zip_url
561
+ # update if @meta was set
562
+ arg_feed[0].meta_pull unless feed.meta.nil?
563
+ # update if @json_file was set
564
+ arg_feed[0].json_pull unless feed.json_file.nil?
565
+ return_value = true
566
+ end
567
+ elsif arg_feed[0].is_a?(Array)
568
+ return_value = []
569
+ arg_feed[0].each do |f|
570
+ res = update_feeds(f)
571
+ puts "#{f} not found" if res.nil?
572
+ return_value.push(res)
573
+ end
574
+ else
575
+ raise "the provided argument #{arg_feed[0]} is not a Feed or an Array"
576
+ end
577
+ else
578
+ # Overloading a list of arguments as one array argument
579
+ return_value = update_feeds(arg_feed)
580
+ end
581
+ return return_value
582
+ end
583
+
584
+ # Return a list with the name of all available CVEs in the feed.
585
+ # Can only be called after {#scrap}.
586
+ # @return [Array<String>] List with the name of all available CVEs. May return tens thousands CVEs.
587
+ def available_cves
588
+ cve_names = []
589
+ feed_names = available_feeds
590
+ feed_names.delete('CVE-Modified')
591
+ feed_names.delete('CVE-Recent')
592
+ feed_names.each do |feed_name|
593
+ f = feeds(feed_name)
594
+ f.json_pull
595
+ # merge removing duplicates
596
+ cve_names |= f.available_cves
597
+ end
598
+ return cve_names
599
+ end
600
+
601
+ # Manage the meta file from a feed.
602
+ #
603
+ # == Usage
604
+ #
605
+ # @example
606
+ # s = NVDFeedScraper.new
607
+ # s.scrap
608
+ # metaUrl = s.feeds("CVE-2014").meta_url
609
+ # m = NVDFeedScraper::Meta.new
610
+ # m.url = metaUrl
611
+ # m.parse
612
+ # m.sha256
613
+ #
614
+ # Several ways to set the url:
615
+ #
616
+ # m = NVDFeedScraper::Meta.new(metaUrl)
617
+ # m.parse
618
+ # # or
619
+ # m = NVDFeedScraper::Meta.new
620
+ # m.url = metaUrl
621
+ # m.parse
622
+ # # or
623
+ # m = NVDFeedScraper::Meta.new
624
+ # m.parse(metaUrl)
625
+ class Meta
626
+ # {Meta} last modified date getter
627
+ # @return [String] the last modified date and time.
628
+ # @example
629
+ # '2017-10-19T03:27:02-04:00'
630
+ attr_reader :last_modified_date
631
+
632
+ # {Meta} JSON size getter
633
+ # @return [String] the size of the JSON file uncompressed.
634
+ # @example
635
+ # '29443314'
636
+ attr_reader :size
637
+
638
+ # {Meta} zip size getter
639
+ # @return [String] the size of the zip file.
640
+ # @example
641
+ # '2008493'
642
+ attr_reader :zip_size
643
+
644
+ # {Meta} gz size getter
645
+ # @return [String] the size of the gz file.
646
+ # @example
647
+ # '2008357'
648
+ attr_reader :gz_size
649
+
650
+ # {Meta} JSON sha256 getter
651
+ # @return [String] the SHA256 value of the uncompressed JSON file.
652
+ # @example
653
+ # '33ED52D451692596D644F23742ED42B4E350258B11ACB900F969F148FCE3777B'
654
+ attr_reader :sha256
655
+
656
+ # @param url [String, nil] see {Feed#meta_url}.
657
+ def initialize(url = nil)
658
+ @url = url
659
+ end
660
+
661
+ # {Meta} URL getter.
662
+ # @return [String] The URL of the meta file of the feed.
663
+ attr_reader :url
664
+
665
+ # {Meta} URL setter.
666
+ # @param url [String] see {Feed#meta_url}.
667
+ def url=(url)
668
+ @url = url
669
+ @last_modified_date = @size = @zip_size = @gz_size = @sha256 = nil
670
+ end
671
+
672
+ # Parse the meta file from the URL and set the attributes.
673
+ # @overload parse
674
+ # Parse the meta file from the URL and set the attributes.
675
+ # @return [Integer] Returns +0+ when there is no error.
676
+ # @overload parse(url)
677
+ # Set the URL of the meta file of the feed and
678
+ # parse the meta file from the URL and set the attributes.
679
+ # @param url [String] see {Feed.meta_url}
680
+ # @return [Integer] Returns +0+ when there is no error.
681
+ def parse(*arg)
682
+ if arg.empty?
683
+ elsif arg.length == 1 # arg = url
684
+ self.url = arg[0]
685
+ else
686
+ raise 'Too much arguments'
687
+ end
688
+
689
+ raise "Can't parse if the URL is empty" if @url.nil?
690
+ uri = URI(@url)
691
+
692
+ meta = Net::HTTP.get(uri)
693
+
694
+ meta = Hash[meta.split.map { |x| x.split(':', 2) }]
695
+
696
+ raise 'no lastModifiedDate attribute found' unless meta['lastModifiedDate']
697
+ raise 'no valid size attribute found' unless /[0-9]+/.match?(meta['size'])
698
+ raise 'no valid zipSize attribute found' unless /[0-9]+/.match?(meta['zipSize'])
699
+ raise 'no valid gzSize attribute found' unless /[0-9]+/.match?(meta['gzSize'])
700
+ raise 'no valid sha256 attribute found' unless /[0-9A-F]{64}/.match?(meta['sha256'])
701
+
702
+ @last_modified_date = meta['lastModifiedDate']
703
+ @size = meta['size']
704
+ @zip_size = meta['zipSize']
705
+ @gz_size = meta['gzSize']
706
+ @sha256 = meta['sha256']
707
+
708
+ 0
709
+ end
710
+ end
711
+ end