nvd_feed_api 0.0.1.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6ce03fb10e963df256a7772e5ddc357d1702a387
4
+ data.tar.gz: 887231a4b7fd59dc8d2c10657c33d5825a775040
5
+ SHA512:
6
+ metadata.gz: 2e46e2ce61301c79339ef96bfba07d8e0a7e684b9390c49c950805932838144ee01ffc93a2d68d08b1dafd71bc0695769198626f5b46dbbeb3f27ee75855e4ef
7
+ data.tar.gz: 5714a1667e0a15edcbcec6a757c3175b957cb037952a29d9cd0d8a675d8bca8dab745b92d4bd53ddebc948cccfd0c74c645be99584eb039d9637289eef55ff91
data/.gitignore ADDED
@@ -0,0 +1,53 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ # do not check Gemfile.lock fror gems
53
+ Gemfile.lock
data/.rubocop.yml ADDED
@@ -0,0 +1,46 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.4
3
+
4
+ # Rubocop is too stupid too see that the variable is used
5
+ Lint/UselessAssignment:
6
+ Enabled: false
7
+
8
+ Metrics/AbcSize:
9
+ Enabled: false
10
+
11
+ Metrics/BlockLength:
12
+ Max: 50
13
+
14
+ Metrics/BlockNesting:
15
+ Max: 4
16
+
17
+ Metrics/ClassLength:
18
+ Enabled: false
19
+
20
+ Metrics/CyclomaticComplexity:
21
+ Max: 15
22
+
23
+ Metrics/LineLength:
24
+ Enabled: false
25
+
26
+ Metrics/MethodLength:
27
+ Max: 100
28
+
29
+ Metrics/PerceivedComplexity:
30
+ Enabled: false
31
+
32
+ Naming/VariableName:
33
+ EnforcedStyle: snake_case
34
+
35
+ Security/JSONLoad:
36
+ Enabled: false
37
+
38
+ Style/FrozenStringLiteralComment:
39
+ EnforcedStyle: never
40
+
41
+ Style/PerlBackrefs:
42
+ AutoCorrect: false
43
+
44
+ # Allow explicit return
45
+ Style/RedundantReturn:
46
+ Enabled: false
data/.yardopts ADDED
@@ -0,0 +1,9 @@
1
+ --protected
2
+ --private
3
+ --output-dir doc/
4
+ -
5
+ --main README.md
6
+ LICENSE.txt
7
+ pages/INSTALL.md
8
+ pages/FEATURES.md
9
+ pages/EXAMPLES.md
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in .gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Alexandre ZANNI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,11 @@
1
+ [![Codacy Badge](https://api.codacy.com/project/badge/Grade/e595382d940a4c6b9439325b9e50d398)](https://www.codacy.com/app/noraj1337/nvd_api?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=noraj1337/nvd_api&amp;utm_campaign=Badge_Grade)
2
+
3
+ # nvd_feed_api
4
+
5
+ Name | Link
6
+ --- | ---
7
+ Website | [link](#)
8
+ Git repository | [link](https://gitlab.com/noraj/nvd_api)
9
+ Merge Requests | [link](https://gitlab.com/noraj/nvd_api/merge_requests)
10
+ Issues | [link](https://gitlab.com/noraj/nvd_api/issues)
11
+ Wiki | [link](https://gitlab.com/noraj/nvd_api/wikis/home)
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake/testtask'
2
+ require 'bundler/gem_tasks'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << 'test'
6
+ end
7
+
8
+ desc 'Run tests'
9
+ task default: :test
data/bin/nvd_feed_api ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'nvd_feed_api'
4
+
5
+ puts 'nvd_feed_api CLI is not existing yet'
6
+ puts 'but there are'
7
+ puts 'nvd_feed_api_console for lanching the API in irb'
8
+ puts 'nvd_feed_api_setup for installing dependencies'
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'nvd_feed_api'
5
+
6
+ require 'irb'
7
+ IRB.start(__FILE__)
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
@@ -0,0 +1,711 @@
1
+ # @author Alexandre ZANNI <alexandre.zanni@engineer.com>
2
+
3
+ # Ruby internal
4
+ require 'digest'
5
+ require 'net/https'
6
+ require 'set'
7
+ # External
8
+ require 'archive/zip'
9
+ require 'nokogiri'
10
+ require 'oj'
11
+ # Project internal
12
+ require 'nvd_feed_api/version'
13
+
14
+ # The class that parse NVD website to get information.
15
+ # @example Initialize a NVDFeedScraper object, get the feeds and see them:
16
+ # scraper = NVDFeedScraper.new
17
+ # scraper.scrap
18
+ # scraper.available_feeds
19
+ # scraper.feeds
20
+ # scraper.feeds("CVE-2007")
21
+ # cve2007, cve2015 = scraper.feeds("CVE-2007", "CVE-2015")
22
+ class NVDFeedScraper
23
+ # The NVD url where is located the data feeds.
24
+ URL = 'https://nvd.nist.gov/vuln/data-feeds'.freeze
25
+ # Load constants
26
+ include NvdFeedApi
27
+
28
+ # Feed object.
29
+ class Feed
30
+ class << self
31
+ # Get / set default feed storage location, where will be stored JSON feeds and archives by default.
32
+ # @return [String] default feed storage location. Default to +/tmp/+.
33
+ # @example
34
+ # NVDFeedScraper::Feed.default_storage_location = '/srv/downloads/'
35
+ attr_accessor :default_storage_location
36
+ end
37
+ @default_storage_location = '/tmp/'
38
+
39
+ # @return [String] the name of the feed.
40
+ # @example
41
+ # 'CVE-2007'
42
+ attr_reader :name
43
+
44
+ # @return [String] the last update date of the feed information on the NVD website.
45
+ # @example
46
+ # '10/19/2017 3:27:02 AM -04:00'
47
+ attr_reader :updated
48
+
49
+ # @return [String] the URL of the metadata file of the feed.
50
+ # @example
51
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
52
+ attr_reader :meta_url
53
+
54
+ # @return [String] the URL of the gz archive of the feed.
55
+ # @example
56
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
57
+ attr_reader :gz_url
58
+
59
+ # @return [String] the URL of the zip archive of the feed.
60
+ # @example
61
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
62
+ attr_reader :zip_url
63
+
64
+ # @return [Meta] the {Meta} object of the feed.
65
+ # @note
66
+ # Return nil if not previously loaded by {#meta_pull}.
67
+ # Note that {#json_pull} also calls {#meta_pull}.
68
+ # @example
69
+ # s = NVDFeedScraper.new
70
+ # s.scrap
71
+ # f = s.feeds("CVE-2014")
72
+ # f.meta # => nil
73
+ # f.meta_pull
74
+ # f.meta # => #<NVDFeedScraper::Meta:0x00555b53027570 ... >
75
+ attr_reader :meta
76
+
77
+ # @return [String] the path of the saved JSON file.
78
+ # @note Return nil if not previously loaded by {#json_pull}.
79
+ # @example
80
+ # s = NVDFeedScraper.new
81
+ # s.scrap
82
+ # f = s.feeds("CVE-2014")
83
+ # f.json_file # => nil
84
+ # f.json_pull
85
+ # f.json_file # => "/tmp/nvdcve-1.0-2014.json"
86
+ attr_reader :json_file
87
+
88
+ # A new instance of Feed.
89
+ # @param name [String] see {#name}.
90
+ # @param updated [String] see {#updated}.
91
+ # @param meta_url [String] see {#meta_url}.
92
+ # @param gz_url [String] see {#gz_url}.
93
+ # @param zip_url [String] see {#zip_url}.
94
+ def initialize(name, updated, meta_url, gz_url, zip_url)
95
+ @name = name
96
+ @updated = updated
97
+ @meta_url = meta_url
98
+ @gz_url = gz_url
99
+ @zip_url = zip_url
100
+ # do not pull meta and json automatically for speed and memory footprint
101
+ @meta = nil
102
+ @json_file = nil
103
+ end
104
+
105
+ # Create or update the {Meta} object (fill the attribute).
106
+ # @return [Meta] the updated {Meta} object of the feed.
107
+ # @see #meta
108
+ def meta_pull
109
+ meta_content = NVDFeedScraper::Meta.new(@meta_url)
110
+ meta_content.parse
111
+ # update @meta
112
+ @meta = meta_content
113
+ end
114
+
115
+ # Download the gz archive of the feed.
116
+ # @param opts [Hash] see {#download_file}.
117
+ # @return [String] the saved gz file path.
118
+ # @example
119
+ # afeed.download_gz
120
+ # afeed.download_gz(destination_path: '/srv/save/')
121
+ def download_gz(opts = {})
122
+ download_file(@gz_url, opts)
123
+ end
124
+
125
+ # Download the zip archive of the feed.
126
+ # @param opts [Hash] see {#download_file}.
127
+ # @return [String] the saved zip file path.
128
+ # @example
129
+ # afeed.download_zip
130
+ # afeed.download_zip(destination_path: '/srv/save/')
131
+ def download_zip(opts = {})
132
+ download_file(@zip_url, opts)
133
+ end
134
+
135
+ # Download the JSON feed and fill the attribute.
136
+ # @param opts [Hash] see {#download_file}.
137
+ # @return [String] the path of the saved JSON file. Default use {Feed#default_storage_location}.
138
+ # @note Will downlaod and save the zip of the JSON file, unzip and save it. This massively consume time.
139
+ # @see #json_file
140
+ def json_pull(opts = {})
141
+ opts[:destination_path] ||= Feed.default_storage_location
142
+
143
+ skip_download = false
144
+ destination_path = opts[:destination_path]
145
+ destination_path += '/' unless destination_path[-1] == '/'
146
+ filename = URI(@zip_url).path.split('/').last.chomp('.zip')
147
+ # do not use @json_file for destination_file because of offline loading
148
+ destination_file = destination_path + filename
149
+ meta_pull
150
+ if File.file?(destination_file)
151
+ # Verify hash to see if it is the latest
152
+ computed_h = Digest::SHA256.file(destination_file)
153
+ skip_download = true if meta.sha256.casecmp(computed_h.hexdigest).zero?
154
+ end
155
+ if skip_download
156
+ @json_file = destination_file
157
+ else
158
+ zip_path = download_zip(opts)
159
+ Archive::Zip.open(zip_path) do |z|
160
+ z.extract(destination_path, flatten: true)
161
+ end
162
+ @json_file = zip_path.chomp('.zip')
163
+ # Verify hash integrity
164
+ computed_h = Digest::SHA256.file(@json_file)
165
+ raise "File corruption: #{@json_file}" unless meta.sha256.casecmp(computed_h.hexdigest).zero?
166
+ end
167
+ return @json_file
168
+ end
169
+
170
+ # Search for CVE in the feed.
171
+ # @overload cve(cve)
172
+ # One CVE.
173
+ # @param cve [String] CVE ID, case insensitive.
174
+ # @return [Hash] a Ruby Hash corresponding to the CVE.
175
+ # @overload cve(cve_arr)
176
+ # An array of CVEs.
177
+ # @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
178
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
179
+ # @overload cve(cve, *)
180
+ # Multiple CVEs.
181
+ # @param cve [String] CVE ID, case insensitive.
182
+ # @param * [String] As many CVE ID as you want.
183
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
184
+ # @note {#json_pull} is needed before using this method. Remember you're searching only in the current feed.
185
+ # @todo implement a CVE Class instead of returning a Hash.
186
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
187
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
188
+ # @example
189
+ # s = NVDFeedScraper.new
190
+ # s.scrap
191
+ # f = s.feeds("CVE-2014")
192
+ # f.json_pull
193
+ # f.cve("CVE-2014-0002", "cve-2014-0001")
194
+ def cve(*arg_cve)
195
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
196
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
197
+ return_value = nil
198
+ raise 'no argument provided, 1 or more expected' if arg_cve.empty?
199
+ if arg_cve.length == 1
200
+ if arg_cve[0].is_a?(String)
201
+ raise "bad CVE name (#{arg_cve[0]})" unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
202
+ doc = Oj::Doc.open(File.read(@json_file))
203
+ # Quicker than doc.fetch('/CVE_Items').size
204
+ doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
205
+ (1..doc_size).each do |i|
206
+ if arg_cve[0].upcase == doc.fetch("/CVE_Items/#{i}/cve/CVE_data_meta/ID")
207
+ return_value = doc.fetch("/CVE_Items/#{i}")
208
+ break
209
+ end
210
+ end
211
+ doc.close
212
+ elsif arg_cve[0].is_a?(Array)
213
+ return_value = []
214
+ # Sorting CVE can allow us to parse quicker
215
+ # Upcase to be sure include? works
216
+ cves_to_find = arg_cve[0].map(&:upcase).sort
217
+ raise 'one of the provided arguments is not a String' unless cves_to_find.all? { |x| x.is_a?(String) }
218
+ raise 'bad CVE name' unless cves_to_find.all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
219
+ doc = Oj::Doc.open(File.read(@json_file))
220
+ # Quicker than doc.fetch('/CVE_Items').size
221
+ doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
222
+ (1..doc_size).each do |i|
223
+ doc.move("/CVE_Items/#{i}")
224
+ cve_id = doc.fetch('cve/CVE_data_meta/ID')
225
+ if cves_to_find.include?(cve_id)
226
+ return_value.push(doc.fetch)
227
+ cves_to_find.delete(cve_id)
228
+ elsif cves_to_find.empty?
229
+ break
230
+ end
231
+ end
232
+ raise "#{cves_to_find.join(', ')} are unexisting CVEs in this feed" unless cves_to_find.empty?
233
+ else
234
+ raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
235
+ end
236
+ else
237
+ # Overloading a list of arguments as one array argument
238
+ return_value = cve(arg_cve)
239
+ end
240
+ return return_value
241
+ end
242
+
243
+ # Return a list with the name of all available CVEs in the feed.
244
+ # Can only be called after {#json_pull}.
245
+ # @return [Array<String>] List with the name of all available CVEs. May return thousands CVEs.
246
+ def available_cves
247
+ raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
248
+ raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
249
+ doc = Oj::Doc.open(File.read(@json_file))
250
+ # Quicker than doc.fetch('/CVE_Items').size
251
+ doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
252
+ cve_names = []
253
+ (1..doc_size).each do |i|
254
+ doc.move("/CVE_Items/#{i}")
255
+ cve_names.push(doc.fetch('cve/CVE_data_meta/ID'))
256
+ end
257
+ doc.close
258
+ return cve_names
259
+ end
260
+
261
+ protected
262
+
263
+ # @param arg_name [String] the new name of the feed.
264
+ # @return [String] the new name of the feed.
265
+ # @example
266
+ # 'CVE-2007'
267
+ def name=(arg_name)
268
+ raise "name (#{arg_name}) is not a string" unless arg_name.is_a(String)
269
+ @name = arg_name
270
+ end
271
+
272
+ # @param arg_updated [String] the last update date of the feed information on the NVD website.
273
+ # @return [String] the new date.
274
+ # @example
275
+ # '10/19/2017 3:27:02 AM -04:00'
276
+ def updated=(arg_updated)
277
+ raise "updated date (#{arg_updated}) is not a string" unless arg_updated.is_a(String)
278
+ @updated = arg_updated
279
+ end
280
+
281
+ # @param arg_meta_url [String] the new URL of the metadata file of the feed.
282
+ # @return [String] the new URL of the metadata file of the feed.
283
+ # @example
284
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
285
+ def meta_url=(arg_meta_url)
286
+ raise "meta_url (#{arg_meta_url}) is not a string" unless arg_meta_url.is_a(String)
287
+ @meta_url = arg_meta_url
288
+ end
289
+
290
+ # @param arg_gz_url [String] the new URL of the gz archive of the feed.
291
+ # @return [String] the new URL of the gz archive of the feed.
292
+ # @example
293
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
294
+ def gz_url=(arg_gz_url)
295
+ raise "gz_url (#{arg_gz_url}) is not a string" unless arg_gz_url.is_a(String)
296
+ @gz_url = arg_gz_url
297
+ end
298
+
299
+ # @param arg_zip_url [String] the new URL of the zip archive of the feed.
300
+ # @return [String] the new URL of the zip archive of the feed.
301
+ # @example
302
+ # 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
303
+ def zip_url=(arg_zip_url)
304
+ raise "zip_url (#{arg_zip_url}) is not a string" unless arg_zip_url.is_a(String)
305
+ @zip_url = arg_zip_url
306
+ end
307
+
308
+ # Download a file.
309
+ # @param file_url [String] the URL of the file.
310
+ # @param opts [Hash] the optional downlaod parameters.
311
+ # @option opts [String] :destination_path the destination path (may
312
+ # overwrite existing file).
313
+ # Default use {Feed#default_storage_location}.
314
+ # @option opts [String] :sha256 the SHA256 hash to check, if the file
315
+ # already exist and the hash matches then the download will be skipped.
316
+ # @return [String] the saved file path.
317
+ # @example
318
+ # download_file('https://example.org/example.zip') # => '/tmp/example.zip'
319
+ # download_file('https://example.org/example.zip', destination_path: '/srv/save/') # => '/srv/save/example.zip'
320
+ # download_file('https://example.org/example.zip', {destination_path: '/srv/save/', sha256: '70d6ea136d5036b6ce771921a949357216866c6442f44cea8497f0528c54642d'}) # => '/srv/save/example.zip'
321
+ def download_file(file_url, opts = {})
322
+ opts[:destination_path] ||= Feed.default_storage_location
323
+ opts[:sha256] ||= nil
324
+
325
+ destination_path = opts[:destination_path]
326
+ destination_path += '/' unless destination_path[-1] == '/'
327
+ skip_download = false
328
+ uri = URI(file_url)
329
+ filename = uri.path.split('/').last
330
+ destination_file = destination_path + filename
331
+ unless opts[:sha256].nil?
332
+ if File.file?(destination_file)
333
+ # Verify hash to see if it is the latest
334
+ computed_h = Digest::SHA256.file(destination_file)
335
+ skip_download = true if opts[:sha256].casecmp(computed_h.hexdigest).zero?
336
+ end
337
+ end
338
+ unless skip_download
339
+ res = Net::HTTP.get_response(uri)
340
+ raise "#{file_url} ended with #{res.code} #{res.message}" unless res.is_a?(Net::HTTPSuccess)
341
+ open(destination_file, 'wb') do |file|
342
+ file.write(res.body)
343
+ end
344
+ end
345
+ return destination_file
346
+ end
347
+ end
348
+
349
+ # Initialize the scraper
350
+ def initialize
351
+ @url = URL
352
+ @feeds = nil
353
+ end
354
+
355
+ # Scrap / parse the website to get the feeds and fill the {#feeds} attribute.
356
+ # @note {#scrap} need to be called only once but can be called again to update if the NVD feed page changed.
357
+ # @return [Integer] +0+ when there is no error.
358
+ def scrap
359
+ uri = URI(@url)
360
+ html = Net::HTTP.get(uri)
361
+
362
+ doc = Nokogiri::HTML(html)
363
+ @feeds = []
364
+ doc.css('h3#JSON_FEED ~ div.row:first-of-type table.xml-feed-table > tbody > tr[data-testid*=desc]').each do |tr|
365
+ name = tr.css('td')[0].text
366
+ updated = tr.css('td')[1].text
367
+ meta = tr.css('td')[2].css('> a').attr('href').value
368
+ gz = tr.css('+ tr > td > a').attr('href').value
369
+ zip = tr.css('+ tr + tr > td > a').attr('href').value
370
+ @feeds.push(Feed.new(name, updated, meta, gz, zip))
371
+ end
372
+ end
373
+
374
+ # Return feeds. Can only be called after {#scrap}.
375
+ # @overload feeds
376
+ # All the feeds.
377
+ # @return [Array<Feed>] Attributes of all feeds. It's an array of {Feed} object.
378
+ # @overload feeds(feed)
379
+ # One feed.
380
+ # @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
381
+ # @return [Feed] Attributes of one feed. It's a {Feed} object.
382
+ # @overload feeds(feed_arr)
383
+ # An array of feeds.
384
+ # @param feed_arr [Array<String>] An array of feed names as written on NVD website. Names can be obtains with {#available_feeds}.
385
+ # @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
386
+ # @overload feeds(feed, *)
387
+ # Multiple feeds.
388
+ # @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
389
+ # @param * [String] As many feeds as you want.
390
+ # @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
391
+ # @example
392
+ # scraper.feeds # => all feeds
393
+ # scraper.feeds('CVE-2010') # => return only CVE-2010 feed
394
+ # scraper.feeds("CVE-2005", "CVE-2002") # => return CVE-2005 and CVE-2002 feeds
395
+ # @see https://nvd.nist.gov/vuln/data-feeds
396
+ def feeds(*arg_feeds)
397
+ raise 'call scrap method before using feeds method' if @feeds.nil?
398
+ return_value = nil
399
+ if arg_feeds.empty?
400
+ return_value = @feeds
401
+ elsif arg_feeds.length == 1
402
+ if arg_feeds[0].is_a?(String)
403
+ @feeds.each do |feed| # feed is an object
404
+ return_value = feed if arg_feeds.include?(feed.name)
405
+ end
406
+ # if nothing found return nil
407
+ elsif arg_feeds[0].is_a?(Array)
408
+ raise 'one of the provided arguments is not a String' unless arg_feeds[0].all? { |x| x.is_a?(String) }
409
+ # Sorting CVE can allow us to parse quicker
410
+ # Upcase to be sure include? works
411
+ # Does not use map(&:upcase) to preserve CVE-Recent and CVE-Modified
412
+ feeds_to_find = arg_feeds[0].map { |x| x[0..2].upcase.concat(x[3..x.size]) }.sort
413
+ matched_feeds = []
414
+ @feeds.each do |feed| # feed is an object
415
+ if feeds_to_find.include?(feed.name)
416
+ matched_feeds.push(feed)
417
+ feeds_to_find.delete(feed.name)
418
+ elsif feeds_to_find.empty?
419
+ break
420
+ end
421
+ end
422
+ return_value = matched_feeds
423
+ raise "#{feeds_to_find.join(', ')} are unexisting feeds" unless feeds_to_find.empty?
424
+ else
425
+ raise "the provided argument (#{arg_feeds[0]}) is nor a String or an Array"
426
+ end
427
+ else
428
+ # Overloading a list of arguments as one array argument
429
+ return_value = feeds(arg_feeds)
430
+ end
431
+ return return_value
432
+ end
433
+
434
+ # Return a list with the name of all available feeds. Returned feed names can be use as argument for {#feeds} method. Can only be called after {#scrap}.
435
+ # @return [Array<String>] List with the name of all available feeds.
436
+ # @example
437
+ # scraper.available_feeds => ["CVE-Modified", "CVE-Recent", "CVE-2017", "CVE-2016", "CVE-2015", "CVE-2014", "CVE-2013", "CVE-2012", "CVE-2011", "CVE-2010", "CVE-2009", "CVE-2008", "CVE-2007", "CVE-2006", "CVE-2005", "CVE-2004", "CVE-2003", "CVE-2002"]
438
+ def available_feeds
439
+ raise 'call scrap method before using available_feeds method' if @feeds.nil?
440
+ feed_names = []
441
+ @feeds.each do |feed| # feed is an objet
442
+ feed_names.push(feed.name)
443
+ end
444
+ feed_names
445
+ end
446
+
447
+ # Search for CVE in all year feeds.
448
+ # @overload cve(cve)
449
+ # One CVE.
450
+ # @param cve [String] CVE ID, case insensitive.
451
+ # @return [Hash] a Ruby Hash corresponding to the CVE.
452
+ # @overload cve(cve_arr)
453
+ # An array of CVEs.
454
+ # @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
455
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
456
+ # @overload cve(cve, *)
457
+ # Multiple CVEs.
458
+ # @param cve [String] CVE ID, case insensitive.
459
+ # @param * [String] As many CVE ID as you want.
460
+ # @return [Array] an Array of CVE, each CVE is a Ruby Hash.
461
+ # @todo implement a CVE Class instead of returning a Hash. May not be in the same order as provided.
462
+ # @note {#scrap} is needed before using this method.
463
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
464
+ # @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
465
+ # @example
466
+ # s = NVDFeedScraper.new
467
+ # s.scrap
468
+ # s.cve("CVE-2014-0002", "cve-2014-0001")
469
+ def cve(*arg_cve)
470
+ return_value = nil
471
+ raise 'no argument provided, 1 or more expected' if arg_cve.empty?
472
+ if arg_cve.length == 1
473
+ if arg_cve[0].is_a?(String)
474
+ raise 'bad CVE name' unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
475
+ year = /^CVE-([0-9]{4})-[0-9]{4,}$/i.match(arg_cve[0]).captures[0]
476
+ matched_feed = nil
477
+ feed_names = available_feeds
478
+ feed_names.delete('CVE-Modified')
479
+ feed_names.delete('CVE-Recent')
480
+ feed_names.each do |feed|
481
+ if /#{year}/.match?(feed)
482
+ matched_feed = feed
483
+ break
484
+ end
485
+ end
486
+ raise "bad CVE year in #{arg_cve}" if matched_feed.nil?
487
+ f = feeds(matched_feed)
488
+ f.json_pull
489
+ return_value = f.cve(arg_cve[0])
490
+ elsif arg_cve[0].is_a?(Array)
491
+ raise 'one of the provided arguments is not a String' unless arg_cve[0].all? { |x| x.is_a?(String) }
492
+ raise 'bad CVE name' unless arg_cve[0].all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
493
+ return_value = []
494
+ # Sorting CVE can allow us to parse quicker
495
+ # Upcase to be sure include? works
496
+ cves_to_find = arg_cve[0].map(&:upcase).sort
497
+ feeds_to_match = Set[]
498
+ cves_to_find.each do |cve|
499
+ feeds_to_match.add?(/^(CVE-[0-9]{4})-[0-9]{4,}$/i.match(cve).captures[0])
500
+ end
501
+ feed_names = available_feeds.to_set
502
+ feed_names.delete('CVE-Modified')
503
+ feed_names.delete('CVE-Recent')
504
+ raise 'unexisting CVE year was provided in some CVE' unless feeds_to_match.subset?(feed_names)
505
+ matched_feeds = feeds_to_match.intersection(feed_names)
506
+ feeds_arr = feeds(matched_feeds.to_a)
507
+ feeds_arr.each do |feed|
508
+ feed.json_pull
509
+ cves_obj = feed.cve(cves_to_find.select { |cve| cve.include?(feed.name) })
510
+ if cves_obj.is_a?(Hash)
511
+ return_value.push(cves_obj)
512
+ elsif cves_obj.is_a?(Array)
513
+ return_value.push(*cves_obj)
514
+ else
515
+ raise 'cve() method of the feed instance returns wrong value'
516
+ end
517
+ end
518
+ else
519
+ raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
520
+ end
521
+ else
522
+ # Overloading a list of arguments as one array argument
523
+ return_value = cve(arg_cve)
524
+ end
525
+ return return_value
526
+ end
527
+
528
+ # Update the feeds
529
+ # @overload update_feeds(feed)
530
+ # One feed.
531
+ # @param feed [Feed] feed object to update.
532
+ # @return [Boolean] +true+ if the feed was updated, +false+ if it wasn't.
533
+ # @overload update_feeds(feed_arr)
534
+ # An array of feed.
535
+ # @param feed_arr [Array<Feed>] array of feed objects to update.
536
+ # @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
537
+ # @overload update_feeds(feed, *)
538
+ # Multiple feeds.
539
+ # @param feed [Feed] feed object to update.
540
+ # @param * [Feed] As many feed objects as you want.
541
+ # @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
542
+ # @example
543
+ # s = NVDFeedScraper.new
544
+ # s.scrap
545
+ # f2015, f2017 = s.feeds("CVE-2015", "CVE-2017")
546
+ # s.update_feeds(f2015, f2017) # => [false, false]
547
+ def update_feeds(*arg_feed)
548
+ return_value = false
549
+ raise 'no argument provided, 1 or more expected' if arg_feed.empty?
550
+ scrap
551
+ if arg_feed.length == 1
552
+ if arg_feed[0].is_a?(Feed)
553
+ new_feed = feeds(arg_feed[0].name)
554
+ # update attributes
555
+ if arg_feed[0].updated != new_feed.updated
556
+ arg_feed[0].name = new_feed.name
557
+ arg_feed[0].updated = new_feed.updated
558
+ arg_feed[0].meta_url = new_feed.meta_url
559
+ arg_feed[0].gz_url = new_feed.gz_url
560
+ arg_feed[0].zip_url = new_feed.zip_url
561
+ # update if @meta was set
562
+ arg_feed[0].meta_pull unless feed.meta.nil?
563
+ # update if @json_file was set
564
+ arg_feed[0].json_pull unless feed.json_file.nil?
565
+ return_value = true
566
+ end
567
+ elsif arg_feed[0].is_a?(Array)
568
+ return_value = []
569
+ arg_feed[0].each do |f|
570
+ res = update_feeds(f)
571
+ puts "#{f} not found" if res.nil?
572
+ return_value.push(res)
573
+ end
574
+ else
575
+ raise "the provided argument #{arg_feed[0]} is not a Feed or an Array"
576
+ end
577
+ else
578
+ # Overloading a list of arguments as one array argument
579
+ return_value = update_feeds(arg_feed)
580
+ end
581
+ return return_value
582
+ end
583
+
584
+ # Return a list with the name of all available CVEs in the feed.
585
+ # Can only be called after {#scrap}.
586
+ # @return [Array<String>] List with the name of all available CVEs. May return tens thousands CVEs.
587
+ def available_cves
588
+ cve_names = []
589
+ feed_names = available_feeds
590
+ feed_names.delete('CVE-Modified')
591
+ feed_names.delete('CVE-Recent')
592
+ feed_names.each do |feed_name|
593
+ f = feeds(feed_name)
594
+ f.json_pull
595
+ # merge removing duplicates
596
+ cve_names |= f.available_cves
597
+ end
598
+ return cve_names
599
+ end
600
+
601
+ # Manage the meta file from a feed.
602
+ #
603
+ # == Usage
604
+ #
605
+ # @example
606
+ # s = NVDFeedScraper.new
607
+ # s.scrap
608
+ # metaUrl = s.feeds("CVE-2014").meta_url
609
+ # m = NVDFeedScraper::Meta.new
610
+ # m.url = metaUrl
611
+ # m.parse
612
+ # m.sha256
613
+ #
614
+ # Several ways to set the url:
615
+ #
616
+ # m = NVDFeedScraper::Meta.new(metaUrl)
617
+ # m.parse
618
+ # # or
619
+ # m = NVDFeedScraper::Meta.new
620
+ # m.url = metaUrl
621
+ # m.parse
622
+ # # or
623
+ # m = NVDFeedScraper::Meta.new
624
+ # m.parse(metaUrl)
625
+ class Meta
626
+ # {Meta} last modified date getter
627
+ # @return [String] the last modified date and time.
628
+ # @example
629
+ # '2017-10-19T03:27:02-04:00'
630
+ attr_reader :last_modified_date
631
+
632
+ # {Meta} JSON size getter
633
+ # @return [String] the size of the JSON file uncompressed.
634
+ # @example
635
+ # '29443314'
636
+ attr_reader :size
637
+
638
+ # {Meta} zip size getter
639
+ # @return [String] the size of the zip file.
640
+ # @example
641
+ # '2008493'
642
+ attr_reader :zip_size
643
+
644
+ # {Meta} gz size getter
645
+ # @return [String] the size of the gz file.
646
+ # @example
647
+ # '2008357'
648
+ attr_reader :gz_size
649
+
650
+ # {Meta} JSON sha256 getter
651
+ # @return [String] the SHA256 value of the uncompressed JSON file.
652
+ # @example
653
+ # '33ED52D451692596D644F23742ED42B4E350258B11ACB900F969F148FCE3777B'
654
+ attr_reader :sha256
655
+
656
+ # @param url [String, nil] see {Feed#meta_url}.
657
+ def initialize(url = nil)
658
+ @url = url
659
+ end
660
+
661
+ # {Meta} URL getter.
662
+ # @return [String] The URL of the meta file of the feed.
663
+ attr_reader :url
664
+
665
+ # {Meta} URL setter.
666
+ # @param url [String] see {Feed#meta_url}.
667
+ def url=(url)
668
+ @url = url
669
+ @last_modified_date = @size = @zip_size = @gz_size = @sha256 = nil
670
+ end
671
+
672
+ # Parse the meta file from the URL and set the attributes.
673
+ # @overload parse
674
+ # Parse the meta file from the URL and set the attributes.
675
+ # @return [Integer] Returns +0+ when there is no error.
676
+ # @overload parse(url)
677
+ # Set the URL of the meta file of the feed and
678
+ # parse the meta file from the URL and set the attributes.
679
+ # @param url [String] see {Feed.meta_url}
680
+ # @return [Integer] Returns +0+ when there is no error.
681
+ def parse(*arg)
682
+ if arg.empty?
683
+ elsif arg.length == 1 # arg = url
684
+ self.url = arg[0]
685
+ else
686
+ raise 'Too much arguments'
687
+ end
688
+
689
+ raise "Can't parse if the URL is empty" if @url.nil?
690
+ uri = URI(@url)
691
+
692
+ meta = Net::HTTP.get(uri)
693
+
694
+ meta = Hash[meta.split.map { |x| x.split(':', 2) }]
695
+
696
+ raise 'no lastModifiedDate attribute found' unless meta['lastModifiedDate']
697
+ raise 'no valid size attribute found' unless /[0-9]+/.match?(meta['size'])
698
+ raise 'no valid zipSize attribute found' unless /[0-9]+/.match?(meta['zipSize'])
699
+ raise 'no valid gzSize attribute found' unless /[0-9]+/.match?(meta['gzSize'])
700
+ raise 'no valid sha256 attribute found' unless /[0-9A-F]{64}/.match?(meta['sha256'])
701
+
702
+ @last_modified_date = meta['lastModifiedDate']
703
+ @size = meta['size']
704
+ @zip_size = meta['zipSize']
705
+ @gz_size = meta['gzSize']
706
+ @sha256 = meta['sha256']
707
+
708
+ 0
709
+ end
710
+ end
711
+ end