nvd_feed_api 0.0.1.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +53 -0
- data/.rubocop.yml +46 -0
- data/.yardopts +9 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +11 -0
- data/Rakefile +9 -0
- data/bin/nvd_feed_api +8 -0
- data/bin/nvd_feed_api_console +7 -0
- data/bin/nvd_feed_api_setup +6 -0
- data/lib/nvd_feed_api.rb +711 -0
- data/lib/nvd_feed_api/version.rb +3 -0
- data/nvd_feed_api.gemspec +45 -0
- data/pages/EXAMPLES.md +21 -0
- data/pages/FEATURES.md +9 -0
- data/pages/INSTALL.md +64 -0
- data/test/test_nvd_feed_api.rb +251 -0
- metadata +211 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6ce03fb10e963df256a7772e5ddc357d1702a387
|
4
|
+
data.tar.gz: 887231a4b7fd59dc8d2c10657c33d5825a775040
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2e46e2ce61301c79339ef96bfba07d8e0a7e684b9390c49c950805932838144ee01ffc93a2d68d08b1dafd71bc0695769198626f5b46dbbeb3f27ee75855e4ef
|
7
|
+
data.tar.gz: 5714a1667e0a15edcbcec6a757c3175b957cb037952a29d9cd0d8a675d8bca8dab745b92d4bd53ddebc948cccfd0c74c645be99584eb039d9637289eef55ff91
|
data/.gitignore
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
51
|
+
|
52
|
+
# do not check Gemfile.lock fror gems
|
53
|
+
Gemfile.lock
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.4
|
3
|
+
|
4
|
+
# Rubocop is too stupid too see that the variable is used
|
5
|
+
Lint/UselessAssignment:
|
6
|
+
Enabled: false
|
7
|
+
|
8
|
+
Metrics/AbcSize:
|
9
|
+
Enabled: false
|
10
|
+
|
11
|
+
Metrics/BlockLength:
|
12
|
+
Max: 50
|
13
|
+
|
14
|
+
Metrics/BlockNesting:
|
15
|
+
Max: 4
|
16
|
+
|
17
|
+
Metrics/ClassLength:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Metrics/CyclomaticComplexity:
|
21
|
+
Max: 15
|
22
|
+
|
23
|
+
Metrics/LineLength:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 100
|
28
|
+
|
29
|
+
Metrics/PerceivedComplexity:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
Naming/VariableName:
|
33
|
+
EnforcedStyle: snake_case
|
34
|
+
|
35
|
+
Security/JSONLoad:
|
36
|
+
Enabled: false
|
37
|
+
|
38
|
+
Style/FrozenStringLiteralComment:
|
39
|
+
EnforcedStyle: never
|
40
|
+
|
41
|
+
Style/PerlBackrefs:
|
42
|
+
AutoCorrect: false
|
43
|
+
|
44
|
+
# Allow explicit return
|
45
|
+
Style/RedundantReturn:
|
46
|
+
Enabled: false
|
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Alexandre ZANNI
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/e595382d940a4c6b9439325b9e50d398)](https://www.codacy.com/app/noraj1337/nvd_api?utm_source=github.com&utm_medium=referral&utm_content=noraj1337/nvd_api&utm_campaign=Badge_Grade)
|
2
|
+
|
3
|
+
# nvd_feed_api
|
4
|
+
|
5
|
+
Name | Link
|
6
|
+
--- | ---
|
7
|
+
Website | [link](#)
|
8
|
+
Git repository | [link](https://gitlab.com/noraj/nvd_api)
|
9
|
+
Merge Requests | [link](https://gitlab.com/noraj/nvd_api/merge_requests)
|
10
|
+
Issues | [link](https://gitlab.com/noraj/nvd_api/issues)
|
11
|
+
Wiki | [link](https://gitlab.com/noraj/nvd_api/wikis/home)
|
data/Rakefile
ADDED
data/bin/nvd_feed_api
ADDED
data/lib/nvd_feed_api.rb
ADDED
@@ -0,0 +1,711 @@
|
|
1
|
+
# @author Alexandre ZANNI <alexandre.zanni@engineer.com>
|
2
|
+
|
3
|
+
# Ruby internal
|
4
|
+
require 'digest'
|
5
|
+
require 'net/https'
|
6
|
+
require 'set'
|
7
|
+
# External
|
8
|
+
require 'archive/zip'
|
9
|
+
require 'nokogiri'
|
10
|
+
require 'oj'
|
11
|
+
# Project internal
|
12
|
+
require 'nvd_feed_api/version'
|
13
|
+
|
14
|
+
# The class that parse NVD website to get information.
|
15
|
+
# @example Initialize a NVDFeedScraper object, get the feeds and see them:
|
16
|
+
# scraper = NVDFeedScraper.new
|
17
|
+
# scraper.scrap
|
18
|
+
# scraper.available_feeds
|
19
|
+
# scraper.feeds
|
20
|
+
# scraper.feeds("CVE-2007")
|
21
|
+
# cve2007, cve2015 = scraper.feeds("CVE-2007", "CVE-2015")
|
22
|
+
class NVDFeedScraper
|
23
|
+
# The NVD url where is located the data feeds.
|
24
|
+
URL = 'https://nvd.nist.gov/vuln/data-feeds'.freeze
|
25
|
+
# Load constants
|
26
|
+
include NvdFeedApi
|
27
|
+
|
28
|
+
# Feed object.
|
29
|
+
class Feed
|
30
|
+
class << self
|
31
|
+
# Get / set default feed storage location, where will be stored JSON feeds and archives by default.
|
32
|
+
# @return [String] default feed storage location. Default to +/tmp/+.
|
33
|
+
# @example
|
34
|
+
# NVDFeedScraper::Feed.default_storage_location = '/srv/downloads/'
|
35
|
+
attr_accessor :default_storage_location
|
36
|
+
end
|
37
|
+
@default_storage_location = '/tmp/'
|
38
|
+
|
39
|
+
# @return [String] the name of the feed.
|
40
|
+
# @example
|
41
|
+
# 'CVE-2007'
|
42
|
+
attr_reader :name
|
43
|
+
|
44
|
+
# @return [String] the last update date of the feed information on the NVD website.
|
45
|
+
# @example
|
46
|
+
# '10/19/2017 3:27:02 AM -04:00'
|
47
|
+
attr_reader :updated
|
48
|
+
|
49
|
+
# @return [String] the URL of the metadata file of the feed.
|
50
|
+
# @example
|
51
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
|
52
|
+
attr_reader :meta_url
|
53
|
+
|
54
|
+
# @return [String] the URL of the gz archive of the feed.
|
55
|
+
# @example
|
56
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
|
57
|
+
attr_reader :gz_url
|
58
|
+
|
59
|
+
# @return [String] the URL of the zip archive of the feed.
|
60
|
+
# @example
|
61
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
|
62
|
+
attr_reader :zip_url
|
63
|
+
|
64
|
+
# @return [Meta] the {Meta} object of the feed.
|
65
|
+
# @note
|
66
|
+
# Return nil if not previously loaded by {#meta_pull}.
|
67
|
+
# Note that {#json_pull} also calls {#meta_pull}.
|
68
|
+
# @example
|
69
|
+
# s = NVDFeedScraper.new
|
70
|
+
# s.scrap
|
71
|
+
# f = s.feeds("CVE-2014")
|
72
|
+
# f.meta # => nil
|
73
|
+
# f.meta_pull
|
74
|
+
# f.meta # => #<NVDFeedScraper::Meta:0x00555b53027570 ... >
|
75
|
+
attr_reader :meta
|
76
|
+
|
77
|
+
# @return [String] the path of the saved JSON file.
|
78
|
+
# @note Return nil if not previously loaded by {#json_pull}.
|
79
|
+
# @example
|
80
|
+
# s = NVDFeedScraper.new
|
81
|
+
# s.scrap
|
82
|
+
# f = s.feeds("CVE-2014")
|
83
|
+
# f.json_file # => nil
|
84
|
+
# f.json_pull
|
85
|
+
# f.json_file # => "/tmp/nvdcve-1.0-2014.json"
|
86
|
+
attr_reader :json_file
|
87
|
+
|
88
|
+
# A new instance of Feed.
|
89
|
+
# @param name [String] see {#name}.
|
90
|
+
# @param updated [String] see {#updated}.
|
91
|
+
# @param meta_url [String] see {#meta_url}.
|
92
|
+
# @param gz_url [String] see {#gz_url}.
|
93
|
+
# @param zip_url [String] see {#zip_url}.
|
94
|
+
def initialize(name, updated, meta_url, gz_url, zip_url)
|
95
|
+
@name = name
|
96
|
+
@updated = updated
|
97
|
+
@meta_url = meta_url
|
98
|
+
@gz_url = gz_url
|
99
|
+
@zip_url = zip_url
|
100
|
+
# do not pull meta and json automatically for speed and memory footprint
|
101
|
+
@meta = nil
|
102
|
+
@json_file = nil
|
103
|
+
end
|
104
|
+
|
105
|
+
# Create or update the {Meta} object (fill the attribute).
|
106
|
+
# @return [Meta] the updated {Meta} object of the feed.
|
107
|
+
# @see #meta
|
108
|
+
def meta_pull
|
109
|
+
meta_content = NVDFeedScraper::Meta.new(@meta_url)
|
110
|
+
meta_content.parse
|
111
|
+
# update @meta
|
112
|
+
@meta = meta_content
|
113
|
+
end
|
114
|
+
|
115
|
+
# Download the gz archive of the feed.
|
116
|
+
# @param opts [Hash] see {#download_file}.
|
117
|
+
# @return [String] the saved gz file path.
|
118
|
+
# @example
|
119
|
+
# afeed.download_gz
|
120
|
+
# afeed.download_gz(destination_path: '/srv/save/')
|
121
|
+
def download_gz(opts = {})
|
122
|
+
download_file(@gz_url, opts)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Download the zip archive of the feed.
|
126
|
+
# @param opts [Hash] see {#download_file}.
|
127
|
+
# @return [String] the saved zip file path.
|
128
|
+
# @example
|
129
|
+
# afeed.download_zip
|
130
|
+
# afeed.download_zip(destination_path: '/srv/save/')
|
131
|
+
def download_zip(opts = {})
|
132
|
+
download_file(@zip_url, opts)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Download the JSON feed and fill the attribute.
|
136
|
+
# @param opts [Hash] see {#download_file}.
|
137
|
+
# @return [String] the path of the saved JSON file. Default use {Feed#default_storage_location}.
|
138
|
+
# @note Will downlaod and save the zip of the JSON file, unzip and save it. This massively consume time.
|
139
|
+
# @see #json_file
|
140
|
+
def json_pull(opts = {})
|
141
|
+
opts[:destination_path] ||= Feed.default_storage_location
|
142
|
+
|
143
|
+
skip_download = false
|
144
|
+
destination_path = opts[:destination_path]
|
145
|
+
destination_path += '/' unless destination_path[-1] == '/'
|
146
|
+
filename = URI(@zip_url).path.split('/').last.chomp('.zip')
|
147
|
+
# do not use @json_file for destination_file because of offline loading
|
148
|
+
destination_file = destination_path + filename
|
149
|
+
meta_pull
|
150
|
+
if File.file?(destination_file)
|
151
|
+
# Verify hash to see if it is the latest
|
152
|
+
computed_h = Digest::SHA256.file(destination_file)
|
153
|
+
skip_download = true if meta.sha256.casecmp(computed_h.hexdigest).zero?
|
154
|
+
end
|
155
|
+
if skip_download
|
156
|
+
@json_file = destination_file
|
157
|
+
else
|
158
|
+
zip_path = download_zip(opts)
|
159
|
+
Archive::Zip.open(zip_path) do |z|
|
160
|
+
z.extract(destination_path, flatten: true)
|
161
|
+
end
|
162
|
+
@json_file = zip_path.chomp('.zip')
|
163
|
+
# Verify hash integrity
|
164
|
+
computed_h = Digest::SHA256.file(@json_file)
|
165
|
+
raise "File corruption: #{@json_file}" unless meta.sha256.casecmp(computed_h.hexdigest).zero?
|
166
|
+
end
|
167
|
+
return @json_file
|
168
|
+
end
|
169
|
+
|
170
|
+
# Search for CVE in the feed.
|
171
|
+
# @overload cve(cve)
|
172
|
+
# One CVE.
|
173
|
+
# @param cve [String] CVE ID, case insensitive.
|
174
|
+
# @return [Hash] a Ruby Hash corresponding to the CVE.
|
175
|
+
# @overload cve(cve_arr)
|
176
|
+
# An array of CVEs.
|
177
|
+
# @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
|
178
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
|
179
|
+
# @overload cve(cve, *)
|
180
|
+
# Multiple CVEs.
|
181
|
+
# @param cve [String] CVE ID, case insensitive.
|
182
|
+
# @param * [String] As many CVE ID as you want.
|
183
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
|
184
|
+
# @note {#json_pull} is needed before using this method. Remember you're searching only in the current feed.
|
185
|
+
# @todo implement a CVE Class instead of returning a Hash.
|
186
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
|
187
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
|
188
|
+
# @example
|
189
|
+
# s = NVDFeedScraper.new
|
190
|
+
# s.scrap
|
191
|
+
# f = s.feeds("CVE-2014")
|
192
|
+
# f.json_pull
|
193
|
+
# f.cve("CVE-2014-0002", "cve-2014-0001")
|
194
|
+
def cve(*arg_cve)
|
195
|
+
raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
|
196
|
+
raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
|
197
|
+
return_value = nil
|
198
|
+
raise 'no argument provided, 1 or more expected' if arg_cve.empty?
|
199
|
+
if arg_cve.length == 1
|
200
|
+
if arg_cve[0].is_a?(String)
|
201
|
+
raise "bad CVE name (#{arg_cve[0]})" unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
|
202
|
+
doc = Oj::Doc.open(File.read(@json_file))
|
203
|
+
# Quicker than doc.fetch('/CVE_Items').size
|
204
|
+
doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
|
205
|
+
(1..doc_size).each do |i|
|
206
|
+
if arg_cve[0].upcase == doc.fetch("/CVE_Items/#{i}/cve/CVE_data_meta/ID")
|
207
|
+
return_value = doc.fetch("/CVE_Items/#{i}")
|
208
|
+
break
|
209
|
+
end
|
210
|
+
end
|
211
|
+
doc.close
|
212
|
+
elsif arg_cve[0].is_a?(Array)
|
213
|
+
return_value = []
|
214
|
+
# Sorting CVE can allow us to parse quicker
|
215
|
+
# Upcase to be sure include? works
|
216
|
+
cves_to_find = arg_cve[0].map(&:upcase).sort
|
217
|
+
raise 'one of the provided arguments is not a String' unless cves_to_find.all? { |x| x.is_a?(String) }
|
218
|
+
raise 'bad CVE name' unless cves_to_find.all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
|
219
|
+
doc = Oj::Doc.open(File.read(@json_file))
|
220
|
+
# Quicker than doc.fetch('/CVE_Items').size
|
221
|
+
doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
|
222
|
+
(1..doc_size).each do |i|
|
223
|
+
doc.move("/CVE_Items/#{i}")
|
224
|
+
cve_id = doc.fetch('cve/CVE_data_meta/ID')
|
225
|
+
if cves_to_find.include?(cve_id)
|
226
|
+
return_value.push(doc.fetch)
|
227
|
+
cves_to_find.delete(cve_id)
|
228
|
+
elsif cves_to_find.empty?
|
229
|
+
break
|
230
|
+
end
|
231
|
+
end
|
232
|
+
raise "#{cves_to_find.join(', ')} are unexisting CVEs in this feed" unless cves_to_find.empty?
|
233
|
+
else
|
234
|
+
raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
|
235
|
+
end
|
236
|
+
else
|
237
|
+
# Overloading a list of arguments as one array argument
|
238
|
+
return_value = cve(arg_cve)
|
239
|
+
end
|
240
|
+
return return_value
|
241
|
+
end
|
242
|
+
|
243
|
+
# Return a list with the name of all available CVEs in the feed.
|
244
|
+
# Can only be called after {#json_pull}.
|
245
|
+
# @return [Array<String>] List with the name of all available CVEs. May return thousands CVEs.
|
246
|
+
def available_cves
|
247
|
+
raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
|
248
|
+
raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
|
249
|
+
doc = Oj::Doc.open(File.read(@json_file))
|
250
|
+
# Quicker than doc.fetch('/CVE_Items').size
|
251
|
+
doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
|
252
|
+
cve_names = []
|
253
|
+
(1..doc_size).each do |i|
|
254
|
+
doc.move("/CVE_Items/#{i}")
|
255
|
+
cve_names.push(doc.fetch('cve/CVE_data_meta/ID'))
|
256
|
+
end
|
257
|
+
doc.close
|
258
|
+
return cve_names
|
259
|
+
end
|
260
|
+
|
261
|
+
protected
|
262
|
+
|
263
|
+
# @param arg_name [String] the new name of the feed.
|
264
|
+
# @return [String] the new name of the feed.
|
265
|
+
# @example
|
266
|
+
# 'CVE-2007'
|
267
|
+
def name=(arg_name)
|
268
|
+
raise "name (#{arg_name}) is not a string" unless arg_name.is_a(String)
|
269
|
+
@name = arg_name
|
270
|
+
end
|
271
|
+
|
272
|
+
# @param arg_updated [String] the last update date of the feed information on the NVD website.
|
273
|
+
# @return [String] the new date.
|
274
|
+
# @example
|
275
|
+
# '10/19/2017 3:27:02 AM -04:00'
|
276
|
+
def updated=(arg_updated)
|
277
|
+
raise "updated date (#{arg_updated}) is not a string" unless arg_updated.is_a(String)
|
278
|
+
@updated = arg_updated
|
279
|
+
end
|
280
|
+
|
281
|
+
# @param arg_meta_url [String] the new URL of the metadata file of the feed.
|
282
|
+
# @return [String] the new URL of the metadata file of the feed.
|
283
|
+
# @example
|
284
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
|
285
|
+
def meta_url=(arg_meta_url)
|
286
|
+
raise "meta_url (#{arg_meta_url}) is not a string" unless arg_meta_url.is_a(String)
|
287
|
+
@meta_url = arg_meta_url
|
288
|
+
end
|
289
|
+
|
290
|
+
# @param arg_gz_url [String] the new URL of the gz archive of the feed.
|
291
|
+
# @return [String] the new URL of the gz archive of the feed.
|
292
|
+
# @example
|
293
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
|
294
|
+
def gz_url=(arg_gz_url)
|
295
|
+
raise "gz_url (#{arg_gz_url}) is not a string" unless arg_gz_url.is_a(String)
|
296
|
+
@gz_url = arg_gz_url
|
297
|
+
end
|
298
|
+
|
299
|
+
# @param arg_zip_url [String] the new URL of the zip archive of the feed.
|
300
|
+
# @return [String] the new URL of the zip archive of the feed.
|
301
|
+
# @example
|
302
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
|
303
|
+
def zip_url=(arg_zip_url)
|
304
|
+
raise "zip_url (#{arg_zip_url}) is not a string" unless arg_zip_url.is_a(String)
|
305
|
+
@zip_url = arg_zip_url
|
306
|
+
end
|
307
|
+
|
308
|
+
# Download a file.
|
309
|
+
# @param file_url [String] the URL of the file.
|
310
|
+
# @param opts [Hash] the optional downlaod parameters.
|
311
|
+
# @option opts [String] :destination_path the destination path (may
|
312
|
+
# overwrite existing file).
|
313
|
+
# Default use {Feed#default_storage_location}.
|
314
|
+
# @option opts [String] :sha256 the SHA256 hash to check, if the file
|
315
|
+
# already exist and the hash matches then the download will be skipped.
|
316
|
+
# @return [String] the saved file path.
|
317
|
+
# @example
|
318
|
+
# download_file('https://example.org/example.zip') # => '/tmp/example.zip'
|
319
|
+
# download_file('https://example.org/example.zip', destination_path: '/srv/save/') # => '/srv/save/example.zip'
|
320
|
+
# download_file('https://example.org/example.zip', {destination_path: '/srv/save/', sha256: '70d6ea136d5036b6ce771921a949357216866c6442f44cea8497f0528c54642d'}) # => '/srv/save/example.zip'
|
321
|
+
def download_file(file_url, opts = {})
|
322
|
+
opts[:destination_path] ||= Feed.default_storage_location
|
323
|
+
opts[:sha256] ||= nil
|
324
|
+
|
325
|
+
destination_path = opts[:destination_path]
|
326
|
+
destination_path += '/' unless destination_path[-1] == '/'
|
327
|
+
skip_download = false
|
328
|
+
uri = URI(file_url)
|
329
|
+
filename = uri.path.split('/').last
|
330
|
+
destination_file = destination_path + filename
|
331
|
+
unless opts[:sha256].nil?
|
332
|
+
if File.file?(destination_file)
|
333
|
+
# Verify hash to see if it is the latest
|
334
|
+
computed_h = Digest::SHA256.file(destination_file)
|
335
|
+
skip_download = true if opts[:sha256].casecmp(computed_h.hexdigest).zero?
|
336
|
+
end
|
337
|
+
end
|
338
|
+
unless skip_download
|
339
|
+
res = Net::HTTP.get_response(uri)
|
340
|
+
raise "#{file_url} ended with #{res.code} #{res.message}" unless res.is_a?(Net::HTTPSuccess)
|
341
|
+
open(destination_file, 'wb') do |file|
|
342
|
+
file.write(res.body)
|
343
|
+
end
|
344
|
+
end
|
345
|
+
return destination_file
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# Initialize the scraper
|
350
|
+
def initialize
|
351
|
+
@url = URL
|
352
|
+
@feeds = nil
|
353
|
+
end
|
354
|
+
|
355
|
+
# Scrap / parse the website to get the feeds and fill the {#feeds} attribute.
|
356
|
+
# @note {#scrap} need to be called only once but can be called again to update if the NVD feed page changed.
|
357
|
+
# @return [Integer] +0+ when there is no error.
|
358
|
+
def scrap
|
359
|
+
uri = URI(@url)
|
360
|
+
html = Net::HTTP.get(uri)
|
361
|
+
|
362
|
+
doc = Nokogiri::HTML(html)
|
363
|
+
@feeds = []
|
364
|
+
doc.css('h3#JSON_FEED ~ div.row:first-of-type table.xml-feed-table > tbody > tr[data-testid*=desc]').each do |tr|
|
365
|
+
name = tr.css('td')[0].text
|
366
|
+
updated = tr.css('td')[1].text
|
367
|
+
meta = tr.css('td')[2].css('> a').attr('href').value
|
368
|
+
gz = tr.css('+ tr > td > a').attr('href').value
|
369
|
+
zip = tr.css('+ tr + tr > td > a').attr('href').value
|
370
|
+
@feeds.push(Feed.new(name, updated, meta, gz, zip))
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
# Return feeds. Can only be called after {#scrap}.
|
375
|
+
# @overload feeds
|
376
|
+
# All the feeds.
|
377
|
+
# @return [Array<Feed>] Attributes of all feeds. It's an array of {Feed} object.
|
378
|
+
# @overload feeds(feed)
|
379
|
+
# One feed.
|
380
|
+
# @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
|
381
|
+
# @return [Feed] Attributes of one feed. It's a {Feed} object.
|
382
|
+
# @overload feeds(feed_arr)
|
383
|
+
# An array of feeds.
|
384
|
+
# @param feed_arr [Array<String>] An array of feed names as written on NVD website. Names can be obtains with {#available_feeds}.
|
385
|
+
# @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
|
386
|
+
# @overload feeds(feed, *)
|
387
|
+
# Multiple feeds.
|
388
|
+
# @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
|
389
|
+
# @param * [String] As many feeds as you want.
|
390
|
+
# @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
|
391
|
+
# @example
|
392
|
+
# scraper.feeds # => all feeds
|
393
|
+
# scraper.feeds('CVE-2010') # => return only CVE-2010 feed
|
394
|
+
# scraper.feeds("CVE-2005", "CVE-2002") # => return CVE-2005 and CVE-2002 feeds
|
395
|
+
# @see https://nvd.nist.gov/vuln/data-feeds
|
396
|
+
def feeds(*arg_feeds)
|
397
|
+
raise 'call scrap method before using feeds method' if @feeds.nil?
|
398
|
+
return_value = nil
|
399
|
+
if arg_feeds.empty?
|
400
|
+
return_value = @feeds
|
401
|
+
elsif arg_feeds.length == 1
|
402
|
+
if arg_feeds[0].is_a?(String)
|
403
|
+
@feeds.each do |feed| # feed is an object
|
404
|
+
return_value = feed if arg_feeds.include?(feed.name)
|
405
|
+
end
|
406
|
+
# if nothing found return nil
|
407
|
+
elsif arg_feeds[0].is_a?(Array)
|
408
|
+
raise 'one of the provided arguments is not a String' unless arg_feeds[0].all? { |x| x.is_a?(String) }
|
409
|
+
# Sorting CVE can allow us to parse quicker
|
410
|
+
# Upcase to be sure include? works
|
411
|
+
# Does not use map(&:upcase) to preserve CVE-Recent and CVE-Modified
|
412
|
+
feeds_to_find = arg_feeds[0].map { |x| x[0..2].upcase.concat(x[3..x.size]) }.sort
|
413
|
+
matched_feeds = []
|
414
|
+
@feeds.each do |feed| # feed is an object
|
415
|
+
if feeds_to_find.include?(feed.name)
|
416
|
+
matched_feeds.push(feed)
|
417
|
+
feeds_to_find.delete(feed.name)
|
418
|
+
elsif feeds_to_find.empty?
|
419
|
+
break
|
420
|
+
end
|
421
|
+
end
|
422
|
+
return_value = matched_feeds
|
423
|
+
raise "#{feeds_to_find.join(', ')} are unexisting feeds" unless feeds_to_find.empty?
|
424
|
+
else
|
425
|
+
raise "the provided argument (#{arg_feeds[0]}) is nor a String or an Array"
|
426
|
+
end
|
427
|
+
else
|
428
|
+
# Overloading a list of arguments as one array argument
|
429
|
+
return_value = feeds(arg_feeds)
|
430
|
+
end
|
431
|
+
return return_value
|
432
|
+
end
|
433
|
+
|
434
|
+
# Return a list with the name of all available feeds. Returned feed names can be use as argument for {#feeds} method. Can only be called after {#scrap}.
|
435
|
+
# @return [Array<String>] List with the name of all available feeds.
|
436
|
+
# @example
|
437
|
+
# scraper.available_feeds => ["CVE-Modified", "CVE-Recent", "CVE-2017", "CVE-2016", "CVE-2015", "CVE-2014", "CVE-2013", "CVE-2012", "CVE-2011", "CVE-2010", "CVE-2009", "CVE-2008", "CVE-2007", "CVE-2006", "CVE-2005", "CVE-2004", "CVE-2003", "CVE-2002"]
|
438
|
+
def available_feeds
|
439
|
+
raise 'call scrap method before using available_feeds method' if @feeds.nil?
|
440
|
+
feed_names = []
|
441
|
+
@feeds.each do |feed| # feed is an objet
|
442
|
+
feed_names.push(feed.name)
|
443
|
+
end
|
444
|
+
feed_names
|
445
|
+
end
|
446
|
+
|
447
|
+
# Search for CVE in all year feeds.
|
448
|
+
# @overload cve(cve)
|
449
|
+
# One CVE.
|
450
|
+
# @param cve [String] CVE ID, case insensitive.
|
451
|
+
# @return [Hash] a Ruby Hash corresponding to the CVE.
|
452
|
+
# @overload cve(cve_arr)
|
453
|
+
# An array of CVEs.
|
454
|
+
# @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
|
455
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
|
456
|
+
# @overload cve(cve, *)
|
457
|
+
# Multiple CVEs.
|
458
|
+
# @param cve [String] CVE ID, case insensitive.
|
459
|
+
# @param * [String] As many CVE ID as you want.
|
460
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash.
|
461
|
+
# @todo implement a CVE Class instead of returning a Hash. May not be in the same order as provided.
|
462
|
+
# @note {#scrap} is needed before using this method.
|
463
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
|
464
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
|
465
|
+
# @example
|
466
|
+
# s = NVDFeedScraper.new
|
467
|
+
# s.scrap
|
468
|
+
# s.cve("CVE-2014-0002", "cve-2014-0001")
|
469
|
+
def cve(*arg_cve)
|
470
|
+
return_value = nil
|
471
|
+
raise 'no argument provided, 1 or more expected' if arg_cve.empty?
|
472
|
+
if arg_cve.length == 1
|
473
|
+
if arg_cve[0].is_a?(String)
|
474
|
+
raise 'bad CVE name' unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
|
475
|
+
year = /^CVE-([0-9]{4})-[0-9]{4,}$/i.match(arg_cve[0]).captures[0]
|
476
|
+
matched_feed = nil
|
477
|
+
feed_names = available_feeds
|
478
|
+
feed_names.delete('CVE-Modified')
|
479
|
+
feed_names.delete('CVE-Recent')
|
480
|
+
feed_names.each do |feed|
|
481
|
+
if /#{year}/.match?(feed)
|
482
|
+
matched_feed = feed
|
483
|
+
break
|
484
|
+
end
|
485
|
+
end
|
486
|
+
raise "bad CVE year in #{arg_cve}" if matched_feed.nil?
|
487
|
+
f = feeds(matched_feed)
|
488
|
+
f.json_pull
|
489
|
+
return_value = f.cve(arg_cve[0])
|
490
|
+
elsif arg_cve[0].is_a?(Array)
|
491
|
+
raise 'one of the provided arguments is not a String' unless arg_cve[0].all? { |x| x.is_a?(String) }
|
492
|
+
raise 'bad CVE name' unless arg_cve[0].all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
|
493
|
+
return_value = []
|
494
|
+
# Sorting CVE can allow us to parse quicker
|
495
|
+
# Upcase to be sure include? works
|
496
|
+
cves_to_find = arg_cve[0].map(&:upcase).sort
|
497
|
+
feeds_to_match = Set[]
|
498
|
+
cves_to_find.each do |cve|
|
499
|
+
feeds_to_match.add?(/^(CVE-[0-9]{4})-[0-9]{4,}$/i.match(cve).captures[0])
|
500
|
+
end
|
501
|
+
feed_names = available_feeds.to_set
|
502
|
+
feed_names.delete('CVE-Modified')
|
503
|
+
feed_names.delete('CVE-Recent')
|
504
|
+
raise 'unexisting CVE year was provided in some CVE' unless feeds_to_match.subset?(feed_names)
|
505
|
+
matched_feeds = feeds_to_match.intersection(feed_names)
|
506
|
+
feeds_arr = feeds(matched_feeds.to_a)
|
507
|
+
feeds_arr.each do |feed|
|
508
|
+
feed.json_pull
|
509
|
+
cves_obj = feed.cve(cves_to_find.select { |cve| cve.include?(feed.name) })
|
510
|
+
if cves_obj.is_a?(Hash)
|
511
|
+
return_value.push(cves_obj)
|
512
|
+
elsif cves_obj.is_a?(Array)
|
513
|
+
return_value.push(*cves_obj)
|
514
|
+
else
|
515
|
+
raise 'cve() method of the feed instance returns wrong value'
|
516
|
+
end
|
517
|
+
end
|
518
|
+
else
|
519
|
+
raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
|
520
|
+
end
|
521
|
+
else
|
522
|
+
# Overloading a list of arguments as one array argument
|
523
|
+
return_value = cve(arg_cve)
|
524
|
+
end
|
525
|
+
return return_value
|
526
|
+
end
|
527
|
+
|
528
|
+
# Update the feeds
|
529
|
+
# @overload update_feeds(feed)
|
530
|
+
# One feed.
|
531
|
+
# @param feed [Feed] feed object to update.
|
532
|
+
# @return [Boolean] +true+ if the feed was updated, +false+ if it wasn't.
|
533
|
+
# @overload update_feeds(feed_arr)
|
534
|
+
# An array of feed.
|
535
|
+
# @param feed_arr [Array<Feed>] array of feed objects to update.
|
536
|
+
# @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
|
537
|
+
# @overload update_feeds(feed, *)
|
538
|
+
# Multiple feeds.
|
539
|
+
# @param feed [Feed] feed object to update.
|
540
|
+
# @param * [Feed] As many feed objects as you want.
|
541
|
+
# @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
|
542
|
+
# @example
|
543
|
+
# s = NVDFeedScraper.new
|
544
|
+
# s.scrap
|
545
|
+
# f2015, f2017 = s.feeds("CVE-2015", "CVE-2017")
|
546
|
+
# s.update_feeds(f2015, f2017) # => [false, false]
|
547
|
+
def update_feeds(*arg_feed)
|
548
|
+
return_value = false
|
549
|
+
raise 'no argument provided, 1 or more expected' if arg_feed.empty?
|
550
|
+
scrap
|
551
|
+
if arg_feed.length == 1
|
552
|
+
if arg_feed[0].is_a?(Feed)
|
553
|
+
new_feed = feeds(arg_feed[0].name)
|
554
|
+
# update attributes
|
555
|
+
if arg_feed[0].updated != new_feed.updated
|
556
|
+
arg_feed[0].name = new_feed.name
|
557
|
+
arg_feed[0].updated = new_feed.updated
|
558
|
+
arg_feed[0].meta_url = new_feed.meta_url
|
559
|
+
arg_feed[0].gz_url = new_feed.gz_url
|
560
|
+
arg_feed[0].zip_url = new_feed.zip_url
|
561
|
+
# update if @meta was set
|
562
|
+
arg_feed[0].meta_pull unless feed.meta.nil?
|
563
|
+
# update if @json_file was set
|
564
|
+
arg_feed[0].json_pull unless feed.json_file.nil?
|
565
|
+
return_value = true
|
566
|
+
end
|
567
|
+
elsif arg_feed[0].is_a?(Array)
|
568
|
+
return_value = []
|
569
|
+
arg_feed[0].each do |f|
|
570
|
+
res = update_feeds(f)
|
571
|
+
puts "#{f} not found" if res.nil?
|
572
|
+
return_value.push(res)
|
573
|
+
end
|
574
|
+
else
|
575
|
+
raise "the provided argument #{arg_feed[0]} is not a Feed or an Array"
|
576
|
+
end
|
577
|
+
else
|
578
|
+
# Overloading a list of arguments as one array argument
|
579
|
+
return_value = update_feeds(arg_feed)
|
580
|
+
end
|
581
|
+
return return_value
|
582
|
+
end
|
583
|
+
|
584
|
+
# Return a list with the name of all available CVEs in the feed.
|
585
|
+
# Can only be called after {#scrap}.
|
586
|
+
# @return [Array<String>] List with the name of all available CVEs. May return tens thousands CVEs.
|
587
|
+
def available_cves
|
588
|
+
cve_names = []
|
589
|
+
feed_names = available_feeds
|
590
|
+
feed_names.delete('CVE-Modified')
|
591
|
+
feed_names.delete('CVE-Recent')
|
592
|
+
feed_names.each do |feed_name|
|
593
|
+
f = feeds(feed_name)
|
594
|
+
f.json_pull
|
595
|
+
# merge removing duplicates
|
596
|
+
cve_names |= f.available_cves
|
597
|
+
end
|
598
|
+
return cve_names
|
599
|
+
end
|
600
|
+
|
601
|
+
# Manage the meta file from a feed.
|
602
|
+
#
|
603
|
+
# == Usage
|
604
|
+
#
|
605
|
+
# @example
|
606
|
+
# s = NVDFeedScraper.new
|
607
|
+
# s.scrap
|
608
|
+
# metaUrl = s.feeds("CVE-2014").meta_url
|
609
|
+
# m = NVDFeedScraper::Meta.new
|
610
|
+
# m.url = metaUrl
|
611
|
+
# m.parse
|
612
|
+
# m.sha256
|
613
|
+
#
|
614
|
+
# Several ways to set the url:
|
615
|
+
#
|
616
|
+
# m = NVDFeedScraper::Meta.new(metaUrl)
|
617
|
+
# m.parse
|
618
|
+
# # or
|
619
|
+
# m = NVDFeedScraper::Meta.new
|
620
|
+
# m.url = metaUrl
|
621
|
+
# m.parse
|
622
|
+
# # or
|
623
|
+
# m = NVDFeedScraper::Meta.new
|
624
|
+
# m.parse(metaUrl)
|
625
|
+
class Meta
|
626
|
+
# {Meta} last modified date getter
|
627
|
+
# @return [String] the last modified date and time.
|
628
|
+
# @example
|
629
|
+
# '2017-10-19T03:27:02-04:00'
|
630
|
+
attr_reader :last_modified_date
|
631
|
+
|
632
|
+
# {Meta} JSON size getter
|
633
|
+
# @return [String] the size of the JSON file uncompressed.
|
634
|
+
# @example
|
635
|
+
# '29443314'
|
636
|
+
attr_reader :size
|
637
|
+
|
638
|
+
# {Meta} zip size getter
|
639
|
+
# @return [String] the size of the zip file.
|
640
|
+
# @example
|
641
|
+
# '2008493'
|
642
|
+
attr_reader :zip_size
|
643
|
+
|
644
|
+
# {Meta} gz size getter
|
645
|
+
# @return [String] the size of the gz file.
|
646
|
+
# @example
|
647
|
+
# '2008357'
|
648
|
+
attr_reader :gz_size
|
649
|
+
|
650
|
+
# {Meta} JSON sha256 getter
|
651
|
+
# @return [String] the SHA256 value of the uncompressed JSON file.
|
652
|
+
# @example
|
653
|
+
# '33ED52D451692596D644F23742ED42B4E350258B11ACB900F969F148FCE3777B'
|
654
|
+
attr_reader :sha256
|
655
|
+
|
656
|
+
# @param url [String, nil] see {Feed#meta_url}.
|
657
|
+
def initialize(url = nil)
|
658
|
+
@url = url
|
659
|
+
end
|
660
|
+
|
661
|
+
# {Meta} URL getter.
|
662
|
+
# @return [String] The URL of the meta file of the feed.
|
663
|
+
attr_reader :url
|
664
|
+
|
665
|
+
# {Meta} URL setter.
|
666
|
+
# @param url [String] see {Feed#meta_url}.
|
667
|
+
def url=(url)
|
668
|
+
@url = url
|
669
|
+
@last_modified_date = @size = @zip_size = @gz_size = @sha256 = nil
|
670
|
+
end
|
671
|
+
|
672
|
+
# Parse the meta file from the URL and set the attributes.
|
673
|
+
# @overload parse
|
674
|
+
# Parse the meta file from the URL and set the attributes.
|
675
|
+
# @return [Integer] Returns +0+ when there is no error.
|
676
|
+
# @overload parse(url)
|
677
|
+
# Set the URL of the meta file of the feed and
|
678
|
+
# parse the meta file from the URL and set the attributes.
|
679
|
+
# @param url [String] see {Feed.meta_url}
|
680
|
+
# @return [Integer] Returns +0+ when there is no error.
|
681
|
+
def parse(*arg)
|
682
|
+
if arg.empty?
|
683
|
+
elsif arg.length == 1 # arg = url
|
684
|
+
self.url = arg[0]
|
685
|
+
else
|
686
|
+
raise 'Too much arguments'
|
687
|
+
end
|
688
|
+
|
689
|
+
raise "Can't parse if the URL is empty" if @url.nil?
|
690
|
+
uri = URI(@url)
|
691
|
+
|
692
|
+
meta = Net::HTTP.get(uri)
|
693
|
+
|
694
|
+
meta = Hash[meta.split.map { |x| x.split(':', 2) }]
|
695
|
+
|
696
|
+
raise 'no lastModifiedDate attribute found' unless meta['lastModifiedDate']
|
697
|
+
raise 'no valid size attribute found' unless /[0-9]+/.match?(meta['size'])
|
698
|
+
raise 'no valid zipSize attribute found' unless /[0-9]+/.match?(meta['zipSize'])
|
699
|
+
raise 'no valid gzSize attribute found' unless /[0-9]+/.match?(meta['gzSize'])
|
700
|
+
raise 'no valid sha256 attribute found' unless /[0-9A-F]{64}/.match?(meta['sha256'])
|
701
|
+
|
702
|
+
@last_modified_date = meta['lastModifiedDate']
|
703
|
+
@size = meta['size']
|
704
|
+
@zip_size = meta['zipSize']
|
705
|
+
@gz_size = meta['gzSize']
|
706
|
+
@sha256 = meta['sha256']
|
707
|
+
|
708
|
+
0
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end
|