nvd_feed_api 0.0.1.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +53 -0
- data/.rubocop.yml +46 -0
- data/.yardopts +9 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +11 -0
- data/Rakefile +9 -0
- data/bin/nvd_feed_api +8 -0
- data/bin/nvd_feed_api_console +7 -0
- data/bin/nvd_feed_api_setup +6 -0
- data/lib/nvd_feed_api.rb +711 -0
- data/lib/nvd_feed_api/version.rb +3 -0
- data/nvd_feed_api.gemspec +45 -0
- data/pages/EXAMPLES.md +21 -0
- data/pages/FEATURES.md +9 -0
- data/pages/INSTALL.md +64 -0
- data/test/test_nvd_feed_api.rb +251 -0
- metadata +211 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6ce03fb10e963df256a7772e5ddc357d1702a387
|
4
|
+
data.tar.gz: 887231a4b7fd59dc8d2c10657c33d5825a775040
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2e46e2ce61301c79339ef96bfba07d8e0a7e684b9390c49c950805932838144ee01ffc93a2d68d08b1dafd71bc0695769198626f5b46dbbeb3f27ee75855e4ef
|
7
|
+
data.tar.gz: 5714a1667e0a15edcbcec6a757c3175b957cb037952a29d9cd0d8a675d8bca8dab745b92d4bd53ddebc948cccfd0c74c645be99584eb039d9637289eef55ff91
|
data/.gitignore
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
51
|
+
|
52
|
+
# do not check Gemfile.lock fror gems
|
53
|
+
Gemfile.lock
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.4
|
3
|
+
|
4
|
+
# Rubocop is too stupid too see that the variable is used
|
5
|
+
Lint/UselessAssignment:
|
6
|
+
Enabled: false
|
7
|
+
|
8
|
+
Metrics/AbcSize:
|
9
|
+
Enabled: false
|
10
|
+
|
11
|
+
Metrics/BlockLength:
|
12
|
+
Max: 50
|
13
|
+
|
14
|
+
Metrics/BlockNesting:
|
15
|
+
Max: 4
|
16
|
+
|
17
|
+
Metrics/ClassLength:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Metrics/CyclomaticComplexity:
|
21
|
+
Max: 15
|
22
|
+
|
23
|
+
Metrics/LineLength:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 100
|
28
|
+
|
29
|
+
Metrics/PerceivedComplexity:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
Naming/VariableName:
|
33
|
+
EnforcedStyle: snake_case
|
34
|
+
|
35
|
+
Security/JSONLoad:
|
36
|
+
Enabled: false
|
37
|
+
|
38
|
+
Style/FrozenStringLiteralComment:
|
39
|
+
EnforcedStyle: never
|
40
|
+
|
41
|
+
Style/PerlBackrefs:
|
42
|
+
AutoCorrect: false
|
43
|
+
|
44
|
+
# Allow explicit return
|
45
|
+
Style/RedundantReturn:
|
46
|
+
Enabled: false
|
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Alexandre ZANNI
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
[](https://www.codacy.com/app/noraj1337/nvd_api?utm_source=github.com&utm_medium=referral&utm_content=noraj1337/nvd_api&utm_campaign=Badge_Grade)
|
2
|
+
|
3
|
+
# nvd_feed_api
|
4
|
+
|
5
|
+
Name | Link
|
6
|
+
--- | ---
|
7
|
+
Website | [link](#)
|
8
|
+
Git repository | [link](https://gitlab.com/noraj/nvd_api)
|
9
|
+
Merge Requests | [link](https://gitlab.com/noraj/nvd_api/merge_requests)
|
10
|
+
Issues | [link](https://gitlab.com/noraj/nvd_api/issues)
|
11
|
+
Wiki | [link](https://gitlab.com/noraj/nvd_api/wikis/home)
|
data/Rakefile
ADDED
data/bin/nvd_feed_api
ADDED
data/lib/nvd_feed_api.rb
ADDED
@@ -0,0 +1,711 @@
|
|
1
|
+
# @author Alexandre ZANNI <alexandre.zanni@engineer.com>
|
2
|
+
|
3
|
+
# Ruby internal
|
4
|
+
require 'digest'
|
5
|
+
require 'net/https'
|
6
|
+
require 'set'
|
7
|
+
# External
|
8
|
+
require 'archive/zip'
|
9
|
+
require 'nokogiri'
|
10
|
+
require 'oj'
|
11
|
+
# Project internal
|
12
|
+
require 'nvd_feed_api/version'
|
13
|
+
|
14
|
+
# The class that parse NVD website to get information.
|
15
|
+
# @example Initialize a NVDFeedScraper object, get the feeds and see them:
|
16
|
+
# scraper = NVDFeedScraper.new
|
17
|
+
# scraper.scrap
|
18
|
+
# scraper.available_feeds
|
19
|
+
# scraper.feeds
|
20
|
+
# scraper.feeds("CVE-2007")
|
21
|
+
# cve2007, cve2015 = scraper.feeds("CVE-2007", "CVE-2015")
|
22
|
+
class NVDFeedScraper
|
23
|
+
# The NVD url where is located the data feeds.
|
24
|
+
URL = 'https://nvd.nist.gov/vuln/data-feeds'.freeze
|
25
|
+
# Load constants
|
26
|
+
include NvdFeedApi
|
27
|
+
|
28
|
+
# Feed object.
|
29
|
+
class Feed
|
30
|
+
class << self
|
31
|
+
# Get / set default feed storage location, where will be stored JSON feeds and archives by default.
|
32
|
+
# @return [String] default feed storage location. Default to +/tmp/+.
|
33
|
+
# @example
|
34
|
+
# NVDFeedScraper::Feed.default_storage_location = '/srv/downloads/'
|
35
|
+
attr_accessor :default_storage_location
|
36
|
+
end
|
37
|
+
@default_storage_location = '/tmp/'
|
38
|
+
|
39
|
+
# @return [String] the name of the feed.
|
40
|
+
# @example
|
41
|
+
# 'CVE-2007'
|
42
|
+
attr_reader :name
|
43
|
+
|
44
|
+
# @return [String] the last update date of the feed information on the NVD website.
|
45
|
+
# @example
|
46
|
+
# '10/19/2017 3:27:02 AM -04:00'
|
47
|
+
attr_reader :updated
|
48
|
+
|
49
|
+
# @return [String] the URL of the metadata file of the feed.
|
50
|
+
# @example
|
51
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
|
52
|
+
attr_reader :meta_url
|
53
|
+
|
54
|
+
# @return [String] the URL of the gz archive of the feed.
|
55
|
+
# @example
|
56
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
|
57
|
+
attr_reader :gz_url
|
58
|
+
|
59
|
+
# @return [String] the URL of the zip archive of the feed.
|
60
|
+
# @example
|
61
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
|
62
|
+
attr_reader :zip_url
|
63
|
+
|
64
|
+
# @return [Meta] the {Meta} object of the feed.
|
65
|
+
# @note
|
66
|
+
# Return nil if not previously loaded by {#meta_pull}.
|
67
|
+
# Note that {#json_pull} also calls {#meta_pull}.
|
68
|
+
# @example
|
69
|
+
# s = NVDFeedScraper.new
|
70
|
+
# s.scrap
|
71
|
+
# f = s.feeds("CVE-2014")
|
72
|
+
# f.meta # => nil
|
73
|
+
# f.meta_pull
|
74
|
+
# f.meta # => #<NVDFeedScraper::Meta:0x00555b53027570 ... >
|
75
|
+
attr_reader :meta
|
76
|
+
|
77
|
+
# @return [String] the path of the saved JSON file.
|
78
|
+
# @note Return nil if not previously loaded by {#json_pull}.
|
79
|
+
# @example
|
80
|
+
# s = NVDFeedScraper.new
|
81
|
+
# s.scrap
|
82
|
+
# f = s.feeds("CVE-2014")
|
83
|
+
# f.json_file # => nil
|
84
|
+
# f.json_pull
|
85
|
+
# f.json_file # => "/tmp/nvdcve-1.0-2014.json"
|
86
|
+
attr_reader :json_file
|
87
|
+
|
88
|
+
# A new instance of Feed.
|
89
|
+
# @param name [String] see {#name}.
|
90
|
+
# @param updated [String] see {#updated}.
|
91
|
+
# @param meta_url [String] see {#meta_url}.
|
92
|
+
# @param gz_url [String] see {#gz_url}.
|
93
|
+
# @param zip_url [String] see {#zip_url}.
|
94
|
+
def initialize(name, updated, meta_url, gz_url, zip_url)
|
95
|
+
@name = name
|
96
|
+
@updated = updated
|
97
|
+
@meta_url = meta_url
|
98
|
+
@gz_url = gz_url
|
99
|
+
@zip_url = zip_url
|
100
|
+
# do not pull meta and json automatically for speed and memory footprint
|
101
|
+
@meta = nil
|
102
|
+
@json_file = nil
|
103
|
+
end
|
104
|
+
|
105
|
+
# Create or update the {Meta} object (fill the attribute).
|
106
|
+
# @return [Meta] the updated {Meta} object of the feed.
|
107
|
+
# @see #meta
|
108
|
+
def meta_pull
|
109
|
+
meta_content = NVDFeedScraper::Meta.new(@meta_url)
|
110
|
+
meta_content.parse
|
111
|
+
# update @meta
|
112
|
+
@meta = meta_content
|
113
|
+
end
|
114
|
+
|
115
|
+
# Download the gz archive of the feed.
|
116
|
+
# @param opts [Hash] see {#download_file}.
|
117
|
+
# @return [String] the saved gz file path.
|
118
|
+
# @example
|
119
|
+
# afeed.download_gz
|
120
|
+
# afeed.download_gz(destination_path: '/srv/save/')
|
121
|
+
def download_gz(opts = {})
|
122
|
+
download_file(@gz_url, opts)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Download the zip archive of the feed.
|
126
|
+
# @param opts [Hash] see {#download_file}.
|
127
|
+
# @return [String] the saved zip file path.
|
128
|
+
# @example
|
129
|
+
# afeed.download_zip
|
130
|
+
# afeed.download_zip(destination_path: '/srv/save/')
|
131
|
+
def download_zip(opts = {})
|
132
|
+
download_file(@zip_url, opts)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Download the JSON feed and fill the attribute.
|
136
|
+
# @param opts [Hash] see {#download_file}.
|
137
|
+
# @return [String] the path of the saved JSON file. Default use {Feed#default_storage_location}.
|
138
|
+
# @note Will downlaod and save the zip of the JSON file, unzip and save it. This massively consume time.
|
139
|
+
# @see #json_file
|
140
|
+
def json_pull(opts = {})
|
141
|
+
opts[:destination_path] ||= Feed.default_storage_location
|
142
|
+
|
143
|
+
skip_download = false
|
144
|
+
destination_path = opts[:destination_path]
|
145
|
+
destination_path += '/' unless destination_path[-1] == '/'
|
146
|
+
filename = URI(@zip_url).path.split('/').last.chomp('.zip')
|
147
|
+
# do not use @json_file for destination_file because of offline loading
|
148
|
+
destination_file = destination_path + filename
|
149
|
+
meta_pull
|
150
|
+
if File.file?(destination_file)
|
151
|
+
# Verify hash to see if it is the latest
|
152
|
+
computed_h = Digest::SHA256.file(destination_file)
|
153
|
+
skip_download = true if meta.sha256.casecmp(computed_h.hexdigest).zero?
|
154
|
+
end
|
155
|
+
if skip_download
|
156
|
+
@json_file = destination_file
|
157
|
+
else
|
158
|
+
zip_path = download_zip(opts)
|
159
|
+
Archive::Zip.open(zip_path) do |z|
|
160
|
+
z.extract(destination_path, flatten: true)
|
161
|
+
end
|
162
|
+
@json_file = zip_path.chomp('.zip')
|
163
|
+
# Verify hash integrity
|
164
|
+
computed_h = Digest::SHA256.file(@json_file)
|
165
|
+
raise "File corruption: #{@json_file}" unless meta.sha256.casecmp(computed_h.hexdigest).zero?
|
166
|
+
end
|
167
|
+
return @json_file
|
168
|
+
end
|
169
|
+
|
170
|
+
# Search for CVE in the feed.
|
171
|
+
# @overload cve(cve)
|
172
|
+
# One CVE.
|
173
|
+
# @param cve [String] CVE ID, case insensitive.
|
174
|
+
# @return [Hash] a Ruby Hash corresponding to the CVE.
|
175
|
+
# @overload cve(cve_arr)
|
176
|
+
# An array of CVEs.
|
177
|
+
# @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
|
178
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
|
179
|
+
# @overload cve(cve, *)
|
180
|
+
# Multiple CVEs.
|
181
|
+
# @param cve [String] CVE ID, case insensitive.
|
182
|
+
# @param * [String] As many CVE ID as you want.
|
183
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
|
184
|
+
# @note {#json_pull} is needed before using this method. Remember you're searching only in the current feed.
|
185
|
+
# @todo implement a CVE Class instead of returning a Hash.
|
186
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
|
187
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
|
188
|
+
# @example
|
189
|
+
# s = NVDFeedScraper.new
|
190
|
+
# s.scrap
|
191
|
+
# f = s.feeds("CVE-2014")
|
192
|
+
# f.json_pull
|
193
|
+
# f.cve("CVE-2014-0002", "cve-2014-0001")
|
194
|
+
def cve(*arg_cve)
|
195
|
+
raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
|
196
|
+
raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
|
197
|
+
return_value = nil
|
198
|
+
raise 'no argument provided, 1 or more expected' if arg_cve.empty?
|
199
|
+
if arg_cve.length == 1
|
200
|
+
if arg_cve[0].is_a?(String)
|
201
|
+
raise "bad CVE name (#{arg_cve[0]})" unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
|
202
|
+
doc = Oj::Doc.open(File.read(@json_file))
|
203
|
+
# Quicker than doc.fetch('/CVE_Items').size
|
204
|
+
doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
|
205
|
+
(1..doc_size).each do |i|
|
206
|
+
if arg_cve[0].upcase == doc.fetch("/CVE_Items/#{i}/cve/CVE_data_meta/ID")
|
207
|
+
return_value = doc.fetch("/CVE_Items/#{i}")
|
208
|
+
break
|
209
|
+
end
|
210
|
+
end
|
211
|
+
doc.close
|
212
|
+
elsif arg_cve[0].is_a?(Array)
|
213
|
+
return_value = []
|
214
|
+
# Sorting CVE can allow us to parse quicker
|
215
|
+
# Upcase to be sure include? works
|
216
|
+
cves_to_find = arg_cve[0].map(&:upcase).sort
|
217
|
+
raise 'one of the provided arguments is not a String' unless cves_to_find.all? { |x| x.is_a?(String) }
|
218
|
+
raise 'bad CVE name' unless cves_to_find.all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
|
219
|
+
doc = Oj::Doc.open(File.read(@json_file))
|
220
|
+
# Quicker than doc.fetch('/CVE_Items').size
|
221
|
+
doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
|
222
|
+
(1..doc_size).each do |i|
|
223
|
+
doc.move("/CVE_Items/#{i}")
|
224
|
+
cve_id = doc.fetch('cve/CVE_data_meta/ID')
|
225
|
+
if cves_to_find.include?(cve_id)
|
226
|
+
return_value.push(doc.fetch)
|
227
|
+
cves_to_find.delete(cve_id)
|
228
|
+
elsif cves_to_find.empty?
|
229
|
+
break
|
230
|
+
end
|
231
|
+
end
|
232
|
+
raise "#{cves_to_find.join(', ')} are unexisting CVEs in this feed" unless cves_to_find.empty?
|
233
|
+
else
|
234
|
+
raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
|
235
|
+
end
|
236
|
+
else
|
237
|
+
# Overloading a list of arguments as one array argument
|
238
|
+
return_value = cve(arg_cve)
|
239
|
+
end
|
240
|
+
return return_value
|
241
|
+
end
|
242
|
+
|
243
|
+
# Return a list with the name of all available CVEs in the feed.
|
244
|
+
# Can only be called after {#json_pull}.
|
245
|
+
# @return [Array<String>] List with the name of all available CVEs. May return thousands CVEs.
|
246
|
+
def available_cves
|
247
|
+
raise 'json_file is nil, it needs to be populated with json_pull' if @json_file.nil?
|
248
|
+
raise "json_file (#{@json_file}) doesn't exist" unless File.file?(@json_file)
|
249
|
+
doc = Oj::Doc.open(File.read(@json_file))
|
250
|
+
# Quicker than doc.fetch('/CVE_Items').size
|
251
|
+
doc_size = doc.fetch('/CVE_data_numberOfCVEs').to_i
|
252
|
+
cve_names = []
|
253
|
+
(1..doc_size).each do |i|
|
254
|
+
doc.move("/CVE_Items/#{i}")
|
255
|
+
cve_names.push(doc.fetch('cve/CVE_data_meta/ID'))
|
256
|
+
end
|
257
|
+
doc.close
|
258
|
+
return cve_names
|
259
|
+
end
|
260
|
+
|
261
|
+
protected
|
262
|
+
|
263
|
+
# @param arg_name [String] the new name of the feed.
|
264
|
+
# @return [String] the new name of the feed.
|
265
|
+
# @example
|
266
|
+
# 'CVE-2007'
|
267
|
+
def name=(arg_name)
|
268
|
+
raise "name (#{arg_name}) is not a string" unless arg_name.is_a(String)
|
269
|
+
@name = arg_name
|
270
|
+
end
|
271
|
+
|
272
|
+
# @param arg_updated [String] the last update date of the feed information on the NVD website.
|
273
|
+
# @return [String] the new date.
|
274
|
+
# @example
|
275
|
+
# '10/19/2017 3:27:02 AM -04:00'
|
276
|
+
def updated=(arg_updated)
|
277
|
+
raise "updated date (#{arg_updated}) is not a string" unless arg_updated.is_a(String)
|
278
|
+
@updated = arg_updated
|
279
|
+
end
|
280
|
+
|
281
|
+
# @param arg_meta_url [String] the new URL of the metadata file of the feed.
|
282
|
+
# @return [String] the new URL of the metadata file of the feed.
|
283
|
+
# @example
|
284
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.meta'
|
285
|
+
def meta_url=(arg_meta_url)
|
286
|
+
raise "meta_url (#{arg_meta_url}) is not a string" unless arg_meta_url.is_a(String)
|
287
|
+
@meta_url = arg_meta_url
|
288
|
+
end
|
289
|
+
|
290
|
+
# @param arg_gz_url [String] the new URL of the gz archive of the feed.
|
291
|
+
# @return [String] the new URL of the gz archive of the feed.
|
292
|
+
# @example
|
293
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.gz'
|
294
|
+
def gz_url=(arg_gz_url)
|
295
|
+
raise "gz_url (#{arg_gz_url}) is not a string" unless arg_gz_url.is_a(String)
|
296
|
+
@gz_url = arg_gz_url
|
297
|
+
end
|
298
|
+
|
299
|
+
# @param arg_zip_url [String] the new URL of the zip archive of the feed.
|
300
|
+
# @return [String] the new URL of the zip archive of the feed.
|
301
|
+
# @example
|
302
|
+
# 'https://static.nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2007.json.zip'
|
303
|
+
def zip_url=(arg_zip_url)
|
304
|
+
raise "zip_url (#{arg_zip_url}) is not a string" unless arg_zip_url.is_a(String)
|
305
|
+
@zip_url = arg_zip_url
|
306
|
+
end
|
307
|
+
|
308
|
+
# Download a file.
|
309
|
+
# @param file_url [String] the URL of the file.
|
310
|
+
# @param opts [Hash] the optional downlaod parameters.
|
311
|
+
# @option opts [String] :destination_path the destination path (may
|
312
|
+
# overwrite existing file).
|
313
|
+
# Default use {Feed#default_storage_location}.
|
314
|
+
# @option opts [String] :sha256 the SHA256 hash to check, if the file
|
315
|
+
# already exist and the hash matches then the download will be skipped.
|
316
|
+
# @return [String] the saved file path.
|
317
|
+
# @example
|
318
|
+
# download_file('https://example.org/example.zip') # => '/tmp/example.zip'
|
319
|
+
# download_file('https://example.org/example.zip', destination_path: '/srv/save/') # => '/srv/save/example.zip'
|
320
|
+
# download_file('https://example.org/example.zip', {destination_path: '/srv/save/', sha256: '70d6ea136d5036b6ce771921a949357216866c6442f44cea8497f0528c54642d'}) # => '/srv/save/example.zip'
|
321
|
+
def download_file(file_url, opts = {})
|
322
|
+
opts[:destination_path] ||= Feed.default_storage_location
|
323
|
+
opts[:sha256] ||= nil
|
324
|
+
|
325
|
+
destination_path = opts[:destination_path]
|
326
|
+
destination_path += '/' unless destination_path[-1] == '/'
|
327
|
+
skip_download = false
|
328
|
+
uri = URI(file_url)
|
329
|
+
filename = uri.path.split('/').last
|
330
|
+
destination_file = destination_path + filename
|
331
|
+
unless opts[:sha256].nil?
|
332
|
+
if File.file?(destination_file)
|
333
|
+
# Verify hash to see if it is the latest
|
334
|
+
computed_h = Digest::SHA256.file(destination_file)
|
335
|
+
skip_download = true if opts[:sha256].casecmp(computed_h.hexdigest).zero?
|
336
|
+
end
|
337
|
+
end
|
338
|
+
unless skip_download
|
339
|
+
res = Net::HTTP.get_response(uri)
|
340
|
+
raise "#{file_url} ended with #{res.code} #{res.message}" unless res.is_a?(Net::HTTPSuccess)
|
341
|
+
open(destination_file, 'wb') do |file|
|
342
|
+
file.write(res.body)
|
343
|
+
end
|
344
|
+
end
|
345
|
+
return destination_file
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# Initialize the scraper
|
350
|
+
def initialize
|
351
|
+
@url = URL
|
352
|
+
@feeds = nil
|
353
|
+
end
|
354
|
+
|
355
|
+
# Scrap / parse the website to get the feeds and fill the {#feeds} attribute.
|
356
|
+
# @note {#scrap} need to be called only once but can be called again to update if the NVD feed page changed.
|
357
|
+
# @return [Integer] +0+ when there is no error.
|
358
|
+
def scrap
|
359
|
+
uri = URI(@url)
|
360
|
+
html = Net::HTTP.get(uri)
|
361
|
+
|
362
|
+
doc = Nokogiri::HTML(html)
|
363
|
+
@feeds = []
|
364
|
+
doc.css('h3#JSON_FEED ~ div.row:first-of-type table.xml-feed-table > tbody > tr[data-testid*=desc]').each do |tr|
|
365
|
+
name = tr.css('td')[0].text
|
366
|
+
updated = tr.css('td')[1].text
|
367
|
+
meta = tr.css('td')[2].css('> a').attr('href').value
|
368
|
+
gz = tr.css('+ tr > td > a').attr('href').value
|
369
|
+
zip = tr.css('+ tr + tr > td > a').attr('href').value
|
370
|
+
@feeds.push(Feed.new(name, updated, meta, gz, zip))
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
# Return feeds. Can only be called after {#scrap}.
|
375
|
+
# @overload feeds
|
376
|
+
# All the feeds.
|
377
|
+
# @return [Array<Feed>] Attributes of all feeds. It's an array of {Feed} object.
|
378
|
+
# @overload feeds(feed)
|
379
|
+
# One feed.
|
380
|
+
# @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
|
381
|
+
# @return [Feed] Attributes of one feed. It's a {Feed} object.
|
382
|
+
# @overload feeds(feed_arr)
|
383
|
+
# An array of feeds.
|
384
|
+
# @param feed_arr [Array<String>] An array of feed names as written on NVD website. Names can be obtains with {#available_feeds}.
|
385
|
+
# @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
|
386
|
+
# @overload feeds(feed, *)
|
387
|
+
# Multiple feeds.
|
388
|
+
# @param feed [String] Feed name as written on NVD website. Names can be obtains with {#available_feeds}.
|
389
|
+
# @param * [String] As many feeds as you want.
|
390
|
+
# @return [Array<Feed>] Attributes of the feeds. It's an array of {Feed} object.
|
391
|
+
# @example
|
392
|
+
# scraper.feeds # => all feeds
|
393
|
+
# scraper.feeds('CVE-2010') # => return only CVE-2010 feed
|
394
|
+
# scraper.feeds("CVE-2005", "CVE-2002") # => return CVE-2005 and CVE-2002 feeds
|
395
|
+
# @see https://nvd.nist.gov/vuln/data-feeds
|
396
|
+
def feeds(*arg_feeds)
|
397
|
+
raise 'call scrap method before using feeds method' if @feeds.nil?
|
398
|
+
return_value = nil
|
399
|
+
if arg_feeds.empty?
|
400
|
+
return_value = @feeds
|
401
|
+
elsif arg_feeds.length == 1
|
402
|
+
if arg_feeds[0].is_a?(String)
|
403
|
+
@feeds.each do |feed| # feed is an object
|
404
|
+
return_value = feed if arg_feeds.include?(feed.name)
|
405
|
+
end
|
406
|
+
# if nothing found return nil
|
407
|
+
elsif arg_feeds[0].is_a?(Array)
|
408
|
+
raise 'one of the provided arguments is not a String' unless arg_feeds[0].all? { |x| x.is_a?(String) }
|
409
|
+
# Sorting CVE can allow us to parse quicker
|
410
|
+
# Upcase to be sure include? works
|
411
|
+
# Does not use map(&:upcase) to preserve CVE-Recent and CVE-Modified
|
412
|
+
feeds_to_find = arg_feeds[0].map { |x| x[0..2].upcase.concat(x[3..x.size]) }.sort
|
413
|
+
matched_feeds = []
|
414
|
+
@feeds.each do |feed| # feed is an object
|
415
|
+
if feeds_to_find.include?(feed.name)
|
416
|
+
matched_feeds.push(feed)
|
417
|
+
feeds_to_find.delete(feed.name)
|
418
|
+
elsif feeds_to_find.empty?
|
419
|
+
break
|
420
|
+
end
|
421
|
+
end
|
422
|
+
return_value = matched_feeds
|
423
|
+
raise "#{feeds_to_find.join(', ')} are unexisting feeds" unless feeds_to_find.empty?
|
424
|
+
else
|
425
|
+
raise "the provided argument (#{arg_feeds[0]}) is nor a String or an Array"
|
426
|
+
end
|
427
|
+
else
|
428
|
+
# Overloading a list of arguments as one array argument
|
429
|
+
return_value = feeds(arg_feeds)
|
430
|
+
end
|
431
|
+
return return_value
|
432
|
+
end
|
433
|
+
|
434
|
+
# Return a list with the name of all available feeds. Returned feed names can be use as argument for {#feeds} method. Can only be called after {#scrap}.
|
435
|
+
# @return [Array<String>] List with the name of all available feeds.
|
436
|
+
# @example
|
437
|
+
# scraper.available_feeds => ["CVE-Modified", "CVE-Recent", "CVE-2017", "CVE-2016", "CVE-2015", "CVE-2014", "CVE-2013", "CVE-2012", "CVE-2011", "CVE-2010", "CVE-2009", "CVE-2008", "CVE-2007", "CVE-2006", "CVE-2005", "CVE-2004", "CVE-2003", "CVE-2002"]
|
438
|
+
def available_feeds
|
439
|
+
raise 'call scrap method before using available_feeds method' if @feeds.nil?
|
440
|
+
feed_names = []
|
441
|
+
@feeds.each do |feed| # feed is an objet
|
442
|
+
feed_names.push(feed.name)
|
443
|
+
end
|
444
|
+
feed_names
|
445
|
+
end
|
446
|
+
|
447
|
+
# Search for CVE in all year feeds.
|
448
|
+
# @overload cve(cve)
|
449
|
+
# One CVE.
|
450
|
+
# @param cve [String] CVE ID, case insensitive.
|
451
|
+
# @return [Hash] a Ruby Hash corresponding to the CVE.
|
452
|
+
# @overload cve(cve_arr)
|
453
|
+
# An array of CVEs.
|
454
|
+
# @param cve_arr [Array<String>] Array of CVE ID, case insensitive.
|
455
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash. May not be in the same order as provided.
|
456
|
+
# @overload cve(cve, *)
|
457
|
+
# Multiple CVEs.
|
458
|
+
# @param cve [String] CVE ID, case insensitive.
|
459
|
+
# @param * [String] As many CVE ID as you want.
|
460
|
+
# @return [Array] an Array of CVE, each CVE is a Ruby Hash.
|
461
|
+
# @todo implement a CVE Class instead of returning a Hash. May not be in the same order as provided.
|
462
|
+
# @note {#scrap} is needed before using this method.
|
463
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/nvd_cve_feed_json_0.1_beta.schema
|
464
|
+
# @see https://scap.nist.gov/schema/nvd/feed/0.1/CVE_JSON_4.0_min.schema
|
465
|
+
# @example
|
466
|
+
# s = NVDFeedScraper.new
|
467
|
+
# s.scrap
|
468
|
+
# s.cve("CVE-2014-0002", "cve-2014-0001")
|
469
|
+
def cve(*arg_cve)
|
470
|
+
return_value = nil
|
471
|
+
raise 'no argument provided, 1 or more expected' if arg_cve.empty?
|
472
|
+
if arg_cve.length == 1
|
473
|
+
if arg_cve[0].is_a?(String)
|
474
|
+
raise 'bad CVE name' unless /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(arg_cve[0])
|
475
|
+
year = /^CVE-([0-9]{4})-[0-9]{4,}$/i.match(arg_cve[0]).captures[0]
|
476
|
+
matched_feed = nil
|
477
|
+
feed_names = available_feeds
|
478
|
+
feed_names.delete('CVE-Modified')
|
479
|
+
feed_names.delete('CVE-Recent')
|
480
|
+
feed_names.each do |feed|
|
481
|
+
if /#{year}/.match?(feed)
|
482
|
+
matched_feed = feed
|
483
|
+
break
|
484
|
+
end
|
485
|
+
end
|
486
|
+
raise "bad CVE year in #{arg_cve}" if matched_feed.nil?
|
487
|
+
f = feeds(matched_feed)
|
488
|
+
f.json_pull
|
489
|
+
return_value = f.cve(arg_cve[0])
|
490
|
+
elsif arg_cve[0].is_a?(Array)
|
491
|
+
raise 'one of the provided arguments is not a String' unless arg_cve[0].all? { |x| x.is_a?(String) }
|
492
|
+
raise 'bad CVE name' unless arg_cve[0].all? { |x| /^CVE-[0-9]{4}-[0-9]{4,}$/i.match?(x) }
|
493
|
+
return_value = []
|
494
|
+
# Sorting CVE can allow us to parse quicker
|
495
|
+
# Upcase to be sure include? works
|
496
|
+
cves_to_find = arg_cve[0].map(&:upcase).sort
|
497
|
+
feeds_to_match = Set[]
|
498
|
+
cves_to_find.each do |cve|
|
499
|
+
feeds_to_match.add?(/^(CVE-[0-9]{4})-[0-9]{4,}$/i.match(cve).captures[0])
|
500
|
+
end
|
501
|
+
feed_names = available_feeds.to_set
|
502
|
+
feed_names.delete('CVE-Modified')
|
503
|
+
feed_names.delete('CVE-Recent')
|
504
|
+
raise 'unexisting CVE year was provided in some CVE' unless feeds_to_match.subset?(feed_names)
|
505
|
+
matched_feeds = feeds_to_match.intersection(feed_names)
|
506
|
+
feeds_arr = feeds(matched_feeds.to_a)
|
507
|
+
feeds_arr.each do |feed|
|
508
|
+
feed.json_pull
|
509
|
+
cves_obj = feed.cve(cves_to_find.select { |cve| cve.include?(feed.name) })
|
510
|
+
if cves_obj.is_a?(Hash)
|
511
|
+
return_value.push(cves_obj)
|
512
|
+
elsif cves_obj.is_a?(Array)
|
513
|
+
return_value.push(*cves_obj)
|
514
|
+
else
|
515
|
+
raise 'cve() method of the feed instance returns wrong value'
|
516
|
+
end
|
517
|
+
end
|
518
|
+
else
|
519
|
+
raise "the provided argument (#{arg_cve[0]}) is nor a String or an Array"
|
520
|
+
end
|
521
|
+
else
|
522
|
+
# Overloading a list of arguments as one array argument
|
523
|
+
return_value = cve(arg_cve)
|
524
|
+
end
|
525
|
+
return return_value
|
526
|
+
end
|
527
|
+
|
528
|
+
# Update the feeds
|
529
|
+
# @overload update_feeds(feed)
|
530
|
+
# One feed.
|
531
|
+
# @param feed [Feed] feed object to update.
|
532
|
+
# @return [Boolean] +true+ if the feed was updated, +false+ if it wasn't.
|
533
|
+
# @overload update_feeds(feed_arr)
|
534
|
+
# An array of feed.
|
535
|
+
# @param feed_arr [Array<Feed>] array of feed objects to update.
|
536
|
+
# @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
|
537
|
+
# @overload update_feeds(feed, *)
|
538
|
+
# Multiple feeds.
|
539
|
+
# @param feed [Feed] feed object to update.
|
540
|
+
# @param * [Feed] As many feed objects as you want.
|
541
|
+
# @return [Array<Boolean>] +true+ if the feed was updated, +false+ if it wasn't.
|
542
|
+
# @example
|
543
|
+
# s = NVDFeedScraper.new
|
544
|
+
# s.scrap
|
545
|
+
# f2015, f2017 = s.feeds("CVE-2015", "CVE-2017")
|
546
|
+
# s.update_feeds(f2015, f2017) # => [false, false]
|
547
|
+
def update_feeds(*arg_feed)
|
548
|
+
return_value = false
|
549
|
+
raise 'no argument provided, 1 or more expected' if arg_feed.empty?
|
550
|
+
scrap
|
551
|
+
if arg_feed.length == 1
|
552
|
+
if arg_feed[0].is_a?(Feed)
|
553
|
+
new_feed = feeds(arg_feed[0].name)
|
554
|
+
# update attributes
|
555
|
+
if arg_feed[0].updated != new_feed.updated
|
556
|
+
arg_feed[0].name = new_feed.name
|
557
|
+
arg_feed[0].updated = new_feed.updated
|
558
|
+
arg_feed[0].meta_url = new_feed.meta_url
|
559
|
+
arg_feed[0].gz_url = new_feed.gz_url
|
560
|
+
arg_feed[0].zip_url = new_feed.zip_url
|
561
|
+
# update if @meta was set
|
562
|
+
arg_feed[0].meta_pull unless feed.meta.nil?
|
563
|
+
# update if @json_file was set
|
564
|
+
arg_feed[0].json_pull unless feed.json_file.nil?
|
565
|
+
return_value = true
|
566
|
+
end
|
567
|
+
elsif arg_feed[0].is_a?(Array)
|
568
|
+
return_value = []
|
569
|
+
arg_feed[0].each do |f|
|
570
|
+
res = update_feeds(f)
|
571
|
+
puts "#{f} not found" if res.nil?
|
572
|
+
return_value.push(res)
|
573
|
+
end
|
574
|
+
else
|
575
|
+
raise "the provided argument #{arg_feed[0]} is not a Feed or an Array"
|
576
|
+
end
|
577
|
+
else
|
578
|
+
# Overloading a list of arguments as one array argument
|
579
|
+
return_value = update_feeds(arg_feed)
|
580
|
+
end
|
581
|
+
return return_value
|
582
|
+
end
|
583
|
+
|
584
|
+
# Return a list with the name of all available CVEs in the feed.
|
585
|
+
# Can only be called after {#scrap}.
|
586
|
+
# @return [Array<String>] List with the name of all available CVEs. May return tens thousands CVEs.
|
587
|
+
def available_cves
|
588
|
+
cve_names = []
|
589
|
+
feed_names = available_feeds
|
590
|
+
feed_names.delete('CVE-Modified')
|
591
|
+
feed_names.delete('CVE-Recent')
|
592
|
+
feed_names.each do |feed_name|
|
593
|
+
f = feeds(feed_name)
|
594
|
+
f.json_pull
|
595
|
+
# merge removing duplicates
|
596
|
+
cve_names |= f.available_cves
|
597
|
+
end
|
598
|
+
return cve_names
|
599
|
+
end
|
600
|
+
|
601
|
+
# Manage the meta file from a feed.
|
602
|
+
#
|
603
|
+
# == Usage
|
604
|
+
#
|
605
|
+
# @example
|
606
|
+
# s = NVDFeedScraper.new
|
607
|
+
# s.scrap
|
608
|
+
# metaUrl = s.feeds("CVE-2014").meta_url
|
609
|
+
# m = NVDFeedScraper::Meta.new
|
610
|
+
# m.url = metaUrl
|
611
|
+
# m.parse
|
612
|
+
# m.sha256
|
613
|
+
#
|
614
|
+
# Several ways to set the url:
|
615
|
+
#
|
616
|
+
# m = NVDFeedScraper::Meta.new(metaUrl)
|
617
|
+
# m.parse
|
618
|
+
# # or
|
619
|
+
# m = NVDFeedScraper::Meta.new
|
620
|
+
# m.url = metaUrl
|
621
|
+
# m.parse
|
622
|
+
# # or
|
623
|
+
# m = NVDFeedScraper::Meta.new
|
624
|
+
# m.parse(metaUrl)
|
625
|
+
class Meta
|
626
|
+
# {Meta} last modified date getter
|
627
|
+
# @return [String] the last modified date and time.
|
628
|
+
# @example
|
629
|
+
# '2017-10-19T03:27:02-04:00'
|
630
|
+
attr_reader :last_modified_date
|
631
|
+
|
632
|
+
# {Meta} JSON size getter
|
633
|
+
# @return [String] the size of the JSON file uncompressed.
|
634
|
+
# @example
|
635
|
+
# '29443314'
|
636
|
+
attr_reader :size
|
637
|
+
|
638
|
+
# {Meta} zip size getter
|
639
|
+
# @return [String] the size of the zip file.
|
640
|
+
# @example
|
641
|
+
# '2008493'
|
642
|
+
attr_reader :zip_size
|
643
|
+
|
644
|
+
# {Meta} gz size getter
|
645
|
+
# @return [String] the size of the gz file.
|
646
|
+
# @example
|
647
|
+
# '2008357'
|
648
|
+
attr_reader :gz_size
|
649
|
+
|
650
|
+
# {Meta} JSON sha256 getter
|
651
|
+
# @return [String] the SHA256 value of the uncompressed JSON file.
|
652
|
+
# @example
|
653
|
+
# '33ED52D451692596D644F23742ED42B4E350258B11ACB900F969F148FCE3777B'
|
654
|
+
attr_reader :sha256
|
655
|
+
|
656
|
+
# @param url [String, nil] see {Feed#meta_url}.
|
657
|
+
def initialize(url = nil)
|
658
|
+
@url = url
|
659
|
+
end
|
660
|
+
|
661
|
+
# {Meta} URL getter.
|
662
|
+
# @return [String] The URL of the meta file of the feed.
|
663
|
+
attr_reader :url
|
664
|
+
|
665
|
+
# {Meta} URL setter.
|
666
|
+
# @param url [String] see {Feed#meta_url}.
|
667
|
+
def url=(url)
|
668
|
+
@url = url
|
669
|
+
@last_modified_date = @size = @zip_size = @gz_size = @sha256 = nil
|
670
|
+
end
|
671
|
+
|
672
|
+
# Parse the meta file from the URL and set the attributes.
|
673
|
+
# @overload parse
|
674
|
+
# Parse the meta file from the URL and set the attributes.
|
675
|
+
# @return [Integer] Returns +0+ when there is no error.
|
676
|
+
# @overload parse(url)
|
677
|
+
# Set the URL of the meta file of the feed and
|
678
|
+
# parse the meta file from the URL and set the attributes.
|
679
|
+
# @param url [String] see {Feed.meta_url}
|
680
|
+
# @return [Integer] Returns +0+ when there is no error.
|
681
|
+
def parse(*arg)
|
682
|
+
if arg.empty?
|
683
|
+
elsif arg.length == 1 # arg = url
|
684
|
+
self.url = arg[0]
|
685
|
+
else
|
686
|
+
raise 'Too much arguments'
|
687
|
+
end
|
688
|
+
|
689
|
+
raise "Can't parse if the URL is empty" if @url.nil?
|
690
|
+
uri = URI(@url)
|
691
|
+
|
692
|
+
meta = Net::HTTP.get(uri)
|
693
|
+
|
694
|
+
meta = Hash[meta.split.map { |x| x.split(':', 2) }]
|
695
|
+
|
696
|
+
raise 'no lastModifiedDate attribute found' unless meta['lastModifiedDate']
|
697
|
+
raise 'no valid size attribute found' unless /[0-9]+/.match?(meta['size'])
|
698
|
+
raise 'no valid zipSize attribute found' unless /[0-9]+/.match?(meta['zipSize'])
|
699
|
+
raise 'no valid gzSize attribute found' unless /[0-9]+/.match?(meta['gzSize'])
|
700
|
+
raise 'no valid sha256 attribute found' unless /[0-9A-F]{64}/.match?(meta['sha256'])
|
701
|
+
|
702
|
+
@last_modified_date = meta['lastModifiedDate']
|
703
|
+
@size = meta['size']
|
704
|
+
@zip_size = meta['zipSize']
|
705
|
+
@gz_size = meta['gzSize']
|
706
|
+
@sha256 = meta['sha256']
|
707
|
+
|
708
|
+
0
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end
|