nsastorage 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +15 -0
- data/README.md +51 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/exe/nsastorage +7 -0
- data/lib/nsastorage/address.rb +61 -0
- data/lib/nsastorage/cli.rb +58 -0
- data/lib/nsastorage/config.rb +58 -0
- data/lib/nsastorage/crawl.rb +37 -0
- data/lib/nsastorage/crawler.rb +59 -0
- data/lib/nsastorage/dimensions.rb +67 -0
- data/lib/nsastorage/facility.rb +126 -0
- data/lib/nsastorage/features.rb +83 -0
- data/lib/nsastorage/fetch_error.rb +12 -0
- data/lib/nsastorage/geocode.rb +45 -0
- data/lib/nsastorage/link.rb +26 -0
- data/lib/nsastorage/price.rb +61 -0
- data/lib/nsastorage/rates.rb +59 -0
- data/lib/nsastorage/sitemap.rb +43 -0
- data/lib/nsastorage/version.rb +5 -0
- data/lib/nsastorage.rb +26 -0
- metadata +139 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ecae367c1dc75c720c89af10a90e6325bfe10c03dbfee675a125a45c70fcc6f5
|
4
|
+
data.tar.gz: dbd35a90238e4a806a9e569bae4c99f474f101d92e906eef91cec2a7c85867b2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3fa08993d81f797354e095db5f367e650a4c58c67763fa147f65de1cc4794113c7b4394bb5656219484ac62dd530c4dc70fc5b2aa86a6b826140b0c1cd767575
|
7
|
+
data.tar.gz: 2b0dbea4c879f0d44d0716772830b330d497ee1c8170969909db7dcdb375203b50672cba4d1f4611e29e3cd800e183aeee405e4f90b040b5fc7994a936691695
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# NSAStorage
|
2
|
+
|
3
|
+
[](https://github.com/ksylvest/nsastorage/blob/main/LICENSE)
|
4
|
+
[](https://rubygems.org/gems/nsastorage)
|
5
|
+
[](https://github.com/ksylvest/nsastorage)
|
6
|
+
[](https://nsastorage.ksylvest.com)
|
7
|
+
[](https://circleci.com/gh/ksylvest/nsastorage)
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
```bash
|
12
|
+
gem install nsastorage
|
13
|
+
```
|
14
|
+
|
15
|
+
## Configuration
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require 'nsastorage'
|
19
|
+
|
20
|
+
NSAStorage.configure do |config|
|
21
|
+
config.user_agent = '../..' # ENV['NSASTORAGE_USER_AGENT']
|
22
|
+
config.timeout = 30 # ENV['NSASTORAGE_TIMEOUT']
|
23
|
+
config.proxy_url = 'http://user:pass@superproxy.zenrows.com:1337' # ENV['NSASTORAGE_PROXY_URL']
|
24
|
+
end
|
25
|
+
```
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require 'nsastorage'
|
31
|
+
|
32
|
+
sitemap = NSAStorage::Facility.sitemap
|
33
|
+
sitemap.links.each do |link|
|
34
|
+
url = link.loc
|
35
|
+
facility = NSAStorage::Facility.fetch(url:)
|
36
|
+
|
37
|
+
puts facility.text
|
38
|
+
|
39
|
+
facility.prices.each do |price|
|
40
|
+
puts price.text
|
41
|
+
end
|
42
|
+
|
43
|
+
puts
|
44
|
+
end
|
45
|
+
```
|
46
|
+
|
47
|
+
## CLI
|
48
|
+
|
49
|
+
```bash
|
50
|
+
nsastorage crawl
|
51
|
+
```
|
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'nsastorage'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
require 'irb'
|
11
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/nsastorage
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The address (street + city + state + zip) of a facility.
|
5
|
+
class Address
|
6
|
+
# @attribute [rw] street
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :street
|
9
|
+
|
10
|
+
# @attribute [rw] city
|
11
|
+
# @return [String]
|
12
|
+
attr_accessor :city
|
13
|
+
|
14
|
+
# @attribute [rw] state
|
15
|
+
# @return [String]
|
16
|
+
attr_accessor :state
|
17
|
+
|
18
|
+
# @attribute [rw] zip
|
19
|
+
# @return [String]
|
20
|
+
attr_accessor :zip
|
21
|
+
|
22
|
+
# @param street [String]
|
23
|
+
# @param city [String]
|
24
|
+
# @param state [String]
|
25
|
+
# @param zip [String]
|
26
|
+
def initialize(street:, city:, state:, zip:)
|
27
|
+
@street = street
|
28
|
+
@city = city
|
29
|
+
@state = state
|
30
|
+
@zip = zip
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [String]
|
34
|
+
def inspect
|
35
|
+
props = [
|
36
|
+
"street=#{@street.inspect}",
|
37
|
+
"city=#{@city.inspect}",
|
38
|
+
"state=#{@state.inspect}",
|
39
|
+
"zip=#{@zip.inspect}"
|
40
|
+
]
|
41
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [String]
|
45
|
+
def text
|
46
|
+
"#{street}, #{city}, #{state} #{zip}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param data [Hash]
|
50
|
+
#
|
51
|
+
# @return [Address]
|
52
|
+
def self.parse(data:)
|
53
|
+
new(
|
54
|
+
street: data['streetAddress'],
|
55
|
+
city: data['addressLocality'],
|
56
|
+
state: data['addressRegion'],
|
57
|
+
zip: data['postalCode']
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
module NSAStorage
|
6
|
+
# Used when interacting with the library from the command line interface (CLI).
|
7
|
+
#
|
8
|
+
# Usage:
|
9
|
+
#
|
10
|
+
# cli = NSAStorage::CLI.new
|
11
|
+
# cli.parse
|
12
|
+
class CLI
|
13
|
+
module Code
|
14
|
+
OK = 0
|
15
|
+
ERROR = 1
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param argv [Array<String>]
|
19
|
+
def parse(argv = ARGV)
|
20
|
+
parser.parse!(argv)
|
21
|
+
command = argv.shift
|
22
|
+
|
23
|
+
case command
|
24
|
+
when 'crawl' then crawl
|
25
|
+
else
|
26
|
+
warn("unsupported command=#{command.inspect}")
|
27
|
+
exit(Code::ERROR)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def crawl
|
34
|
+
Crawl.run
|
35
|
+
exit(Code::OK)
|
36
|
+
end
|
37
|
+
|
38
|
+
def help(options)
|
39
|
+
puts(options)
|
40
|
+
exit(Code::OK)
|
41
|
+
end
|
42
|
+
|
43
|
+
def version
|
44
|
+
puts(VERSION)
|
45
|
+
exit(Code::OK)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [OptionParser]
|
49
|
+
def parser
|
50
|
+
OptionParser.new do |options|
|
51
|
+
options.banner = 'usage: nsastorage [options] <command> [<args>]'
|
52
|
+
|
53
|
+
options.on('-h', '--help', 'help') { help(options) }
|
54
|
+
options.on('-v', '--version', 'version') { version }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The core configuration.
|
5
|
+
class Config
|
6
|
+
# @attribute [rw] accept_language
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :accept_language
|
9
|
+
|
10
|
+
# @attribute [rw] user_agent
|
11
|
+
# @return [String]
|
12
|
+
attr_accessor :user_agent
|
13
|
+
|
14
|
+
# @attribute [rw] timeout
|
15
|
+
# @return [Integer]
|
16
|
+
attr_accessor :timeout
|
17
|
+
|
18
|
+
# @attribute [rw] proxy_url
|
19
|
+
# @return [String]
|
20
|
+
attr_accessor :proxy_url
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@accept_language = ENV.fetch('NSASTORAGE_ACCEPT_LANGUAGE', 'en-US,en;q=0.9')
|
24
|
+
@user_agent = ENV.fetch('NSASTORAGE_USER_AGENT', "nsastorage.rb/#{VERSION}")
|
25
|
+
@timeout = Integer(ENV.fetch('NSASTORAGE_TIMEOUT', 60))
|
26
|
+
@proxy_url = ENV.fetch('NSASTORAGE_PROXY_URL', nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Boolean]
|
30
|
+
def headers?
|
31
|
+
!@user_agent.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [Boolean]
|
35
|
+
def timeout?
|
36
|
+
!@timeout.zero?
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [Boolean]
|
40
|
+
def proxy?
|
41
|
+
!@proxy_url.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Hash<String, String>] e.g { 'User-Agent' => 'nsastorage.rb/1.0.0' }
|
45
|
+
def headers
|
46
|
+
{
|
47
|
+
'Accept-Language' => @accept_language,
|
48
|
+
'User-Agent' => @user_agent
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Array] e.g. ['proxy.example.com', 8080, 'user', 'pass']
|
53
|
+
def via
|
54
|
+
proxy_uri = URI.parse(@proxy_url)
|
55
|
+
[proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# Handles the crawl command via CLI.
|
5
|
+
class Crawl
|
6
|
+
def self.run(...)
|
7
|
+
new(...).run
|
8
|
+
end
|
9
|
+
|
10
|
+
# @param stdout [IO] optional
|
11
|
+
# @param stderr [IO] optional
|
12
|
+
# @param options [Hash] optional
|
13
|
+
def initialize(stdout: $stdout, stderr: $stderr, options: {})
|
14
|
+
@stdout = stdout
|
15
|
+
@stderr = stderr
|
16
|
+
@options = options
|
17
|
+
end
|
18
|
+
|
19
|
+
def run
|
20
|
+
sitemap = Facility.sitemap
|
21
|
+
@stdout.puts("count=#{sitemap.links.count}")
|
22
|
+
@stdout.puts
|
23
|
+
|
24
|
+
sitemap.links.each { |link| process(url: link.loc) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def process(url:)
|
28
|
+
@stdout.puts(url)
|
29
|
+
facility = Facility.fetch(url: url)
|
30
|
+
@stdout.puts(facility.text)
|
31
|
+
facility.prices.each { |price| @stdout.puts(price.text) }
|
32
|
+
@stdout.puts
|
33
|
+
rescue FetchError => e
|
34
|
+
@stderr.puts("url=#{url} error=#{e.message}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# Used to fetch and parse either HTML or XML via a URL.
|
5
|
+
class Crawler
|
6
|
+
HOST = 'https://www.nsastorage.com'
|
7
|
+
|
8
|
+
# @param url [String]
|
9
|
+
# @raise [FetchError]
|
10
|
+
# @return [Nokogiri::HTML::Document]
|
11
|
+
def self.html(url:)
|
12
|
+
new.html(url:)
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param url [String]
|
16
|
+
# @raise [FetchError]
|
17
|
+
# @return [Nokogiri::XML::Document]
|
18
|
+
def self.xml(url:)
|
19
|
+
new.xml(url:)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [HTTP::Client]
|
23
|
+
def connection
|
24
|
+
@connection ||= begin
|
25
|
+
config = NSAStorage.config
|
26
|
+
|
27
|
+
connection = HTTP.use(:auto_deflate).use(:auto_inflate).persistent(HOST)
|
28
|
+
connection = connection.headers(config.headers) if config.headers?
|
29
|
+
connection = connection.timeout(config.timeout) if config.timeout?
|
30
|
+
connection = connection.via(*config.via) if config.proxy?
|
31
|
+
|
32
|
+
connection
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param url [String]
|
37
|
+
# @return [HTTP::Response]
|
38
|
+
def fetch(url:)
|
39
|
+
response = connection.get(url)
|
40
|
+
raise FetchError.new(url:, response: response.flush) unless response.status.ok?
|
41
|
+
|
42
|
+
response
|
43
|
+
end
|
44
|
+
|
45
|
+
# @param url [String]
|
46
|
+
# @raise [FetchError]
|
47
|
+
# @return [Nokogiri::XML::Document]
|
48
|
+
def html(url:)
|
49
|
+
Nokogiri::HTML(String(fetch(url:).body))
|
50
|
+
end
|
51
|
+
|
52
|
+
# @param url [String]
|
53
|
+
# @raise [FetchError]
|
54
|
+
# @return [Nokogiri::XML::Document]
|
55
|
+
def xml(url:)
|
56
|
+
Nokogiri::XML(String(fetch(url:).body))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The dimensions (width + depth + sqft) of a price.
|
5
|
+
class Dimensions
|
6
|
+
DEFAULT_HEIGHT = 8.0 # feet
|
7
|
+
|
8
|
+
# @attribute [rw] depth
|
9
|
+
# @return [Float]
|
10
|
+
attr_accessor :depth
|
11
|
+
|
12
|
+
# @attribute [rw] width
|
13
|
+
# @return [Float]
|
14
|
+
attr_accessor :width
|
15
|
+
|
16
|
+
# @attribute [rw] height
|
17
|
+
# @return [Float]
|
18
|
+
attr_accessor :height
|
19
|
+
|
20
|
+
# @param depth [Float]
|
21
|
+
# @param width [Float]
|
22
|
+
# @param height [Float]
|
23
|
+
def initialize(depth:, width:, height: DEFAULT_HEIGHT)
|
24
|
+
@depth = depth
|
25
|
+
@width = width
|
26
|
+
@height = height
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [String]
|
30
|
+
def inspect
|
31
|
+
props = [
|
32
|
+
"depth=#{@depth.inspect}",
|
33
|
+
"width=#{@width.inspect}",
|
34
|
+
"height=#{@height.inspect}"
|
35
|
+
]
|
36
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [Integer]
|
40
|
+
def sqft
|
41
|
+
Integer(@width * @depth)
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Integer]
|
45
|
+
def cuft
|
46
|
+
Integer(@width * @depth * @height)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String] e.g. "10' × 10' (100 sqft)"
|
50
|
+
def text
|
51
|
+
"#{format('%g', @width)}' × #{format('%g', @depth)}' (#{sqft} sqft)"
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param element [Nokogiri::XML::Element]
|
55
|
+
#
|
56
|
+
# @return [Dimensions]
|
57
|
+
def self.parse(element:)
|
58
|
+
text = element.text
|
59
|
+
match = text.match(/(?<width>[\d\.]+)'x(?<depth>[\d\.]+)'/)
|
60
|
+
raise text.inspect if match.nil?
|
61
|
+
|
62
|
+
width = Float(match[:width])
|
63
|
+
depth = Float(match[:depth])
|
64
|
+
new(depth:, width:, height: DEFAULT_HEIGHT)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# A facility (address + geocode + prices) on nsastorage.com.
|
5
|
+
#
|
6
|
+
# e.g. https://www.nsastorage.com/storage/california/storage-units-paramount/7752-Jackson-St-876
|
7
|
+
class Facility
|
8
|
+
class ParseError < StandardError; end
|
9
|
+
|
10
|
+
DEFAULT_EMAIL = 'TODO'
|
11
|
+
DEFAULT_PHONE = 'TODO'
|
12
|
+
|
13
|
+
SITEMAP_URL = 'https://www.nsastorage.com/sitemap.xml'
|
14
|
+
|
15
|
+
ID_REGEX = %r{/(?<id>\d+)}
|
16
|
+
|
17
|
+
# @attribute [rw] id
|
18
|
+
# @return [String]
|
19
|
+
attr_accessor :id
|
20
|
+
|
21
|
+
# @attribute [rw] url
|
22
|
+
# @return [String]
|
23
|
+
attr_accessor :url
|
24
|
+
|
25
|
+
# @attribute [rw] name
|
26
|
+
# @return [String]
|
27
|
+
attr_accessor :name
|
28
|
+
|
29
|
+
# @attribute [rw] phone
|
30
|
+
# @return [String]
|
31
|
+
attr_accessor :phone
|
32
|
+
|
33
|
+
# @attribute [rw] email
|
34
|
+
# @return [String]
|
35
|
+
attr_accessor :email
|
36
|
+
|
37
|
+
# @attribute [rw] address
|
38
|
+
# @return [Address]
|
39
|
+
attr_accessor :address
|
40
|
+
|
41
|
+
# @attribute [rw] geocode
|
42
|
+
# @return [Geocode]
|
43
|
+
attr_accessor :geocode
|
44
|
+
|
45
|
+
# @attribute [rw] prices
|
46
|
+
# @return [Array<Price>]
|
47
|
+
attr_accessor :prices
|
48
|
+
|
49
|
+
# @return [Sitemap]
|
50
|
+
def self.sitemap
|
51
|
+
Sitemap.fetch(url: SITEMAP_URL)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param url [String]
|
55
|
+
#
|
56
|
+
# @return [Facility]
|
57
|
+
def self.fetch(url:)
|
58
|
+
document = Crawler.html(url:)
|
59
|
+
parse(url:, document:)
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param url [String]
|
63
|
+
# @param document [Nokogiri::HTML::Document]
|
64
|
+
#
|
65
|
+
# @return [Facility]
|
66
|
+
def self.parse(url:, document:)
|
67
|
+
data = parse_json_ld(document: document)
|
68
|
+
id = ID_REGEX.match(url)[:id]
|
69
|
+
|
70
|
+
address = Address.parse(data: data['address'])
|
71
|
+
geocode = Geocode.parse(data: data['address'])
|
72
|
+
|
73
|
+
new(id:, url:, name: data['name'], address:, geocode:)
|
74
|
+
end
|
75
|
+
|
76
|
+
# @param document [Nokogiri::HTML::Document]
|
77
|
+
#
|
78
|
+
# @raise [ParseError]
|
79
|
+
#
|
80
|
+
# @return [Hash]
|
81
|
+
def self.parse_json_ld(document:)
|
82
|
+
document
|
83
|
+
.xpath('//script[@type="application/ld+json"]')
|
84
|
+
.map { |script| JSON.parse(script.text) }
|
85
|
+
.find { |data| data['@type'] == 'SelfStorage' }
|
86
|
+
end
|
87
|
+
|
88
|
+
# @param id [String]
|
89
|
+
# @param url [String]
|
90
|
+
# @param name [String]
|
91
|
+
# @param address [Address]
|
92
|
+
# @param geocode [Geocode]
|
93
|
+
# @param phone [String]
|
94
|
+
# @param email [String]
|
95
|
+
# @param prices [Array<Price>]
|
96
|
+
def initialize(id:, url:, name:, address:, geocode:, phone: DEFAULT_PHONE, email: DEFAULT_EMAIL, prices: [])
|
97
|
+
@id = id
|
98
|
+
@url = url
|
99
|
+
@name = name
|
100
|
+
@address = address
|
101
|
+
@geocode = geocode
|
102
|
+
@phone = phone
|
103
|
+
@email = email
|
104
|
+
@prices = prices
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return [String]
|
108
|
+
def inspect
|
109
|
+
props = [
|
110
|
+
"id=#{@id.inspect}",
|
111
|
+
"url=#{@url.inspect}",
|
112
|
+
"address=#{@address.inspect}",
|
113
|
+
"geocode=#{@geocode.inspect}",
|
114
|
+
"phone=#{@phone.inspect}",
|
115
|
+
"email=#{@email.inspect}",
|
116
|
+
"prices=#{@prices.inspect}"
|
117
|
+
]
|
118
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
119
|
+
end
|
120
|
+
|
121
|
+
# @return [String]
|
122
|
+
def text
|
123
|
+
"#{@id} | #{@name} | #{@phone} | #{@email} | #{@address.text} | #{@geocode.text}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The features (e.g. climate-controlled, inside-drive-up-access, outside-drive-up-access, etc) of a price.
|
5
|
+
class Features
|
6
|
+
# @param element [Nokogiri::XML::Element]
|
7
|
+
#
|
8
|
+
# @return [Features]
|
9
|
+
def self.parse(element:)
|
10
|
+
text = element.text
|
11
|
+
|
12
|
+
new(
|
13
|
+
climate_controlled: text.include?('Climate controlled'),
|
14
|
+
inside_drive_up_access: text.include?('Inside drive-up access'),
|
15
|
+
outside_drive_up_access: text.include?('Outside drive-up access'),
|
16
|
+
first_floor_access: text.include?('1st floor access')
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param climate_controlled [Boolean]
|
21
|
+
# @param inside_drive_up_access [Boolean]
|
22
|
+
# @param outside_drive_up_access [Boolean]
|
23
|
+
# @param first_floor_access [Boolean]
|
24
|
+
def initialize(climate_controlled:, inside_drive_up_access:, outside_drive_up_access:, first_floor_access:)
|
25
|
+
@climate_controlled = climate_controlled
|
26
|
+
@inside_drive_up_access = inside_drive_up_access
|
27
|
+
@outside_drive_up_access = outside_drive_up_access
|
28
|
+
@first_floor_access = first_floor_access
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [String]
|
32
|
+
def inspect
|
33
|
+
props = [
|
34
|
+
"climate_controlled=#{@climate_controlled}",
|
35
|
+
"inside_drive_up_access=#{@inside_drive_up_access}",
|
36
|
+
"outside_drive_up_access=#{@outside_drive_up_access}",
|
37
|
+
"first_floor_access=#{@first_floor_access}"
|
38
|
+
]
|
39
|
+
|
40
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [String] e.g. "Climate Controlled + First Floor Access"
|
44
|
+
def text
|
45
|
+
amenities.join(' + ')
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Array<String>]
|
49
|
+
def amenities
|
50
|
+
[].tap do |amenities|
|
51
|
+
amenities << 'Climate Controlled' if climate_controlled?
|
52
|
+
amenities << 'Inside Drive-Up Access' if inside_drive_up_access?
|
53
|
+
amenities << 'Outside Drive-Up Access' if outside_drive_up_access?
|
54
|
+
amenities << 'First Floor Access' if first_floor_access?
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Boolean]
|
59
|
+
def climate_controlled?
|
60
|
+
@climate_controlled
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Boolean]
|
64
|
+
def inside_drive_up_access?
|
65
|
+
@inside_drive_up_access
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Boolean]
|
69
|
+
def outside_drive_up_access?
|
70
|
+
@outside_drive_up_access
|
71
|
+
end
|
72
|
+
|
73
|
+
# @return [Boolean]
|
74
|
+
def drive_up_access?
|
75
|
+
inside_drive_up_access? || outside_drive_up_access?
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [Boolean]
|
79
|
+
def first_floor_access?
|
80
|
+
@first_floor_access
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# Raised for unexpected HTTP responses.
|
5
|
+
class FetchError < Error
|
6
|
+
# @param url [String]
|
7
|
+
# @param response [HTTP::Response]
|
8
|
+
def initialize(url:, response:)
|
9
|
+
super("url=#{url} status=#{response.status.inspect} body=#{String(response.body).inspect}")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The geocode (latitude + longitude) of a facility.
|
5
|
+
class Geocode
|
6
|
+
# @attribute [rw] latitude
|
7
|
+
# @return [Float]
|
8
|
+
attr_accessor :latitude
|
9
|
+
|
10
|
+
# @attribute [rw] longitude
|
11
|
+
# @return [Float]
|
12
|
+
attr_accessor :longitude
|
13
|
+
|
14
|
+
# @param data [Hash]
|
15
|
+
#
|
16
|
+
# @return [Geocode]
|
17
|
+
def self.parse(data:)
|
18
|
+
new(
|
19
|
+
latitude: data['latitude'],
|
20
|
+
longitude: data['longitude']
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param latitude [Float]
|
25
|
+
# @param longitude [Float]
|
26
|
+
def initialize(latitude:, longitude:)
|
27
|
+
@latitude = latitude
|
28
|
+
@longitude = longitude
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [String]
|
32
|
+
def inspect
|
33
|
+
props = [
|
34
|
+
"latitude=#{@latitude.inspect}",
|
35
|
+
"longitude=#{@longitude.inspect}"
|
36
|
+
]
|
37
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [String]
|
41
|
+
def text
|
42
|
+
"#{@latitude},#{@longitude}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# A link in a sitemap.
|
5
|
+
class Link
|
6
|
+
# @attribute [rw] loc
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :loc
|
9
|
+
|
10
|
+
# @attribute [rw] lastmod
|
11
|
+
# @return [Time]
|
12
|
+
attr_accessor :lastmod
|
13
|
+
|
14
|
+
# @param loc [String]
|
15
|
+
# @param lastmod [String]
|
16
|
+
def initialize(loc:, lastmod:)
|
17
|
+
@loc = loc
|
18
|
+
@lastmod = Time.parse(lastmod)
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [String]
|
22
|
+
def inspect
|
23
|
+
"#<#{self.class.name} loc=#{@loc.inspect} lastmod=#{@lastmod.inspect}>"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The price (id + dimensions + rate) for a facility
|
5
|
+
class Price
|
6
|
+
# @attribute [rw] id
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :id
|
9
|
+
|
10
|
+
# @attribute [rw] dimensions
|
11
|
+
# @return [Dimensions]
|
12
|
+
attr_accessor :dimensions
|
13
|
+
|
14
|
+
# @attribute [rw] features
|
15
|
+
# @return [Features]
|
16
|
+
attr_accessor :features
|
17
|
+
|
18
|
+
# @attribute [rw] rates
|
19
|
+
# @return [Rates]
|
20
|
+
attr_accessor :rates
|
21
|
+
|
22
|
+
# @param id [String]
|
23
|
+
# @param dimensions [Dimensions]
|
24
|
+
# @param features [Features]
|
25
|
+
# @param rates [Rates]
|
26
|
+
def initialize(id:, dimensions:, features:, rates:)
|
27
|
+
@id = id
|
28
|
+
@dimensions = dimensions
|
29
|
+
@features = features
|
30
|
+
@rates = rates
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [String]
|
34
|
+
def inspect
|
35
|
+
props = [
|
36
|
+
"id=#{@id.inspect}",
|
37
|
+
"dimensions=#{@dimensions.inspect}",
|
38
|
+
"features=#{@features.inspect}",
|
39
|
+
"rates=#{@rates.inspect}"
|
40
|
+
]
|
41
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [String] e.g. "123 | 5' × 5' (25 sqft) | $100 (street) / $90 (web)"
|
45
|
+
def text
|
46
|
+
"#{@id} | #{@dimensions.text} | #{@rates.text} | #{@features.text}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param element [Nokogiri::XML::Element]
|
50
|
+
#
|
51
|
+
# @return [Price]
|
52
|
+
def self.parse(element:)
|
53
|
+
new(
|
54
|
+
id: element.attr('id'),
|
55
|
+
dimensions: Dimensions.parse(element:),
|
56
|
+
features: Features.parse(element:),
|
57
|
+
rates: Rates.parse(element:)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The rates (street + web) for a facility
|
5
|
+
class Rates
|
6
|
+
STREET_SELECTOR = '.ptOriginalPriceSpan'
|
7
|
+
WEB_SELECTOR = '.ptDiscountPriceSpan'
|
8
|
+
VALUE_REGEX = /(?<value>[\d\.]+)/
|
9
|
+
|
10
|
+
# @attribute [rw] street
|
11
|
+
# @return [Integer]
|
12
|
+
attr_accessor :street
|
13
|
+
|
14
|
+
# @attribute [rw] web
|
15
|
+
# @return [Integer]
|
16
|
+
attr_accessor :web
|
17
|
+
|
18
|
+
# @param element [Nokogiri::XML::Element]
|
19
|
+
#
|
20
|
+
# @return [Rates]
|
21
|
+
def self.parse(element:)
|
22
|
+
street = parse_value(element: element.at_css(STREET_SELECTOR))
|
23
|
+
web = parse_value(element: element.at_css(WEB_SELECTOR))
|
24
|
+
|
25
|
+
new(street: street || web, web: web || street)
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param element [Nokogiri::XML::Element]
|
29
|
+
#
|
30
|
+
# @return [Float, nil]
|
31
|
+
def self.parse_value(element:)
|
32
|
+
return if element.nil?
|
33
|
+
|
34
|
+
match = VALUE_REGEX.match(element.text)
|
35
|
+
Float(match[:value]) if match
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param street [Integer]
|
39
|
+
# @param web [Integer]
|
40
|
+
def initialize(street:, web:)
|
41
|
+
@street = street
|
42
|
+
@web = web
|
43
|
+
end
|
44
|
+
|
45
|
+
# @return [String]
|
46
|
+
def inspect
|
47
|
+
props = [
|
48
|
+
"street=#{@street.inspect}",
|
49
|
+
"web=#{@web.inspect}"
|
50
|
+
]
|
51
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [String] e.g. "$80 (street) | $60 (web)"
|
55
|
+
def text
|
56
|
+
"$#{@street} (street) | $#{@web} (web)"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# A sitemap on nsastorage.com.
|
5
|
+
#
|
6
|
+
# e.g. https://www.nsastorage.com/sitemap-facility.xml
|
7
|
+
class Sitemap
|
8
|
+
# @attribute [rw] links
|
9
|
+
# @return [Array<Link>]
|
10
|
+
attr_accessor :links
|
11
|
+
|
12
|
+
# @param document [NokoGiri::XML::Document]
|
13
|
+
#
|
14
|
+
# @return [Sitemap]
|
15
|
+
def self.parse(document:)
|
16
|
+
links = document.xpath('//xmlns:url').map do |url|
|
17
|
+
loc = url.at_xpath('xmlns:loc')&.text
|
18
|
+
lastmod = url.at_xpath('xmlns:lastmod')&.text
|
19
|
+
Link.new(loc:, lastmod:)
|
20
|
+
end
|
21
|
+
|
22
|
+
new(links: links)
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param url [String]
|
26
|
+
#
|
27
|
+
# @return [Sitemap]
|
28
|
+
def self.fetch(url:)
|
29
|
+
document = Crawler.xml(url:)
|
30
|
+
parse(document:)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @param links [Array<Link>]
|
34
|
+
def initialize(links:)
|
35
|
+
@links = links
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [String]
|
39
|
+
def inspect
|
40
|
+
"#<#{self.class.name} links=#{@links.inspect}>"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/nsastorage.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'http'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'zeitwerk'
|
6
|
+
|
7
|
+
loader = Zeitwerk::Loader.for_gem
|
8
|
+
loader.inflector.inflect 'nsastorage' => 'NSAStorage'
|
9
|
+
loader.inflector.inflect 'cli' => 'CLI'
|
10
|
+
loader.setup
|
11
|
+
|
12
|
+
# An interface for NSAStorage.
|
13
|
+
module NSAStorage
|
14
|
+
class Error < StandardError; end
|
15
|
+
|
16
|
+
# @return [Config]
|
17
|
+
def self.config
|
18
|
+
@config ||= Config.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# @yield [config]
|
22
|
+
# @yieldparam config [Config]
|
23
|
+
def self.configure
|
24
|
+
yield config
|
25
|
+
end
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nsastorage
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kevin Sylvestre
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-12-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: http
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: optparse
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: zeitwerk
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Uses HTTP.rb to scrape nsastorage.com.
|
84
|
+
email:
|
85
|
+
- kevin@ksylvest.com
|
86
|
+
executables:
|
87
|
+
- nsastorage
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- Gemfile
|
92
|
+
- README.md
|
93
|
+
- bin/console
|
94
|
+
- bin/setup
|
95
|
+
- exe/nsastorage
|
96
|
+
- lib/nsastorage.rb
|
97
|
+
- lib/nsastorage/address.rb
|
98
|
+
- lib/nsastorage/cli.rb
|
99
|
+
- lib/nsastorage/config.rb
|
100
|
+
- lib/nsastorage/crawl.rb
|
101
|
+
- lib/nsastorage/crawler.rb
|
102
|
+
- lib/nsastorage/dimensions.rb
|
103
|
+
- lib/nsastorage/facility.rb
|
104
|
+
- lib/nsastorage/features.rb
|
105
|
+
- lib/nsastorage/fetch_error.rb
|
106
|
+
- lib/nsastorage/geocode.rb
|
107
|
+
- lib/nsastorage/link.rb
|
108
|
+
- lib/nsastorage/price.rb
|
109
|
+
- lib/nsastorage/rates.rb
|
110
|
+
- lib/nsastorage/sitemap.rb
|
111
|
+
- lib/nsastorage/version.rb
|
112
|
+
homepage: https://github.com/ksylvest/nsastorage
|
113
|
+
licenses:
|
114
|
+
- MIT
|
115
|
+
metadata:
|
116
|
+
rubygems_mfa_required: 'true'
|
117
|
+
homepage_uri: https://github.com/ksylvest/nsastorage
|
118
|
+
source_code_uri: https://github.com/ksylvest/nsastorage
|
119
|
+
changelog_uri: https://github.com/ksylvest/nsastorage
|
120
|
+
post_install_message:
|
121
|
+
rdoc_options: []
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: 3.2.0
|
129
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
requirements: []
|
135
|
+
rubygems_version: 3.5.22
|
136
|
+
signing_key:
|
137
|
+
specification_version: 4
|
138
|
+
summary: A crawler for NSAStorage.
|
139
|
+
test_files: []
|