nsastorage 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +15 -0
- data/README.md +51 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/exe/nsastorage +7 -0
- data/lib/nsastorage/address.rb +61 -0
- data/lib/nsastorage/cli.rb +58 -0
- data/lib/nsastorage/config.rb +58 -0
- data/lib/nsastorage/crawl.rb +37 -0
- data/lib/nsastorage/crawler.rb +59 -0
- data/lib/nsastorage/dimensions.rb +67 -0
- data/lib/nsastorage/facility.rb +126 -0
- data/lib/nsastorage/features.rb +83 -0
- data/lib/nsastorage/fetch_error.rb +12 -0
- data/lib/nsastorage/geocode.rb +45 -0
- data/lib/nsastorage/link.rb +26 -0
- data/lib/nsastorage/price.rb +61 -0
- data/lib/nsastorage/rates.rb +59 -0
- data/lib/nsastorage/sitemap.rb +43 -0
- data/lib/nsastorage/version.rb +5 -0
- data/lib/nsastorage.rb +26 -0
- metadata +139 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ecae367c1dc75c720c89af10a90e6325bfe10c03dbfee675a125a45c70fcc6f5
|
4
|
+
data.tar.gz: dbd35a90238e4a806a9e569bae4c99f474f101d92e906eef91cec2a7c85867b2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3fa08993d81f797354e095db5f367e650a4c58c67763fa147f65de1cc4794113c7b4394bb5656219484ac62dd530c4dc70fc5b2aa86a6b826140b0c1cd767575
|
7
|
+
data.tar.gz: 2b0dbea4c879f0d44d0716772830b330d497ee1c8170969909db7dcdb375203b50672cba4d1f4611e29e3cd800e183aeee405e4f90b040b5fc7994a936691695
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# NSAStorage
|
2
|
+
|
3
|
+
[![LICENSE](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/ksylvest/nsastorage/blob/main/LICENSE)
|
4
|
+
[![RubyGems](https://img.shields.io/gem/v/nsastorage)](https://rubygems.org/gems/nsastorage)
|
5
|
+
[![GitHub](https://img.shields.io/badge/github-repo-blue.svg)](https://github.com/ksylvest/nsastorage)
|
6
|
+
[![Yard](https://img.shields.io/badge/docs-site-blue.svg)](https://nsastorage.ksylvest.com)
|
7
|
+
[![CircleCI](https://img.shields.io/circleci/build/github/ksylvest/nsastorage)](https://circleci.com/gh/ksylvest/nsastorage)
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
```bash
|
12
|
+
gem install nsastorage
|
13
|
+
```
|
14
|
+
|
15
|
+
## Configuration
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require 'nsastorage'
|
19
|
+
|
20
|
+
NSAStorage.configure do |config|
|
21
|
+
config.user_agent = '../..' # ENV['NSASTORAGE_USER_AGENT']
|
22
|
+
config.timeout = 30 # ENV['NSASTORAGE_TIMEOUT']
|
23
|
+
config.proxy_url = 'http://user:pass@superproxy.zenrows.com:1337' # ENV['NSASTORAGE_PROXY_URL']
|
24
|
+
end
|
25
|
+
```
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require 'nsastorage'
|
31
|
+
|
32
|
+
sitemap = NSAStorage::Facility.sitemap
|
33
|
+
sitemap.links.each do |link|
|
34
|
+
url = link.loc
|
35
|
+
facility = NSAStorage::Facility.fetch(url:)
|
36
|
+
|
37
|
+
puts facility.text
|
38
|
+
|
39
|
+
facility.prices.each do |price|
|
40
|
+
puts price.text
|
41
|
+
end
|
42
|
+
|
43
|
+
puts
|
44
|
+
end
|
45
|
+
```
|
46
|
+
|
47
|
+
## CLI
|
48
|
+
|
49
|
+
```bash
|
50
|
+
nsastorage crawl
|
51
|
+
```
|
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'nsastorage'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
require 'irb'
|
11
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/nsastorage
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The address (street + city + state + zip) of a facility.
|
5
|
+
class Address
|
6
|
+
# @attribute [rw] street
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :street
|
9
|
+
|
10
|
+
# @attribute [rw] city
|
11
|
+
# @return [String]
|
12
|
+
attr_accessor :city
|
13
|
+
|
14
|
+
# @attribute [rw] state
|
15
|
+
# @return [String]
|
16
|
+
attr_accessor :state
|
17
|
+
|
18
|
+
# @attribute [rw] zip
|
19
|
+
# @return [String]
|
20
|
+
attr_accessor :zip
|
21
|
+
|
22
|
+
# @param street [String]
|
23
|
+
# @param city [String]
|
24
|
+
# @param state [String]
|
25
|
+
# @param zip [String]
|
26
|
+
def initialize(street:, city:, state:, zip:)
|
27
|
+
@street = street
|
28
|
+
@city = city
|
29
|
+
@state = state
|
30
|
+
@zip = zip
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [String]
|
34
|
+
def inspect
|
35
|
+
props = [
|
36
|
+
"street=#{@street.inspect}",
|
37
|
+
"city=#{@city.inspect}",
|
38
|
+
"state=#{@state.inspect}",
|
39
|
+
"zip=#{@zip.inspect}"
|
40
|
+
]
|
41
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [String]
|
45
|
+
def text
|
46
|
+
"#{street}, #{city}, #{state} #{zip}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param data [Hash]
|
50
|
+
#
|
51
|
+
# @return [Address]
|
52
|
+
def self.parse(data:)
|
53
|
+
new(
|
54
|
+
street: data['streetAddress'],
|
55
|
+
city: data['addressLocality'],
|
56
|
+
state: data['addressRegion'],
|
57
|
+
zip: data['postalCode']
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
module NSAStorage
|
6
|
+
# Used when interacting with the library from the command line interface (CLI).
|
7
|
+
#
|
8
|
+
# Usage:
|
9
|
+
#
|
10
|
+
# cli = NSAStorage::CLI.new
|
11
|
+
# cli.parse
|
12
|
+
class CLI
|
13
|
+
module Code
|
14
|
+
OK = 0
|
15
|
+
ERROR = 1
|
16
|
+
end
|
17
|
+
|
18
|
+
# @param argv [Array<String>]
|
19
|
+
def parse(argv = ARGV)
|
20
|
+
parser.parse!(argv)
|
21
|
+
command = argv.shift
|
22
|
+
|
23
|
+
case command
|
24
|
+
when 'crawl' then crawl
|
25
|
+
else
|
26
|
+
warn("unsupported command=#{command.inspect}")
|
27
|
+
exit(Code::ERROR)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def crawl
|
34
|
+
Crawl.run
|
35
|
+
exit(Code::OK)
|
36
|
+
end
|
37
|
+
|
38
|
+
def help(options)
|
39
|
+
puts(options)
|
40
|
+
exit(Code::OK)
|
41
|
+
end
|
42
|
+
|
43
|
+
def version
|
44
|
+
puts(VERSION)
|
45
|
+
exit(Code::OK)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [OptionParser]
|
49
|
+
def parser
|
50
|
+
OptionParser.new do |options|
|
51
|
+
options.banner = 'usage: nsastorage [options] <command> [<args>]'
|
52
|
+
|
53
|
+
options.on('-h', '--help', 'help') { help(options) }
|
54
|
+
options.on('-v', '--version', 'version') { version }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The core configuration.
|
5
|
+
class Config
|
6
|
+
# @attribute [rw] accept_language
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :accept_language
|
9
|
+
|
10
|
+
# @attribute [rw] user_agent
|
11
|
+
# @return [String]
|
12
|
+
attr_accessor :user_agent
|
13
|
+
|
14
|
+
# @attribute [rw] timeout
|
15
|
+
# @return [Integer]
|
16
|
+
attr_accessor :timeout
|
17
|
+
|
18
|
+
# @attribute [rw] proxy_url
|
19
|
+
# @return [String]
|
20
|
+
attr_accessor :proxy_url
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@accept_language = ENV.fetch('NSASTORAGE_ACCEPT_LANGUAGE', 'en-US,en;q=0.9')
|
24
|
+
@user_agent = ENV.fetch('NSASTORAGE_USER_AGENT', "nsastorage.rb/#{VERSION}")
|
25
|
+
@timeout = Integer(ENV.fetch('NSASTORAGE_TIMEOUT', 60))
|
26
|
+
@proxy_url = ENV.fetch('NSASTORAGE_PROXY_URL', nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Boolean]
|
30
|
+
def headers?
|
31
|
+
!@user_agent.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [Boolean]
|
35
|
+
def timeout?
|
36
|
+
!@timeout.zero?
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [Boolean]
|
40
|
+
def proxy?
|
41
|
+
!@proxy_url.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Hash<String, String>] e.g { 'User-Agent' => 'nsastorage.rb/1.0.0' }
|
45
|
+
def headers
|
46
|
+
{
|
47
|
+
'Accept-Language' => @accept_language,
|
48
|
+
'User-Agent' => @user_agent
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Array] e.g. ['proxy.example.com', 8080, 'user', 'pass']
|
53
|
+
def via
|
54
|
+
proxy_uri = URI.parse(@proxy_url)
|
55
|
+
[proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# Handles the crawl command via CLI.
|
5
|
+
class Crawl
|
6
|
+
def self.run(...)
|
7
|
+
new(...).run
|
8
|
+
end
|
9
|
+
|
10
|
+
# @param stdout [IO] optional
|
11
|
+
# @param stderr [IO] optional
|
12
|
+
# @param options [Hash] optional
|
13
|
+
def initialize(stdout: $stdout, stderr: $stderr, options: {})
|
14
|
+
@stdout = stdout
|
15
|
+
@stderr = stderr
|
16
|
+
@options = options
|
17
|
+
end
|
18
|
+
|
19
|
+
def run
|
20
|
+
sitemap = Facility.sitemap
|
21
|
+
@stdout.puts("count=#{sitemap.links.count}")
|
22
|
+
@stdout.puts
|
23
|
+
|
24
|
+
sitemap.links.each { |link| process(url: link.loc) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def process(url:)
|
28
|
+
@stdout.puts(url)
|
29
|
+
facility = Facility.fetch(url: url)
|
30
|
+
@stdout.puts(facility.text)
|
31
|
+
facility.prices.each { |price| @stdout.puts(price.text) }
|
32
|
+
@stdout.puts
|
33
|
+
rescue FetchError => e
|
34
|
+
@stderr.puts("url=#{url} error=#{e.message}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# Used to fetch and parse either HTML or XML via a URL.
|
5
|
+
class Crawler
|
6
|
+
HOST = 'https://www.nsastorage.com'
|
7
|
+
|
8
|
+
# @param url [String]
|
9
|
+
# @raise [FetchError]
|
10
|
+
# @return [Nokogiri::HTML::Document]
|
11
|
+
def self.html(url:)
|
12
|
+
new.html(url:)
|
13
|
+
end
|
14
|
+
|
15
|
+
# @param url [String]
|
16
|
+
# @raise [FetchError]
|
17
|
+
# @return [Nokogiri::XML::Document]
|
18
|
+
def self.xml(url:)
|
19
|
+
new.xml(url:)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [HTTP::Client]
|
23
|
+
def connection
|
24
|
+
@connection ||= begin
|
25
|
+
config = NSAStorage.config
|
26
|
+
|
27
|
+
connection = HTTP.use(:auto_deflate).use(:auto_inflate).persistent(HOST)
|
28
|
+
connection = connection.headers(config.headers) if config.headers?
|
29
|
+
connection = connection.timeout(config.timeout) if config.timeout?
|
30
|
+
connection = connection.via(*config.via) if config.proxy?
|
31
|
+
|
32
|
+
connection
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# @param url [String]
|
37
|
+
# @return [HTTP::Response]
|
38
|
+
def fetch(url:)
|
39
|
+
response = connection.get(url)
|
40
|
+
raise FetchError.new(url:, response: response.flush) unless response.status.ok?
|
41
|
+
|
42
|
+
response
|
43
|
+
end
|
44
|
+
|
45
|
+
# @param url [String]
|
46
|
+
# @raise [FetchError]
|
47
|
+
# @return [Nokogiri::XML::Document]
|
48
|
+
def html(url:)
|
49
|
+
Nokogiri::HTML(String(fetch(url:).body))
|
50
|
+
end
|
51
|
+
|
52
|
+
# @param url [String]
|
53
|
+
# @raise [FetchError]
|
54
|
+
# @return [Nokogiri::XML::Document]
|
55
|
+
def xml(url:)
|
56
|
+
Nokogiri::XML(String(fetch(url:).body))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The dimensions (width + depth + sqft) of a price.
|
5
|
+
class Dimensions
|
6
|
+
DEFAULT_HEIGHT = 8.0 # feet
|
7
|
+
|
8
|
+
# @attribute [rw] depth
|
9
|
+
# @return [Float]
|
10
|
+
attr_accessor :depth
|
11
|
+
|
12
|
+
# @attribute [rw] width
|
13
|
+
# @return [Float]
|
14
|
+
attr_accessor :width
|
15
|
+
|
16
|
+
# @attribute [rw] height
|
17
|
+
# @return [Float]
|
18
|
+
attr_accessor :height
|
19
|
+
|
20
|
+
# @param depth [Float]
|
21
|
+
# @param width [Float]
|
22
|
+
# @param height [Float]
|
23
|
+
def initialize(depth:, width:, height: DEFAULT_HEIGHT)
|
24
|
+
@depth = depth
|
25
|
+
@width = width
|
26
|
+
@height = height
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [String]
|
30
|
+
def inspect
|
31
|
+
props = [
|
32
|
+
"depth=#{@depth.inspect}",
|
33
|
+
"width=#{@width.inspect}",
|
34
|
+
"height=#{@height.inspect}"
|
35
|
+
]
|
36
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [Integer]
|
40
|
+
def sqft
|
41
|
+
Integer(@width * @depth)
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Integer]
|
45
|
+
def cuft
|
46
|
+
Integer(@width * @depth * @height)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String] e.g. "10' × 10' (100 sqft)"
|
50
|
+
def text
|
51
|
+
"#{format('%g', @width)}' × #{format('%g', @depth)}' (#{sqft} sqft)"
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param element [Nokogiri::XML::Element]
|
55
|
+
#
|
56
|
+
# @return [Dimensions]
|
57
|
+
def self.parse(element:)
|
58
|
+
text = element.text
|
59
|
+
match = text.match(/(?<width>[\d\.]+)'x(?<depth>[\d\.]+)'/)
|
60
|
+
raise text.inspect if match.nil?
|
61
|
+
|
62
|
+
width = Float(match[:width])
|
63
|
+
depth = Float(match[:depth])
|
64
|
+
new(depth:, width:, height: DEFAULT_HEIGHT)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# A facility (address + geocode + prices) on nsastorage.com.
|
5
|
+
#
|
6
|
+
# e.g. https://www.nsastorage.com/storage/california/storage-units-paramount/7752-Jackson-St-876
|
7
|
+
class Facility
|
8
|
+
class ParseError < StandardError; end
|
9
|
+
|
10
|
+
DEFAULT_EMAIL = 'TODO'
|
11
|
+
DEFAULT_PHONE = 'TODO'
|
12
|
+
|
13
|
+
SITEMAP_URL = 'https://www.nsastorage.com/sitemap.xml'
|
14
|
+
|
15
|
+
ID_REGEX = %r{/(?<id>\d+)}
|
16
|
+
|
17
|
+
# @attribute [rw] id
|
18
|
+
# @return [String]
|
19
|
+
attr_accessor :id
|
20
|
+
|
21
|
+
# @attribute [rw] url
|
22
|
+
# @return [String]
|
23
|
+
attr_accessor :url
|
24
|
+
|
25
|
+
# @attribute [rw] name
|
26
|
+
# @return [String]
|
27
|
+
attr_accessor :name
|
28
|
+
|
29
|
+
# @attribute [rw] phone
|
30
|
+
# @return [String]
|
31
|
+
attr_accessor :phone
|
32
|
+
|
33
|
+
# @attribute [rw] email
|
34
|
+
# @return [String]
|
35
|
+
attr_accessor :email
|
36
|
+
|
37
|
+
# @attribute [rw] address
|
38
|
+
# @return [Address]
|
39
|
+
attr_accessor :address
|
40
|
+
|
41
|
+
# @attribute [rw] geocode
|
42
|
+
# @return [Geocode]
|
43
|
+
attr_accessor :geocode
|
44
|
+
|
45
|
+
# @attribute [rw] prices
|
46
|
+
# @return [Array<Price>]
|
47
|
+
attr_accessor :prices
|
48
|
+
|
49
|
+
# @return [Sitemap]
|
50
|
+
def self.sitemap
|
51
|
+
Sitemap.fetch(url: SITEMAP_URL)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param url [String]
|
55
|
+
#
|
56
|
+
# @return [Facility]
|
57
|
+
def self.fetch(url:)
|
58
|
+
document = Crawler.html(url:)
|
59
|
+
parse(url:, document:)
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param url [String]
|
63
|
+
# @param document [Nokogiri::HTML::Document]
|
64
|
+
#
|
65
|
+
# @return [Facility]
|
66
|
+
def self.parse(url:, document:)
|
67
|
+
data = parse_json_ld(document: document)
|
68
|
+
id = ID_REGEX.match(url)[:id]
|
69
|
+
|
70
|
+
address = Address.parse(data: data['address'])
|
71
|
+
geocode = Geocode.parse(data: data['address'])
|
72
|
+
|
73
|
+
new(id:, url:, name: data['name'], address:, geocode:)
|
74
|
+
end
|
75
|
+
|
76
|
+
# @param document [Nokogiri::HTML::Document]
|
77
|
+
#
|
78
|
+
# @raise [ParseError]
|
79
|
+
#
|
80
|
+
# @return [Hash]
|
81
|
+
def self.parse_json_ld(document:)
|
82
|
+
document
|
83
|
+
.xpath('//script[@type="application/ld+json"]')
|
84
|
+
.map { |script| JSON.parse(script.text) }
|
85
|
+
.find { |data| data['@type'] == 'SelfStorage' }
|
86
|
+
end
|
87
|
+
|
88
|
+
# @param id [String]
|
89
|
+
# @param url [String]
|
90
|
+
# @param name [String]
|
91
|
+
# @param address [Address]
|
92
|
+
# @param geocode [Geocode]
|
93
|
+
# @param phone [String]
|
94
|
+
# @param email [String]
|
95
|
+
# @param prices [Array<Price>]
|
96
|
+
def initialize(id:, url:, name:, address:, geocode:, phone: DEFAULT_PHONE, email: DEFAULT_EMAIL, prices: [])
|
97
|
+
@id = id
|
98
|
+
@url = url
|
99
|
+
@name = name
|
100
|
+
@address = address
|
101
|
+
@geocode = geocode
|
102
|
+
@phone = phone
|
103
|
+
@email = email
|
104
|
+
@prices = prices
|
105
|
+
end
|
106
|
+
|
107
|
+
# @return [String]
|
108
|
+
def inspect
|
109
|
+
props = [
|
110
|
+
"id=#{@id.inspect}",
|
111
|
+
"url=#{@url.inspect}",
|
112
|
+
"address=#{@address.inspect}",
|
113
|
+
"geocode=#{@geocode.inspect}",
|
114
|
+
"phone=#{@phone.inspect}",
|
115
|
+
"email=#{@email.inspect}",
|
116
|
+
"prices=#{@prices.inspect}"
|
117
|
+
]
|
118
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
119
|
+
end
|
120
|
+
|
121
|
+
# @return [String]
|
122
|
+
def text
|
123
|
+
"#{@id} | #{@name} | #{@phone} | #{@email} | #{@address.text} | #{@geocode.text}"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The features (e.g. climate-controlled, inside-drive-up-access, outside-drive-up-access, etc) of a price.
|
5
|
+
class Features
|
6
|
+
# @param element [Nokogiri::XML::Element]
|
7
|
+
#
|
8
|
+
# @return [Features]
|
9
|
+
def self.parse(element:)
|
10
|
+
text = element.text
|
11
|
+
|
12
|
+
new(
|
13
|
+
climate_controlled: text.include?('Climate controlled'),
|
14
|
+
inside_drive_up_access: text.include?('Inside drive-up access'),
|
15
|
+
outside_drive_up_access: text.include?('Outside drive-up access'),
|
16
|
+
first_floor_access: text.include?('1st floor access')
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param climate_controlled [Boolean]
|
21
|
+
# @param inside_drive_up_access [Boolean]
|
22
|
+
# @param outside_drive_up_access [Boolean]
|
23
|
+
# @param first_floor_access [Boolean]
|
24
|
+
def initialize(climate_controlled:, inside_drive_up_access:, outside_drive_up_access:, first_floor_access:)
|
25
|
+
@climate_controlled = climate_controlled
|
26
|
+
@inside_drive_up_access = inside_drive_up_access
|
27
|
+
@outside_drive_up_access = outside_drive_up_access
|
28
|
+
@first_floor_access = first_floor_access
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [String]
|
32
|
+
def inspect
|
33
|
+
props = [
|
34
|
+
"climate_controlled=#{@climate_controlled}",
|
35
|
+
"inside_drive_up_access=#{@inside_drive_up_access}",
|
36
|
+
"outside_drive_up_access=#{@outside_drive_up_access}",
|
37
|
+
"first_floor_access=#{@first_floor_access}"
|
38
|
+
]
|
39
|
+
|
40
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [String] e.g. "Climate Controlled + First Floor Access"
|
44
|
+
def text
|
45
|
+
amenities.join(' + ')
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Array<String>]
|
49
|
+
def amenities
|
50
|
+
[].tap do |amenities|
|
51
|
+
amenities << 'Climate Controlled' if climate_controlled?
|
52
|
+
amenities << 'Inside Drive-Up Access' if inside_drive_up_access?
|
53
|
+
amenities << 'Outside Drive-Up Access' if outside_drive_up_access?
|
54
|
+
amenities << 'First Floor Access' if first_floor_access?
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Boolean]
|
59
|
+
def climate_controlled?
|
60
|
+
@climate_controlled
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Boolean]
|
64
|
+
def inside_drive_up_access?
|
65
|
+
@inside_drive_up_access
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Boolean]
|
69
|
+
def outside_drive_up_access?
|
70
|
+
@outside_drive_up_access
|
71
|
+
end
|
72
|
+
|
73
|
+
# @return [Boolean]
|
74
|
+
def drive_up_access?
|
75
|
+
inside_drive_up_access? || outside_drive_up_access?
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [Boolean]
|
79
|
+
def first_floor_access?
|
80
|
+
@first_floor_access
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# Raised for unexpected HTTP responses.
|
5
|
+
class FetchError < Error
|
6
|
+
# @param url [String]
|
7
|
+
# @param response [HTTP::Response]
|
8
|
+
def initialize(url:, response:)
|
9
|
+
super("url=#{url} status=#{response.status.inspect} body=#{String(response.body).inspect}")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The geocode (latitude + longitude) of a facility.
|
5
|
+
class Geocode
|
6
|
+
# @attribute [rw] latitude
|
7
|
+
# @return [Float]
|
8
|
+
attr_accessor :latitude
|
9
|
+
|
10
|
+
# @attribute [rw] longitude
|
11
|
+
# @return [Float]
|
12
|
+
attr_accessor :longitude
|
13
|
+
|
14
|
+
# @param data [Hash]
|
15
|
+
#
|
16
|
+
# @return [Geocode]
|
17
|
+
def self.parse(data:)
|
18
|
+
new(
|
19
|
+
latitude: data['latitude'],
|
20
|
+
longitude: data['longitude']
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @param latitude [Float]
|
25
|
+
# @param longitude [Float]
|
26
|
+
def initialize(latitude:, longitude:)
|
27
|
+
@latitude = latitude
|
28
|
+
@longitude = longitude
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [String]
|
32
|
+
def inspect
|
33
|
+
props = [
|
34
|
+
"latitude=#{@latitude.inspect}",
|
35
|
+
"longitude=#{@longitude.inspect}"
|
36
|
+
]
|
37
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [String]
|
41
|
+
def text
|
42
|
+
"#{@latitude},#{@longitude}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# A link in a sitemap.
|
5
|
+
class Link
|
6
|
+
# @attribute [rw] loc
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :loc
|
9
|
+
|
10
|
+
# @attribute [rw] lastmod
|
11
|
+
# @return [Time]
|
12
|
+
attr_accessor :lastmod
|
13
|
+
|
14
|
+
# @param loc [String]
|
15
|
+
# @param lastmod [String]
|
16
|
+
def initialize(loc:, lastmod:)
|
17
|
+
@loc = loc
|
18
|
+
@lastmod = Time.parse(lastmod)
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [String]
|
22
|
+
def inspect
|
23
|
+
"#<#{self.class.name} loc=#{@loc.inspect} lastmod=#{@lastmod.inspect}>"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The price (id + dimensions + rate) for a facility
|
5
|
+
class Price
|
6
|
+
# @attribute [rw] id
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :id
|
9
|
+
|
10
|
+
# @attribute [rw] dimensions
|
11
|
+
# @return [Dimensions]
|
12
|
+
attr_accessor :dimensions
|
13
|
+
|
14
|
+
# @attribute [rw] features
|
15
|
+
# @return [Features]
|
16
|
+
attr_accessor :features
|
17
|
+
|
18
|
+
# @attribute [rw] rates
|
19
|
+
# @return [Rates]
|
20
|
+
attr_accessor :rates
|
21
|
+
|
22
|
+
# @param id [String]
|
23
|
+
# @param dimensions [Dimensions]
|
24
|
+
# @param features [Features]
|
25
|
+
# @param rates [Rates]
|
26
|
+
def initialize(id:, dimensions:, features:, rates:)
|
27
|
+
@id = id
|
28
|
+
@dimensions = dimensions
|
29
|
+
@features = features
|
30
|
+
@rates = rates
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [String]
|
34
|
+
def inspect
|
35
|
+
props = [
|
36
|
+
"id=#{@id.inspect}",
|
37
|
+
"dimensions=#{@dimensions.inspect}",
|
38
|
+
"features=#{@features.inspect}",
|
39
|
+
"rates=#{@rates.inspect}"
|
40
|
+
]
|
41
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [String] e.g. "123 | 5' × 5' (25 sqft) | $100 (street) / $90 (web)"
|
45
|
+
def text
|
46
|
+
"#{@id} | #{@dimensions.text} | #{@rates.text} | #{@features.text}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param element [Nokogiri::XML::Element]
|
50
|
+
#
|
51
|
+
# @return [Price]
|
52
|
+
def self.parse(element:)
|
53
|
+
new(
|
54
|
+
id: element.attr('id'),
|
55
|
+
dimensions: Dimensions.parse(element:),
|
56
|
+
features: Features.parse(element:),
|
57
|
+
rates: Rates.parse(element:)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# The rates (street + web) for a facility
|
5
|
+
class Rates
|
6
|
+
STREET_SELECTOR = '.ptOriginalPriceSpan'
|
7
|
+
WEB_SELECTOR = '.ptDiscountPriceSpan'
|
8
|
+
VALUE_REGEX = /(?<value>[\d\.]+)/
|
9
|
+
|
10
|
+
# @attribute [rw] street
|
11
|
+
# @return [Integer]
|
12
|
+
attr_accessor :street
|
13
|
+
|
14
|
+
# @attribute [rw] web
|
15
|
+
# @return [Integer]
|
16
|
+
attr_accessor :web
|
17
|
+
|
18
|
+
# @param element [Nokogiri::XML::Element]
|
19
|
+
#
|
20
|
+
# @return [Rates]
|
21
|
+
def self.parse(element:)
|
22
|
+
street = parse_value(element: element.at_css(STREET_SELECTOR))
|
23
|
+
web = parse_value(element: element.at_css(WEB_SELECTOR))
|
24
|
+
|
25
|
+
new(street: street || web, web: web || street)
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param element [Nokogiri::XML::Element]
|
29
|
+
#
|
30
|
+
# @return [Float, nil]
|
31
|
+
def self.parse_value(element:)
|
32
|
+
return if element.nil?
|
33
|
+
|
34
|
+
match = VALUE_REGEX.match(element.text)
|
35
|
+
Float(match[:value]) if match
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param street [Integer]
|
39
|
+
# @param web [Integer]
|
40
|
+
def initialize(street:, web:)
|
41
|
+
@street = street
|
42
|
+
@web = web
|
43
|
+
end
|
44
|
+
|
45
|
+
# @return [String]
|
46
|
+
def inspect
|
47
|
+
props = [
|
48
|
+
"street=#{@street.inspect}",
|
49
|
+
"web=#{@web.inspect}"
|
50
|
+
]
|
51
|
+
"#<#{self.class.name} #{props.join(' ')}>"
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [String] e.g. "$80 (street) | $60 (web)"
|
55
|
+
def text
|
56
|
+
"$#{@street} (street) | $#{@web} (web)"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module NSAStorage
|
4
|
+
# A sitemap on nsastorage.com.
|
5
|
+
#
|
6
|
+
# e.g. https://www.nsastorage.com/sitemap-facility.xml
|
7
|
+
class Sitemap
|
8
|
+
# @attribute [rw] links
|
9
|
+
# @return [Array<Link>]
|
10
|
+
attr_accessor :links
|
11
|
+
|
12
|
+
# @param document [NokoGiri::XML::Document]
|
13
|
+
#
|
14
|
+
# @return [Sitemap]
|
15
|
+
def self.parse(document:)
|
16
|
+
links = document.xpath('//xmlns:url').map do |url|
|
17
|
+
loc = url.at_xpath('xmlns:loc')&.text
|
18
|
+
lastmod = url.at_xpath('xmlns:lastmod')&.text
|
19
|
+
Link.new(loc:, lastmod:)
|
20
|
+
end
|
21
|
+
|
22
|
+
new(links: links)
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param url [String]
|
26
|
+
#
|
27
|
+
# @return [Sitemap]
|
28
|
+
def self.fetch(url:)
|
29
|
+
document = Crawler.xml(url:)
|
30
|
+
parse(document:)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @param links [Array<Link>]
|
34
|
+
def initialize(links:)
|
35
|
+
@links = links
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [String]
|
39
|
+
def inspect
|
40
|
+
"#<#{self.class.name} links=#{@links.inspect}>"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/nsastorage.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'http'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'zeitwerk'
|
6
|
+
|
7
|
+
loader = Zeitwerk::Loader.for_gem
|
8
|
+
loader.inflector.inflect 'nsastorage' => 'NSAStorage'
|
9
|
+
loader.inflector.inflect 'cli' => 'CLI'
|
10
|
+
loader.setup
|
11
|
+
|
12
|
+
# An interface for NSAStorage.
|
13
|
+
module NSAStorage
|
14
|
+
class Error < StandardError; end
|
15
|
+
|
16
|
+
# @return [Config]
|
17
|
+
def self.config
|
18
|
+
@config ||= Config.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# @yield [config]
|
22
|
+
# @yieldparam config [Config]
|
23
|
+
def self.configure
|
24
|
+
yield config
|
25
|
+
end
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nsastorage
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kevin Sylvestre
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-12-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: http
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: optparse
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: zeitwerk
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Uses HTTP.rb to scrape nsastorage.com.
|
84
|
+
email:
|
85
|
+
- kevin@ksylvest.com
|
86
|
+
executables:
|
87
|
+
- nsastorage
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- Gemfile
|
92
|
+
- README.md
|
93
|
+
- bin/console
|
94
|
+
- bin/setup
|
95
|
+
- exe/nsastorage
|
96
|
+
- lib/nsastorage.rb
|
97
|
+
- lib/nsastorage/address.rb
|
98
|
+
- lib/nsastorage/cli.rb
|
99
|
+
- lib/nsastorage/config.rb
|
100
|
+
- lib/nsastorage/crawl.rb
|
101
|
+
- lib/nsastorage/crawler.rb
|
102
|
+
- lib/nsastorage/dimensions.rb
|
103
|
+
- lib/nsastorage/facility.rb
|
104
|
+
- lib/nsastorage/features.rb
|
105
|
+
- lib/nsastorage/fetch_error.rb
|
106
|
+
- lib/nsastorage/geocode.rb
|
107
|
+
- lib/nsastorage/link.rb
|
108
|
+
- lib/nsastorage/price.rb
|
109
|
+
- lib/nsastorage/rates.rb
|
110
|
+
- lib/nsastorage/sitemap.rb
|
111
|
+
- lib/nsastorage/version.rb
|
112
|
+
homepage: https://github.com/ksylvest/nsastorage
|
113
|
+
licenses:
|
114
|
+
- MIT
|
115
|
+
metadata:
|
116
|
+
rubygems_mfa_required: 'true'
|
117
|
+
homepage_uri: https://github.com/ksylvest/nsastorage
|
118
|
+
source_code_uri: https://github.com/ksylvest/nsastorage
|
119
|
+
changelog_uri: https://github.com/ksylvest/nsastorage
|
120
|
+
post_install_message:
|
121
|
+
rdoc_options: []
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: 3.2.0
|
129
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
requirements: []
|
135
|
+
rubygems_version: 3.5.22
|
136
|
+
signing_key:
|
137
|
+
specification_version: 4
|
138
|
+
summary: A crawler for NSAStorage.
|
139
|
+
test_files: []
|