publicstorage 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +19 -2
- data/lib/publicstorage/cli.rb +9 -3
- data/lib/publicstorage/config.rb +19 -0
- data/lib/publicstorage/crawl.rb +40 -0
- data/lib/publicstorage/crawler.rb +14 -9
- data/lib/publicstorage/dimensions.rb +3 -3
- data/lib/publicstorage/fetch_error.rb +12 -0
- data/lib/publicstorage/price.rb +6 -2
- data/lib/publicstorage/rates.rb +4 -7
- data/lib/publicstorage/version.rb +1 -1
- data/lib/publicstorage.rb +12 -0
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 385e04642b7995999e422333ae7c251c4e3556ae9958b0df0f35a387ca35297a
|
4
|
+
data.tar.gz: 0c99f850bedd80f482187364625aa583518768ad6fc7f4b749f5dafdfe0871d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25af8483c84aded81acac087be7e7bf7c42a404200d8d1091b7f32184de6a61fd6abccdff9cd4d6b2efcf582f3619c37250adeb21440232d3afa90c685ea2894
|
7
|
+
data.tar.gz: d623ad587cb3c5b3d33aad2221002a411211f1cc5d39741b86e1ffa9f64732bc1ba9220a72e7e05bec40e852589a8ce09097a89ac22db489bda30df7776d3322
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Public Storage
|
2
2
|
|
3
3
|
[](https://github.com/ksylvest/publicstorage/blob/main/LICENSE)
|
4
4
|
[](https://rubygems.org/gems/publicstorage)
|
@@ -6,12 +6,25 @@
|
|
6
6
|
[](https://publicstorage.ksylvest.com)
|
7
7
|
[](https://circleci.com/gh/ksylvest/publicstorage)
|
8
8
|
|
9
|
+
A Ruby library offering both a CLI and API for scraping [Public Storage](https://www.publicstorage.com/) self-storage facilities and prices.
|
10
|
+
|
9
11
|
## Installation
|
10
12
|
|
11
13
|
```bash
|
12
14
|
gem install publicstorage
|
13
15
|
```
|
14
16
|
|
17
|
+
## Configuration
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
require 'publicstorage'
|
21
|
+
|
22
|
+
PublicStorage.configure do |config|
|
23
|
+
config.user_agent = '../..' # ENV['PUBLICSTORAGE_USER_AGENT']
|
24
|
+
config.timeout = 30 # ENV['PUBLICSTORAGE_TIMEOUT']
|
25
|
+
end
|
26
|
+
```
|
27
|
+
|
15
28
|
## Usage
|
16
29
|
|
17
30
|
```ruby
|
@@ -20,7 +33,7 @@ require 'publicstorage'
|
|
20
33
|
sitemap = PublicStorage::Facility.sitemap
|
21
34
|
sitemap.links.each do |link|
|
22
35
|
url = link.loc
|
23
|
-
facility =
|
36
|
+
facility = PublicStorage::Facility.fetch(url:)
|
24
37
|
|
25
38
|
puts facility.text
|
26
39
|
|
@@ -37,3 +50,7 @@ end
|
|
37
50
|
```bash
|
38
51
|
publicstorage crawl
|
39
52
|
```
|
53
|
+
|
54
|
+
```bash
|
55
|
+
publicstorage crawl "https://www.publicstorage.com/self-storage-ca-venice/120.html"
|
56
|
+
```
|
data/lib/publicstorage/cli.rb
CHANGED
@@ -21,7 +21,7 @@ module PublicStorage
|
|
21
21
|
command = argv.shift
|
22
22
|
|
23
23
|
case command
|
24
|
-
when 'crawl' then crawl
|
24
|
+
when 'crawl' then crawl(*argv)
|
25
25
|
else
|
26
26
|
warn("unsupported command=#{command.inspect}")
|
27
27
|
exit(Code::ERROR)
|
@@ -30,8 +30,9 @@ module PublicStorage
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
|
34
|
-
|
33
|
+
# @url [String] optional
|
34
|
+
def crawl(url = nil)
|
35
|
+
Crawl.run(url: url)
|
35
36
|
exit(Code::OK)
|
36
37
|
end
|
37
38
|
|
@@ -52,6 +53,11 @@ module PublicStorage
|
|
52
53
|
|
53
54
|
options.on('-h', '--help', 'help') { help(options) }
|
54
55
|
options.on('-v', '--version', 'version') { version }
|
56
|
+
|
57
|
+
options.separator <<~COMMANDS
|
58
|
+
commands:
|
59
|
+
crawl [url]
|
60
|
+
COMMANDS
|
55
61
|
end
|
56
62
|
end
|
57
63
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PublicStorage
|
4
|
+
# The core configuration.
|
5
|
+
class Config
|
6
|
+
# @attribute [rw] user_agent
|
7
|
+
# @return [String]
|
8
|
+
attr_accessor :user_agent
|
9
|
+
|
10
|
+
# @attribute [rw] timeout
|
11
|
+
# @return [Integer]
|
12
|
+
attr_accessor :timeout
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@user_agent = ENV.fetch('PUBLICSTORAGE_USER_AGENT', "publicstorage.rb/#{VERSION}")
|
16
|
+
@timeout = Integer(ENV.fetch('PUBLICSTORAGE_TIMEOUT', 60))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PublicStorage
|
4
|
+
# Handles the crawl command via CLI.
|
5
|
+
class Crawl
|
6
|
+
def self.run(...)
|
7
|
+
new(...).run
|
8
|
+
end
|
9
|
+
|
10
|
+
# @param stdout [IO] optional
|
11
|
+
# @param stderr [IO] optional
|
12
|
+
# @param url [String] optional
|
13
|
+
def initialize(stdout: $stdout, stderr: $stderr, url: nil)
|
14
|
+
@stdout = stdout
|
15
|
+
@stderr = stderr
|
16
|
+
@url = url
|
17
|
+
end
|
18
|
+
|
19
|
+
def run
|
20
|
+
if @url
|
21
|
+
process(url: @url)
|
22
|
+
else
|
23
|
+
sitemap = Facility.sitemap
|
24
|
+
@stdout.puts("count=#{sitemap.links.count}")
|
25
|
+
@stdout.puts
|
26
|
+
sitemap.links.each { |link| process(url: link.loc) }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def process(url:)
|
31
|
+
@stdout.puts(url)
|
32
|
+
facility = Facility.fetch(url: url)
|
33
|
+
@stdout.puts(facility.text)
|
34
|
+
facility.prices.each { |price| @stdout.puts(price.text) }
|
35
|
+
@stdout.puts
|
36
|
+
rescue FetchError => e
|
37
|
+
@stderr.puts("url=#{url} error=#{e.message}")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -3,14 +3,7 @@
|
|
3
3
|
module PublicStorage
|
4
4
|
# Used to fetch and parse either HTML or XML via a URL.
|
5
5
|
class Crawler
|
6
|
-
|
7
|
-
class FetchError < StandardError
|
8
|
-
# @param url [String]
|
9
|
-
# @param response [HTTP::Response]
|
10
|
-
def initialize(url:, response:)
|
11
|
-
super("url=#{url} status=#{response.status.inspect} body=#{response.body.inspect}")
|
12
|
-
end
|
13
|
-
end
|
6
|
+
HOST = 'https://www.publicstorage.com'
|
14
7
|
|
15
8
|
# @param url [String]
|
16
9
|
# @raise [FetchError]
|
@@ -26,10 +19,22 @@ module PublicStorage
|
|
26
19
|
new.xml(url:)
|
27
20
|
end
|
28
21
|
|
22
|
+
# @return [HTTP::Client]
|
23
|
+
def connection
|
24
|
+
@connection ||= begin
|
25
|
+
config = PublicStorage.config
|
26
|
+
|
27
|
+
connection = HTTP.persistent(HOST)
|
28
|
+
connection = connection.headers('User-Agent' => config.user_agent) if config.user_agent
|
29
|
+
connection = connection.timeout(config.timeout) if config.timeout
|
30
|
+
connection
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
29
34
|
# @param url [String]
|
30
35
|
# @return [HTTP::Response]
|
31
36
|
def fetch(url:)
|
32
|
-
response =
|
37
|
+
response = connection.get(url)
|
33
38
|
raise FetchError.new(url:, response: response.flush) unless response.status.ok?
|
34
39
|
|
35
40
|
response
|
@@ -41,11 +41,11 @@ module PublicStorage
|
|
41
41
|
"#{format('%g', @width)}' × #{format('%g', @depth)}' (#{@sqft} sqft)"
|
42
42
|
end
|
43
43
|
|
44
|
-
# @param
|
44
|
+
# @param data [Hash]
|
45
45
|
#
|
46
46
|
# @return [Dimensions]
|
47
|
-
def self.parse(
|
48
|
-
match =
|
47
|
+
def self.parse(data:)
|
48
|
+
match = data['dimension'].match(/(?<depth>[\d\.]+)'x(?<width>[\d\.]+)'/)
|
49
49
|
depth = Float(match[:depth])
|
50
50
|
width = Float(match[:width])
|
51
51
|
sqft = Integer(depth * width)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PublicStorage
|
4
|
+
# Raised for unexpected HTTP responses.
|
5
|
+
class FetchError < StandardError
|
6
|
+
# @param url [String]
|
7
|
+
# @param response [HTTP::Response]
|
8
|
+
def initialize(url:, response:)
|
9
|
+
super("url=#{url} status=#{response.status.inspect} body=#{response.body.inspect}")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
data/lib/publicstorage/price.rb
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
module PublicStorage
|
4
4
|
# The price (id + dimensions + rate) for a facility
|
5
5
|
class Price
|
6
|
+
GTM_SELECTOR = 'button[data-gtmdata]'
|
7
|
+
|
6
8
|
# @attribute [rw] id
|
7
9
|
# @return [String]
|
8
10
|
attr_accessor :id
|
@@ -43,8 +45,10 @@ module PublicStorage
|
|
43
45
|
#
|
44
46
|
# @return [Price]
|
45
47
|
def self.parse(element:)
|
46
|
-
|
47
|
-
|
48
|
+
data = JSON.parse(element.at(GTM_SELECTOR).attribute('data-gtmdata'))
|
49
|
+
|
50
|
+
rates = Rates.parse(data:)
|
51
|
+
dimensions = Dimensions.parse(data:)
|
48
52
|
|
49
53
|
new(
|
50
54
|
id: element.attr('data-unitid'),
|
data/lib/publicstorage/rates.rb
CHANGED
@@ -3,9 +3,6 @@
|
|
3
3
|
module PublicStorage
|
4
4
|
# The rates (street + web) for a facility
|
5
5
|
class Rates
|
6
|
-
STREET_SELECTOR = '.unit-prices .unit-pricing .unit-strike-through-price'
|
7
|
-
WEB_SELECTOR = '.unit-prices .unit-pricing .unit-price'
|
8
|
-
|
9
6
|
# @attribute [rw] street
|
10
7
|
# @return [Integer]
|
11
8
|
attr_accessor :street
|
@@ -35,12 +32,12 @@ module PublicStorage
|
|
35
32
|
"$#{@street} (street) | $#{@web} (web)"
|
36
33
|
end
|
37
34
|
|
38
|
-
# @param
|
35
|
+
# @param data [Hash]
|
39
36
|
#
|
40
37
|
# @return [Rates]
|
41
|
-
def self.parse(
|
42
|
-
street =
|
43
|
-
web =
|
38
|
+
def self.parse(data:)
|
39
|
+
street = data['listprice']
|
40
|
+
web = data['saleprice']
|
44
41
|
new(street:, web:)
|
45
42
|
end
|
46
43
|
end
|
data/lib/publicstorage.rb
CHANGED
@@ -9,6 +9,18 @@ loader.inflector.inflect 'publicstorage' => 'PublicStorage'
|
|
9
9
|
loader.inflector.inflect 'cli' => 'CLI'
|
10
10
|
loader.setup
|
11
11
|
|
12
|
+
# An interface for PublicStorage.
|
12
13
|
module PublicStorage
|
13
14
|
class Error < StandardError; end
|
15
|
+
|
16
|
+
# @return [Config]
|
17
|
+
def self.config
|
18
|
+
@config ||= Config.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# @yield [config]
|
22
|
+
# @yieldparam config [Config]
|
23
|
+
def self.configure
|
24
|
+
yield config
|
25
|
+
end
|
14
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: publicstorage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Sylvestre
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -82,9 +82,12 @@ files:
|
|
82
82
|
- lib/publicstorage.rb
|
83
83
|
- lib/publicstorage/address.rb
|
84
84
|
- lib/publicstorage/cli.rb
|
85
|
+
- lib/publicstorage/config.rb
|
86
|
+
- lib/publicstorage/crawl.rb
|
85
87
|
- lib/publicstorage/crawler.rb
|
86
88
|
- lib/publicstorage/dimensions.rb
|
87
89
|
- lib/publicstorage/facility.rb
|
90
|
+
- lib/publicstorage/fetch_error.rb
|
88
91
|
- lib/publicstorage/geocode.rb
|
89
92
|
- lib/publicstorage/link.rb
|
90
93
|
- lib/publicstorage/price.rb
|
@@ -97,8 +100,9 @@ licenses:
|
|
97
100
|
metadata:
|
98
101
|
rubygems_mfa_required: 'true'
|
99
102
|
homepage_uri: https://github.com/ksylvest/publicstorage
|
100
|
-
source_code_uri: https://github.com/ksylvest/publicstorage
|
101
|
-
changelog_uri: https://github.com/ksylvest/publicstorage
|
103
|
+
source_code_uri: https://github.com/ksylvest/publicstorage/tree/v1.0.0
|
104
|
+
changelog_uri: https://github.com/ksylvest/publicstorage/releases/tag/v1.0.0
|
105
|
+
documentation_uri: https://publicstorage.ksylvest.com/
|
102
106
|
post_install_message:
|
103
107
|
rdoc_options: []
|
104
108
|
require_paths:
|