uhaul 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 70a37ead64b21ccff91595e73dfedf11b378c2e5ea14acdd0d18e9d45735e3a5
4
+ data.tar.gz: d355639b46eb03ce4ce4be447520f05551ecfc8f23f7ebacc8260a6bb5d71203
5
+ SHA512:
6
+ metadata.gz: b73ef2ae65031b627177dd1b8c5fa647ce6273b3f09efb8975a42182d6be90e05d6f6e27c5c50dfae3c836f81cd48e1211fd57bf2918081cc6664999b3daef24
7
+ data.tar.gz: bec905b2070dd9a60d0d32b1f3f3de1fbe184d27f495152524911f90d9eff9c1b3c2b6b833b1e3f3cb26960de25a606396d3ca0b9dcf9292f42a1498bed1fc98
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
6
+
7
+ gem 'rake'
8
+ gem 'rspec'
9
+ gem 'rspec_junit_formatter'
10
+ gem 'rubocop'
11
+ gem 'rubocop-rake'
12
+ gem 'rubocop-rspec'
13
+ gem 'vcr'
14
+ gem 'webmock'
15
+ gem 'yard'
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # UHaul
2
+
3
+ [![LICENSE](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/ksylvest/uhaul/blob/main/LICENSE)
4
+ [![RubyGems](https://img.shields.io/gem/v/uhaul)](https://rubygems.org/gems/uhaul)
5
+ [![GitHub](https://img.shields.io/badge/github-repo-blue.svg)](https://github.com/ksylvest/uhaul)
6
+ [![Yard](https://img.shields.io/badge/docs-site-blue.svg)](https://uhaul.ksylvest.com)
7
+ [![CircleCI](https://img.shields.io/circleci/build/github/ksylvest/uhaul)](https://circleci.com/gh/ksylvest/uhaul)
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ gem install uhaul
13
+ ```
14
+
15
+ ## Configuration
16
+
17
+ ```ruby
18
+ require 'uhaul'
19
+
20
+ UHaul.configure do |config|
21
+ config.user_agent = '../..' # ENV['NSASTORAGE_USER_AGENT']
22
+ config.timeout = 30 # ENV['NSASTORAGE_TIMEOUT']
23
+ config.proxy_url = 'http://user:pass@superproxy.zenrows.com:1337' # ENV['NSASTORAGE_PROXY_URL']
24
+ end
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```ruby
30
+ require 'uhaul'
31
+
32
+ sitemap = UHaul::Facility.sitemap
33
+ sitemap.links.each do |link|
34
+ url = link.loc
35
+ facility = UHaul::Facility.fetch(url:)
36
+
37
+ puts facility.text
38
+
39
+ facility.prices.each do |price|
40
+ puts price.text
41
+ end
42
+
43
+ puts
44
+ end
45
+ ```
46
+
47
+ ## CLI
48
+
49
+ ```bash
50
+ uhaul crawl
51
+ ```
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'uhaul'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require 'irb'
11
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/exe/uhaul ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'uhaul'
5
+
6
+ cli = UHaul::CLI.new
7
+ cli.parse
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The address (street + city + state + zip) of a facility.
5
+ class Address
6
+ ADDRESS_SELECTOR = '.item-des-box .text-box .part_title_1'
7
+ ADDRESS_REGEX = /(?<street>.+),\s+(?<city>.+),\s+(?<state>.+)\s+(?<zip>\d{5})/
8
+ # @attribute [rw] street
9
+ # @return [String]
10
+ attr_accessor :street
11
+
12
+ # @attribute [rw] city
13
+ # @return [String]
14
+ attr_accessor :city
15
+
16
+ # @attribute [rw] state
17
+ # @return [String]
18
+ attr_accessor :state
19
+
20
+ # @attribute [rw] zip
21
+ # @return [String]
22
+ attr_accessor :zip
23
+
24
+ # @param street [String]
25
+ # @param city [String]
26
+ # @param state [String]
27
+ # @param zip [String]
28
+ def initialize(street:, city:, state:, zip:)
29
+ @street = street
30
+ @city = city
31
+ @state = state
32
+ @zip = zip
33
+ end
34
+
35
+ # @return [String]
36
+ def inspect
37
+ props = [
38
+ "street=#{@street.inspect}",
39
+ "city=#{@city.inspect}",
40
+ "state=#{@state.inspect}",
41
+ "zip=#{@zip.inspect}"
42
+ ]
43
+ "#<#{self.class.name} #{props.join(' ')}>"
44
+ end
45
+
46
+ # @return [String]
47
+ def text
48
+ "#{street}, #{city}, #{state} #{zip}"
49
+ end
50
+
51
+ # @param data [Hash]
52
+ #
53
+ # @return [Address]
54
+ def self.parse(data:)
55
+ new(
56
+ street: data['streetAddress'],
57
+ city: data['addressLocality'],
58
+ state: data['addressRegion'],
59
+ zip: data['postalCode']
60
+ )
61
+ end
62
+ end
63
+ end
data/lib/uhaul/cli.rb ADDED
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'optparse'
4
+
5
+ module UHaul
6
+ # Used when interacting with the library from the command line interface (CLI).
7
+ #
8
+ # Usage:
9
+ #
10
+ # cli = UHaul::CLI.new
11
+ # cli.parse
12
+ class CLI
13
+ module Code
14
+ OK = 0
15
+ ERROR = 1
16
+ end
17
+
18
+ # @param argv [Array<String>]
19
+ def parse(argv = ARGV)
20
+ parser.parse!(argv)
21
+ command = argv.shift
22
+
23
+ case command
24
+ when 'crawl' then crawl
25
+ else
26
+ warn("unsupported command=#{command.inspect}")
27
+ exit(Code::ERROR)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def crawl
34
+ Crawl.run
35
+ exit(Code::OK)
36
+ end
37
+
38
+ def help(options)
39
+ puts(options)
40
+ exit(Code::OK)
41
+ end
42
+
43
+ def version
44
+ puts(VERSION)
45
+ exit(Code::OK)
46
+ end
47
+
48
+ # @return [OptionParser]
49
+ def parser
50
+ OptionParser.new do |options|
51
+ options.banner = 'usage: uhaul [options] <command> [<args>]'
52
+
53
+ options.on('-h', '--help', 'help') { help(options) }
54
+ options.on('-v', '--version', 'version') { version }
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The core configuration.
5
+ class Config
6
+ # @attribute [rw] accept_language
7
+ # @return [String]
8
+ attr_accessor :accept_language
9
+
10
+ # @attribute [rw] user_agent
11
+ # @return [String]
12
+ attr_accessor :user_agent
13
+
14
+ # @attribute [rw] timeout
15
+ # @return [Integer]
16
+ attr_accessor :timeout
17
+
18
+ # @attribute [rw] proxy_url
19
+ # @return [String]
20
+ attr_accessor :proxy_url
21
+
22
+ def initialize
23
+ @accept_language = ENV.fetch('NSASTORAGE_ACCEPT_LANGUAGE', 'en-US,en;q=0.9')
24
+ @user_agent = ENV.fetch('NSASTORAGE_USER_AGENT', "uhaul.rb/#{VERSION}")
25
+ @timeout = Integer(ENV.fetch('NSASTORAGE_TIMEOUT', 60))
26
+ @proxy_url = ENV.fetch('NSASTORAGE_PROXY_URL', nil)
27
+ end
28
+
29
+ # @return [Boolean]
30
+ def headers?
31
+ !@user_agent.nil?
32
+ end
33
+
34
+ # @return [Boolean]
35
+ def timeout?
36
+ !@timeout.zero?
37
+ end
38
+
39
+ # @return [Boolean]
40
+ def proxy?
41
+ !@proxy_url.nil?
42
+ end
43
+
44
+ # @return [Hash<String, String>] e.g { 'User-Agent' => 'uhaul.rb/1.0.0' }
45
+ def headers
46
+ {
47
+ 'Accept-Language' => @accept_language,
48
+ 'User-Agent' => @user_agent
49
+ }
50
+ end
51
+
52
+ # @return [Array] e.g. ['proxy.example.com', 8080, 'user', 'pass']
53
+ def via
54
+ proxy_uri = URI.parse(@proxy_url)
55
+ [proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password]
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # Handles the crawl command via CLI.
5
+ class Crawl
6
+ def self.run(...)
7
+ new(...).run
8
+ end
9
+
10
+ # @param stdout [IO] optional
11
+ # @param stderr [IO] optional
12
+ # @param options [Hash] optional
13
+ def initialize(stdout: $stdout, stderr: $stderr, options: {})
14
+ @stdout = stdout
15
+ @stderr = stderr
16
+ @options = options
17
+ end
18
+
19
+ def run
20
+ sitemap = Facility.sitemap
21
+ @stdout.puts("count=#{sitemap.links.count}")
22
+ @stdout.puts
23
+
24
+ sitemap.links.each { |link| process(url: link.loc) }
25
+ end
26
+
27
+ def process(url:)
28
+ @stdout.puts(url)
29
+ facility = Facility.fetch(url: url)
30
+ @stdout.puts(facility.text)
31
+ facility.prices.each { |price| @stdout.puts(price.text) }
32
+ @stdout.puts
33
+ rescue FetchError => e
34
+ @stderr.puts("url=#{url} error=#{e.message}")
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # Used to fetch and parse either HTML or XML via a URL.
5
+ class Crawler
6
+ HOST = 'https://www.uhaul.com'
7
+
8
+ # @attribute url [String]
9
+ # @raise [FetchError]
10
+ # @return [Hash]
11
+ def self.json(url:)
12
+ new.json(url:)
13
+ end
14
+
15
+ # @param url [String]
16
+ # @raise [FetchError]
17
+ # @return [Nokogiri::HTML::Document]
18
+ def self.html(url:)
19
+ new.html(url:)
20
+ end
21
+
22
+ # @param url [String]
23
+ # @raise [FetchError]
24
+ # @return [Nokogiri::XML::Document]
25
+ def self.xml(url:)
26
+ new.xml(url:)
27
+ end
28
+
29
+ # @return [HTTP::Client]
30
+ def connection
31
+ @connection ||= begin
32
+ config = UHaul.config
33
+
34
+ connection = HTTP.use(:auto_deflate).use(:auto_inflate).persistent(HOST)
35
+ connection = connection.headers(config.headers) if config.headers?
36
+ connection = connection.timeout(config.timeout) if config.timeout?
37
+ connection = connection.via(*config.via) if config.proxy?
38
+
39
+ connection
40
+ end
41
+ end
42
+
43
+ # @param url [String]
44
+ # @return [HTTP::Response]
45
+ def fetch(url:)
46
+ response = connection.get(url)
47
+ raise FetchError.new(url:, response: response.flush) unless response.status.ok?
48
+
49
+ response
50
+ end
51
+
52
+ # @param url [String]
53
+ # @raise [FetchError]
54
+ # @return [Hash]
55
+ def json(url:)
56
+ JSON.parse(String(fetch(url:).body))
57
+ end
58
+
59
+ # @param url [String]
60
+ # @raise [FetchError]
61
+ # @return [Nokogiri::XML::Document]
62
+ def html(url:)
63
+ Nokogiri::HTML(String(fetch(url:).body))
64
+ end
65
+
66
+ # @param url [String]
67
+ # @raise [FetchError]
68
+ # @return [Nokogiri::XML::Document]
69
+ def xml(url:)
70
+ Nokogiri::XML(String(fetch(url:).body))
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The dimensions (width + depth + sqft) of a price.
5
+ class Dimensions
6
+ DEFAULT_WIDTH = 5.0 # feet
7
+ DEFAULT_DEPTH = 5.0 # feet
8
+ DEFAULT_HEIGHT = 8.0 # feet
9
+
10
+ DIMENSIONS_REGEX = /(?<width>[\d\.]+) x (?<depth>[\d\.]+)/
11
+
12
+ # @attribute [rw] depth
13
+ # @return [Float]
14
+ attr_accessor :depth
15
+
16
+ # @attribute [rw] width
17
+ # @return [Float]
18
+ attr_accessor :width
19
+
20
+ # @attribute [rw] height
21
+ # @return [Float]
22
+ attr_accessor :height
23
+
24
+ # @param depth [Float]
25
+ # @param width [Float]
26
+ # @param height [Float]
27
+ def initialize(depth:, width:, height: DEFAULT_HEIGHT)
28
+ @depth = depth
29
+ @width = width
30
+ @height = height
31
+ end
32
+
33
+ # @return [String]
34
+ def inspect
35
+ props = [
36
+ "depth=#{@depth.inspect}",
37
+ "width=#{@width.inspect}",
38
+ "height=#{@height.inspect}"
39
+ ]
40
+ "#<#{self.class.name} #{props.join(' ')}>"
41
+ end
42
+
43
+ # @return [String] e.g. "5×5"
44
+ def id
45
+ "#{format('%g', @width)}×#{format('%g', @depth)}"
46
+ end
47
+
48
+ # @return [Integer]
49
+ def sqft
50
+ Integer(@width * @depth)
51
+ end
52
+
53
+ # @return [Integer]
54
+ def cuft
55
+ Integer(@width * @depth * @height)
56
+ end
57
+
58
+ # @return [String] e.g. "10' × 10' (100 sqft)"
59
+ def text
60
+ "#{format('%g', @width)}' × #{format('%g', @depth)}' (#{sqft} sqft)"
61
+ end
62
+
63
+ # @param element [Nokogiri::XML::Element]
64
+ #
65
+ # @return [Dimensions]
66
+ def self.parse(element:)
67
+ text = element.at_css('.unit-select-item-detail').text
68
+ match = DIMENSIONS_REGEX.match(text)
69
+
70
+ width = match ? Float(match[:width]) : DEFAULT_WIDTH
71
+ depth = match ? Float(match[:depth]) : DEFAULT_DEPTH
72
+ new(depth:, width:, height: DEFAULT_HEIGHT)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # A facility (address + geocode + prices) on uhaul.com.
5
+ #
6
+ # e.g. https://www.uhaul.com/Locations/Self-Storage-near-Inglewood-CA-90301/712030/
7
+ class Facility
8
+ class ParseError < StandardError; end
9
+
10
+ SITEMAP_URLS = %w[
11
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AL.ashx
12
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AK.ashx
13
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AZ.ashx
14
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AR.ashx
15
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-CA.ashx
16
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-CO.ashx
17
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-CT.ashx
18
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-DC.ashx
19
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-DE.ashx
20
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-FL.ashx
21
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-GA.ashx
22
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-HI.ashx
23
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-ID.ashx
24
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-IL.ashx
25
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-IN.ashx
26
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-IA.ashx
27
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-KS.ashx
28
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-KY.ashx
29
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-LA.ashx
30
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-ME.ashx
31
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MD.ashx
32
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MA.ashx
33
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MI.ashx
34
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MN.ashx
35
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MS.ashx
36
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MO.ashx
37
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MT.ashx
38
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NE.ashx
39
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NV.ashx
40
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NH.ashx
41
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NJ.ashx
42
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NM.ashx
43
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NY.ashx
44
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NC.ashx
45
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-ND.ashx
46
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-OH.ashx
47
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-OK.ashx
48
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-OR.ashx
49
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-PA.ashx
50
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-RI.ashx
51
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-SC.ashx
52
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-SD.ashx
53
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-TN.ashx
54
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-TX.ashx
55
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-UT.ashx
56
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-VT.ashx
57
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-VA.ashx
58
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WA.ashx
59
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WV.ashx
60
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WI.ashx
61
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WY.ashx
62
+ ].freeze
63
+
64
+ DEFAULT_EMAIL = 'service@uhaul.com'
65
+ DEFAULT_PHONE = '+1-800-468-4285
66
+ '
67
+
68
+ # @attribute [rw] id
69
+ # @return [String]
70
+ attr_accessor :id
71
+
72
+ # @attribute [rw] url
73
+ # @return [String]
74
+ attr_accessor :url
75
+
76
+ # @attribute [rw] name
77
+ # @return [String]
78
+ attr_accessor :name
79
+
80
+ # @attribute [rw] phone
81
+ # @return [String]
82
+ attr_accessor :phone
83
+
84
+ # @attribute [rw] email
85
+ # @return [String]
86
+ attr_accessor :email
87
+
88
+ # @attribute [rw] address
89
+ # @return [Address]
90
+ attr_accessor :address
91
+
92
+ # @attribute [rw] geocode
93
+ # @return [Geocode, nil]
94
+ attr_accessor :geocode
95
+
96
+ # @attribute [rw] prices
97
+ # @return [Array<Price>]
98
+ attr_accessor :prices
99
+
100
+ # @return [Sitemap]
101
+ def self.sitemap
102
+ links = sitemaps.map(&:links).reduce(&:+)
103
+ Sitemap.new(links:)
104
+ end
105
+
106
+ # @return [Array<Sitemap>]
107
+ def self.sitemaps
108
+ SITEMAP_URLS.map do |url|
109
+ Sitemap.fetch(url:)
110
+ end
111
+ end
112
+
113
+ # @param url [String]
114
+ #
115
+ # @return [Facility]
116
+ def self.fetch(url:)
117
+ document = Crawler.html(url:)
118
+ parse(url:, document:)
119
+ end
120
+
121
+ # @param url [String]
122
+ # @param document [Nokogiri::HTML::Document]
123
+ #
124
+ # @return [Facility]
125
+ def self.parse(url:, document:)
126
+ data = parse_ld_json_script(document:)
127
+
128
+ id = data['@id'].match(%r{(?<id>\d+)/#schema$})[:id]
129
+ name = data['name']
130
+
131
+ geocode = Geocode.parse(data: data['geo'])
132
+ address = Address.parse(data: data['address'])
133
+ prices = [] # TODO
134
+
135
+ new(id:, url:, name:, address:, geocode:, prices:)
136
+ end
137
+
138
+ # @param document [Nokogiri::HTML::Document]
139
+ #
140
+ # @raise [ParseError]
141
+ #
142
+ # @return [Hash]
143
+ def self.parse_ld_json_script(document:)
144
+ parse_ld_json_scripts(document:).find do |data|
145
+ data['@type'] == 'SelfStorage'
146
+ end || raise(ParseError, 'missing ld+json')
147
+ end
148
+
149
+ # @param document [Nokogiri::HTML::Document]
150
+ #
151
+ # @return [Array<Hash>]
152
+ def self.parse_ld_json_scripts(document:)
153
+ elements = document.xpath('//script[@type="application/ld+json"]')
154
+
155
+ elements.map { |element| element.text.empty? ? {} : JSON.parse(element.text) }
156
+ end
157
+
158
+ # @param id [String]
159
+ # @param url [String]
160
+ # @param name [String]
161
+ # @param address [Address]
162
+ # @param geocode [Geocode]
163
+ # @param phone [String]
164
+ # @param email [String]
165
+ # @param prices [Array<Price>]
166
+ def initialize(id:, url:, name:, address:, geocode:, phone: DEFAULT_PHONE, email: DEFAULT_EMAIL, prices: [])
167
+ @id = id
168
+ @url = url
169
+ @name = name
170
+ @address = address
171
+ @geocode = geocode
172
+ @phone = phone
173
+ @email = email
174
+ @prices = prices
175
+ end
176
+
177
+ # @return [String]
178
+ def inspect
179
+ props = [
180
+ "id=#{@id.inspect}",
181
+ "url=#{@url.inspect}",
182
+ "address=#{@address.inspect}",
183
+ "geocode=#{@geocode.inspect}",
184
+ "phone=#{@phone.inspect}",
185
+ "email=#{@email.inspect}",
186
+ "prices=#{@prices.inspect}"
187
+ ]
188
+ "#<#{self.class.name} #{props.join(' ')}>"
189
+ end
190
+
191
+ # @return [String]
192
+ def text
193
+ "#{@id} | #{@name} | #{@phone} | #{@email} | #{@address.text} | #{@geocode ? @geocode.text : 'N/A'}"
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The features (e.g. climate-controlled, inside-drive-up-access, outside-drive-up-access, etc) of a price.
5
+ class Features
6
+ # @param element [Nokogiri::XML::Element]
7
+ #
8
+ # @return [Features]
9
+ def self.parse(element:)
10
+ text = element.text
11
+
12
+ new(
13
+ climate_controlled: text.include?('Heated and Cooled'),
14
+ drive_up_access: text.include?('Drive Up Access'),
15
+ first_floor_access: text.include?('1st Floor')
16
+ )
17
+ end
18
+
19
+ # @param climate_controlled [Boolean]
20
+ # @param drive_up_access [Boolean]
21
+ # @param first_floor_access [Boolean]
22
+ def initialize(climate_controlled:, drive_up_access:, first_floor_access:)
23
+ @climate_controlled = climate_controlled
24
+ @drive_up_access = drive_up_access
25
+ @first_floor_access = first_floor_access
26
+ end
27
+
28
+ # @return [String]
29
+ def inspect
30
+ props = [
31
+ "climate_controlled=#{@climate_controlled}",
32
+ "drive_up_access=#{@drive_up_access}",
33
+ "first_floor_access=#{@first_floor_access}"
34
+ ]
35
+
36
+ "#<#{self.class.name} #{props.join(' ')}>"
37
+ end
38
+
39
+ # @return [String] e.g. ""
40
+ def id
41
+ [].tap do |ids|
42
+ ids << 'cc' if climate_controlled?
43
+ ids << 'dua' if drive_up_access?
44
+ ids << 'ffa' if first_floor_access?
45
+ end.join('-')
46
+ end
47
+
48
+ # @return [String] e.g. "Climate Controlled + First Floor Access"
49
+ def text
50
+ amenities.join(' + ')
51
+ end
52
+
53
+ # @return [Array<String>]
54
+ def amenities
55
+ [].tap do |amenities|
56
+ amenities << 'Climate Controlled' if climate_controlled?
57
+ amenities << 'Drive-Up Access' if drive_up_access?
58
+ amenities << 'First Floor Access' if first_floor_access?
59
+ end
60
+ end
61
+
62
+ # @return [Boolean]
63
+ def climate_controlled?
64
+ @climate_controlled
65
+ end
66
+
67
+ # @return [Boolean]
68
+ def drive_up_access?
69
+ @drive_up_access
70
+ end
71
+
72
+ # @return [Boolean]
73
+ def first_floor_access?
74
+ @first_floor_access
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # Raised for unexpected HTTP responses.
5
+ class FetchError < Error
6
+ # @param url [String]
7
+ # @param response [HTTP::Response]
8
+ def initialize(url:, response:)
9
+ super("url=#{url} status=#{response.status.inspect} body=#{String(response.body).inspect}")
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The geocode (latitude + longitude) of a facility.
5
+ class Geocode
6
+ LATITUDE_REGEX = /\\u0022lat\\u0022:(?<latitude>[\+\-\d\.]+)/
7
+ LONGITUDE_REGEX = /\\u0022long\\u0022:(?<longitude>[\+\-\d\.]+)/
8
+
9
+ # @attribute [rw] latitude
10
+ # @return [Float]
11
+ attr_accessor :latitude
12
+
13
+ # @attribute [rw] longitude
14
+ # @return [Float]
15
+ attr_accessor :longitude
16
+
17
+ # @param data [Hash]
18
+ #
19
+ # @return [Geocode]
20
+ def self.parse(data:)
21
+ latitude = data[:latitude]
22
+ longitude = data[:longitude]
23
+
24
+ new(latitude:, longitude:)
25
+ end
26
+
27
+ # @param latitude [Float]
28
+ # @param longitude [Float]
29
+ def initialize(latitude:, longitude:)
30
+ @latitude = latitude
31
+ @longitude = longitude
32
+ end
33
+
34
+ # @return [String]
35
+ def inspect
36
+ props = [
37
+ "latitude=#{@latitude.inspect}",
38
+ "longitude=#{@longitude.inspect}"
39
+ ]
40
+ "#<#{self.class.name} #{props.join(' ')}>"
41
+ end
42
+
43
+ # @return [String]
44
+ def text
45
+ "#{@latitude},#{@longitude}"
46
+ end
47
+ end
48
+ end
data/lib/uhaul/link.rb ADDED
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # A link in a sitemap.
5
+ class Link
6
+ # @attribute [rw] loc
7
+ # @return [String]
8
+ attr_accessor :loc
9
+
10
+ # @param loc [String]
11
+ # @param lastmod [String, nil]
12
+ def initialize(loc:)
13
+ @loc = loc
14
+ end
15
+
16
+ # @return [String]
17
+ def inspect
18
+ "#<#{self.class.name} loc=#{@loc.inspect}>"
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The price (id + dimensions + rate) for a facility.
5
+ class Price
6
+ ID_REGEX = %r{(?<id>\d+)/(?:rent|reserve)/}
7
+ PRICE_SELECTOR = '[data-unit-size="small"],[data-unit-size="medium"],[data-unit-size="large"]'
8
+
9
+ # @attribute [rw] id
10
+ # @return [String]
11
+ attr_accessor :id
12
+
13
+ # @attribute [rw] dimensions
14
+ # @return [Dimensions]
15
+ attr_accessor :dimensions
16
+
17
+ # @attribute [rw] features
18
+ # @return [Features]
19
+ attr_accessor :features
20
+
21
+ # @attribute [rw] rates
22
+ # @return [Rates]
23
+ attr_accessor :rates
24
+
25
+ # @param facility_id [Integer]
26
+ #
27
+ # @return [Array<Price>]
28
+ def self.fetch(facility_id:)
29
+ url = "https://www.uhaul.com/facility-units/#{facility_id}"
30
+ data = Crawler.json(url:)['data']
31
+ return [] if data['error']
32
+
33
+ html = data['html']['units']
34
+ Nokogiri::HTML(html).css(PRICE_SELECTOR).map { |element| parse(element:) }
35
+ end
36
+
37
+ # @param id [String]
38
+ # @param dimensions [Dimensions]
39
+ # @param features [Features]
40
+ # @param rates [Rates]
41
+ def initialize(id:, dimensions:, features:, rates:)
42
+ @id = id
43
+ @dimensions = dimensions
44
+ @features = features
45
+ @rates = rates
46
+ end
47
+
48
+ # @return [String]
49
+ def inspect
50
+ props = [
51
+ "id=#{@id.inspect}",
52
+ "dimensions=#{@dimensions.inspect}",
53
+ "features=#{@features.inspect}",
54
+ "rates=#{@rates.inspect}"
55
+ ]
56
+ "#<#{self.class.name} #{props.join(' ')}>"
57
+ end
58
+
59
+ # @return [String] e.g. "123 | 5' × 5' (25 sqft) | $100 (street) / $90 (web)"
60
+ def text
61
+ "#{@id} | #{@dimensions.text} | #{@rates.text} | #{@features.text}"
62
+ end
63
+
64
+ # @param element [Nokogiri::XML::Element]
65
+ #
66
+ # @return [Price]
67
+ def self.parse(element:)
68
+ link = element.at_xpath(".//a[contains(text(), 'Rent')]|//a[contains(text(), 'Reserve')]")
69
+ dimensions = Dimensions.parse(element:)
70
+ features = Features.parse(element:)
71
+ rates = Rates.parse(element:)
72
+
73
+ id = link ? ID_REGEX.match(link['href'])[:id] : "#{dimensions.id}-#{features.id}"
74
+
75
+ new(id:, dimensions:, features:, rates:)
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The rates (street + web) for a facility
5
+ class Rates
6
+ STREET_SELECTOR = '.part_item_old_price'
7
+ WEB_SELECTOR = '.part_item_price'
8
+ VALUE_REGEX = /(?<value>[\d\.]+)/
9
+
10
+ # @attribute [rw] street
11
+ # @return [Integer]
12
+ attr_accessor :street
13
+
14
+ # @attribute [rw] web
15
+ # @return [Integer]
16
+ attr_accessor :web
17
+
18
+ # @param element [Nokogiri::XML::Element]
19
+ #
20
+ # @return [Rates]
21
+ def self.parse(element:)
22
+ street = parse_value(element: element.at_css(STREET_SELECTOR))
23
+ web = parse_value(element: element.at_css(WEB_SELECTOR))
24
+
25
+ new(street: street || web, web: web || street)
26
+ end
27
+
28
+ # @param element [Nokogiri::XML::Element]
29
+ #
30
+ # @return [Float, nil]
31
+ def self.parse_value(element:)
32
+ return if element.nil?
33
+
34
+ match = VALUE_REGEX.match(element.text)
35
+ Float(match[:value]) if match
36
+ end
37
+
38
+ # @param street [Integer]
39
+ # @param web [Integer]
40
+ def initialize(street:, web:)
41
+ @street = street
42
+ @web = web
43
+ end
44
+
45
+ # @return [String]
46
+ def inspect
47
+ props = [
48
+ "street=#{@street.inspect}",
49
+ "web=#{@web.inspect}"
50
+ ]
51
+ "#<#{self.class.name} #{props.join(' ')}>"
52
+ end
53
+
54
+ # @return [String] e.g. "$80 (street) | $60 (web)"
55
+ def text
56
+ "$#{@street} (street) | $#{@web} (web)"
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # A sitemap on uhaul.com.
5
+ class Sitemap
6
+ # @attribute [rw] links
7
+ # @return [Array<Link>]
8
+ attr_accessor :links
9
+
10
+ # @param document [NokoGiri::XML::Document]
11
+ #
12
+ # @return [Sitemap]
13
+ def self.parse(document:)
14
+ links = document.xpath('//xmlns:url').map do |url|
15
+ loc = url.at_xpath('xmlns:loc').text
16
+ Link.new(loc:)
17
+ end
18
+
19
+ new(links: links.filter { |link| link.loc.match(%r{/Self-Storage/.*/\d+}) })
20
+ end
21
+
22
+ # @param url [String]
23
+ #
24
+ # @return [Sitemap]
25
+ def self.fetch(url:)
26
+ document = Crawler.xml(url:)
27
+ parse(document:)
28
+ end
29
+
30
+ # @param links [Array<Link>]
31
+ def initialize(links:)
32
+ @links = links
33
+ end
34
+
35
+ # @return [String]
36
+ def inspect
37
+ "#<#{self.class.name} links=#{@links.inspect}>"
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ VERSION = '0.1.0'
5
+ end
data/lib/uhaul.rb ADDED
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'http'
4
+ require 'nokogiri'
5
+ require 'zeitwerk'
6
+
7
+ loader = Zeitwerk::Loader.for_gem
8
+ loader.inflector.inflect 'uhaul' => 'UHaul'
9
+ loader.inflector.inflect 'cli' => 'CLI'
10
+ loader.setup
11
+
12
+ # An interface for UHaul.
13
+ module UHaul
14
+ class Error < StandardError; end
15
+
16
+ # @return [Config]
17
+ def self.config
18
+ @config ||= Config.new
19
+ end
20
+
21
+ # @yield [config]
22
+ # @yieldparam config [Config]
23
+ def self.configure
24
+ yield config
25
+ end
26
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uhaul
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Sylvestre
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-12-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: http
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: optparse
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: zeitwerk
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Uses HTTP.rb to scrape uhaul.com.
84
+ email:
85
+ - kevin@ksylvest.com
86
+ executables:
87
+ - uhaul
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - Gemfile
92
+ - README.md
93
+ - bin/console
94
+ - bin/setup
95
+ - exe/uhaul
96
+ - lib/uhaul.rb
97
+ - lib/uhaul/address.rb
98
+ - lib/uhaul/cli.rb
99
+ - lib/uhaul/config.rb
100
+ - lib/uhaul/crawl.rb
101
+ - lib/uhaul/crawler.rb
102
+ - lib/uhaul/dimensions.rb
103
+ - lib/uhaul/facility.rb
104
+ - lib/uhaul/features.rb
105
+ - lib/uhaul/fetch_error.rb
106
+ - lib/uhaul/geocode.rb
107
+ - lib/uhaul/link.rb
108
+ - lib/uhaul/price.rb
109
+ - lib/uhaul/rates.rb
110
+ - lib/uhaul/sitemap.rb
111
+ - lib/uhaul/version.rb
112
+ homepage: https://github.com/ksylvest/uhaul
113
+ licenses:
114
+ - MIT
115
+ metadata:
116
+ rubygems_mfa_required: 'true'
117
+ homepage_uri: https://github.com/ksylvest/uhaul
118
+ source_code_uri: https://github.com/ksylvest/uhaul
119
+ changelog_uri: https://github.com/ksylvest/uhaul
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: 3.2.0
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubygems_version: 3.5.22
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: A crawler for UHaul.
139
+ test_files: []