uhaul 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 70a37ead64b21ccff91595e73dfedf11b378c2e5ea14acdd0d18e9d45735e3a5
4
+ data.tar.gz: d355639b46eb03ce4ce4be447520f05551ecfc8f23f7ebacc8260a6bb5d71203
5
+ SHA512:
6
+ metadata.gz: b73ef2ae65031b627177dd1b8c5fa647ce6273b3f09efb8975a42182d6be90e05d6f6e27c5c50dfae3c836f81cd48e1211fd57bf2918081cc6664999b3daef24
7
+ data.tar.gz: bec905b2070dd9a60d0d32b1f3f3de1fbe184d27f495152524911f90d9eff9c1b3c2b6b833b1e3f3cb26960de25a606396d3ca0b9dcf9292f42a1498bed1fc98
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
6
+
7
+ gem 'rake'
8
+ gem 'rspec'
9
+ gem 'rspec_junit_formatter'
10
+ gem 'rubocop'
11
+ gem 'rubocop-rake'
12
+ gem 'rubocop-rspec'
13
+ gem 'vcr'
14
+ gem 'webmock'
15
+ gem 'yard'
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # UHaul
2
+
3
+ [![LICENSE](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/ksylvest/uhaul/blob/main/LICENSE)
4
+ [![RubyGems](https://img.shields.io/gem/v/uhaul)](https://rubygems.org/gems/uhaul)
5
+ [![GitHub](https://img.shields.io/badge/github-repo-blue.svg)](https://github.com/ksylvest/uhaul)
6
+ [![Yard](https://img.shields.io/badge/docs-site-blue.svg)](https://uhaul.ksylvest.com)
7
+ [![CircleCI](https://img.shields.io/circleci/build/github/ksylvest/uhaul)](https://circleci.com/gh/ksylvest/uhaul)
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ gem install uhaul
13
+ ```
14
+
15
+ ## Configuration
16
+
17
+ ```ruby
18
+ require 'uhaul'
19
+
20
+ UHaul.configure do |config|
21
+ config.user_agent = '../..' # ENV['NSASTORAGE_USER_AGENT']
22
+ config.timeout = 30 # ENV['NSASTORAGE_TIMEOUT']
23
+ config.proxy_url = 'http://user:pass@superproxy.zenrows.com:1337' # ENV['NSASTORAGE_PROXY_URL']
24
+ end
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```ruby
30
+ require 'uhaul'
31
+
32
+ sitemap = UHaul::Facility.sitemap
33
+ sitemap.links.each do |link|
34
+ url = link.loc
35
+ facility = UHaul::Facility.fetch(url:)
36
+
37
+ puts facility.text
38
+
39
+ facility.prices.each do |price|
40
+ puts price.text
41
+ end
42
+
43
+ puts
44
+ end
45
+ ```
46
+
47
+ ## CLI
48
+
49
+ ```bash
50
+ uhaul crawl
51
+ ```
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'uhaul'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require 'irb'
11
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/exe/uhaul ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'uhaul'
5
+
6
+ cli = UHaul::CLI.new
7
+ cli.parse
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The address (street + city + state + zip) of a facility.
5
+ class Address
6
+ ADDRESS_SELECTOR = '.item-des-box .text-box .part_title_1'
7
+ ADDRESS_REGEX = /(?<street>.+),\s+(?<city>.+),\s+(?<state>.+)\s+(?<zip>\d{5})/
8
+ # @attribute [rw] street
9
+ # @return [String]
10
+ attr_accessor :street
11
+
12
+ # @attribute [rw] city
13
+ # @return [String]
14
+ attr_accessor :city
15
+
16
+ # @attribute [rw] state
17
+ # @return [String]
18
+ attr_accessor :state
19
+
20
+ # @attribute [rw] zip
21
+ # @return [String]
22
+ attr_accessor :zip
23
+
24
+ # @param street [String]
25
+ # @param city [String]
26
+ # @param state [String]
27
+ # @param zip [String]
28
+ def initialize(street:, city:, state:, zip:)
29
+ @street = street
30
+ @city = city
31
+ @state = state
32
+ @zip = zip
33
+ end
34
+
35
+ # @return [String]
36
+ def inspect
37
+ props = [
38
+ "street=#{@street.inspect}",
39
+ "city=#{@city.inspect}",
40
+ "state=#{@state.inspect}",
41
+ "zip=#{@zip.inspect}"
42
+ ]
43
+ "#<#{self.class.name} #{props.join(' ')}>"
44
+ end
45
+
46
+ # @return [String]
47
+ def text
48
+ "#{street}, #{city}, #{state} #{zip}"
49
+ end
50
+
51
+ # @param data [Hash]
52
+ #
53
+ # @return [Address]
54
+ def self.parse(data:)
55
+ new(
56
+ street: data['streetAddress'],
57
+ city: data['addressLocality'],
58
+ state: data['addressRegion'],
59
+ zip: data['postalCode']
60
+ )
61
+ end
62
+ end
63
+ end
data/lib/uhaul/cli.rb ADDED
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'optparse'
4
+
5
+ module UHaul
6
+ # Used when interacting with the library from the command line interface (CLI).
7
+ #
8
+ # Usage:
9
+ #
10
+ # cli = UHaul::CLI.new
11
+ # cli.parse
12
+ class CLI
13
+ module Code
14
+ OK = 0
15
+ ERROR = 1
16
+ end
17
+
18
+ # @param argv [Array<String>]
19
+ def parse(argv = ARGV)
20
+ parser.parse!(argv)
21
+ command = argv.shift
22
+
23
+ case command
24
+ when 'crawl' then crawl
25
+ else
26
+ warn("unsupported command=#{command.inspect}")
27
+ exit(Code::ERROR)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def crawl
34
+ Crawl.run
35
+ exit(Code::OK)
36
+ end
37
+
38
+ def help(options)
39
+ puts(options)
40
+ exit(Code::OK)
41
+ end
42
+
43
+ def version
44
+ puts(VERSION)
45
+ exit(Code::OK)
46
+ end
47
+
48
+ # @return [OptionParser]
49
+ def parser
50
+ OptionParser.new do |options|
51
+ options.banner = 'usage: uhaul [options] <command> [<args>]'
52
+
53
+ options.on('-h', '--help', 'help') { help(options) }
54
+ options.on('-v', '--version', 'version') { version }
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The core configuration.
5
+ class Config
6
+ # @attribute [rw] accept_language
7
+ # @return [String]
8
+ attr_accessor :accept_language
9
+
10
+ # @attribute [rw] user_agent
11
+ # @return [String]
12
+ attr_accessor :user_agent
13
+
14
+ # @attribute [rw] timeout
15
+ # @return [Integer]
16
+ attr_accessor :timeout
17
+
18
+ # @attribute [rw] proxy_url
19
+ # @return [String]
20
+ attr_accessor :proxy_url
21
+
22
+ def initialize
23
+ @accept_language = ENV.fetch('NSASTORAGE_ACCEPT_LANGUAGE', 'en-US,en;q=0.9')
24
+ @user_agent = ENV.fetch('NSASTORAGE_USER_AGENT', "uhaul.rb/#{VERSION}")
25
+ @timeout = Integer(ENV.fetch('NSASTORAGE_TIMEOUT', 60))
26
+ @proxy_url = ENV.fetch('NSASTORAGE_PROXY_URL', nil)
27
+ end
28
+
29
+ # @return [Boolean]
30
+ def headers?
31
+ !@user_agent.nil?
32
+ end
33
+
34
+ # @return [Boolean]
35
+ def timeout?
36
+ !@timeout.zero?
37
+ end
38
+
39
+ # @return [Boolean]
40
+ def proxy?
41
+ !@proxy_url.nil?
42
+ end
43
+
44
+ # @return [Hash<String, String>] e.g { 'User-Agent' => 'uhaul.rb/1.0.0' }
45
+ def headers
46
+ {
47
+ 'Accept-Language' => @accept_language,
48
+ 'User-Agent' => @user_agent
49
+ }
50
+ end
51
+
52
+ # @return [Array] e.g. ['proxy.example.com', 8080, 'user', 'pass']
53
+ def via
54
+ proxy_uri = URI.parse(@proxy_url)
55
+ [proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password]
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # Handles the crawl command via CLI.
5
+ class Crawl
6
+ def self.run(...)
7
+ new(...).run
8
+ end
9
+
10
+ # @param stdout [IO] optional
11
+ # @param stderr [IO] optional
12
+ # @param options [Hash] optional
13
+ def initialize(stdout: $stdout, stderr: $stderr, options: {})
14
+ @stdout = stdout
15
+ @stderr = stderr
16
+ @options = options
17
+ end
18
+
19
+ def run
20
+ sitemap = Facility.sitemap
21
+ @stdout.puts("count=#{sitemap.links.count}")
22
+ @stdout.puts
23
+
24
+ sitemap.links.each { |link| process(url: link.loc) }
25
+ end
26
+
27
+ def process(url:)
28
+ @stdout.puts(url)
29
+ facility = Facility.fetch(url: url)
30
+ @stdout.puts(facility.text)
31
+ facility.prices.each { |price| @stdout.puts(price.text) }
32
+ @stdout.puts
33
+ rescue FetchError => e
34
+ @stderr.puts("url=#{url} error=#{e.message}")
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # Used to fetch and parse either HTML or XML via a URL.
5
+ class Crawler
6
+ HOST = 'https://www.uhaul.com'
7
+
8
+ # @attribute url [String]
9
+ # @raise [FetchError]
10
+ # @return [Hash]
11
+ def self.json(url:)
12
+ new.json(url:)
13
+ end
14
+
15
+ # @param url [String]
16
+ # @raise [FetchError]
17
+ # @return [Nokogiri::HTML::Document]
18
+ def self.html(url:)
19
+ new.html(url:)
20
+ end
21
+
22
+ # @param url [String]
23
+ # @raise [FetchError]
24
+ # @return [Nokogiri::XML::Document]
25
+ def self.xml(url:)
26
+ new.xml(url:)
27
+ end
28
+
29
+ # @return [HTTP::Client]
30
+ def connection
31
+ @connection ||= begin
32
+ config = UHaul.config
33
+
34
+ connection = HTTP.use(:auto_deflate).use(:auto_inflate).persistent(HOST)
35
+ connection = connection.headers(config.headers) if config.headers?
36
+ connection = connection.timeout(config.timeout) if config.timeout?
37
+ connection = connection.via(*config.via) if config.proxy?
38
+
39
+ connection
40
+ end
41
+ end
42
+
43
+ # @param url [String]
44
+ # @return [HTTP::Response]
45
+ def fetch(url:)
46
+ response = connection.get(url)
47
+ raise FetchError.new(url:, response: response.flush) unless response.status.ok?
48
+
49
+ response
50
+ end
51
+
52
+ # @param url [String]
53
+ # @raise [FetchError]
54
+ # @return [Hash]
55
+ def json(url:)
56
+ JSON.parse(String(fetch(url:).body))
57
+ end
58
+
59
+ # @param url [String]
60
+ # @raise [FetchError]
61
+ # @return [Nokogiri::XML::Document]
62
+ def html(url:)
63
+ Nokogiri::HTML(String(fetch(url:).body))
64
+ end
65
+
66
+ # @param url [String]
67
+ # @raise [FetchError]
68
+ # @return [Nokogiri::XML::Document]
69
+ def xml(url:)
70
+ Nokogiri::XML(String(fetch(url:).body))
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The dimensions (width + depth + sqft) of a price.
5
+ class Dimensions
6
+ DEFAULT_WIDTH = 5.0 # feet
7
+ DEFAULT_DEPTH = 5.0 # feet
8
+ DEFAULT_HEIGHT = 8.0 # feet
9
+
10
+ DIMENSIONS_REGEX = /(?<width>[\d\.]+) x (?<depth>[\d\.]+)/
11
+
12
+ # @attribute [rw] depth
13
+ # @return [Float]
14
+ attr_accessor :depth
15
+
16
+ # @attribute [rw] width
17
+ # @return [Float]
18
+ attr_accessor :width
19
+
20
+ # @attribute [rw] height
21
+ # @return [Float]
22
+ attr_accessor :height
23
+
24
+ # @param depth [Float]
25
+ # @param width [Float]
26
+ # @param height [Float]
27
+ def initialize(depth:, width:, height: DEFAULT_HEIGHT)
28
+ @depth = depth
29
+ @width = width
30
+ @height = height
31
+ end
32
+
33
+ # @return [String]
34
+ def inspect
35
+ props = [
36
+ "depth=#{@depth.inspect}",
37
+ "width=#{@width.inspect}",
38
+ "height=#{@height.inspect}"
39
+ ]
40
+ "#<#{self.class.name} #{props.join(' ')}>"
41
+ end
42
+
43
+ # @return [String] e.g. "5×5"
44
+ def id
45
+ "#{format('%g', @width)}×#{format('%g', @depth)}"
46
+ end
47
+
48
+ # @return [Integer]
49
+ def sqft
50
+ Integer(@width * @depth)
51
+ end
52
+
53
+ # @return [Integer]
54
+ def cuft
55
+ Integer(@width * @depth * @height)
56
+ end
57
+
58
+ # @return [String] e.g. "10' × 10' (100 sqft)"
59
+ def text
60
+ "#{format('%g', @width)}' × #{format('%g', @depth)}' (#{sqft} sqft)"
61
+ end
62
+
63
+ # @param element [Nokogiri::XML::Element]
64
+ #
65
+ # @return [Dimensions]
66
+ def self.parse(element:)
67
+ text = element.at_css('.unit-select-item-detail').text
68
+ match = DIMENSIONS_REGEX.match(text)
69
+
70
+ width = match ? Float(match[:width]) : DEFAULT_WIDTH
71
+ depth = match ? Float(match[:depth]) : DEFAULT_DEPTH
72
+ new(depth:, width:, height: DEFAULT_HEIGHT)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # A facility (address + geocode + prices) on uhaul.com.
5
+ #
6
+ # e.g. https://www.uhaul.com/Locations/Self-Storage-near-Inglewood-CA-90301/712030/
7
+ class Facility
8
+ class ParseError < StandardError; end
9
+
10
+ SITEMAP_URLS = %w[
11
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AL.ashx
12
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AK.ashx
13
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AZ.ashx
14
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-AR.ashx
15
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-CA.ashx
16
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-CO.ashx
17
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-CT.ashx
18
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-DC.ashx
19
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-DE.ashx
20
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-FL.ashx
21
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-GA.ashx
22
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-HI.ashx
23
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-ID.ashx
24
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-IL.ashx
25
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-IN.ashx
26
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-IA.ashx
27
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-KS.ashx
28
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-KY.ashx
29
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-LA.ashx
30
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-ME.ashx
31
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MD.ashx
32
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MA.ashx
33
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MI.ashx
34
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MN.ashx
35
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MS.ashx
36
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MO.ashx
37
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-MT.ashx
38
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NE.ashx
39
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NV.ashx
40
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NH.ashx
41
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NJ.ashx
42
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NM.ashx
43
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NY.ashx
44
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-NC.ashx
45
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-ND.ashx
46
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-OH.ashx
47
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-OK.ashx
48
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-OR.ashx
49
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-PA.ashx
50
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-RI.ashx
51
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-SC.ashx
52
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-SD.ashx
53
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-TN.ashx
54
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-TX.ashx
55
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-UT.ashx
56
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-VT.ashx
57
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-VA.ashx
58
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WA.ashx
59
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WV.ashx
60
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WI.ashx
61
+ https://www.uhaul.com/Locations/Sitemaps/Sitemap-for-Storage-in-WY.ashx
62
+ ].freeze
63
+
64
+ DEFAULT_EMAIL = 'service@uhaul.com'
65
+ DEFAULT_PHONE = '+1-800-468-4285
66
+ '
67
+
68
+ # @attribute [rw] id
69
+ # @return [String]
70
+ attr_accessor :id
71
+
72
+ # @attribute [rw] url
73
+ # @return [String]
74
+ attr_accessor :url
75
+
76
+ # @attribute [rw] name
77
+ # @return [String]
78
+ attr_accessor :name
79
+
80
+ # @attribute [rw] phone
81
+ # @return [String]
82
+ attr_accessor :phone
83
+
84
+ # @attribute [rw] email
85
+ # @return [String]
86
+ attr_accessor :email
87
+
88
+ # @attribute [rw] address
89
+ # @return [Address]
90
+ attr_accessor :address
91
+
92
+ # @attribute [rw] geocode
93
+ # @return [Geocode, nil]
94
+ attr_accessor :geocode
95
+
96
+ # @attribute [rw] prices
97
+ # @return [Array<Price>]
98
+ attr_accessor :prices
99
+
100
+ # @return [Sitemap]
101
+ def self.sitemap
102
+ links = sitemaps.map(&:links).reduce(&:+)
103
+ Sitemap.new(links:)
104
+ end
105
+
106
+ # @return [Array<Sitemap>]
107
+ def self.sitemaps
108
+ SITEMAP_URLS.map do |url|
109
+ Sitemap.fetch(url:)
110
+ end
111
+ end
112
+
113
+ # @param url [String]
114
+ #
115
+ # @return [Facility]
116
+ def self.fetch(url:)
117
+ document = Crawler.html(url:)
118
+ parse(url:, document:)
119
+ end
120
+
121
+ # @param url [String]
122
+ # @param document [Nokogiri::HTML::Document]
123
+ #
124
+ # @return [Facility]
125
+ def self.parse(url:, document:)
126
+ data = parse_ld_json_script(document:)
127
+
128
+ id = data['@id'].match(%r{(?<id>\d+)/#schema$})[:id]
129
+ name = data['name']
130
+
131
+ geocode = Geocode.parse(data: data['geo'])
132
+ address = Address.parse(data: data['address'])
133
+ prices = [] # TODO
134
+
135
+ new(id:, url:, name:, address:, geocode:, prices:)
136
+ end
137
+
138
+ # @param document [Nokogiri::HTML::Document]
139
+ #
140
+ # @raise [ParseError]
141
+ #
142
+ # @return [Hash]
143
+ def self.parse_ld_json_script(document:)
144
+ parse_ld_json_scripts(document:).find do |data|
145
+ data['@type'] == 'SelfStorage'
146
+ end || raise(ParseError, 'missing ld+json')
147
+ end
148
+
149
+ # @param document [Nokogiri::HTML::Document]
150
+ #
151
+ # @return [Array<Hash>]
152
+ def self.parse_ld_json_scripts(document:)
153
+ elements = document.xpath('//script[@type="application/ld+json"]')
154
+
155
+ elements.map { |element| element.text.empty? ? {} : JSON.parse(element.text) }
156
+ end
157
+
158
+ # @param id [String]
159
+ # @param url [String]
160
+ # @param name [String]
161
+ # @param address [Address]
162
+ # @param geocode [Geocode]
163
+ # @param phone [String]
164
+ # @param email [String]
165
+ # @param prices [Array<Price>]
166
+ def initialize(id:, url:, name:, address:, geocode:, phone: DEFAULT_PHONE, email: DEFAULT_EMAIL, prices: [])
167
+ @id = id
168
+ @url = url
169
+ @name = name
170
+ @address = address
171
+ @geocode = geocode
172
+ @phone = phone
173
+ @email = email
174
+ @prices = prices
175
+ end
176
+
177
+ # @return [String]
178
+ def inspect
179
+ props = [
180
+ "id=#{@id.inspect}",
181
+ "url=#{@url.inspect}",
182
+ "address=#{@address.inspect}",
183
+ "geocode=#{@geocode.inspect}",
184
+ "phone=#{@phone.inspect}",
185
+ "email=#{@email.inspect}",
186
+ "prices=#{@prices.inspect}"
187
+ ]
188
+ "#<#{self.class.name} #{props.join(' ')}>"
189
+ end
190
+
191
+ # @return [String]
192
+ def text
193
+ "#{@id} | #{@name} | #{@phone} | #{@email} | #{@address.text} | #{@geocode ? @geocode.text : 'N/A'}"
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The features (e.g. climate-controlled, inside-drive-up-access, outside-drive-up-access, etc) of a price.
5
+ class Features
6
+ # @param element [Nokogiri::XML::Element]
7
+ #
8
+ # @return [Features]
9
+ def self.parse(element:)
10
+ text = element.text
11
+
12
+ new(
13
+ climate_controlled: text.include?('Heated and Cooled'),
14
+ drive_up_access: text.include?('Drive Up Access'),
15
+ first_floor_access: text.include?('1st Floor')
16
+ )
17
+ end
18
+
19
+ # @param climate_controlled [Boolean]
20
+ # @param drive_up_access [Boolean]
21
+ # @param first_floor_access [Boolean]
22
+ def initialize(climate_controlled:, drive_up_access:, first_floor_access:)
23
+ @climate_controlled = climate_controlled
24
+ @drive_up_access = drive_up_access
25
+ @first_floor_access = first_floor_access
26
+ end
27
+
28
+ # @return [String]
29
+ def inspect
30
+ props = [
31
+ "climate_controlled=#{@climate_controlled}",
32
+ "drive_up_access=#{@drive_up_access}",
33
+ "first_floor_access=#{@first_floor_access}"
34
+ ]
35
+
36
+ "#<#{self.class.name} #{props.join(' ')}>"
37
+ end
38
+
39
+ # @return [String] e.g. ""
40
+ def id
41
+ [].tap do |ids|
42
+ ids << 'cc' if climate_controlled?
43
+ ids << 'dua' if drive_up_access?
44
+ ids << 'ffa' if first_floor_access?
45
+ end.join('-')
46
+ end
47
+
48
+ # @return [String] e.g. "Climate Controlled + First Floor Access"
49
+ def text
50
+ amenities.join(' + ')
51
+ end
52
+
53
+ # @return [Array<String>]
54
+ def amenities
55
+ [].tap do |amenities|
56
+ amenities << 'Climate Controlled' if climate_controlled?
57
+ amenities << 'Drive-Up Access' if drive_up_access?
58
+ amenities << 'First Floor Access' if first_floor_access?
59
+ end
60
+ end
61
+
62
+ # @return [Boolean]
63
+ def climate_controlled?
64
+ @climate_controlled
65
+ end
66
+
67
+ # @return [Boolean]
68
+ def drive_up_access?
69
+ @drive_up_access
70
+ end
71
+
72
+ # @return [Boolean]
73
+ def first_floor_access?
74
+ @first_floor_access
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # Raised for unexpected HTTP responses.
5
+ class FetchError < Error
6
+ # @param url [String]
7
+ # @param response [HTTP::Response]
8
+ def initialize(url:, response:)
9
+ super("url=#{url} status=#{response.status.inspect} body=#{String(response.body).inspect}")
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The geocode (latitude + longitude) of a facility.
5
+ class Geocode
6
+ LATITUDE_REGEX = /\\u0022lat\\u0022:(?<latitude>[\+\-\d\.]+)/
7
+ LONGITUDE_REGEX = /\\u0022long\\u0022:(?<longitude>[\+\-\d\.]+)/
8
+
9
+ # @attribute [rw] latitude
10
+ # @return [Float]
11
+ attr_accessor :latitude
12
+
13
+ # @attribute [rw] longitude
14
+ # @return [Float]
15
+ attr_accessor :longitude
16
+
17
+ # @param data [Hash]
18
+ #
19
+ # @return [Geocode]
20
+ def self.parse(data:)
21
+ latitude = data[:latitude]
22
+ longitude = data[:longitude]
23
+
24
+ new(latitude:, longitude:)
25
+ end
26
+
27
+ # @param latitude [Float]
28
+ # @param longitude [Float]
29
+ def initialize(latitude:, longitude:)
30
+ @latitude = latitude
31
+ @longitude = longitude
32
+ end
33
+
34
+ # @return [String]
35
+ def inspect
36
+ props = [
37
+ "latitude=#{@latitude.inspect}",
38
+ "longitude=#{@longitude.inspect}"
39
+ ]
40
+ "#<#{self.class.name} #{props.join(' ')}>"
41
+ end
42
+
43
+ # @return [String]
44
+ def text
45
+ "#{@latitude},#{@longitude}"
46
+ end
47
+ end
48
+ end
data/lib/uhaul/link.rb ADDED
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # A link in a sitemap.
5
+ class Link
6
+ # @attribute [rw] loc
7
+ # @return [String]
8
+ attr_accessor :loc
9
+
10
+ # @param loc [String]
11
+ # @param lastmod [String, nil]
12
+ def initialize(loc:)
13
+ @loc = loc
14
+ end
15
+
16
+ # @return [String]
17
+ def inspect
18
+ "#<#{self.class.name} loc=#{@loc.inspect}>"
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The price (id + dimensions + rate) for a facility.
5
+ class Price
6
+ ID_REGEX = %r{(?<id>\d+)/(?:rent|reserve)/}
7
+ PRICE_SELECTOR = '[data-unit-size="small"],[data-unit-size="medium"],[data-unit-size="large"]'
8
+
9
+ # @attribute [rw] id
10
+ # @return [String]
11
+ attr_accessor :id
12
+
13
+ # @attribute [rw] dimensions
14
+ # @return [Dimensions]
15
+ attr_accessor :dimensions
16
+
17
+ # @attribute [rw] features
18
+ # @return [Features]
19
+ attr_accessor :features
20
+
21
+ # @attribute [rw] rates
22
+ # @return [Rates]
23
+ attr_accessor :rates
24
+
25
+ # @param facility_id [Integer]
26
+ #
27
+ # @return [Array<Price>]
28
+ def self.fetch(facility_id:)
29
+ url = "https://www.uhaul.com/facility-units/#{facility_id}"
30
+ data = Crawler.json(url:)['data']
31
+ return [] if data['error']
32
+
33
+ html = data['html']['units']
34
+ Nokogiri::HTML(html).css(PRICE_SELECTOR).map { |element| parse(element:) }
35
+ end
36
+
37
+ # @param id [String]
38
+ # @param dimensions [Dimensions]
39
+ # @param features [Features]
40
+ # @param rates [Rates]
41
+ def initialize(id:, dimensions:, features:, rates:)
42
+ @id = id
43
+ @dimensions = dimensions
44
+ @features = features
45
+ @rates = rates
46
+ end
47
+
48
+ # @return [String]
49
+ def inspect
50
+ props = [
51
+ "id=#{@id.inspect}",
52
+ "dimensions=#{@dimensions.inspect}",
53
+ "features=#{@features.inspect}",
54
+ "rates=#{@rates.inspect}"
55
+ ]
56
+ "#<#{self.class.name} #{props.join(' ')}>"
57
+ end
58
+
59
+ # @return [String] e.g. "123 | 5' × 5' (25 sqft) | $100 (street) / $90 (web)"
60
+ def text
61
+ "#{@id} | #{@dimensions.text} | #{@rates.text} | #{@features.text}"
62
+ end
63
+
64
+ # @param element [Nokogiri::XML::Element]
65
+ #
66
+ # @return [Price]
67
+ def self.parse(element:)
68
+ link = element.at_xpath(".//a[contains(text(), 'Rent')]|//a[contains(text(), 'Reserve')]")
69
+ dimensions = Dimensions.parse(element:)
70
+ features = Features.parse(element:)
71
+ rates = Rates.parse(element:)
72
+
73
+ id = link ? ID_REGEX.match(link['href'])[:id] : "#{dimensions.id}-#{features.id}"
74
+
75
+ new(id:, dimensions:, features:, rates:)
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # The rates (street + web) for a facility
5
+ class Rates
6
+ STREET_SELECTOR = '.part_item_old_price'
7
+ WEB_SELECTOR = '.part_item_price'
8
+ VALUE_REGEX = /(?<value>[\d\.]+)/
9
+
10
+ # @attribute [rw] street
11
+ # @return [Integer]
12
+ attr_accessor :street
13
+
14
+ # @attribute [rw] web
15
+ # @return [Integer]
16
+ attr_accessor :web
17
+
18
+ # @param element [Nokogiri::XML::Element]
19
+ #
20
+ # @return [Rates]
21
+ def self.parse(element:)
22
+ street = parse_value(element: element.at_css(STREET_SELECTOR))
23
+ web = parse_value(element: element.at_css(WEB_SELECTOR))
24
+
25
+ new(street: street || web, web: web || street)
26
+ end
27
+
28
+ # @param element [Nokogiri::XML::Element]
29
+ #
30
+ # @return [Float, nil]
31
+ def self.parse_value(element:)
32
+ return if element.nil?
33
+
34
+ match = VALUE_REGEX.match(element.text)
35
+ Float(match[:value]) if match
36
+ end
37
+
38
+ # @param street [Integer]
39
+ # @param web [Integer]
40
+ def initialize(street:, web:)
41
+ @street = street
42
+ @web = web
43
+ end
44
+
45
+ # @return [String]
46
+ def inspect
47
+ props = [
48
+ "street=#{@street.inspect}",
49
+ "web=#{@web.inspect}"
50
+ ]
51
+ "#<#{self.class.name} #{props.join(' ')}>"
52
+ end
53
+
54
+ # @return [String] e.g. "$80 (street) | $60 (web)"
55
+ def text
56
+ "$#{@street} (street) | $#{@web} (web)"
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ # A sitemap on uhaul.com.
5
+ class Sitemap
6
+ # @attribute [rw] links
7
+ # @return [Array<Link>]
8
+ attr_accessor :links
9
+
10
+ # @param document [NokoGiri::XML::Document]
11
+ #
12
+ # @return [Sitemap]
13
+ def self.parse(document:)
14
+ links = document.xpath('//xmlns:url').map do |url|
15
+ loc = url.at_xpath('xmlns:loc').text
16
+ Link.new(loc:)
17
+ end
18
+
19
+ new(links: links.filter { |link| link.loc.match(%r{/Self-Storage/.*/\d+}) })
20
+ end
21
+
22
+ # @param url [String]
23
+ #
24
+ # @return [Sitemap]
25
+ def self.fetch(url:)
26
+ document = Crawler.xml(url:)
27
+ parse(document:)
28
+ end
29
+
30
+ # @param links [Array<Link>]
31
+ def initialize(links:)
32
+ @links = links
33
+ end
34
+
35
+ # @return [String]
36
+ def inspect
37
+ "#<#{self.class.name} links=#{@links.inspect}>"
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UHaul
4
+ VERSION = '0.1.0'
5
+ end
data/lib/uhaul.rb ADDED
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'http'
4
+ require 'nokogiri'
5
+ require 'zeitwerk'
6
+
7
+ loader = Zeitwerk::Loader.for_gem
8
+ loader.inflector.inflect 'uhaul' => 'UHaul'
9
+ loader.inflector.inflect 'cli' => 'CLI'
10
+ loader.setup
11
+
12
+ # An interface for UHaul.
13
+ module UHaul
14
+ class Error < StandardError; end
15
+
16
+ # @return [Config]
17
+ def self.config
18
+ @config ||= Config.new
19
+ end
20
+
21
+ # @yield [config]
22
+ # @yieldparam config [Config]
23
+ def self.configure
24
+ yield config
25
+ end
26
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uhaul
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Sylvestre
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-12-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: http
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: optparse
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: zeitwerk
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Uses HTTP.rb to scrape uhaul.com.
84
+ email:
85
+ - kevin@ksylvest.com
86
+ executables:
87
+ - uhaul
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - Gemfile
92
+ - README.md
93
+ - bin/console
94
+ - bin/setup
95
+ - exe/uhaul
96
+ - lib/uhaul.rb
97
+ - lib/uhaul/address.rb
98
+ - lib/uhaul/cli.rb
99
+ - lib/uhaul/config.rb
100
+ - lib/uhaul/crawl.rb
101
+ - lib/uhaul/crawler.rb
102
+ - lib/uhaul/dimensions.rb
103
+ - lib/uhaul/facility.rb
104
+ - lib/uhaul/features.rb
105
+ - lib/uhaul/fetch_error.rb
106
+ - lib/uhaul/geocode.rb
107
+ - lib/uhaul/link.rb
108
+ - lib/uhaul/price.rb
109
+ - lib/uhaul/rates.rb
110
+ - lib/uhaul/sitemap.rb
111
+ - lib/uhaul/version.rb
112
+ homepage: https://github.com/ksylvest/uhaul
113
+ licenses:
114
+ - MIT
115
+ metadata:
116
+ rubygems_mfa_required: 'true'
117
+ homepage_uri: https://github.com/ksylvest/uhaul
118
+ source_code_uri: https://github.com/ksylvest/uhaul
119
+ changelog_uri: https://github.com/ksylvest/uhaul
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: 3.2.0
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubygems_version: 3.5.22
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: A crawler for UHaul.
139
+ test_files: []