deadlist 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/deadlist +4 -0
- data/lib/deadlist/cli/client.rb +52 -0
- data/lib/deadlist/cli/downloader.rb +19 -0
- data/lib/deadlist/cli.rb +70 -0
- data/lib/deadlist/models/show.rb +58 -0
- data/lib/deadlist/models/track.rb +24 -0
- data/lib/deadlist.rb +34 -0
- data/lib/version.rb +3 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4b3a7882b393d327f4286af082de84e81d547b29b18000c9c67c29212a9ce918
|
4
|
+
data.tar.gz: 4f658efd33197c0e5f947f1f8a25b77001489781f23a5ca409fc425ee9611b04
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bcb234db17a5a222e0ca5997f74adae2930d121c398e83a449714c72079bb23a425b3e4fcfd1a8b0365edbb49a74903de5f19dfe3396b3a6a492991288f83b93
|
7
|
+
data.tar.gz: 2b2e980f90adc2494b5730af7d7024090fcd3be8467184b419c24a8bd1e37e3bbf75e78ec63c35d2f89855e1ead5bc348cd0000288ea361d9ed6538efe9e457d
|
data/bin/deadlist
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# The Client class manages HTML scraping and parsing for the CLI and other classes above it. Any HTML work should be handled here.
|
2
|
+
class Client
|
3
|
+
# Returns a show_data object for helping in the creation of a new Show
|
4
|
+
def scrape_show_info(show_link)
|
5
|
+
doc = get_page_source(show_link)
|
6
|
+
track_divs = doc.css('div[itemprop="track"]')
|
7
|
+
|
8
|
+
show_data = {
|
9
|
+
date: extract_metadata(doc, itemprop: 'datePublished'),
|
10
|
+
location: extract_metadata(doc, label: 'Location'),
|
11
|
+
venue: extract_metadata(doc, label: 'Venue'),
|
12
|
+
transferred_by: extract_metadata(doc, label: 'Transferred by'),
|
13
|
+
duration: extract_metadata(doc, label: 'Run time'),
|
14
|
+
tracks: extract_track_data(track_divs)
|
15
|
+
}
|
16
|
+
|
17
|
+
return show_data
|
18
|
+
rescue => e
|
19
|
+
puts "\nā Data extraction failed: #{e.message}"
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# Returns nokogiri-fied page HTML for use in scraping show info
|
25
|
+
def get_page_source(show_link)
|
26
|
+
return Nokogiri::HTML(HTTParty.get(show_link).body)
|
27
|
+
rescue => e
|
28
|
+
puts "\nā Scraping failed: #{e.message}"
|
29
|
+
end
|
30
|
+
|
31
|
+
# Handles finding of values via 'label' and 'itemprop' Xpath values
|
32
|
+
def extract_metadata(doc, label: nil, itemprop: nil)
|
33
|
+
if label
|
34
|
+
# For dt/dd metadata pairs
|
35
|
+
doc.xpath("//dt[normalize-space(text())='#{label}']/following-sibling::dd").first&.text&.strip
|
36
|
+
elsif itemprop
|
37
|
+
# For itemprop attributes
|
38
|
+
doc.xpath("//*[@itemprop='#{itemprop}']").first&.content&.strip
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Hunts through track-divs for data required to create Tracks
|
43
|
+
def extract_track_data(track_divs)
|
44
|
+
track_divs.each_with_index.map do |div, i|
|
45
|
+
{
|
46
|
+
pos: i + 1,
|
47
|
+
name: div.css('meta[itemprop="name"]').first&.[]('content'),
|
48
|
+
links: div.css('link[itemprop="associatedMedia"]').map { |link| link['href'] }
|
49
|
+
}
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# A simple class to download files to a given directory. Expects details for the filename and a link.
|
2
|
+
# One Downloader should be created / show being downloaded. Downloaders can run on seperate threads for getting many hows at once.
|
3
|
+
class Downloader
|
4
|
+
def initialize(path, format)
|
5
|
+
@path = path
|
6
|
+
@format = format
|
7
|
+
end
|
8
|
+
|
9
|
+
# Goes to a link (assuming the format is already validated), and gets the file, saving with argument names.
|
10
|
+
def get(pos, name, link)
|
11
|
+
uri = URI.parse(link); raise ArgumentError, "Only HTTP(S) URLs allowed" unless uri.is_a?(URI::HTTP)
|
12
|
+
|
13
|
+
download = uri.open
|
14
|
+
filename = "#{@path}/#{pos} -- #{name}.#{@format}"
|
15
|
+
IO.copy_stream(download, filename)
|
16
|
+
rescue => e
|
17
|
+
puts "ā Download failed: #{e.message}"
|
18
|
+
end
|
19
|
+
end
|
data/lib/deadlist/cli.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require_relative 'cli/client'
|
2
|
+
require_relative 'cli/downloader'
|
3
|
+
require_relative 'models/show'
|
4
|
+
require_relative 'models/track'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
# The CLI is the 'session' created by the main class, managing arguments passed in and housing methods for scraping and downloading shows.
|
8
|
+
class CLI
|
9
|
+
def initialize(version, args)
|
10
|
+
@version = version
|
11
|
+
@args = {}
|
12
|
+
@show = nil
|
13
|
+
|
14
|
+
startup_text
|
15
|
+
parse_arguments(args)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Reads arguments passed at the command line and maps them to an instance object
|
19
|
+
def parse_arguments(args)
|
20
|
+
args.each do |arg|
|
21
|
+
key, value = arg.split('=')
|
22
|
+
@args[key.tr('--', '').to_sym] = value
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Creates new show object with link given populated with metadata and track details
|
27
|
+
def scrape_links
|
28
|
+
@show = Show.new(@args[:show])
|
29
|
+
puts "\nšæ #{@show.tracks.length} tracks found!"
|
30
|
+
rescue => e
|
31
|
+
puts "\nā Scraping failed: #{e.message}"
|
32
|
+
end
|
33
|
+
|
34
|
+
# Validates format isn't for test, and passes directory + format arguments to the download method of a Show
|
35
|
+
def download_show
|
36
|
+
download_format = @args[:format]
|
37
|
+
|
38
|
+
if download_format == "test"
|
39
|
+
puts "Test Download, skipping"
|
40
|
+
elsif @show.has_format?(download_format)
|
41
|
+
download_path = setup_directories(@show)
|
42
|
+
@show.download_tracks(download_path, download_format)
|
43
|
+
else
|
44
|
+
puts "\nā #{download_format} not found for this show! #{@show.tracks[0].available_formats} available"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# Deadlist starts with some friendly text
|
51
|
+
def startup_text
|
52
|
+
puts "\n\n"
|
53
|
+
puts '='*52
|
54
|
+
puts "š¹ā”ļø One man gathers what another man spills... ā”ļøš¹"
|
55
|
+
puts '='*52
|
56
|
+
end
|
57
|
+
|
58
|
+
# Configures directories that will be used by the downloader
|
59
|
+
def setup_directories(show, base_path = Dir.pwd)
|
60
|
+
# Create base shows directory
|
61
|
+
shows_dir = File.join(base_path, "shows")
|
62
|
+
FileUtils.mkdir_p(shows_dir)
|
63
|
+
|
64
|
+
# Create specific show directory
|
65
|
+
show_dir = File.join(shows_dir, show.name)
|
66
|
+
FileUtils.mkdir_p(show_dir)
|
67
|
+
|
68
|
+
return show_dir
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Object to handle Show data and the array of Track objects to be used in downloading.
|
2
|
+
class Show
|
3
|
+
attr_reader :name, :venue, :date, :location, :duration, :transferred_by, :tracks, :available_formats
|
4
|
+
|
5
|
+
def initialize(download_url)
|
6
|
+
@show_link = download_url
|
7
|
+
@name = nil
|
8
|
+
@date = nil
|
9
|
+
@location = nil
|
10
|
+
@venue = nil
|
11
|
+
@duration = nil
|
12
|
+
@transferred_by = nil
|
13
|
+
@available_formats = []
|
14
|
+
@tracks = nil
|
15
|
+
|
16
|
+
set_show_info
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns whether or not a given format is available for this show
|
20
|
+
def has_format?(requested_format)
|
21
|
+
@tracks[0].has_format?(requested_format)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Initializes a Downloader and passes track details
|
25
|
+
def download_tracks(path, format)
|
26
|
+
dl = Downloader.new(path, format)
|
27
|
+
|
28
|
+
@tracks.each do |track|
|
29
|
+
track_link = track.url_for_format(format)
|
30
|
+
|
31
|
+
dl.get(track.pos, track.name, track_link)
|
32
|
+
|
33
|
+
puts "ā”ļø #{track.pos} - #{track.name} downloaded successfully"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# On initialization, show variables are extracted from the HTML data scraped by the Client.
|
40
|
+
def set_show_info
|
41
|
+
show_data = Client.new.scrape_show_info(@show_link)
|
42
|
+
|
43
|
+
@date = show_data[:date]
|
44
|
+
@location = show_data[:location]
|
45
|
+
@venue = show_data[:venue]
|
46
|
+
@duration = show_data[:duration]
|
47
|
+
@transferred_by = show_data[:transferred_by]
|
48
|
+
@name = "#{show_data[:date]} - #{show_data[:venue]} - #{show_data[:location]}"
|
49
|
+
@tracks = set_tracks(show_data[:tracks])
|
50
|
+
|
51
|
+
puts "š¹š Downloading #{name}"
|
52
|
+
end
|
53
|
+
|
54
|
+
# Converts track lists to Track objects
|
55
|
+
def set_tracks(track_data)
|
56
|
+
@tracks = track_data.map { |track| Track.new(track) }
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class Track
|
2
|
+
attr_reader :pos, :name, :links
|
3
|
+
|
4
|
+
def initialize(track_data)
|
5
|
+
@pos = track_data[:pos]
|
6
|
+
@name = track_data[:name]
|
7
|
+
@links = track_data[:links]
|
8
|
+
end
|
9
|
+
|
10
|
+
# Returns formats available for a given track via the links
|
11
|
+
def available_formats
|
12
|
+
@available_formats ||= links.map { |url| File.extname(url).delete('.') }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Based on the format argument, returns one link containing that format
|
16
|
+
def url_for_format(format)
|
17
|
+
links.find { |url| url.end_with?(".#{format}") }
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns boolean if a format exists for this Track
|
21
|
+
def has_format?(format)
|
22
|
+
available_formats.include?(format)
|
23
|
+
end
|
24
|
+
end
|
data/lib/deadlist.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'httparty'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'pry'
|
5
|
+
|
6
|
+
require_relative 'deadlist/cli.rb'
|
7
|
+
|
8
|
+
# Main DeadList class.
|
9
|
+
class DeadList
|
10
|
+
HOSTNAME = 'https://www.archive.org/'
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@current_version = '1.0.0'
|
14
|
+
@hostname = HOSTNAME
|
15
|
+
end
|
16
|
+
|
17
|
+
def run
|
18
|
+
# Start a new CLI session
|
19
|
+
# In future this could be abstracted to pass the show link vs all args, so a 'session' is started per show.
|
20
|
+
session = CLI.new(@current_version, ARGV)
|
21
|
+
|
22
|
+
# Scrape links and metadata for given show
|
23
|
+
session.scrape_links
|
24
|
+
|
25
|
+
# Create folder with show date and begin track downloads if format matches
|
26
|
+
session.download_show
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Run DeadList
|
31
|
+
if __FILE__ == $0
|
32
|
+
DeadList.new.run
|
33
|
+
puts "\n"
|
34
|
+
end
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: deadlist
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- nazwr
|
8
|
+
bindir: bin
|
9
|
+
cert_chain: []
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: httparty
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0.21'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '0.21'
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: nokogiri
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '1.10'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.10'
|
40
|
+
description: A Ruby gem for downloading Grateful Dead concert recordings from the
|
41
|
+
Internet Archive
|
42
|
+
email: nathan@azotiwright.com
|
43
|
+
executables:
|
44
|
+
- deadlist
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- bin/deadlist
|
49
|
+
- lib/deadlist.rb
|
50
|
+
- lib/deadlist/cli.rb
|
51
|
+
- lib/deadlist/cli/client.rb
|
52
|
+
- lib/deadlist/cli/downloader.rb
|
53
|
+
- lib/deadlist/models/show.rb
|
54
|
+
- lib/deadlist/models/track.rb
|
55
|
+
- lib/version.rb
|
56
|
+
homepage: https://github.com/yourusername/deadlist
|
57
|
+
licenses:
|
58
|
+
- MIT
|
59
|
+
metadata: {}
|
60
|
+
rdoc_options: []
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: 2.7.0
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
requirements: []
|
74
|
+
rubygems_version: 3.6.7
|
75
|
+
specification_version: 4
|
76
|
+
summary: Download Grateful Dead shows from archive.org
|
77
|
+
test_files: []
|