deadlist 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4b3a7882b393d327f4286af082de84e81d547b29b18000c9c67c29212a9ce918
4
+ data.tar.gz: 4f658efd33197c0e5f947f1f8a25b77001489781f23a5ca409fc425ee9611b04
5
+ SHA512:
6
+ metadata.gz: bcb234db17a5a222e0ca5997f74adae2930d121c398e83a449714c72079bb23a425b3e4fcfd1a8b0365edbb49a74903de5f19dfe3396b3a6a492991288f83b93
7
+ data.tar.gz: 2b2e980f90adc2494b5730af7d7024090fcd3be8467184b419c24a8bd1e37e3bbf75e78ec63c35d2f89855e1ead5bc348cd0000288ea361d9ed6538efe9e457d
data/bin/deadlist ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'deadlist'
3
+
4
+ DeadList.new.run
@@ -0,0 +1,52 @@
1
+ # The Client class manages HTML scraping and parsing for the CLI and other classes above it. Any HTML work should be handled here.
2
+ class Client
3
+ # Returns a show_data object for helping in the creation of a new Show
4
+ def scrape_show_info(show_link)
5
+ doc = get_page_source(show_link)
6
+ track_divs = doc.css('div[itemprop="track"]')
7
+
8
+ show_data = {
9
+ date: extract_metadata(doc, itemprop: 'datePublished'),
10
+ location: extract_metadata(doc, label: 'Location'),
11
+ venue: extract_metadata(doc, label: 'Venue'),
12
+ transferred_by: extract_metadata(doc, label: 'Transferred by'),
13
+ duration: extract_metadata(doc, label: 'Run time'),
14
+ tracks: extract_track_data(track_divs)
15
+ }
16
+
17
+ return show_data
18
+ rescue => e
19
+ puts "\nāŒ Data extraction failed: #{e.message}"
20
+ end
21
+
22
+ private
23
+
24
+ # Returns nokogiri-fied page HTML for use in scraping show info
25
+ def get_page_source(show_link)
26
+ return Nokogiri::HTML(HTTParty.get(show_link).body)
27
+ rescue => e
28
+ puts "\nāŒ Scraping failed: #{e.message}"
29
+ end
30
+
31
+ # Handles finding of values via 'label' and 'itemprop' Xpath values
32
+ def extract_metadata(doc, label: nil, itemprop: nil)
33
+ if label
34
+ # For dt/dd metadata pairs
35
+ doc.xpath("//dt[normalize-space(text())='#{label}']/following-sibling::dd").first&.text&.strip
36
+ elsif itemprop
37
+ # For itemprop attributes
38
+ doc.xpath("//*[@itemprop='#{itemprop}']").first&.content&.strip
39
+ end
40
+ end
41
+
42
+ # Hunts through track-divs for data required to create Tracks
43
+ def extract_track_data(track_divs)
44
+ track_divs.each_with_index.map do |div, i|
45
+ {
46
+ pos: i + 1,
47
+ name: div.css('meta[itemprop="name"]').first&.[]('content'),
48
+ links: div.css('link[itemprop="associatedMedia"]').map { |link| link['href'] }
49
+ }
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ # A simple class to download files to a given directory. Expects details for the filename and a link.
2
+ # One Downloader should be created / show being downloaded. Downloaders can run on seperate threads for getting many hows at once.
3
+ class Downloader
4
+ def initialize(path, format)
5
+ @path = path
6
+ @format = format
7
+ end
8
+
9
+ # Goes to a link (assuming the format is already validated), and gets the file, saving with argument names.
10
+ def get(pos, name, link)
11
+ uri = URI.parse(link); raise ArgumentError, "Only HTTP(S) URLs allowed" unless uri.is_a?(URI::HTTP)
12
+
13
+ download = uri.open
14
+ filename = "#{@path}/#{pos} -- #{name}.#{@format}"
15
+ IO.copy_stream(download, filename)
16
+ rescue => e
17
+ puts "āŒ Download failed: #{e.message}"
18
+ end
19
+ end
@@ -0,0 +1,70 @@
1
+ require_relative 'cli/client'
2
+ require_relative 'cli/downloader'
3
+ require_relative 'models/show'
4
+ require_relative 'models/track'
5
+ require 'fileutils'
6
+
7
+ # The CLI is the 'session' created by the main class, managing arguments passed in and housing methods for scraping and downloading shows.
8
+ class CLI
9
+ def initialize(version, args)
10
+ @version = version
11
+ @args = {}
12
+ @show = nil
13
+
14
+ startup_text
15
+ parse_arguments(args)
16
+ end
17
+
18
+ # Reads arguments passed at the command line and maps them to an instance object
19
+ def parse_arguments(args)
20
+ args.each do |arg|
21
+ key, value = arg.split('=')
22
+ @args[key.tr('--', '').to_sym] = value
23
+ end
24
+ end
25
+
26
+ # Creates new show object with link given populated with metadata and track details
27
+ def scrape_links
28
+ @show = Show.new(@args[:show])
29
+ puts "\nšŸ’æ #{@show.tracks.length} tracks found!"
30
+ rescue => e
31
+ puts "\nāŒ Scraping failed: #{e.message}"
32
+ end
33
+
34
+ # Validates format isn't for test, and passes directory + format arguments to the download method of a Show
35
+ def download_show
36
+ download_format = @args[:format]
37
+
38
+ if download_format == "test"
39
+ puts "Test Download, skipping"
40
+ elsif @show.has_format?(download_format)
41
+ download_path = setup_directories(@show)
42
+ @show.download_tracks(download_path, download_format)
43
+ else
44
+ puts "\nāŒ #{download_format} not found for this show! #{@show.tracks[0].available_formats} available"
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ # Deadlist starts with some friendly text
51
+ def startup_text
52
+ puts "\n\n"
53
+ puts '='*52
54
+ puts "šŸŒ¹āš”ļø One man gathers what another man spills... āš”ļøšŸŒ¹"
55
+ puts '='*52
56
+ end
57
+
58
+ # Configures directories that will be used by the downloader
59
+ def setup_directories(show, base_path = Dir.pwd)
60
+ # Create base shows directory
61
+ shows_dir = File.join(base_path, "shows")
62
+ FileUtils.mkdir_p(shows_dir)
63
+
64
+ # Create specific show directory
65
+ show_dir = File.join(shows_dir, show.name)
66
+ FileUtils.mkdir_p(show_dir)
67
+
68
+ return show_dir
69
+ end
70
+ end
@@ -0,0 +1,58 @@
1
+ # Object to handle Show data and the array of Track objects to be used in downloading.
2
+ class Show
3
+ attr_reader :name, :venue, :date, :location, :duration, :transferred_by, :tracks, :available_formats
4
+
5
+ def initialize(download_url)
6
+ @show_link = download_url
7
+ @name = nil
8
+ @date = nil
9
+ @location = nil
10
+ @venue = nil
11
+ @duration = nil
12
+ @transferred_by = nil
13
+ @available_formats = []
14
+ @tracks = nil
15
+
16
+ set_show_info
17
+ end
18
+
19
+ # Returns whether or not a given format is available for this show
20
+ def has_format?(requested_format)
21
+ @tracks[0].has_format?(requested_format)
22
+ end
23
+
24
+ # Initializes a Downloader and passes track details
25
+ def download_tracks(path, format)
26
+ dl = Downloader.new(path, format)
27
+
28
+ @tracks.each do |track|
29
+ track_link = track.url_for_format(format)
30
+
31
+ dl.get(track.pos, track.name, track_link)
32
+
33
+ puts "āš”ļø #{track.pos} - #{track.name} downloaded successfully"
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ # On initialization, show variables are extracted from the HTML data scraped by the Client.
40
+ def set_show_info
41
+ show_data = Client.new.scrape_show_info(@show_link)
42
+
43
+ @date = show_data[:date]
44
+ @location = show_data[:location]
45
+ @venue = show_data[:venue]
46
+ @duration = show_data[:duration]
47
+ @transferred_by = show_data[:transferred_by]
48
+ @name = "#{show_data[:date]} - #{show_data[:venue]} - #{show_data[:location]}"
49
+ @tracks = set_tracks(show_data[:tracks])
50
+
51
+ puts "šŸŒ¹šŸ’€ Downloading #{name}"
52
+ end
53
+
54
+ # Converts track lists to Track objects
55
+ def set_tracks(track_data)
56
+ @tracks = track_data.map { |track| Track.new(track) }
57
+ end
58
+ end
@@ -0,0 +1,24 @@
1
+ class Track
2
+ attr_reader :pos, :name, :links
3
+
4
+ def initialize(track_data)
5
+ @pos = track_data[:pos]
6
+ @name = track_data[:name]
7
+ @links = track_data[:links]
8
+ end
9
+
10
+ # Returns formats available for a given track via the links
11
+ def available_formats
12
+ @available_formats ||= links.map { |url| File.extname(url).delete('.') }
13
+ end
14
+
15
+ # Based on the format argument, returns one link containing that format
16
+ def url_for_format(format)
17
+ links.find { |url| url.end_with?(".#{format}") }
18
+ end
19
+
20
+ # Returns boolean if a format exists for this Track
21
+ def has_format?(format)
22
+ available_formats.include?(format)
23
+ end
24
+ end
data/lib/deadlist.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'httparty'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'pry'
5
+
6
+ require_relative 'deadlist/cli.rb'
7
+
8
+ # Main DeadList class.
9
+ class DeadList
10
+ HOSTNAME = 'https://www.archive.org/'
11
+
12
+ def initialize
13
+ @current_version = '1.0.0'
14
+ @hostname = HOSTNAME
15
+ end
16
+
17
+ def run
18
+ # Start a new CLI session
19
+ # In future this could be abstracted to pass the show link vs all args, so a 'session' is started per show.
20
+ session = CLI.new(@current_version, ARGV)
21
+
22
+ # Scrape links and metadata for given show
23
+ session.scrape_links
24
+
25
+ # Create folder with show date and begin track downloads if format matches
26
+ session.download_show
27
+ end
28
+ end
29
+
30
+ # Run DeadList
31
+ if __FILE__ == $0
32
+ DeadList.new.run
33
+ puts "\n"
34
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,3 @@
1
+ module DeadList
2
+ VERSION = '1.0.0'
3
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: deadlist
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - nazwr
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: httparty
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.21'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.21'
26
+ - !ruby/object:Gem::Dependency
27
+ name: nokogiri
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.10'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.10'
40
+ description: A Ruby gem for downloading Grateful Dead concert recordings from the
41
+ Internet Archive
42
+ email: nathan@azotiwright.com
43
+ executables:
44
+ - deadlist
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - bin/deadlist
49
+ - lib/deadlist.rb
50
+ - lib/deadlist/cli.rb
51
+ - lib/deadlist/cli/client.rb
52
+ - lib/deadlist/cli/downloader.rb
53
+ - lib/deadlist/models/show.rb
54
+ - lib/deadlist/models/track.rb
55
+ - lib/version.rb
56
+ homepage: https://github.com/yourusername/deadlist
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ rdoc_options: []
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 2.7.0
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubygems_version: 3.6.7
75
+ specification_version: 4
76
+ summary: Download Grateful Dead shows from archive.org
77
+ test_files: []