RubyGems - deadlist - Versions diffs - 1.0.0 - Mend

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +7 -0
data/bin/deadlist +4 -0
data/lib/deadlist/cli/client.rb +52 -0
data/lib/deadlist/cli/downloader.rb +19 -0
data/lib/deadlist/cli.rb +70 -0
data/lib/deadlist/models/show.rb +58 -0
data/lib/deadlist/models/track.rb +24 -0
data/lib/deadlist.rb +34 -0
data/lib/version.rb +3 -0
metadata +77 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 4b3a7882b393d327f4286af082de84e81d547b29b18000c9c67c29212a9ce918
+  data.tar.gz: 4f658efd33197c0e5f947f1f8a25b77001489781f23a5ca409fc425ee9611b04
+SHA512:
+  metadata.gz: bcb234db17a5a222e0ca5997f74adae2930d121c398e83a449714c72079bb23a425b3e4fcfd1a8b0365edbb49a74903de5f19dfe3396b3a6a492991288f83b93
+  data.tar.gz: 2b2e980f90adc2494b5730af7d7024090fcd3be8467184b419c24a8bd1e37e3bbf75e78ec63c35d2f89855e1ead5bc348cd0000288ea361d9ed6538efe9e457d

data/bin/deadlist ADDED Viewed

@@ -0,0 +1,4 @@
+#!/usr/bin/env ruby
+require 'deadlist'
+DeadList.new.run

data/lib/deadlist/cli/client.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# The Client class manages HTML scraping and parsing for the CLI and other classes above it. Any HTML work should be handled here.
+class Client
+    # Returns a show_data object for helping in the creation of a new Show
+    def scrape_show_info(show_link)
+        doc = get_page_source(show_link)
+        track_divs = doc.css('div[itemprop="track"]')
+        show_data = {
+            date: extract_metadata(doc, itemprop: 'datePublished'),
+            location: extract_metadata(doc, label: 'Location'),
+            venue: extract_metadata(doc, label: 'Venue'),
+            transferred_by: extract_metadata(doc, label: 'Transferred by'),
+            duration: extract_metadata(doc, label: 'Run time'),
+            tracks: extract_track_data(track_divs)
+        }
+        return show_data
+    rescue => e
+        puts "\n❌ Data extraction failed: #{e.message}"
+    end
+    private
+    # Returns nokogiri-fied page HTML for use in scraping show info
+    def get_page_source(show_link)
+        return Nokogiri::HTML(HTTParty.get(show_link).body)
+    rescue => e
+        puts "\n❌ Scraping failed: #{e.message}"
+    end
+    # Handles finding of values via 'label' and 'itemprop' Xpath values
+    def extract_metadata(doc, label: nil, itemprop: nil)
+        if label
+            # For dt/dd metadata pairs
+            doc.xpath("//dt[normalize-space(text())='#{label}']/following-sibling::dd").first&.text&.strip
+        elsif itemprop
+            # For itemprop attributes
+            doc.xpath("//*[@itemprop='#{itemprop}']").first&.content&.strip
+        end
+    end
+    # Hunts through track-divs for data required to create Tracks
+    def extract_track_data(track_divs)
+        track_divs.each_with_index.map do |div, i|
+            {
+                pos: i + 1,
+                name: div.css('meta[itemprop="name"]').first&.[]('content'),
+                links: div.css('link[itemprop="associatedMedia"]').map { |link| link['href'] }
+            }
+        end
+    end
+end

data/lib/deadlist/cli/downloader.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# A simple class to download files to a given directory. Expects details for the filename and a link.
+# One Downloader should be created / show being downloaded. Downloaders can run on seperate threads for getting many hows at once.
+class Downloader
+    def initialize(path, format)
+        @path = path
+        @format = format
+    end
+    # Goes to a link (assuming the format is already validated), and gets the file, saving with argument names.
+    def get(pos, name, link)
+        uri = URI.parse(link); raise ArgumentError, "Only HTTP(S) URLs allowed" unless uri.is_a?(URI::HTTP)
+        download = uri.open
+        filename = "#{@path}/#{pos} -- #{name}.#{@format}"
+        IO.copy_stream(download, filename)
+    rescue => e
+        puts "❌ Download failed: #{e.message}"
+    end
+end

data/lib/deadlist/cli.rb ADDED Viewed

@@ -0,0 +1,70 @@
+require_relative 'cli/client'
+require_relative 'cli/downloader'
+require_relative 'models/show'
+require_relative 'models/track'
+require 'fileutils'
+# The CLI is the 'session' created by the main class, managing arguments passed in and housing methods for scraping and downloading shows.
+class CLI
+    def initialize(version, args)
+        @version = version
+        @args = {}
+        @show = nil
+        startup_text
+        parse_arguments(args)
+    end
+    # Reads arguments passed at the command line and maps them to an instance object
+    def parse_arguments(args)
+        args.each do |arg|
+            key, value = arg.split('=')
+            @args[key.tr('--', '').to_sym] = value
+        end
+    end
+    # Creates new show object with link given populated with metadata and track details
+    def scrape_links
+        @show = Show.new(@args[:show])
+        puts "\n💿 #{@show.tracks.length} tracks found!"
+    rescue => e
+        puts "\n❌ Scraping failed: #{e.message}"
+    end
+    # Validates format isn't for test, and passes directory + format arguments to the download method of a Show
+    def download_show
+        download_format = @args[:format]
+        if download_format == "test"
+          puts "Test Download, skipping"
+        elsif @show.has_format?(download_format)
+            download_path = setup_directories(@show)
+            @show.download_tracks(download_path, download_format)
+        else
+            puts "\n❌ #{download_format} not found for this show! #{@show.tracks[0].available_formats} available"
+        end
+    end
+    private
+    # Deadlist starts with some friendly text
+    def startup_text
+        puts "\n\n"
+        puts '='*52
+        puts "🌹⚡️ One man gathers what another man spills... ⚡️🌹"
+        puts '='*52
+    end
+    # Configures directories that will be used by the downloader
+    def setup_directories(show, base_path = Dir.pwd)
+        # Create base shows directory
+        shows_dir = File.join(base_path, "shows")
+        FileUtils.mkdir_p(shows_dir)
+        # Create specific show directory
+        show_dir = File.join(shows_dir, show.name)
+        FileUtils.mkdir_p(show_dir)
+        return show_dir
+    end
+end

data/lib/deadlist/models/show.rb ADDED Viewed

@@ -0,0 +1,58 @@
+# Object to handle Show data and the array of Track objects to be used in downloading.
+class Show
+    attr_reader :name, :venue, :date, :location, :duration, :transferred_by, :tracks, :available_formats
+    def initialize(download_url)
+        @show_link = download_url
+        @name = nil
+        @date = nil
+        @location = nil
+        @venue = nil
+        @duration = nil
+        @transferred_by = nil
+        @available_formats = []
+        @tracks = nil
+        set_show_info
+    end
+    # Returns whether or not a given format is available for this show
+    def has_format?(requested_format)
+      @tracks[0].has_format?(requested_format)
+    end
+    # Initializes a Downloader and passes track details
+    def download_tracks(path, format)
+        dl = Downloader.new(path, format)
+        @tracks.each do |track|
+            track_link = track.url_for_format(format)
+            dl.get(track.pos, track.name, track_link)
+            puts "⚡️ #{track.pos} - #{track.name} downloaded successfully"
+        end
+    end
+    private
+    # On initialization, show variables are extracted from the HTML data scraped by the Client.
+    def set_show_info
+        show_data = Client.new.scrape_show_info(@show_link)
+        @date = show_data[:date]
+        @location = show_data[:location]
+        @venue = show_data[:venue]
+        @duration = show_data[:duration]
+        @transferred_by = show_data[:transferred_by]
+        @name = "#{show_data[:date]} - #{show_data[:venue]} - #{show_data[:location]}"
+        @tracks = set_tracks(show_data[:tracks])
+        puts "🌹💀 Downloading #{name}"
+    end
+    # Converts track lists to Track objects
+    def set_tracks(track_data)
+        @tracks = track_data.map { |track| Track.new(track) }
+    end
+end

data/lib/deadlist/models/track.rb ADDED Viewed

@@ -0,0 +1,24 @@
+class Track
+  attr_reader :pos, :name, :links
+  def initialize(track_data)
+    @pos = track_data[:pos]
+    @name = track_data[:name]
+    @links = track_data[:links]
+  end
+  # Returns formats available for a given track via the links
+  def available_formats
+    @available_formats ||= links.map { |url| File.extname(url).delete('.') }
+  end
+  # Based on the format argument, returns one link containing that format
+  def url_for_format(format)
+    links.find { |url| url.end_with?(".#{format}") }
+  end
+  # Returns boolean if a format exists for this Track
+  def has_format?(format)
+    available_formats.include?(format)
+  end
+end

data/lib/deadlist.rb ADDED Viewed

@@ -0,0 +1,34 @@
+require 'httparty'
+require 'nokogiri'
+require 'open-uri'
+require 'pry'
+require_relative 'deadlist/cli.rb'
+# Main DeadList class.
+class DeadList
+    HOSTNAME = 'https://www.archive.org/'
+    def initialize
+        @current_version = '1.0.0'
+        @hostname = HOSTNAME
+    end
+    def run
+        # Start a new CLI session
+        # In future this could be abstracted to pass the show link vs all args, so a 'session' is started per show.
+        session = CLI.new(@current_version, ARGV)
+        # Scrape links and metadata for given show
+        session.scrape_links
+        # Create folder with show date and begin track downloads if format matches
+        session.download_show
+    end
+end
+# Run DeadList
+if __FILE__ == $0
+  DeadList.new.run
+  puts "\n"
+end

data/lib/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module DeadList
+  VERSION = '1.0.0'
+end

metadata ADDED Viewed

@@ -0,0 +1,77 @@
+--- !ruby/object:Gem::Specification
+name: deadlist
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+platform: ruby
+authors:
+- nazwr
+bindir: bin
+cert_chain: []
+date: 1980-01-02 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: httparty
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.21'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.21'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.10'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.10'
+description: A Ruby gem for downloading Grateful Dead concert recordings from the
+  Internet Archive
+email: nathan@azotiwright.com
+executables:
+- deadlist
+extensions: []
+extra_rdoc_files: []
+files:
+- bin/deadlist
+- lib/deadlist.rb
+- lib/deadlist/cli.rb
+- lib/deadlist/cli/client.rb
+- lib/deadlist/cli/downloader.rb
+- lib/deadlist/models/show.rb
+- lib/deadlist/models/track.rb
+- lib/version.rb
+homepage: https://github.com/yourusername/deadlist
+licenses:
+- MIT
+metadata: {}
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 2.7.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.6.7
+specification_version: 4
+summary: Download Grateful Dead shows from archive.org
+test_files: []

deadlist 1.0.0