ocremix_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7877e9a0233d31be28de858799f82b05b89dd112
4
+ data.tar.gz: 9c1d128a96c48ad365834e9956596889623725f5
5
+ SHA512:
6
+ metadata.gz: 74ababfbc361fb7bf30191518961a8eeabad0c3d0932d7cf634b0ed0b3344bd31f269c6d2810962fa4880db22e2bf4757ded2047560b11ba9bcb7dba931ca01d
7
+ data.tar.gz: 788a06eb92b852b92ba34cfbbd1568887b77f885b67e46b85834961136a3c5e6205cabf1b812b25dded0f51ef97e411b32e5cdcc7b584ee6dd1f333a28a9e9cb
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.4
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in ocremix_parser.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,17 @@
1
+ # OcremixParser
2
+
3
+ This gem will download the top 10 tracks from ocremix.org to where ever you configure it to do so via `config/application.yml`
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+
10
+ Or install it yourself as:
11
+
12
+ $ gem install ocremix_parser
13
+
14
+ ## Usage
15
+
16
+ For right now... don't actually install the gem, just use ./bin/console, which will begin the downloading process
17
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "ocremix_parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ puts "ONLY USING TMP/ BUILT IN CONFIG FILE"
10
+ ENV['HOME'] = "/tmp/ocremix_parser_test_home"
11
+
12
+ mg = OcremixParser::MixGrabber.new
13
+ mg.download_ten_latest_mixes_via_web_scrapes
14
+
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,4 @@
1
+ rss_feed: "http://ocremix.org/feeds/ten20/"
2
+ file_mirrors: "http://ocr.blueblue.fr/files/music/remixes http://iterations.org/files/music/remixes http://ocrmirror.org/files/music/remixes"
3
+
4
+ download_directory: "/storage/storage/ocremix"
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'ocremix_parser'
4
+
5
+ # TODO: add thor and use interface like: list, start
6
+
7
+ mg = OcremixParser::MixGrabber.new
8
+
9
+ mg.download_ten_latest_mixes_via_web_scrapes
10
+
@@ -0,0 +1,118 @@
1
+ require 'i18n'
2
+ require 'figaro'
3
+ require 'open-uri'
4
+ require 'simple-rss'
5
+ require 'nokogiri'
6
+ require 'fileutils'
7
+
8
+ require "ocremix_parser/version"
9
+
10
+ module OcremixParser
11
+
12
+ class MixGrabber
13
+
14
+ def initialize
15
+ prepare_config_file!
16
+
17
+ @user_agent = '"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0"'
18
+ @mirror = ENV['file_mirrors'].split(" ").last
19
+
20
+ @skips = 0
21
+ @downloads = 0
22
+ end
23
+
24
+ def prepare_config_file!
25
+ bundled_config_path = File.expand_path('../../config/application.yml', __FILE__)
26
+ config_path = "#{ENV['HOME']}/.config/ocremix_parser.yml"
27
+
28
+ # create the config file unless it exists
29
+ unless File.exists?(config_path)
30
+ FileUtils.mkdir_p File.dirname(config_path)
31
+ FileUtils.cp bundled_config_path, config_path
32
+ end
33
+
34
+ Figaro.application.path = config_path
35
+ Figaro.load
36
+ end
37
+
38
+ def download_ten_latest_mixes_via_web_scrapes
39
+ top_ten_track_page_links = query_from_top_ten_rss_feed(:link)
40
+
41
+ top_ten_track_page_links.each do |page_url|
42
+ dl_links = convert_track_page_to_mp3_links(page_url)
43
+ file_name = File.basename(dl_links.first)
44
+ destination_path = "#{ENV['download_directory']}/#{file_name}"
45
+
46
+ wget_download(dl_links.first, destination_path)
47
+ end
48
+
49
+ puts "#{@downloads} downloads happened, #{@skips} skips"
50
+ end
51
+
52
+ def download_ten_latest_remixes_via_filename_guessing
53
+ # pull down the latest file_names from the rss feed
54
+ top_ten_file_names = query_from_top_ten_rss_feed.map {|s| convert_title_to_filename(s)}
55
+
56
+ top_ten_file_names.each do |file_name|
57
+ url = "#{@mirror}/#{file_name}"
58
+ destination_path = "#{ENV['download_directory']}/#{file_name}"
59
+
60
+ wget_download(url, destination_path)
61
+ end
62
+ end
63
+
64
+ def wget_download(url, destination_path)
65
+ if File.exists? destination_path
66
+ @skips += 1
67
+ return
68
+ end
69
+
70
+ @downloads += 1
71
+
72
+ # Download with a progress bar...
73
+ # Wget user agent required (I have an odd firewall...)!
74
+ cmd = "wget #{url} -O #{destination_path} -U #{@user_agent}"
75
+ puts cmd
76
+ `#{cmd}`
77
+ end
78
+
79
+ def convert_title_to_filename(string)
80
+ tag = "_OC_ReMix.mp3"
81
+ string = convert_foriegn_characters_to_en(string)
82
+
83
+ string.gsub(" ", "_").gsub(/[^0-9A-Za-z_\-]/, "") + tag
84
+ end
85
+
86
+ def convert_foriegn_characters_to_en(string)
87
+ I18n.available_locales = [:en]
88
+ string = I18n.transliterate(string.force_encoding('utf-8'))
89
+ end
90
+
91
+ def query_from_top_ten_rss_feed(key_to_query = :title)
92
+ rss = SimpleRSS.parse open(ENV['rss_feed'])
93
+ rss.entries.collect {|e| e[key_to_query] }
94
+ end
95
+
96
+ # Not Used... handy if wget doesn't exist though...
97
+ def download_bad_way(destination_path, source_path, user_agent)
98
+
99
+ # Download without a progress bar
100
+ File.open(destination_path, "wb") do |saved_file|
101
+ # the following "open" is provided by open-uri
102
+ open("#{source_path}", "rb", 'User-Agent' => useragent) do |read_file|
103
+ saved_file.write(read_file.read)
104
+ end
105
+ end
106
+ end
107
+
108
+ # e.g. pass in http://ocremix.org/remix/OCR03341
109
+ # returns [ "http://mirrorname.com/file/path/File_Name.mp3", ... ]
110
+ def convert_track_page_to_mp3_links(url)
111
+ doc = Nokogiri.parse( open(url, "rb", 'User-Agent' => @user_agent).read )
112
+ li_mirrors = doc.css("#panel-download > div:nth-child(1) > ul:nth-child(4) > li")
113
+ download_links = li_mirrors.collect {|li| li.css("a").first["href"] }
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,3 @@
1
+ module OcremixParser
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ocremix_parser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "ocremix_parser"
8
+ spec.version = OcremixParser::VERSION
9
+ spec.authors = ["TheNotary"]
10
+ spec.email = ["no@email.plz"]
11
+
12
+ spec.summary = %q{This .}
13
+ spec.description = %q{: Write a longer description or delete this line.}
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
16
+ spec.bindir = "exe"
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_dependency "figaro"
21
+ spec.add_dependency "simple-rss"
22
+ spec.add_dependency "i18n"
23
+ spec.add_dependency "nokogiri"
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.12"
26
+ spec.add_development_dependency "rake", "~> 10.0"
27
+ spec.add_development_dependency "rspec", "~> 3.0"
28
+ spec.add_development_dependency "pry"
29
+ end
metadata ADDED
@@ -0,0 +1,169 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ocremix_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - TheNotary
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-07-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: figaro
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: simple-rss
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: i18n
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.12'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.12'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '10.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '10.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: ": Write a longer description or delete this line."
126
+ email:
127
+ - no@email.plz
128
+ executables:
129
+ - ocremix_parser
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".gitignore"
134
+ - ".rspec"
135
+ - ".travis.yml"
136
+ - Gemfile
137
+ - README.md
138
+ - Rakefile
139
+ - bin/console
140
+ - bin/setup
141
+ - config/application.yml
142
+ - exe/ocremix_parser
143
+ - lib/ocremix_parser.rb
144
+ - lib/ocremix_parser/version.rb
145
+ - ocremix_parser.gemspec
146
+ homepage:
147
+ licenses: []
148
+ metadata: {}
149
+ post_install_message:
150
+ rdoc_options: []
151
+ require_paths:
152
+ - lib
153
+ required_ruby_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ">="
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ required_rubygems_version: !ruby/object:Gem::Requirement
159
+ requirements:
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: '0'
163
+ requirements: []
164
+ rubyforge_project:
165
+ rubygems_version: 2.5.1
166
+ signing_key:
167
+ specification_version: 4
168
+ summary: This .
169
+ test_files: []