audio_book_creator 0.0.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +2 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile +3 -3
- data/README.md +9 -4
- data/audio_book_creator.gemspec +3 -3
- data/{bin → exe}/audio_book_creator +3 -0
- data/lib/audio_book_creator.rb +4 -2
- data/lib/audio_book_creator/binder.rb +2 -1
- data/lib/audio_book_creator/book_def.rb +2 -2
- data/lib/audio_book_creator/cached_hash.rb +1 -1
- data/lib/audio_book_creator/cascading_array.rb +8 -8
- data/lib/audio_book_creator/chapter.rb +1 -1
- data/lib/audio_book_creator/cli.rb +36 -29
- data/lib/audio_book_creator/conductor.rb +5 -3
- data/lib/audio_book_creator/defaulter.rb +41 -0
- data/lib/audio_book_creator/editor.rb +2 -3
- data/lib/audio_book_creator/page_db.rb +14 -8
- data/lib/audio_book_creator/page_def.rb +7 -15
- data/lib/audio_book_creator/runner.rb +5 -3
- data/lib/audio_book_creator/speaker.rb +1 -1
- data/lib/audio_book_creator/spider.rb +9 -28
- data/lib/audio_book_creator/surfer_def.rb +1 -5
- data/lib/audio_book_creator/url_filter.rb +1 -1
- data/lib/audio_book_creator/version.rb +1 -1
- data/lib/audio_book_creator/web_page.rb +49 -0
- data/run_mutant +89 -0
- data/spec/audio_book_creator/binder_spec.rb +3 -3
- data/spec/audio_book_creator/book_creator_spec.rb +2 -3
- data/spec/audio_book_creator/book_def_spec.rb +33 -22
- data/spec/audio_book_creator/cached_hash_spec.rb +4 -0
- data/spec/audio_book_creator/cli_spec.rb +189 -122
- data/spec/audio_book_creator/conductor_spec.rb +17 -6
- data/spec/audio_book_creator/defaulter_spec.rb +154 -0
- data/spec/audio_book_creator/editor_spec.rb +7 -7
- data/spec/audio_book_creator/page_db_spec.rb +73 -11
- data/spec/audio_book_creator/page_def_spec.rb +26 -40
- data/spec/audio_book_creator/speaker_spec.rb +2 -2
- data/spec/audio_book_creator/spider_spec.rb +10 -15
- data/spec/audio_book_creator/surfer_def_spec.rb +1 -4
- data/spec/audio_book_creator/url_filter_spec.rb +1 -1
- data/spec/audio_book_creator/web_page_spec.rb +65 -0
- data/spec/audio_book_creator_spec.rb +23 -0
- data/spec/spec_helper.rb +15 -12
- metadata +14 -20
@@ -1,30 +1,36 @@
|
|
1
1
|
require "sqlite3"
|
2
|
+
require "json"
|
2
3
|
|
3
4
|
module AudioBookCreator
|
5
|
+
# a name value store stored in sqlite
|
6
|
+
# this is used for pages and also settings
|
4
7
|
class PageDb
|
5
8
|
include Enumerable
|
6
9
|
|
7
|
-
|
8
|
-
attr_accessor :filename
|
10
|
+
attr_accessor :filename, :table_name, :encode
|
9
11
|
|
10
|
-
def initialize(filename)
|
12
|
+
def initialize(filename, table_name, encode)
|
11
13
|
@filename = filename
|
14
|
+
@table_name = table_name
|
15
|
+
@encode = encode
|
12
16
|
end
|
13
17
|
|
14
18
|
def []=(key, value)
|
15
|
-
|
19
|
+
value = JSON.generate(value) if encode && value
|
20
|
+
db.execute "insert into #{table_name} (name, contents) values (?, ?)", [key, value]
|
16
21
|
end
|
17
22
|
|
18
23
|
def [](key)
|
19
|
-
db.execute("select contents from
|
24
|
+
value = db.execute("select contents from #{table_name} where name = ?", key).map { |row| row.first }.first
|
25
|
+
encode && value ? JSON.parse(value, :symbolize_names => true) : value
|
20
26
|
end
|
21
27
|
|
22
28
|
def include?(key)
|
23
|
-
|
29
|
+
self[key]
|
24
30
|
end
|
25
31
|
|
26
32
|
def each(&block)
|
27
|
-
db.execute "select name, contents from
|
33
|
+
db.execute "select name, contents from #{table_name}", &block
|
28
34
|
end
|
29
35
|
|
30
36
|
private
|
@@ -35,7 +41,7 @@ module AudioBookCreator
|
|
35
41
|
|
36
42
|
def create
|
37
43
|
SQLite3::Database.new(filename).tap do |db|
|
38
|
-
db.execute("create table if not exists
|
44
|
+
db.execute("create table if not exists #{table_name} (name text, contents blob)")
|
39
45
|
end
|
40
46
|
end
|
41
47
|
end
|
@@ -2,30 +2,22 @@ module AudioBookCreator
|
|
2
2
|
# information on the format of the html page that is read
|
3
3
|
class PageDef
|
4
4
|
attr_accessor :title_path, :body_path, :link_path, :chapter_path
|
5
|
+
attr_accessor :invalid_urls
|
5
6
|
|
6
|
-
def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
|
7
|
+
def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil, invalid_urls = {})
|
7
8
|
@title_path = title_path
|
8
9
|
@body_path = body_path
|
9
10
|
@link_path = link_path
|
10
11
|
@chapter_path = chapter_path
|
12
|
+
@invalid_urls = invalid_urls
|
11
13
|
end
|
12
14
|
|
13
|
-
def
|
14
|
-
|
15
|
-
title.text if title
|
15
|
+
def page_links(page)
|
16
|
+
page.links(link_path).select { |href| !invalid_urls.include?(href) }
|
16
17
|
end
|
17
18
|
|
18
|
-
def
|
19
|
-
|
20
|
-
# feels like I need .map { |n| n.text }
|
21
|
-
end
|
22
|
-
|
23
|
-
def page_links(dom, &block)
|
24
|
-
dom.css(link_path).map(&block)
|
25
|
-
end
|
26
|
-
|
27
|
-
def chapter_links(dom, &block)
|
28
|
-
dom.css(chapter_path).map(&block)
|
19
|
+
def chapter_links(page)
|
20
|
+
page.links(chapter_path).select { |href| !invalid_urls.include?(href) }
|
29
21
|
end
|
30
22
|
end
|
31
23
|
end
|
@@ -4,11 +4,13 @@ module AudioBookCreator
|
|
4
4
|
include Logging
|
5
5
|
|
6
6
|
def run(cmd, options)
|
7
|
-
params = options
|
7
|
+
params = options.fetch(:params).flatten.flatten.compact
|
8
8
|
|
9
|
-
|
9
|
+
cmdline = [cmd] + params.map(&:to_s)
|
10
|
+
|
11
|
+
logger.info { "run: #{cmdline.join(" ")}" }
|
10
12
|
logger.info ""
|
11
|
-
status = system(
|
13
|
+
status = system(*cmdline)
|
12
14
|
logger.info ""
|
13
15
|
logger.info { status ? "success" : "issue" }
|
14
16
|
|
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
require 'uri'
|
3
|
-
|
4
1
|
module AudioBookCreator
|
5
2
|
class Spider
|
6
3
|
include Logging
|
@@ -8,28 +5,26 @@ module AudioBookCreator
|
|
8
5
|
# @!attribute web
|
9
6
|
# @return Hash access to the world wide web
|
10
7
|
attr_accessor :web
|
11
|
-
attr_accessor :invalid_urls
|
12
8
|
|
13
9
|
attr_accessor :page_def
|
14
10
|
|
15
|
-
def initialize(page_def, web
|
11
|
+
def initialize(page_def, web)
|
16
12
|
@page_def = page_def
|
17
13
|
@web = web
|
18
|
-
@invalid_urls = invalid_urls
|
19
14
|
end
|
20
15
|
|
21
16
|
def run(chapters)
|
22
|
-
outstanding = CascadingArray.new([],
|
17
|
+
outstanding = CascadingArray.new([], WebPage.map_urls(chapters))
|
23
18
|
visited = []
|
24
19
|
|
25
20
|
while (url = outstanding.shift)
|
26
|
-
|
27
|
-
visited <<
|
28
|
-
|
29
|
-
outstanding.add_unique_page(href)
|
21
|
+
wp = visit_page(url)
|
22
|
+
visited << wp
|
23
|
+
page_def.page_links(wp).each do |href|
|
24
|
+
outstanding.add_unique_page(href)
|
30
25
|
end
|
31
|
-
|
32
|
-
outstanding.add_unique_chapter(href)
|
26
|
+
page_def.chapter_links(wp).each do |href|
|
27
|
+
outstanding.add_unique_chapter(href)
|
33
28
|
end
|
34
29
|
end
|
35
30
|
visited
|
@@ -40,21 +35,7 @@ module AudioBookCreator
|
|
40
35
|
# this one hangs on mutations
|
41
36
|
def visit_page(url)
|
42
37
|
logger.info { "visit #{url}" }
|
43
|
-
|
44
|
-
doc = Nokogiri::HTML(page)
|
45
|
-
[
|
46
|
-
page,
|
47
|
-
page_def.page_links(doc) { |a| uri(url, a["href"]) },
|
48
|
-
page_def.chapter_links(doc) { |a| uri(url, a["href"]) }
|
49
|
-
]
|
50
|
-
end
|
51
|
-
|
52
|
-
# raises URI::Error (BadURIError)
|
53
|
-
def uri(url, alt = nil)
|
54
|
-
url = URI.parse(url) unless url.is_a?(URI)
|
55
|
-
url += alt if alt
|
56
|
-
url.fragment = nil # remove #x part of url
|
57
|
-
url
|
38
|
+
WebPage.new(url, web[url.to_s])
|
58
39
|
end
|
59
40
|
end
|
60
41
|
end
|
@@ -1,15 +1,11 @@
|
|
1
1
|
module AudioBookCreator
|
2
2
|
class SurferDef
|
3
|
-
attr_accessor :host
|
4
3
|
attr_accessor :max
|
5
4
|
attr_accessor :regen_html
|
6
|
-
attr_accessor :cache_filename
|
7
5
|
|
8
|
-
def initialize(
|
9
|
-
@host = host
|
6
|
+
def initialize(max = nil, regen_html = nil)
|
10
7
|
@max = max
|
11
8
|
@regen_html = regen_html
|
12
|
-
@cache_filename = cache_filename
|
13
9
|
end
|
14
10
|
end
|
15
11
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module AudioBookCreator
|
5
|
+
class WebPage
|
6
|
+
attr_accessor :url
|
7
|
+
attr_accessor :body
|
8
|
+
#attr_accessor :etag
|
9
|
+
|
10
|
+
def initialize(url, body)
|
11
|
+
@url = url
|
12
|
+
@body = body
|
13
|
+
end
|
14
|
+
|
15
|
+
# def single_css(path) ; css(path).first ; end
|
16
|
+
def css(path)
|
17
|
+
dom.css(path).map {|n| n.text }
|
18
|
+
end
|
19
|
+
|
20
|
+
def links(path)
|
21
|
+
dom.css(path).map { |a| self.class.uri(url, a["href"]) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def dom
|
25
|
+
@dom ||= Nokogiri::HTML(body)
|
26
|
+
end
|
27
|
+
private :dom
|
28
|
+
|
29
|
+
def ==(other)
|
30
|
+
other.kind_of?(WebPage) &&
|
31
|
+
other.url.eql?(url)
|
32
|
+
end
|
33
|
+
alias :eql? :==
|
34
|
+
|
35
|
+
def self.map_urls(url)
|
36
|
+
url.map { |o| uri(o) }
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
# raises URI::Error (BadURIError)
|
42
|
+
def self.uri(url, alt = nil)
|
43
|
+
url = URI.parse(url) unless url.is_a?(URI)
|
44
|
+
url += alt if alt
|
45
|
+
url.fragment = nil # remove #x part of url
|
46
|
+
url
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/run_mutant
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
class Mutt
|
6
|
+
FILE_NAME="audio_book_creator"
|
7
|
+
attr_accessor :src
|
8
|
+
attr_accessor :spec
|
9
|
+
attr_accessor :mutant
|
10
|
+
attr_accessor :class_name
|
11
|
+
|
12
|
+
def initialize(src)
|
13
|
+
@src = src
|
14
|
+
@spec = src.sub("lib/","spec/").sub(".rb","_spec.rb")
|
15
|
+
@mutant = src.sub("lib/","muts/").sub(".rb",".txt")
|
16
|
+
@class_name = file_to_class(src.sub("lib/",""))
|
17
|
+
end
|
18
|
+
|
19
|
+
def need_to_run?
|
20
|
+
!File.exist?(mutant) ||
|
21
|
+
File.stat(src).mtime > File.stat(mutant).mtime ||
|
22
|
+
File.stat(spec).mtime > File.stat(mutant).mtime
|
23
|
+
end
|
24
|
+
|
25
|
+
def skip?
|
26
|
+
class_name.include?("::Version")
|
27
|
+
end
|
28
|
+
|
29
|
+
def ensure_mutant_dir
|
30
|
+
FileUtils.mkdir_p(File.dirname(mutant))
|
31
|
+
end
|
32
|
+
|
33
|
+
def mutant_count
|
34
|
+
if File.exist?(mutant)
|
35
|
+
%w(Kills Mutations).map { |field| `awk "/#{field}/ { print \\$2; }" #{mutant}`.chomp.to_i }
|
36
|
+
else
|
37
|
+
["", ""]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def count_in_progress
|
42
|
+
k, m = mutant_count
|
43
|
+
"%s%4s/%4s #{class_name}" % ["?", k, m]
|
44
|
+
end
|
45
|
+
|
46
|
+
def counts
|
47
|
+
k, m = mutant_count
|
48
|
+
"%s%4s/%4s #{class_name}" % [(k == m ? " " : "*"), k, m]
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_mutant
|
52
|
+
# ensure_mutant_dir
|
53
|
+
# --fail-fast
|
54
|
+
system({ "MUTANT" => "true"},
|
55
|
+
"bundle exec mutant --include lib --require #{FILE_NAME} --require audio_book_creator/cli " +
|
56
|
+
"--use rspec #{class_name} --jobs 1 > #{mutant}")
|
57
|
+
rescue SystemExit, Interrupt
|
58
|
+
FileUtils.rm(mutant)
|
59
|
+
raise
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.run(filenames, force)
|
63
|
+
filenames.map { |src| Mutt.new(src) }.select { |mutant| !mutant.skip? }.each do |mutant|
|
64
|
+
if force || mutant.need_to_run?
|
65
|
+
puts "#{mutant.count_in_progress} ..."
|
66
|
+
mutant.run_mutant
|
67
|
+
print back_a_line
|
68
|
+
puts "#{mutant.counts} "
|
69
|
+
else
|
70
|
+
puts mutant.counts
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def self.back_a_line
|
78
|
+
@back_a_line ||= `tput cuu1; tput el`
|
79
|
+
end
|
80
|
+
|
81
|
+
def file_to_class(str)
|
82
|
+
str.sub(".rb","").split("/").collect { |f| f.split("_").collect(&:capitalize).join }.join("::")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
force = ARGV.delete("-f")
|
87
|
+
filenames = ARGV.empty? ? Dir["lib/**/*.rb"] : ARGV
|
88
|
+
|
89
|
+
Mutt.run(filenames, force)
|
@@ -7,7 +7,7 @@ describe AudioBookCreator::Binder do
|
|
7
7
|
|
8
8
|
it "should require a chapter" do
|
9
9
|
expect_runner.not_to receive(:system)
|
10
|
-
expect {
|
10
|
+
expect {subject.create([]) }.to raise_error("No Chapters")
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should do nothing if m4b exists" do
|
@@ -41,9 +41,9 @@ describe AudioBookCreator::Binder do
|
|
41
41
|
expect(File).to receive(:exist?).with("title.m4b").and_return(false)
|
42
42
|
|
43
43
|
expect_runner.to receive(:system)
|
44
|
-
.with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
|
44
|
+
.with("abbinder", "-A", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
|
45
45
|
"-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
|
46
|
-
"@the title@", "dir/chapter01.m4a"
|
46
|
+
"@the title@", "dir/chapter01.m4a").and_return(true)
|
47
47
|
subject.create([spoken_chapter])
|
48
48
|
end
|
49
49
|
end
|
@@ -12,9 +12,8 @@ describe AudioBookCreator::BookCreator do
|
|
12
12
|
subject { described_class.new(spider, editor, speaker, binder) }
|
13
13
|
|
14
14
|
it "should call all the constructors and components" do
|
15
|
-
|
16
|
-
|
17
|
-
page_contents = ["site.com contents"]
|
15
|
+
outstanding = [AudioBookCreator::WebPage.new('', "http://site.com/")]
|
16
|
+
page_contents = [page("title1","contents1"), page("title2","contents2")]
|
18
17
|
chapters = [
|
19
18
|
chapter("contents1", "title1", 1),
|
20
19
|
chapter("contents2", "title2", 2)
|
@@ -6,7 +6,7 @@ describe AudioBookCreator::BookDef do
|
|
6
6
|
it { expect(subject.title).to eq(nil) }
|
7
7
|
it { expect(subject.author).to eq("Vicki") }
|
8
8
|
it { expect(subject.urls).to be_nil }
|
9
|
-
it { expect(subject.itunes).to
|
9
|
+
it { expect(subject.itunes).to be_truthy }
|
10
10
|
end
|
11
11
|
|
12
12
|
context "with title" do
|
@@ -16,18 +16,38 @@ describe AudioBookCreator::BookDef do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
context "with all parameters" do
|
19
|
-
subject { described_class.new("the title", "author", "dir", %w(a b),
|
19
|
+
subject { described_class.new("the title", "author", "dir", %w(a b), false) }
|
20
20
|
it { expect(subject.base_dir).to eq("dir") }
|
21
21
|
it { expect(subject.title).to eq("the title") }
|
22
22
|
it { expect(subject.author).to eq("author") }
|
23
23
|
it { expect(subject.filename).to eq("the-title.m4b") }
|
24
24
|
it { expect(subject.urls).to eq(%w(a b)) }
|
25
|
+
it { expect(subject.itunes).to be_falsy }
|
26
|
+
end
|
27
|
+
|
28
|
+
context "with all parameters alt" do
|
29
|
+
subject { described_class.new("the title", "author", "dir", %w(a b), true) }
|
25
30
|
it { expect(subject.itunes).to be_truthy }
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
subject { described_class.new
|
30
|
-
|
33
|
+
describe "#base_dir (derived)" do
|
34
|
+
subject { described_class.new }
|
35
|
+
|
36
|
+
it "supports titles with spaces" do
|
37
|
+
subject.title = %{title ((for "you", "Amy", and "John"))}
|
38
|
+
expect(subject.base_dir).to eq("title-for-you-Amy-and-John")
|
39
|
+
end
|
40
|
+
|
41
|
+
it "supports titles with extra stuff" do
|
42
|
+
subject.title = "title,for!"
|
43
|
+
expect(subject.base_dir).to eq("title-for")
|
44
|
+
end
|
45
|
+
|
46
|
+
it "overrides" do
|
47
|
+
subject.base_dir = "dir"
|
48
|
+
subject.title = "title"
|
49
|
+
expect(subject.base_dir).to eq("dir")
|
50
|
+
end
|
31
51
|
end
|
32
52
|
|
33
53
|
context "#unique_urls" do
|
@@ -36,26 +56,17 @@ describe AudioBookCreator::BookDef do
|
|
36
56
|
it { expect(subject.unique_urls).to eq(%w(http://site.com/title http://site.com/title2)) }
|
37
57
|
end
|
38
58
|
|
39
|
-
|
40
|
-
subject { described_class }
|
41
|
-
it "should join strings" do
|
42
|
-
expect(subject.sanitize_filename("title", "jpg")).to eq("title.jpg")
|
43
|
-
end
|
44
|
-
|
45
|
-
it "should handle arrays" do
|
46
|
-
expect(subject.sanitize_filename(%w(title jpg))).to eq("title.jpg")
|
47
|
-
end
|
48
|
-
|
49
|
-
it "should ignore nils" do
|
50
|
-
expect(subject.sanitize_filename("title", nil)).to eq("title")
|
51
|
-
end
|
59
|
+
describe "#filename (derived)" do
|
60
|
+
subject { described_class.new }
|
52
61
|
|
53
|
-
it "
|
54
|
-
|
62
|
+
it "adds extension" do
|
63
|
+
subject.title = "title"
|
64
|
+
expect(subject.filename).to eq("title.m4b")
|
55
65
|
end
|
56
66
|
|
57
|
-
it "
|
58
|
-
|
67
|
+
it "supports spaces" do
|
68
|
+
subject.title = "the title"
|
69
|
+
expect(subject.filename).to eq("the-title.m4b")
|
59
70
|
end
|
60
71
|
end
|
61
72
|
end
|