audio_book_creator 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +2 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile +3 -3
- data/README.md +9 -4
- data/audio_book_creator.gemspec +3 -3
- data/{bin → exe}/audio_book_creator +3 -0
- data/lib/audio_book_creator.rb +4 -2
- data/lib/audio_book_creator/binder.rb +2 -1
- data/lib/audio_book_creator/book_def.rb +2 -2
- data/lib/audio_book_creator/cached_hash.rb +1 -1
- data/lib/audio_book_creator/cascading_array.rb +8 -8
- data/lib/audio_book_creator/chapter.rb +1 -1
- data/lib/audio_book_creator/cli.rb +36 -29
- data/lib/audio_book_creator/conductor.rb +5 -3
- data/lib/audio_book_creator/defaulter.rb +41 -0
- data/lib/audio_book_creator/editor.rb +2 -3
- data/lib/audio_book_creator/page_db.rb +14 -8
- data/lib/audio_book_creator/page_def.rb +7 -15
- data/lib/audio_book_creator/runner.rb +5 -3
- data/lib/audio_book_creator/speaker.rb +1 -1
- data/lib/audio_book_creator/spider.rb +9 -28
- data/lib/audio_book_creator/surfer_def.rb +1 -5
- data/lib/audio_book_creator/url_filter.rb +1 -1
- data/lib/audio_book_creator/version.rb +1 -1
- data/lib/audio_book_creator/web_page.rb +49 -0
- data/run_mutant +89 -0
- data/spec/audio_book_creator/binder_spec.rb +3 -3
- data/spec/audio_book_creator/book_creator_spec.rb +2 -3
- data/spec/audio_book_creator/book_def_spec.rb +33 -22
- data/spec/audio_book_creator/cached_hash_spec.rb +4 -0
- data/spec/audio_book_creator/cli_spec.rb +189 -122
- data/spec/audio_book_creator/conductor_spec.rb +17 -6
- data/spec/audio_book_creator/defaulter_spec.rb +154 -0
- data/spec/audio_book_creator/editor_spec.rb +7 -7
- data/spec/audio_book_creator/page_db_spec.rb +73 -11
- data/spec/audio_book_creator/page_def_spec.rb +26 -40
- data/spec/audio_book_creator/speaker_spec.rb +2 -2
- data/spec/audio_book_creator/spider_spec.rb +10 -15
- data/spec/audio_book_creator/surfer_def_spec.rb +1 -4
- data/spec/audio_book_creator/url_filter_spec.rb +1 -1
- data/spec/audio_book_creator/web_page_spec.rb +65 -0
- data/spec/audio_book_creator_spec.rb +23 -0
- data/spec/spec_helper.rb +15 -12
- metadata +14 -20
@@ -1,30 +1,36 @@
|
|
1
1
|
require "sqlite3"
|
2
|
+
require "json"
|
2
3
|
|
3
4
|
module AudioBookCreator
|
5
|
+
# a name value store stored in sqlite
|
6
|
+
# this is used for pages and also settings
|
4
7
|
class PageDb
|
5
8
|
include Enumerable
|
6
9
|
|
7
|
-
|
8
|
-
attr_accessor :filename
|
10
|
+
attr_accessor :filename, :table_name, :encode
|
9
11
|
|
10
|
-
def initialize(filename)
|
12
|
+
def initialize(filename, table_name, encode)
|
11
13
|
@filename = filename
|
14
|
+
@table_name = table_name
|
15
|
+
@encode = encode
|
12
16
|
end
|
13
17
|
|
14
18
|
def []=(key, value)
|
15
|
-
|
19
|
+
value = JSON.generate(value) if encode && value
|
20
|
+
db.execute "insert into #{table_name} (name, contents) values (?, ?)", [key, value]
|
16
21
|
end
|
17
22
|
|
18
23
|
def [](key)
|
19
|
-
db.execute("select contents from
|
24
|
+
value = db.execute("select contents from #{table_name} where name = ?", key).map { |row| row.first }.first
|
25
|
+
encode && value ? JSON.parse(value, :symbolize_names => true) : value
|
20
26
|
end
|
21
27
|
|
22
28
|
def include?(key)
|
23
|
-
|
29
|
+
self[key]
|
24
30
|
end
|
25
31
|
|
26
32
|
def each(&block)
|
27
|
-
db.execute "select name, contents from
|
33
|
+
db.execute "select name, contents from #{table_name}", &block
|
28
34
|
end
|
29
35
|
|
30
36
|
private
|
@@ -35,7 +41,7 @@ module AudioBookCreator
|
|
35
41
|
|
36
42
|
def create
|
37
43
|
SQLite3::Database.new(filename).tap do |db|
|
38
|
-
db.execute("create table if not exists
|
44
|
+
db.execute("create table if not exists #{table_name} (name text, contents blob)")
|
39
45
|
end
|
40
46
|
end
|
41
47
|
end
|
@@ -2,30 +2,22 @@ module AudioBookCreator
|
|
2
2
|
# information on the format of the html page that is read
|
3
3
|
class PageDef
|
4
4
|
attr_accessor :title_path, :body_path, :link_path, :chapter_path
|
5
|
+
attr_accessor :invalid_urls
|
5
6
|
|
6
|
-
def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
|
7
|
+
def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil, invalid_urls = {})
|
7
8
|
@title_path = title_path
|
8
9
|
@body_path = body_path
|
9
10
|
@link_path = link_path
|
10
11
|
@chapter_path = chapter_path
|
12
|
+
@invalid_urls = invalid_urls
|
11
13
|
end
|
12
14
|
|
13
|
-
def
|
14
|
-
|
15
|
-
title.text if title
|
15
|
+
def page_links(page)
|
16
|
+
page.links(link_path).select { |href| !invalid_urls.include?(href) }
|
16
17
|
end
|
17
18
|
|
18
|
-
def
|
19
|
-
|
20
|
-
# feels like I need .map { |n| n.text }
|
21
|
-
end
|
22
|
-
|
23
|
-
def page_links(dom, &block)
|
24
|
-
dom.css(link_path).map(&block)
|
25
|
-
end
|
26
|
-
|
27
|
-
def chapter_links(dom, &block)
|
28
|
-
dom.css(chapter_path).map(&block)
|
19
|
+
def chapter_links(page)
|
20
|
+
page.links(chapter_path).select { |href| !invalid_urls.include?(href) }
|
29
21
|
end
|
30
22
|
end
|
31
23
|
end
|
@@ -4,11 +4,13 @@ module AudioBookCreator
|
|
4
4
|
include Logging
|
5
5
|
|
6
6
|
def run(cmd, options)
|
7
|
-
params = options
|
7
|
+
params = options.fetch(:params).flatten.flatten.compact
|
8
8
|
|
9
|
-
|
9
|
+
cmdline = [cmd] + params.map(&:to_s)
|
10
|
+
|
11
|
+
logger.info { "run: #{cmdline.join(" ")}" }
|
10
12
|
logger.info ""
|
11
|
-
status = system(
|
13
|
+
status = system(*cmdline)
|
12
14
|
logger.info ""
|
13
15
|
logger.info { status ? "success" : "issue" }
|
14
16
|
|
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
require 'uri'
|
3
|
-
|
4
1
|
module AudioBookCreator
|
5
2
|
class Spider
|
6
3
|
include Logging
|
@@ -8,28 +5,26 @@ module AudioBookCreator
|
|
8
5
|
# @!attribute web
|
9
6
|
# @return Hash access to the world wide web
|
10
7
|
attr_accessor :web
|
11
|
-
attr_accessor :invalid_urls
|
12
8
|
|
13
9
|
attr_accessor :page_def
|
14
10
|
|
15
|
-
def initialize(page_def, web
|
11
|
+
def initialize(page_def, web)
|
16
12
|
@page_def = page_def
|
17
13
|
@web = web
|
18
|
-
@invalid_urls = invalid_urls
|
19
14
|
end
|
20
15
|
|
21
16
|
def run(chapters)
|
22
|
-
outstanding = CascadingArray.new([],
|
17
|
+
outstanding = CascadingArray.new([], WebPage.map_urls(chapters))
|
23
18
|
visited = []
|
24
19
|
|
25
20
|
while (url = outstanding.shift)
|
26
|
-
|
27
|
-
visited <<
|
28
|
-
|
29
|
-
outstanding.add_unique_page(href)
|
21
|
+
wp = visit_page(url)
|
22
|
+
visited << wp
|
23
|
+
page_def.page_links(wp).each do |href|
|
24
|
+
outstanding.add_unique_page(href)
|
30
25
|
end
|
31
|
-
|
32
|
-
outstanding.add_unique_chapter(href)
|
26
|
+
page_def.chapter_links(wp).each do |href|
|
27
|
+
outstanding.add_unique_chapter(href)
|
33
28
|
end
|
34
29
|
end
|
35
30
|
visited
|
@@ -40,21 +35,7 @@ module AudioBookCreator
|
|
40
35
|
# this one hangs on mutations
|
41
36
|
def visit_page(url)
|
42
37
|
logger.info { "visit #{url}" }
|
43
|
-
|
44
|
-
doc = Nokogiri::HTML(page)
|
45
|
-
[
|
46
|
-
page,
|
47
|
-
page_def.page_links(doc) { |a| uri(url, a["href"]) },
|
48
|
-
page_def.chapter_links(doc) { |a| uri(url, a["href"]) }
|
49
|
-
]
|
50
|
-
end
|
51
|
-
|
52
|
-
# raises URI::Error (BadURIError)
|
53
|
-
def uri(url, alt = nil)
|
54
|
-
url = URI.parse(url) unless url.is_a?(URI)
|
55
|
-
url += alt if alt
|
56
|
-
url.fragment = nil # remove #x part of url
|
57
|
-
url
|
38
|
+
WebPage.new(url, web[url.to_s])
|
58
39
|
end
|
59
40
|
end
|
60
41
|
end
|
@@ -1,15 +1,11 @@
|
|
1
1
|
module AudioBookCreator
|
2
2
|
class SurferDef
|
3
|
-
attr_accessor :host
|
4
3
|
attr_accessor :max
|
5
4
|
attr_accessor :regen_html
|
6
|
-
attr_accessor :cache_filename
|
7
5
|
|
8
|
-
def initialize(
|
9
|
-
@host = host
|
6
|
+
def initialize(max = nil, regen_html = nil)
|
10
7
|
@max = max
|
11
8
|
@regen_html = regen_html
|
12
|
-
@cache_filename = cache_filename
|
13
9
|
end
|
14
10
|
end
|
15
11
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module AudioBookCreator
|
5
|
+
class WebPage
|
6
|
+
attr_accessor :url
|
7
|
+
attr_accessor :body
|
8
|
+
#attr_accessor :etag
|
9
|
+
|
10
|
+
def initialize(url, body)
|
11
|
+
@url = url
|
12
|
+
@body = body
|
13
|
+
end
|
14
|
+
|
15
|
+
# def single_css(path) ; css(path).first ; end
|
16
|
+
def css(path)
|
17
|
+
dom.css(path).map {|n| n.text }
|
18
|
+
end
|
19
|
+
|
20
|
+
def links(path)
|
21
|
+
dom.css(path).map { |a| self.class.uri(url, a["href"]) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def dom
|
25
|
+
@dom ||= Nokogiri::HTML(body)
|
26
|
+
end
|
27
|
+
private :dom
|
28
|
+
|
29
|
+
def ==(other)
|
30
|
+
other.kind_of?(WebPage) &&
|
31
|
+
other.url.eql?(url)
|
32
|
+
end
|
33
|
+
alias :eql? :==
|
34
|
+
|
35
|
+
def self.map_urls(url)
|
36
|
+
url.map { |o| uri(o) }
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
# raises URI::Error (BadURIError)
|
42
|
+
def self.uri(url, alt = nil)
|
43
|
+
url = URI.parse(url) unless url.is_a?(URI)
|
44
|
+
url += alt if alt
|
45
|
+
url.fragment = nil # remove #x part of url
|
46
|
+
url
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/run_mutant
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
class Mutt
|
6
|
+
FILE_NAME="audio_book_creator"
|
7
|
+
attr_accessor :src
|
8
|
+
attr_accessor :spec
|
9
|
+
attr_accessor :mutant
|
10
|
+
attr_accessor :class_name
|
11
|
+
|
12
|
+
def initialize(src)
|
13
|
+
@src = src
|
14
|
+
@spec = src.sub("lib/","spec/").sub(".rb","_spec.rb")
|
15
|
+
@mutant = src.sub("lib/","muts/").sub(".rb",".txt")
|
16
|
+
@class_name = file_to_class(src.sub("lib/",""))
|
17
|
+
end
|
18
|
+
|
19
|
+
def need_to_run?
|
20
|
+
!File.exist?(mutant) ||
|
21
|
+
File.stat(src).mtime > File.stat(mutant).mtime ||
|
22
|
+
File.stat(spec).mtime > File.stat(mutant).mtime
|
23
|
+
end
|
24
|
+
|
25
|
+
def skip?
|
26
|
+
class_name.include?("::Version")
|
27
|
+
end
|
28
|
+
|
29
|
+
def ensure_mutant_dir
|
30
|
+
FileUtils.mkdir_p(File.dirname(mutant))
|
31
|
+
end
|
32
|
+
|
33
|
+
def mutant_count
|
34
|
+
if File.exist?(mutant)
|
35
|
+
%w(Kills Mutations).map { |field| `awk "/#{field}/ { print \\$2; }" #{mutant}`.chomp.to_i }
|
36
|
+
else
|
37
|
+
["", ""]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def count_in_progress
|
42
|
+
k, m = mutant_count
|
43
|
+
"%s%4s/%4s #{class_name}" % ["?", k, m]
|
44
|
+
end
|
45
|
+
|
46
|
+
def counts
|
47
|
+
k, m = mutant_count
|
48
|
+
"%s%4s/%4s #{class_name}" % [(k == m ? " " : "*"), k, m]
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_mutant
|
52
|
+
# ensure_mutant_dir
|
53
|
+
# --fail-fast
|
54
|
+
system({ "MUTANT" => "true"},
|
55
|
+
"bundle exec mutant --include lib --require #{FILE_NAME} --require audio_book_creator/cli " +
|
56
|
+
"--use rspec #{class_name} --jobs 1 > #{mutant}")
|
57
|
+
rescue SystemExit, Interrupt
|
58
|
+
FileUtils.rm(mutant)
|
59
|
+
raise
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.run(filenames, force)
|
63
|
+
filenames.map { |src| Mutt.new(src) }.select { |mutant| !mutant.skip? }.each do |mutant|
|
64
|
+
if force || mutant.need_to_run?
|
65
|
+
puts "#{mutant.count_in_progress} ..."
|
66
|
+
mutant.run_mutant
|
67
|
+
print back_a_line
|
68
|
+
puts "#{mutant.counts} "
|
69
|
+
else
|
70
|
+
puts mutant.counts
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def self.back_a_line
|
78
|
+
@back_a_line ||= `tput cuu1; tput el`
|
79
|
+
end
|
80
|
+
|
81
|
+
def file_to_class(str)
|
82
|
+
str.sub(".rb","").split("/").collect { |f| f.split("_").collect(&:capitalize).join }.join("::")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
force = ARGV.delete("-f")
|
87
|
+
filenames = ARGV.empty? ? Dir["lib/**/*.rb"] : ARGV
|
88
|
+
|
89
|
+
Mutt.run(filenames, force)
|
@@ -7,7 +7,7 @@ describe AudioBookCreator::Binder do
|
|
7
7
|
|
8
8
|
it "should require a chapter" do
|
9
9
|
expect_runner.not_to receive(:system)
|
10
|
-
expect {
|
10
|
+
expect {subject.create([]) }.to raise_error("No Chapters")
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should do nothing if m4b exists" do
|
@@ -41,9 +41,9 @@ describe AudioBookCreator::Binder do
|
|
41
41
|
expect(File).to receive(:exist?).with("title.m4b").and_return(false)
|
42
42
|
|
43
43
|
expect_runner.to receive(:system)
|
44
|
-
.with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
|
44
|
+
.with("abbinder", "-A", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
|
45
45
|
"-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
|
46
|
-
"@the title@", "dir/chapter01.m4a"
|
46
|
+
"@the title@", "dir/chapter01.m4a").and_return(true)
|
47
47
|
subject.create([spoken_chapter])
|
48
48
|
end
|
49
49
|
end
|
@@ -12,9 +12,8 @@ describe AudioBookCreator::BookCreator do
|
|
12
12
|
subject { described_class.new(spider, editor, speaker, binder) }
|
13
13
|
|
14
14
|
it "should call all the constructors and components" do
|
15
|
-
|
16
|
-
|
17
|
-
page_contents = ["site.com contents"]
|
15
|
+
outstanding = [AudioBookCreator::WebPage.new('', "http://site.com/")]
|
16
|
+
page_contents = [page("title1","contents1"), page("title2","contents2")]
|
18
17
|
chapters = [
|
19
18
|
chapter("contents1", "title1", 1),
|
20
19
|
chapter("contents2", "title2", 2)
|
@@ -6,7 +6,7 @@ describe AudioBookCreator::BookDef do
|
|
6
6
|
it { expect(subject.title).to eq(nil) }
|
7
7
|
it { expect(subject.author).to eq("Vicki") }
|
8
8
|
it { expect(subject.urls).to be_nil }
|
9
|
-
it { expect(subject.itunes).to
|
9
|
+
it { expect(subject.itunes).to be_truthy }
|
10
10
|
end
|
11
11
|
|
12
12
|
context "with title" do
|
@@ -16,18 +16,38 @@ describe AudioBookCreator::BookDef do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
context "with all parameters" do
|
19
|
-
subject { described_class.new("the title", "author", "dir", %w(a b),
|
19
|
+
subject { described_class.new("the title", "author", "dir", %w(a b), false) }
|
20
20
|
it { expect(subject.base_dir).to eq("dir") }
|
21
21
|
it { expect(subject.title).to eq("the title") }
|
22
22
|
it { expect(subject.author).to eq("author") }
|
23
23
|
it { expect(subject.filename).to eq("the-title.m4b") }
|
24
24
|
it { expect(subject.urls).to eq(%w(a b)) }
|
25
|
+
it { expect(subject.itunes).to be_falsy }
|
26
|
+
end
|
27
|
+
|
28
|
+
context "with all parameters alt" do
|
29
|
+
subject { described_class.new("the title", "author", "dir", %w(a b), true) }
|
25
30
|
it { expect(subject.itunes).to be_truthy }
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
subject { described_class.new
|
30
|
-
|
33
|
+
describe "#base_dir (derived)" do
|
34
|
+
subject { described_class.new }
|
35
|
+
|
36
|
+
it "supports titles with spaces" do
|
37
|
+
subject.title = %{title ((for "you", "Amy", and "John"))}
|
38
|
+
expect(subject.base_dir).to eq("title-for-you-Amy-and-John")
|
39
|
+
end
|
40
|
+
|
41
|
+
it "supports titles with extra stuff" do
|
42
|
+
subject.title = "title,for!"
|
43
|
+
expect(subject.base_dir).to eq("title-for")
|
44
|
+
end
|
45
|
+
|
46
|
+
it "overrides" do
|
47
|
+
subject.base_dir = "dir"
|
48
|
+
subject.title = "title"
|
49
|
+
expect(subject.base_dir).to eq("dir")
|
50
|
+
end
|
31
51
|
end
|
32
52
|
|
33
53
|
context "#unique_urls" do
|
@@ -36,26 +56,17 @@ describe AudioBookCreator::BookDef do
|
|
36
56
|
it { expect(subject.unique_urls).to eq(%w(http://site.com/title http://site.com/title2)) }
|
37
57
|
end
|
38
58
|
|
39
|
-
|
40
|
-
subject { described_class }
|
41
|
-
it "should join strings" do
|
42
|
-
expect(subject.sanitize_filename("title", "jpg")).to eq("title.jpg")
|
43
|
-
end
|
44
|
-
|
45
|
-
it "should handle arrays" do
|
46
|
-
expect(subject.sanitize_filename(%w(title jpg))).to eq("title.jpg")
|
47
|
-
end
|
48
|
-
|
49
|
-
it "should ignore nils" do
|
50
|
-
expect(subject.sanitize_filename("title", nil)).to eq("title")
|
51
|
-
end
|
59
|
+
describe "#filename (derived)" do
|
60
|
+
subject { described_class.new }
|
52
61
|
|
53
|
-
it "
|
54
|
-
|
62
|
+
it "adds extension" do
|
63
|
+
subject.title = "title"
|
64
|
+
expect(subject.filename).to eq("title.m4b")
|
55
65
|
end
|
56
66
|
|
57
|
-
it "
|
58
|
-
|
67
|
+
it "supports spaces" do
|
68
|
+
subject.title = "the title"
|
69
|
+
expect(subject.filename).to eq("the-title.m4b")
|
59
70
|
end
|
60
71
|
end
|
61
72
|
end
|