audio_book_creator 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +2 -2
  4. data/CHANGELOG.md +24 -0
  5. data/Gemfile +3 -3
  6. data/README.md +9 -4
  7. data/audio_book_creator.gemspec +3 -3
  8. data/{bin → exe}/audio_book_creator +3 -0
  9. data/lib/audio_book_creator.rb +4 -2
  10. data/lib/audio_book_creator/binder.rb +2 -1
  11. data/lib/audio_book_creator/book_def.rb +2 -2
  12. data/lib/audio_book_creator/cached_hash.rb +1 -1
  13. data/lib/audio_book_creator/cascading_array.rb +8 -8
  14. data/lib/audio_book_creator/chapter.rb +1 -1
  15. data/lib/audio_book_creator/cli.rb +36 -29
  16. data/lib/audio_book_creator/conductor.rb +5 -3
  17. data/lib/audio_book_creator/defaulter.rb +41 -0
  18. data/lib/audio_book_creator/editor.rb +2 -3
  19. data/lib/audio_book_creator/page_db.rb +14 -8
  20. data/lib/audio_book_creator/page_def.rb +7 -15
  21. data/lib/audio_book_creator/runner.rb +5 -3
  22. data/lib/audio_book_creator/speaker.rb +1 -1
  23. data/lib/audio_book_creator/spider.rb +9 -28
  24. data/lib/audio_book_creator/surfer_def.rb +1 -5
  25. data/lib/audio_book_creator/url_filter.rb +1 -1
  26. data/lib/audio_book_creator/version.rb +1 -1
  27. data/lib/audio_book_creator/web_page.rb +49 -0
  28. data/run_mutant +89 -0
  29. data/spec/audio_book_creator/binder_spec.rb +3 -3
  30. data/spec/audio_book_creator/book_creator_spec.rb +2 -3
  31. data/spec/audio_book_creator/book_def_spec.rb +33 -22
  32. data/spec/audio_book_creator/cached_hash_spec.rb +4 -0
  33. data/spec/audio_book_creator/cli_spec.rb +189 -122
  34. data/spec/audio_book_creator/conductor_spec.rb +17 -6
  35. data/spec/audio_book_creator/defaulter_spec.rb +154 -0
  36. data/spec/audio_book_creator/editor_spec.rb +7 -7
  37. data/spec/audio_book_creator/page_db_spec.rb +73 -11
  38. data/spec/audio_book_creator/page_def_spec.rb +26 -40
  39. data/spec/audio_book_creator/speaker_spec.rb +2 -2
  40. data/spec/audio_book_creator/spider_spec.rb +10 -15
  41. data/spec/audio_book_creator/surfer_def_spec.rb +1 -4
  42. data/spec/audio_book_creator/url_filter_spec.rb +1 -1
  43. data/spec/audio_book_creator/web_page_spec.rb +65 -0
  44. data/spec/audio_book_creator_spec.rb +23 -0
  45. data/spec/spec_helper.rb +15 -12
  46. metadata +14 -20
@@ -1,30 +1,36 @@
1
1
  require "sqlite3"
2
+ require "json"
2
3
 
3
4
  module AudioBookCreator
5
+ # a name value store stored in sqlite
6
+ # this is used for pages and also settings
4
7
  class PageDb
5
8
  include Enumerable
6
9
 
7
- # this is for tests - get out of here
8
- attr_accessor :filename
10
+ attr_accessor :filename, :table_name, :encode
9
11
 
10
- def initialize(filename)
12
+ def initialize(filename, table_name, encode)
11
13
  @filename = filename
14
+ @table_name = table_name
15
+ @encode = encode
12
16
  end
13
17
 
14
18
  def []=(key, value)
15
- db.execute "insert into pages (name, contents) values ( ?, ?)", [key, value]
19
+ value = JSON.generate(value) if encode && value
20
+ db.execute "insert into #{table_name} (name, contents) values (?, ?)", [key, value]
16
21
  end
17
22
 
18
23
  def [](key)
19
- db.execute("select contents from pages where name = ?", key).map { |row| row.first }.first
24
+ value = db.execute("select contents from #{table_name} where name = ?", key).map { |row| row.first }.first
25
+ encode && value ? JSON.parse(value, :symbolize_names => true) : value
20
26
  end
21
27
 
22
28
  def include?(key)
23
- !!self[key]
29
+ self[key]
24
30
  end
25
31
 
26
32
  def each(&block)
27
- db.execute "select name, contents from pages order by rowid", &block
33
+ db.execute "select name, contents from #{table_name}", &block
28
34
  end
29
35
 
30
36
  private
@@ -35,7 +41,7 @@ module AudioBookCreator
35
41
 
36
42
  def create
37
43
  SQLite3::Database.new(filename).tap do |db|
38
- db.execute("create table if not exists pages (name text, contents blob)")
44
+ db.execute("create table if not exists #{table_name} (name text, contents blob)")
39
45
  end
40
46
  end
41
47
  end
@@ -2,30 +2,22 @@ module AudioBookCreator
2
2
  # information on the format of the html page that is read
3
3
  class PageDef
4
4
  attr_accessor :title_path, :body_path, :link_path, :chapter_path
5
+ attr_accessor :invalid_urls
5
6
 
6
- def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
7
+ def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil, invalid_urls = {})
7
8
  @title_path = title_path
8
9
  @body_path = body_path
9
10
  @link_path = link_path
10
11
  @chapter_path = chapter_path
12
+ @invalid_urls = invalid_urls
11
13
  end
12
14
 
13
- def title(dom)
14
- title = dom.css(title_path).first
15
- title.text if title
15
+ def page_links(page)
16
+ page.links(link_path).select { |href| !invalid_urls.include?(href) }
16
17
  end
17
18
 
18
- def body(dom)
19
- dom.css(body_path)
20
- # feels like I need .map { |n| n.text }
21
- end
22
-
23
- def page_links(dom, &block)
24
- dom.css(link_path).map(&block)
25
- end
26
-
27
- def chapter_links(dom, &block)
28
- dom.css(chapter_path).map(&block)
19
+ def chapter_links(page)
20
+ page.links(chapter_path).select { |href| !invalid_urls.include?(href) }
29
21
  end
30
22
  end
31
23
  end
@@ -4,11 +4,13 @@ module AudioBookCreator
4
4
  include Logging
5
5
 
6
6
  def run(cmd, options)
7
- params = options[:params].flatten.flatten.compact
7
+ params = options.fetch(:params).flatten.flatten.compact
8
8
 
9
- logger.info { "run: #{cmd} #{params.join(" ")}" }
9
+ cmdline = [cmd] + params.map(&:to_s)
10
+
11
+ logger.info { "run: #{cmdline.join(" ")}" }
10
12
  logger.info ""
11
- status = system(cmd, *params.map { |x| x.to_s })
13
+ status = system(*cmdline)
12
14
  logger.info ""
13
15
  logger.info { status ? "success" : "issue" }
14
16
 
@@ -13,7 +13,7 @@ module AudioBookCreator
13
13
  end
14
14
 
15
15
  def say(chapter)
16
- raise "Empty chapter" if chapter.empty?
16
+ raise "Empty Chapter" if chapter.empty?
17
17
  text_filename = chapter_text_filename(chapter)
18
18
  sound_filename = chapter_sound_filename(chapter)
19
19
 
@@ -1,6 +1,3 @@
1
- require 'nokogiri'
2
- require 'uri'
3
-
4
1
  module AudioBookCreator
5
2
  class Spider
6
3
  include Logging
@@ -8,28 +5,26 @@ module AudioBookCreator
8
5
  # @!attribute web
9
6
  # @return Hash access to the world wide web
10
7
  attr_accessor :web
11
- attr_accessor :invalid_urls
12
8
 
13
9
  attr_accessor :page_def
14
10
 
15
- def initialize(page_def, web = {}, invalid_urls = {})
11
+ def initialize(page_def, web)
16
12
  @page_def = page_def
17
13
  @web = web
18
- @invalid_urls = invalid_urls
19
14
  end
20
15
 
21
16
  def run(chapters)
22
- outstanding = CascadingArray.new([], chapters.map { |o| uri(o) })
17
+ outstanding = CascadingArray.new([], WebPage.map_urls(chapters))
23
18
  visited = []
24
19
 
25
20
  while (url = outstanding.shift)
26
- contents, new_pages, new_chapters = visit_page(url)
27
- visited << contents
28
- new_pages.each do |href|
29
- outstanding.add_unique_page(href) unless invalid_urls.include?(href)
21
+ wp = visit_page(url)
22
+ visited << wp
23
+ page_def.page_links(wp).each do |href|
24
+ outstanding.add_unique_page(href)
30
25
  end
31
- new_chapters.each do |href|
32
- outstanding.add_unique_chapter(href) unless invalid_urls.include?(href)
26
+ page_def.chapter_links(wp).each do |href|
27
+ outstanding.add_unique_chapter(href)
33
28
  end
34
29
  end
35
30
  visited
@@ -40,21 +35,7 @@ module AudioBookCreator
40
35
  # this one hangs on mutations
41
36
  def visit_page(url)
42
37
  logger.info { "visit #{url}" }
43
- page = web[url.to_s]
44
- doc = Nokogiri::HTML(page)
45
- [
46
- page,
47
- page_def.page_links(doc) { |a| uri(url, a["href"]) },
48
- page_def.chapter_links(doc) { |a| uri(url, a["href"]) }
49
- ]
50
- end
51
-
52
- # raises URI::Error (BadURIError)
53
- def uri(url, alt = nil)
54
- url = URI.parse(url) unless url.is_a?(URI)
55
- url += alt if alt
56
- url.fragment = nil # remove #x part of url
57
- url
38
+ WebPage.new(url, web[url.to_s])
58
39
  end
59
40
  end
60
41
  end
@@ -1,15 +1,11 @@
1
1
  module AudioBookCreator
2
2
  class SurferDef
3
- attr_accessor :host
4
3
  attr_accessor :max
5
4
  attr_accessor :regen_html
6
- attr_accessor :cache_filename
7
5
 
8
- def initialize(host = nil, max = nil, regen_html = nil, cache_filename = nil)
9
- @host = host
6
+ def initialize(max = nil, regen_html = nil)
10
7
  @max = max
11
8
  @regen_html = regen_html
12
- @cache_filename = cache_filename
13
9
  end
14
10
  end
15
11
  end
@@ -27,7 +27,7 @@ module AudioBookCreator
27
27
  private
28
28
 
29
29
  def valid_extensions
30
- ["", '.html', '.htm', '.php', '.jsp']
30
+ ["", '.html', '.htm', '.php', '.jsp', '.cfm']
31
31
  end
32
32
  end
33
33
  end
@@ -1,3 +1,3 @@
1
1
  module AudioBookCreator
2
- VERSION = "0.0.1"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -0,0 +1,49 @@
1
+ require 'nokogiri'
2
+ require 'uri'
3
+
4
+ module AudioBookCreator
5
+ class WebPage
6
+ attr_accessor :url
7
+ attr_accessor :body
8
+ #attr_accessor :etag
9
+
10
+ def initialize(url, body)
11
+ @url = url
12
+ @body = body
13
+ end
14
+
15
+ # def single_css(path) ; css(path).first ; end
16
+ def css(path)
17
+ dom.css(path).map {|n| n.text }
18
+ end
19
+
20
+ def links(path)
21
+ dom.css(path).map { |a| self.class.uri(url, a["href"]) }
22
+ end
23
+
24
+ def dom
25
+ @dom ||= Nokogiri::HTML(body)
26
+ end
27
+ private :dom
28
+
29
+ def ==(other)
30
+ other.kind_of?(WebPage) &&
31
+ other.url.eql?(url)
32
+ end
33
+ alias :eql? :==
34
+
35
+ def self.map_urls(url)
36
+ url.map { |o| uri(o) }
37
+ end
38
+
39
+ private
40
+
41
+ # raises URI::Error (BadURIError)
42
+ def self.uri(url, alt = nil)
43
+ url = URI.parse(url) unless url.is_a?(URI)
44
+ url += alt if alt
45
+ url.fragment = nil # remove #x part of url
46
+ url
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'fileutils'
4
+
5
+ class Mutt
6
+ FILE_NAME="audio_book_creator"
7
+ attr_accessor :src
8
+ attr_accessor :spec
9
+ attr_accessor :mutant
10
+ attr_accessor :class_name
11
+
12
+ def initialize(src)
13
+ @src = src
14
+ @spec = src.sub("lib/","spec/").sub(".rb","_spec.rb")
15
+ @mutant = src.sub("lib/","muts/").sub(".rb",".txt")
16
+ @class_name = file_to_class(src.sub("lib/",""))
17
+ end
18
+
19
+ def need_to_run?
20
+ !File.exist?(mutant) ||
21
+ File.stat(src).mtime > File.stat(mutant).mtime ||
22
+ File.stat(spec).mtime > File.stat(mutant).mtime
23
+ end
24
+
25
+ def skip?
26
+ class_name.include?("::Version")
27
+ end
28
+
29
+ def ensure_mutant_dir
30
+ FileUtils.mkdir_p(File.dirname(mutant))
31
+ end
32
+
33
+ def mutant_count
34
+ if File.exist?(mutant)
35
+ %w(Kills Mutations).map { |field| `awk "/#{field}/ { print \\$2; }" #{mutant}`.chomp.to_i }
36
+ else
37
+ ["", ""]
38
+ end
39
+ end
40
+
41
+ def count_in_progress
42
+ k, m = mutant_count
43
+ "%s%4s/%4s #{class_name}" % ["?", k, m]
44
+ end
45
+
46
+ def counts
47
+ k, m = mutant_count
48
+ "%s%4s/%4s #{class_name}" % [(k == m ? " " : "*"), k, m]
49
+ end
50
+
51
+ def run_mutant
52
+ # ensure_mutant_dir
53
+ # --fail-fast
54
+ system({ "MUTANT" => "true"},
55
+ "bundle exec mutant --include lib --require #{FILE_NAME} --require audio_book_creator/cli " +
56
+ "--use rspec #{class_name} --jobs 1 > #{mutant}")
57
+ rescue SystemExit, Interrupt
58
+ FileUtils.rm(mutant)
59
+ raise
60
+ end
61
+
62
+ def self.run(filenames, force)
63
+ filenames.map { |src| Mutt.new(src) }.select { |mutant| !mutant.skip? }.each do |mutant|
64
+ if force || mutant.need_to_run?
65
+ puts "#{mutant.count_in_progress} ..."
66
+ mutant.run_mutant
67
+ print back_a_line
68
+ puts "#{mutant.counts} "
69
+ else
70
+ puts mutant.counts
71
+ end
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def self.back_a_line
78
+ @back_a_line ||= `tput cuu1; tput el`
79
+ end
80
+
81
+ def file_to_class(str)
82
+ str.sub(".rb","").split("/").collect { |f| f.split("_").collect(&:capitalize).join }.join("::")
83
+ end
84
+ end
85
+
86
+ force = ARGV.delete("-f")
87
+ filenames = ARGV.empty? ? Dir["lib/**/*.rb"] : ARGV
88
+
89
+ Mutt.run(filenames, force)
@@ -7,7 +7,7 @@ describe AudioBookCreator::Binder do
7
7
 
8
8
  it "should require a chapter" do
9
9
  expect_runner.not_to receive(:system)
10
- expect { subject.create([]) }.to raise_error
10
+ expect {subject.create([]) }.to raise_error("No Chapters")
11
11
  end
12
12
 
13
13
  it "should do nothing if m4b exists" do
@@ -41,9 +41,9 @@ describe AudioBookCreator::Binder do
41
41
  expect(File).to receive(:exist?).with("title.m4b").and_return(false)
42
42
 
43
43
  expect_runner.to receive(:system)
44
- .with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
44
+ .with("abbinder", "-A", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
45
45
  "-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
46
- "@the title@", "dir/chapter01.m4a", "-A").and_return(true)
46
+ "@the title@", "dir/chapter01.m4a").and_return(true)
47
47
  subject.create([spoken_chapter])
48
48
  end
49
49
  end
@@ -12,9 +12,8 @@ describe AudioBookCreator::BookCreator do
12
12
  subject { described_class.new(spider, editor, speaker, binder) }
13
13
 
14
14
  it "should call all the constructors and components" do
15
-
16
- outstanding = ["http://site.com/"]
17
- page_contents = ["site.com contents"]
15
+ outstanding = [AudioBookCreator::WebPage.new('', "http://site.com/")]
16
+ page_contents = [page("title1","contents1"), page("title2","contents2")]
18
17
  chapters = [
19
18
  chapter("contents1", "title1", 1),
20
19
  chapter("contents2", "title2", 2)
@@ -6,7 +6,7 @@ describe AudioBookCreator::BookDef do
6
6
  it { expect(subject.title).to eq(nil) }
7
7
  it { expect(subject.author).to eq("Vicki") }
8
8
  it { expect(subject.urls).to be_nil }
9
- it { expect(subject.itunes).to be_falsy }
9
+ it { expect(subject.itunes).to be_truthy }
10
10
  end
11
11
 
12
12
  context "with title" do
@@ -16,18 +16,38 @@ describe AudioBookCreator::BookDef do
16
16
  end
17
17
 
18
18
  context "with all parameters" do
19
- subject { described_class.new("the title", "author", "dir", %w(a b), true) }
19
+ subject { described_class.new("the title", "author", "dir", %w(a b), false) }
20
20
  it { expect(subject.base_dir).to eq("dir") }
21
21
  it { expect(subject.title).to eq("the title") }
22
22
  it { expect(subject.author).to eq("author") }
23
23
  it { expect(subject.filename).to eq("the-title.m4b") }
24
24
  it { expect(subject.urls).to eq(%w(a b)) }
25
+ it { expect(subject.itunes).to be_falsy }
26
+ end
27
+
28
+ context "with all parameters alt" do
29
+ subject { described_class.new("the title", "author", "dir", %w(a b), true) }
25
30
  it { expect(subject.itunes).to be_truthy }
26
31
  end
27
32
 
28
- context "with derived title" do
29
- subject { described_class.new("the title", "author", nil, nil) }
30
- it { expect(subject.base_dir).to eq("the-title") }
33
+ describe "#base_dir (derived)" do
34
+ subject { described_class.new }
35
+
36
+ it "supports titles with spaces" do
37
+ subject.title = %{title ((for "you", "Amy", and "John"))}
38
+ expect(subject.base_dir).to eq("title-for-you-Amy-and-John")
39
+ end
40
+
41
+ it "supports titles with extra stuff" do
42
+ subject.title = "title,for!"
43
+ expect(subject.base_dir).to eq("title-for")
44
+ end
45
+
46
+ it "overrides" do
47
+ subject.base_dir = "dir"
48
+ subject.title = "title"
49
+ expect(subject.base_dir).to eq("dir")
50
+ end
31
51
  end
32
52
 
33
53
  context "#unique_urls" do
@@ -36,26 +56,17 @@ describe AudioBookCreator::BookDef do
36
56
  it { expect(subject.unique_urls).to eq(%w(http://site.com/title http://site.com/title2)) }
37
57
  end
38
58
 
39
- context ".sanitize_filename" do
40
- subject { described_class }
41
- it "should join strings" do
42
- expect(subject.sanitize_filename("title", "jpg")).to eq("title.jpg")
43
- end
44
-
45
- it "should handle arrays" do
46
- expect(subject.sanitize_filename(%w(title jpg))).to eq("title.jpg")
47
- end
48
-
49
- it "should ignore nils" do
50
- expect(subject.sanitize_filename("title", nil)).to eq("title")
51
- end
59
+ describe "#filename (derived)" do
60
+ subject { described_class.new }
52
61
 
53
- it "should support titles with spaces" do
54
- expect(subject.sanitize_filename(%{title ((for "you", "Amy", and "John"))})).to eq("title-for-you-Amy-and-John")
62
+ it "adds extension" do
63
+ subject.title = "title"
64
+ expect(subject.filename).to eq("title.m4b")
55
65
  end
56
66
 
57
- it "should support titles with extra stuff" do
58
- expect(subject.sanitize_filename("title,for!")).to eq("title-for")
67
+ it "supports spaces" do
68
+ subject.title = "the title"
69
+ expect(subject.filename).to eq("the-title.m4b")
59
70
  end
60
71
  end
61
72
  end