audio_book_creator 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +2 -2
  4. data/CHANGELOG.md +24 -0
  5. data/Gemfile +3 -3
  6. data/README.md +9 -4
  7. data/audio_book_creator.gemspec +3 -3
  8. data/{bin → exe}/audio_book_creator +3 -0
  9. data/lib/audio_book_creator.rb +4 -2
  10. data/lib/audio_book_creator/binder.rb +2 -1
  11. data/lib/audio_book_creator/book_def.rb +2 -2
  12. data/lib/audio_book_creator/cached_hash.rb +1 -1
  13. data/lib/audio_book_creator/cascading_array.rb +8 -8
  14. data/lib/audio_book_creator/chapter.rb +1 -1
  15. data/lib/audio_book_creator/cli.rb +36 -29
  16. data/lib/audio_book_creator/conductor.rb +5 -3
  17. data/lib/audio_book_creator/defaulter.rb +41 -0
  18. data/lib/audio_book_creator/editor.rb +2 -3
  19. data/lib/audio_book_creator/page_db.rb +14 -8
  20. data/lib/audio_book_creator/page_def.rb +7 -15
  21. data/lib/audio_book_creator/runner.rb +5 -3
  22. data/lib/audio_book_creator/speaker.rb +1 -1
  23. data/lib/audio_book_creator/spider.rb +9 -28
  24. data/lib/audio_book_creator/surfer_def.rb +1 -5
  25. data/lib/audio_book_creator/url_filter.rb +1 -1
  26. data/lib/audio_book_creator/version.rb +1 -1
  27. data/lib/audio_book_creator/web_page.rb +49 -0
  28. data/run_mutant +89 -0
  29. data/spec/audio_book_creator/binder_spec.rb +3 -3
  30. data/spec/audio_book_creator/book_creator_spec.rb +2 -3
  31. data/spec/audio_book_creator/book_def_spec.rb +33 -22
  32. data/spec/audio_book_creator/cached_hash_spec.rb +4 -0
  33. data/spec/audio_book_creator/cli_spec.rb +189 -122
  34. data/spec/audio_book_creator/conductor_spec.rb +17 -6
  35. data/spec/audio_book_creator/defaulter_spec.rb +154 -0
  36. data/spec/audio_book_creator/editor_spec.rb +7 -7
  37. data/spec/audio_book_creator/page_db_spec.rb +73 -11
  38. data/spec/audio_book_creator/page_def_spec.rb +26 -40
  39. data/spec/audio_book_creator/speaker_spec.rb +2 -2
  40. data/spec/audio_book_creator/spider_spec.rb +10 -15
  41. data/spec/audio_book_creator/surfer_def_spec.rb +1 -4
  42. data/spec/audio_book_creator/url_filter_spec.rb +1 -1
  43. data/spec/audio_book_creator/web_page_spec.rb +65 -0
  44. data/spec/audio_book_creator_spec.rb +23 -0
  45. data/spec/spec_helper.rb +15 -12
  46. metadata +14 -20
@@ -1,30 +1,36 @@
1
1
  require "sqlite3"
2
+ require "json"
2
3
 
3
4
  module AudioBookCreator
5
+ # a name value store stored in sqlite
6
+ # this is used for pages and also settings
4
7
  class PageDb
5
8
  include Enumerable
6
9
 
7
- # this is for tests - get out of here
8
- attr_accessor :filename
10
+ attr_accessor :filename, :table_name, :encode
9
11
 
10
- def initialize(filename)
12
+ def initialize(filename, table_name, encode)
11
13
  @filename = filename
14
+ @table_name = table_name
15
+ @encode = encode
12
16
  end
13
17
 
14
18
  def []=(key, value)
15
- db.execute "insert into pages (name, contents) values ( ?, ?)", [key, value]
19
+ value = JSON.generate(value) if encode && value
20
+ db.execute "insert into #{table_name} (name, contents) values (?, ?)", [key, value]
16
21
  end
17
22
 
18
23
  def [](key)
19
- db.execute("select contents from pages where name = ?", key).map { |row| row.first }.first
24
+ value = db.execute("select contents from #{table_name} where name = ?", key).map { |row| row.first }.first
25
+ encode && value ? JSON.parse(value, :symbolize_names => true) : value
20
26
  end
21
27
 
22
28
  def include?(key)
23
- !!self[key]
29
+ self[key]
24
30
  end
25
31
 
26
32
  def each(&block)
27
- db.execute "select name, contents from pages order by rowid", &block
33
+ db.execute "select name, contents from #{table_name}", &block
28
34
  end
29
35
 
30
36
  private
@@ -35,7 +41,7 @@ module AudioBookCreator
35
41
 
36
42
  def create
37
43
  SQLite3::Database.new(filename).tap do |db|
38
- db.execute("create table if not exists pages (name text, contents blob)")
44
+ db.execute("create table if not exists #{table_name} (name text, contents blob)")
39
45
  end
40
46
  end
41
47
  end
@@ -2,30 +2,22 @@ module AudioBookCreator
2
2
  # information on the format of the html page that is read
3
3
  class PageDef
4
4
  attr_accessor :title_path, :body_path, :link_path, :chapter_path
5
+ attr_accessor :invalid_urls
5
6
 
6
- def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil)
7
+ def initialize(title_path = "h1", body_path = "p", link_path = "a", chapter_path = nil, invalid_urls = {})
7
8
  @title_path = title_path
8
9
  @body_path = body_path
9
10
  @link_path = link_path
10
11
  @chapter_path = chapter_path
12
+ @invalid_urls = invalid_urls
11
13
  end
12
14
 
13
- def title(dom)
14
- title = dom.css(title_path).first
15
- title.text if title
15
+ def page_links(page)
16
+ page.links(link_path).select { |href| !invalid_urls.include?(href) }
16
17
  end
17
18
 
18
- def body(dom)
19
- dom.css(body_path)
20
- # feels like I need .map { |n| n.text }
21
- end
22
-
23
- def page_links(dom, &block)
24
- dom.css(link_path).map(&block)
25
- end
26
-
27
- def chapter_links(dom, &block)
28
- dom.css(chapter_path).map(&block)
19
+ def chapter_links(page)
20
+ page.links(chapter_path).select { |href| !invalid_urls.include?(href) }
29
21
  end
30
22
  end
31
23
  end
@@ -4,11 +4,13 @@ module AudioBookCreator
4
4
  include Logging
5
5
 
6
6
  def run(cmd, options)
7
- params = options[:params].flatten.flatten.compact
7
+ params = options.fetch(:params).flatten.flatten.compact
8
8
 
9
- logger.info { "run: #{cmd} #{params.join(" ")}" }
9
+ cmdline = [cmd] + params.map(&:to_s)
10
+
11
+ logger.info { "run: #{cmdline.join(" ")}" }
10
12
  logger.info ""
11
- status = system(cmd, *params.map { |x| x.to_s })
13
+ status = system(*cmdline)
12
14
  logger.info ""
13
15
  logger.info { status ? "success" : "issue" }
14
16
 
@@ -13,7 +13,7 @@ module AudioBookCreator
13
13
  end
14
14
 
15
15
  def say(chapter)
16
- raise "Empty chapter" if chapter.empty?
16
+ raise "Empty Chapter" if chapter.empty?
17
17
  text_filename = chapter_text_filename(chapter)
18
18
  sound_filename = chapter_sound_filename(chapter)
19
19
 
@@ -1,6 +1,3 @@
1
- require 'nokogiri'
2
- require 'uri'
3
-
4
1
  module AudioBookCreator
5
2
  class Spider
6
3
  include Logging
@@ -8,28 +5,26 @@ module AudioBookCreator
8
5
  # @!attribute web
9
6
  # @return Hash access to the world wide web
10
7
  attr_accessor :web
11
- attr_accessor :invalid_urls
12
8
 
13
9
  attr_accessor :page_def
14
10
 
15
- def initialize(page_def, web = {}, invalid_urls = {})
11
+ def initialize(page_def, web)
16
12
  @page_def = page_def
17
13
  @web = web
18
- @invalid_urls = invalid_urls
19
14
  end
20
15
 
21
16
  def run(chapters)
22
- outstanding = CascadingArray.new([], chapters.map { |o| uri(o) })
17
+ outstanding = CascadingArray.new([], WebPage.map_urls(chapters))
23
18
  visited = []
24
19
 
25
20
  while (url = outstanding.shift)
26
- contents, new_pages, new_chapters = visit_page(url)
27
- visited << contents
28
- new_pages.each do |href|
29
- outstanding.add_unique_page(href) unless invalid_urls.include?(href)
21
+ wp = visit_page(url)
22
+ visited << wp
23
+ page_def.page_links(wp).each do |href|
24
+ outstanding.add_unique_page(href)
30
25
  end
31
- new_chapters.each do |href|
32
- outstanding.add_unique_chapter(href) unless invalid_urls.include?(href)
26
+ page_def.chapter_links(wp).each do |href|
27
+ outstanding.add_unique_chapter(href)
33
28
  end
34
29
  end
35
30
  visited
@@ -40,21 +35,7 @@ module AudioBookCreator
40
35
  # this one hangs on mutations
41
36
  def visit_page(url)
42
37
  logger.info { "visit #{url}" }
43
- page = web[url.to_s]
44
- doc = Nokogiri::HTML(page)
45
- [
46
- page,
47
- page_def.page_links(doc) { |a| uri(url, a["href"]) },
48
- page_def.chapter_links(doc) { |a| uri(url, a["href"]) }
49
- ]
50
- end
51
-
52
- # raises URI::Error (BadURIError)
53
- def uri(url, alt = nil)
54
- url = URI.parse(url) unless url.is_a?(URI)
55
- url += alt if alt
56
- url.fragment = nil # remove #x part of url
57
- url
38
+ WebPage.new(url, web[url.to_s])
58
39
  end
59
40
  end
60
41
  end
@@ -1,15 +1,11 @@
1
1
  module AudioBookCreator
2
2
  class SurferDef
3
- attr_accessor :host
4
3
  attr_accessor :max
5
4
  attr_accessor :regen_html
6
- attr_accessor :cache_filename
7
5
 
8
- def initialize(host = nil, max = nil, regen_html = nil, cache_filename = nil)
9
- @host = host
6
+ def initialize(max = nil, regen_html = nil)
10
7
  @max = max
11
8
  @regen_html = regen_html
12
- @cache_filename = cache_filename
13
9
  end
14
10
  end
15
11
  end
@@ -27,7 +27,7 @@ module AudioBookCreator
27
27
  private
28
28
 
29
29
  def valid_extensions
30
- ["", '.html', '.htm', '.php', '.jsp']
30
+ ["", '.html', '.htm', '.php', '.jsp', '.cfm']
31
31
  end
32
32
  end
33
33
  end
@@ -1,3 +1,3 @@
1
1
  module AudioBookCreator
2
- VERSION = "0.0.1"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -0,0 +1,49 @@
1
+ require 'nokogiri'
2
+ require 'uri'
3
+
4
+ module AudioBookCreator
5
+ class WebPage
6
+ attr_accessor :url
7
+ attr_accessor :body
8
+ #attr_accessor :etag
9
+
10
+ def initialize(url, body)
11
+ @url = url
12
+ @body = body
13
+ end
14
+
15
+ # def single_css(path) ; css(path).first ; end
16
+ def css(path)
17
+ dom.css(path).map {|n| n.text }
18
+ end
19
+
20
+ def links(path)
21
+ dom.css(path).map { |a| self.class.uri(url, a["href"]) }
22
+ end
23
+
24
+ def dom
25
+ @dom ||= Nokogiri::HTML(body)
26
+ end
27
+ private :dom
28
+
29
+ def ==(other)
30
+ other.kind_of?(WebPage) &&
31
+ other.url.eql?(url)
32
+ end
33
+ alias :eql? :==
34
+
35
+ def self.map_urls(url)
36
+ url.map { |o| uri(o) }
37
+ end
38
+
39
+ private
40
+
41
+ # raises URI::Error (BadURIError)
42
+ def self.uri(url, alt = nil)
43
+ url = URI.parse(url) unless url.is_a?(URI)
44
+ url += alt if alt
45
+ url.fragment = nil # remove #x part of url
46
+ url
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'fileutils'
4
+
5
+ class Mutt
6
+ FILE_NAME="audio_book_creator"
7
+ attr_accessor :src
8
+ attr_accessor :spec
9
+ attr_accessor :mutant
10
+ attr_accessor :class_name
11
+
12
+ def initialize(src)
13
+ @src = src
14
+ @spec = src.sub("lib/","spec/").sub(".rb","_spec.rb")
15
+ @mutant = src.sub("lib/","muts/").sub(".rb",".txt")
16
+ @class_name = file_to_class(src.sub("lib/",""))
17
+ end
18
+
19
+ def need_to_run?
20
+ !File.exist?(mutant) ||
21
+ File.stat(src).mtime > File.stat(mutant).mtime ||
22
+ File.stat(spec).mtime > File.stat(mutant).mtime
23
+ end
24
+
25
+ def skip?
26
+ class_name.include?("::Version")
27
+ end
28
+
29
+ def ensure_mutant_dir
30
+ FileUtils.mkdir_p(File.dirname(mutant))
31
+ end
32
+
33
+ def mutant_count
34
+ if File.exist?(mutant)
35
+ %w(Kills Mutations).map { |field| `awk "/#{field}/ { print \\$2; }" #{mutant}`.chomp.to_i }
36
+ else
37
+ ["", ""]
38
+ end
39
+ end
40
+
41
+ def count_in_progress
42
+ k, m = mutant_count
43
+ "%s%4s/%4s #{class_name}" % ["?", k, m]
44
+ end
45
+
46
+ def counts
47
+ k, m = mutant_count
48
+ "%s%4s/%4s #{class_name}" % [(k == m ? " " : "*"), k, m]
49
+ end
50
+
51
+ def run_mutant
52
+ # ensure_mutant_dir
53
+ # --fail-fast
54
+ system({ "MUTANT" => "true"},
55
+ "bundle exec mutant --include lib --require #{FILE_NAME} --require audio_book_creator/cli " +
56
+ "--use rspec #{class_name} --jobs 1 > #{mutant}")
57
+ rescue SystemExit, Interrupt
58
+ FileUtils.rm(mutant)
59
+ raise
60
+ end
61
+
62
+ def self.run(filenames, force)
63
+ filenames.map { |src| Mutt.new(src) }.select { |mutant| !mutant.skip? }.each do |mutant|
64
+ if force || mutant.need_to_run?
65
+ puts "#{mutant.count_in_progress} ..."
66
+ mutant.run_mutant
67
+ print back_a_line
68
+ puts "#{mutant.counts} "
69
+ else
70
+ puts mutant.counts
71
+ end
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def self.back_a_line
78
+ @back_a_line ||= `tput cuu1; tput el`
79
+ end
80
+
81
+ def file_to_class(str)
82
+ str.sub(".rb","").split("/").collect { |f| f.split("_").collect(&:capitalize).join }.join("::")
83
+ end
84
+ end
85
+
86
+ force = ARGV.delete("-f")
87
+ filenames = ARGV.empty? ? Dir["lib/**/*.rb"] : ARGV
88
+
89
+ Mutt.run(filenames, force)
@@ -7,7 +7,7 @@ describe AudioBookCreator::Binder do
7
7
 
8
8
  it "should require a chapter" do
9
9
  expect_runner.not_to receive(:system)
10
- expect { subject.create([]) }.to raise_error
10
+ expect {subject.create([]) }.to raise_error("No Chapters")
11
11
  end
12
12
 
13
13
  it "should do nothing if m4b exists" do
@@ -41,9 +41,9 @@ describe AudioBookCreator::Binder do
41
41
  expect(File).to receive(:exist?).with("title.m4b").and_return(false)
42
42
 
43
43
  expect_runner.to receive(:system)
44
- .with("abbinder", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
44
+ .with("abbinder", "-A", "-a", "Vicki", "-t", "title", "-b", "32", "-c", "1",
45
45
  "-r", "22050", "-g", "Audiobook", "-l", "7", "-o", "title.m4b",
46
- "@the title@", "dir/chapter01.m4a", "-A").and_return(true)
46
+ "@the title@", "dir/chapter01.m4a").and_return(true)
47
47
  subject.create([spoken_chapter])
48
48
  end
49
49
  end
@@ -12,9 +12,8 @@ describe AudioBookCreator::BookCreator do
12
12
  subject { described_class.new(spider, editor, speaker, binder) }
13
13
 
14
14
  it "should call all the constructors and components" do
15
-
16
- outstanding = ["http://site.com/"]
17
- page_contents = ["site.com contents"]
15
+ outstanding = [AudioBookCreator::WebPage.new('', "http://site.com/")]
16
+ page_contents = [page("title1","contents1"), page("title2","contents2")]
18
17
  chapters = [
19
18
  chapter("contents1", "title1", 1),
20
19
  chapter("contents2", "title2", 2)
@@ -6,7 +6,7 @@ describe AudioBookCreator::BookDef do
6
6
  it { expect(subject.title).to eq(nil) }
7
7
  it { expect(subject.author).to eq("Vicki") }
8
8
  it { expect(subject.urls).to be_nil }
9
- it { expect(subject.itunes).to be_falsy }
9
+ it { expect(subject.itunes).to be_truthy }
10
10
  end
11
11
 
12
12
  context "with title" do
@@ -16,18 +16,38 @@ describe AudioBookCreator::BookDef do
16
16
  end
17
17
 
18
18
  context "with all parameters" do
19
- subject { described_class.new("the title", "author", "dir", %w(a b), true) }
19
+ subject { described_class.new("the title", "author", "dir", %w(a b), false) }
20
20
  it { expect(subject.base_dir).to eq("dir") }
21
21
  it { expect(subject.title).to eq("the title") }
22
22
  it { expect(subject.author).to eq("author") }
23
23
  it { expect(subject.filename).to eq("the-title.m4b") }
24
24
  it { expect(subject.urls).to eq(%w(a b)) }
25
+ it { expect(subject.itunes).to be_falsy }
26
+ end
27
+
28
+ context "with all parameters alt" do
29
+ subject { described_class.new("the title", "author", "dir", %w(a b), true) }
25
30
  it { expect(subject.itunes).to be_truthy }
26
31
  end
27
32
 
28
- context "with derived title" do
29
- subject { described_class.new("the title", "author", nil, nil) }
30
- it { expect(subject.base_dir).to eq("the-title") }
33
+ describe "#base_dir (derived)" do
34
+ subject { described_class.new }
35
+
36
+ it "supports titles with spaces" do
37
+ subject.title = %{title ((for "you", "Amy", and "John"))}
38
+ expect(subject.base_dir).to eq("title-for-you-Amy-and-John")
39
+ end
40
+
41
+ it "supports titles with extra stuff" do
42
+ subject.title = "title,for!"
43
+ expect(subject.base_dir).to eq("title-for")
44
+ end
45
+
46
+ it "overrides" do
47
+ subject.base_dir = "dir"
48
+ subject.title = "title"
49
+ expect(subject.base_dir).to eq("dir")
50
+ end
31
51
  end
32
52
 
33
53
  context "#unique_urls" do
@@ -36,26 +56,17 @@ describe AudioBookCreator::BookDef do
36
56
  it { expect(subject.unique_urls).to eq(%w(http://site.com/title http://site.com/title2)) }
37
57
  end
38
58
 
39
- context ".sanitize_filename" do
40
- subject { described_class }
41
- it "should join strings" do
42
- expect(subject.sanitize_filename("title", "jpg")).to eq("title.jpg")
43
- end
44
-
45
- it "should handle arrays" do
46
- expect(subject.sanitize_filename(%w(title jpg))).to eq("title.jpg")
47
- end
48
-
49
- it "should ignore nils" do
50
- expect(subject.sanitize_filename("title", nil)).to eq("title")
51
- end
59
+ describe "#filename (derived)" do
60
+ subject { described_class.new }
52
61
 
53
- it "should support titles with spaces" do
54
- expect(subject.sanitize_filename(%{title ((for "you", "Amy", and "John"))})).to eq("title-for-you-Amy-and-John")
62
+ it "adds extension" do
63
+ subject.title = "title"
64
+ expect(subject.filename).to eq("title.m4b")
55
65
  end
56
66
 
57
- it "should support titles with extra stuff" do
58
- expect(subject.sanitize_filename("title,for!")).to eq("title-for")
67
+ it "supports spaces" do
68
+ subject.title = "the title"
69
+ expect(subject.filename).to eq("the-title.m4b")
59
70
  end
60
71
  end
61
72
  end