epub-parser-io 0.1.6a

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/.gemtest +0 -0
  2. data/.gitignore +12 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +4 -0
  5. data/.yardopts +10 -0
  6. data/CHANGELOG.markdown +61 -0
  7. data/Gemfile +2 -0
  8. data/MIT-LICENSE +7 -0
  9. data/README.markdown +174 -0
  10. data/Rakefile +68 -0
  11. data/bin/epub-open +25 -0
  12. data/bin/epubinfo +64 -0
  13. data/docs/EpubOpen.markdown +43 -0
  14. data/docs/Epubinfo.markdown +37 -0
  15. data/docs/FixedLayout.markdown +96 -0
  16. data/docs/Home.markdown +128 -0
  17. data/docs/Item.markdown +80 -0
  18. data/docs/Navigation.markdown +58 -0
  19. data/docs/Publication.markdown +54 -0
  20. data/epub-parser.gemspec +49 -0
  21. data/features/epubinfo.feature +6 -0
  22. data/features/step_definitions/epubinfo_steps.rb +5 -0
  23. data/features/support/env.rb +1 -0
  24. data/lib/epub/book/features.rb +85 -0
  25. data/lib/epub/book.rb +7 -0
  26. data/lib/epub/constants.rb +48 -0
  27. data/lib/epub/content_document/navigation.rb +104 -0
  28. data/lib/epub/content_document/xhtml.rb +41 -0
  29. data/lib/epub/content_document.rb +2 -0
  30. data/lib/epub/inspector.rb +45 -0
  31. data/lib/epub/ocf/container.rb +28 -0
  32. data/lib/epub/ocf/encryption.rb +7 -0
  33. data/lib/epub/ocf/manifest.rb +6 -0
  34. data/lib/epub/ocf/metadata.rb +6 -0
  35. data/lib/epub/ocf/rights.rb +6 -0
  36. data/lib/epub/ocf/signatures.rb +6 -0
  37. data/lib/epub/ocf.rb +8 -0
  38. data/lib/epub/parser/content_document.rb +111 -0
  39. data/lib/epub/parser/ocf.rb +73 -0
  40. data/lib/epub/parser/publication.rb +200 -0
  41. data/lib/epub/parser/utils.rb +20 -0
  42. data/lib/epub/parser/version.rb +5 -0
  43. data/lib/epub/parser.rb +103 -0
  44. data/lib/epub/publication/fixed_layout.rb +208 -0
  45. data/lib/epub/publication/package/bindings.rb +31 -0
  46. data/lib/epub/publication/package/guide.rb +51 -0
  47. data/lib/epub/publication/package/manifest.rb +180 -0
  48. data/lib/epub/publication/package/metadata.rb +170 -0
  49. data/lib/epub/publication/package/spine.rb +106 -0
  50. data/lib/epub/publication/package.rb +68 -0
  51. data/lib/epub/publication.rb +2 -0
  52. data/lib/epub.rb +14 -0
  53. data/man/epubinfo.1.ronn +19 -0
  54. data/schemas/epub-nav-30.rnc +10 -0
  55. data/schemas/epub-nav-30.sch +72 -0
  56. data/schemas/epub-xhtml-30.sch +377 -0
  57. data/schemas/ocf-container-30.rnc +16 -0
  58. data/test/fixtures/book/META-INF/container.xml +6 -0
  59. data/test/fixtures/book/OPS/%E6%97%A5%E6%9C%AC%E8%AA%9E.xhtml +10 -0
  60. data/test/fixtures/book/OPS/case-sensitive.xhtml +9 -0
  61. data/test/fixtures/book/OPS/containing space.xhtml +10 -0
  62. data/test/fixtures/book/OPS/containing%20space.xhtml +10 -0
  63. data/test/fixtures/book/OPS/nav.xhtml +28 -0
  64. data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +119 -0
  65. data/test/fixtures/book/OPS//346/227/245/346/234/254/350/252/236.xhtml +10 -0
  66. data/test/fixtures/book/mimetype +1 -0
  67. data/test/helper.rb +9 -0
  68. data/test/test_content_document.rb +92 -0
  69. data/test/test_epub.rb +21 -0
  70. data/test/test_fixed_layout.rb +257 -0
  71. data/test/test_inspect.rb +121 -0
  72. data/test/test_parser.rb +60 -0
  73. data/test/test_parser_content_document.rb +36 -0
  74. data/test/test_parser_fixed_layout.rb +16 -0
  75. data/test/test_parser_ocf.rb +38 -0
  76. data/test/test_parser_publication.rb +247 -0
  77. data/test/test_publication.rb +324 -0
  78. metadata +445 -0
data/.gemtest ADDED
File without changes
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ vendor/*
6
+ .yardoc/
7
+ coverage/*
8
+ samples/*
9
+ doc/*
10
+ NOTE
11
+ test/fixtures/book.epub
12
+ *~
data/.gitmodules ADDED
@@ -0,0 +1,3 @@
1
+ [submodule "wiki"]
2
+ path = wiki
3
+ url = git://github.com/KitaitiMakoto/epub-parser.wiki.git
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ rvm:
2
+ - "1.9.3"
3
+ - "2.0.0"
4
+ - "2.1.0"
data/.yardopts ADDED
@@ -0,0 +1,10 @@
1
+ -
2
+ CHANGELOG.markdown
3
+ MIT-LICENSE
4
+ docs/Home.markdown
5
+ docs/Publication.markdown
6
+ docs/Item.markdown
7
+ docs/FixedLayout.markdown
8
+ docs/Epubinfo.markdown
9
+ docs/EpubOpen.markdown
10
+ docs/Navigation.markdown
@@ -0,0 +1,61 @@
1
+ CHANGELOG
2
+ =========
3
+ 0.1.6
4
+ -----
5
+ * Remove `EPUB.parse` method
6
+ * Remove `EPUB::Publication::Package::Metadata#to_hash`
7
+ * Add `EPUB::Publication::Package::Metadata::Identifier` for ad-hoc `scheme` attribute and `#isbn?` method
8
+ * Remove `MethodDecorators::Deprecated`
9
+ * Make `EPUB::Parser::OCF::CONTAINER_FILE` and other constants deprecated
10
+ * Make `EPUB::Publication::Package::Metadata::Link#rel` a `Set`
11
+ * Add exception class `EPUB::Constants::MediaType::UnsupportedMediaType`
12
+ * Make `EPUB::Constants::MediaType::UnsupportedError` deprecated. Use `UnsupportedMediatType` instead
13
+ * Add `EPUB::Publication::Package::Item#cover_image?`
14
+ * Add `EPUB::Book::Features` module and move methods of `EPUB` module to it(Thanks, [takahashim][]!)
15
+ * Make including `EPUB` deprecated
16
+ * Parse `hidden` attribute of `nav` elements
17
+ * [Experimental]Add `EPUB::ContentDocument::Navigation::Item#traverse`
18
+
19
+ [takahashim]: https://github.com/takahashim
20
+
21
+ 0.1.5
22
+ -----
23
+ * Add `ContentDocument::XHTML#title`
24
+ * Add `Manifest::Item#xhtml?`
25
+ * Add `--words` and `--chars` options to `epubinfo` command which count words and charactors of XHTMLs in EPUB file
26
+ * API change: `OCF::Container::Rootfile#full_path` became Addressable::URI object rather than `String`. `EPUB#rootfile_path` still returns `String`
27
+ * Add `ContentDocument::XHTML#rexml` which returns document as `REXML::Document` object
28
+ * Add `ContentDocument::XHTML#nokogiri` which returns document as `Nokogiri::XML::Document` object
29
+ * Inspect more readbly
30
+
31
+ 0.1.4
32
+ -----
33
+ * [Fixed-Layout Documents][fixed-layout] support
34
+ * Define `ContentDocument::XHTML#top_level?`
35
+ * Define `Spine::Itemref#page_spread` and `#page_spread=`
36
+ * Define some utility methods around `Manifest::Item` and `Spine::Itemref`
37
+ * `Manifest::Item#itemref`
38
+ * `Spine::Itemref#item=`
39
+
40
+ [fixed-layout]: http://www.idpf.org/epub/fxl/
41
+
42
+ 0.1.3
43
+ -----
44
+ * Add `EPUB::Parser::Utils` module
45
+ * Add a command-line tool `epub-open`
46
+ * Add support for XHTML Navigation Document
47
+ * Make `EPUB::Publication::Package::Metadata#to_hash` obsolete. Use `#to_h` instead
48
+ * Add utility methods `EPUB#description`, `EPUB#date` and `EPUB#unique_identifier`
49
+
50
+ 0.1.2
51
+ -----
52
+ * Fix a bug that `Item#read` couldn't read file when `href` is percent-encoded(Thanks, [gambhiro][]!)
53
+
54
+ [gambhiro]: https://github.com/gambhiro
55
+
56
+ 0.1.1
57
+ -----
58
+ * Parse package@prefix and attach it as `Package#prefix`
59
+ * `Manifest::Item#iri` was removed. It have existed for files in unzipped epub books but now EPUB Parser retrieves files from zip archive directly. `#href` now returns `Addressable::URI` object.
60
+ * `Metadata::Link#iri`: ditto.
61
+ * `Guide::Reference#iri`: ditto.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/MIT-LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2011, 2012, 2013 KITAITIMAKOTO <KitaitiMakoto@gmail.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,174 @@
1
+ EPUB Parser
2
+ ===========
3
+ [![Build Status](https://secure.travis-ci.org/KitaitiMakoto/epub-parser.png?branch=master)](http://travis-ci.org/KitaitiMakoto/epub-parser)
4
+ [![Dependency Status](https://gemnasium.com/KitaitiMakoto/epub-parser.png)](https://gemnasium.com/KitaitiMakoto/epub-parser)
5
+
6
+ INSTALLATION
7
+ -------
8
+
9
+ gem install epub-parser
10
+
11
+ USAGE
12
+ -----
13
+
14
+ ### As a library
15
+
16
+ require 'epub/parser'
17
+
18
+ book = EPUB::Parser.parse('book.epub')
19
+ book.metadata.titles # => Array of EPUB::Publication::Package::Metadata::Title. Main title, subtitle, etc...
20
+ book.metadata.title # => Title string including all titles
21
+ book.metadata.creators # => Creators(authors)
22
+ book.each_page_on_spine do |page|
23
+ page.media_type # => "application/xhtml+xml"
24
+ page.entry_name # => "OPS/nav.xhtml" entry name in EPUB package(zip archive)
25
+ page.read # => raw content document
26
+ page.content_document.nokogiri # => Nokogiri::XML::Document. The same to Nokogiri.XML(page.read)
27
+ # do something more
28
+ # :
29
+ end
30
+
31
+ See document's {file:docs/Home.markdown} or [API Documentation][rubydoc] for more info.
32
+
33
+ [rubydoc]: http://rubydoc.info/gems/epub-parser/frames
34
+
35
+ ### `epubinfo` command-line tool
36
+
37
+ `epubinfo` tool extracts and shows the metadata of specified EPUB book.
38
+
39
+ $ epubinfo ~/Documebts/Books/build_awesome_command_line_applications_in_ruby.epub
40
+ Title: Build Awesome Command-Line Applications in Ruby (for KITAITI MAKOTO)
41
+ Identifiers: 978-1-934356-91-3
42
+ Titles: Build Awesome Command-Line Applications in Ruby (for KITAITI MAKOTO)
43
+ Languages: en
44
+ Contributors:
45
+ Coverages:
46
+ Creators: David Bryant Copeland
47
+ Dates:
48
+ Descriptions:
49
+ Formats:
50
+ Publishers: The Pragmatic Bookshelf, LLC (338304)
51
+ Relations:
52
+ Rights: Copyright © 2012 Pragmatic Programmers, LLC
53
+ Sources:
54
+ Subjects: Pragmatic Bookshelf
55
+ Types:
56
+ Unique identifier: 978-1-934356-91-3
57
+ Epub version: 2.0
58
+
59
+ See {file:docs/Epubinfo} for more info.
60
+
61
+ ### `epub-open` command-line tool
62
+
63
+ `epub-open` tool provides interactive shell(IRB) which helps you research about EPUB book.
64
+
65
+ epub-open path/to/book.epub
66
+
67
+ IRB starts. `self` becomes the EPUB book and can access to methods of `EPUB`.
68
+
69
+ title
70
+ => "Title of the book"
71
+ metadata.creators
72
+ => [Author 1, Author2, ...]
73
+ resources.first.properties
74
+ => #<Set: {"nav"}> # You know that first resource of this book is nav document
75
+ nav = resources.first
76
+ => ...
77
+ nav.href
78
+ => #<Addressable::URI:0x15ce350 URI:nav.xhtml>
79
+ nav.media_type
80
+ => "application/xhtml+xml"
81
+ puts nav.read
82
+ <?xml version="1.0"?>
83
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
84
+ :
85
+ :
86
+ :
87
+ </html>
88
+ => nil
89
+ exit # Enter "exit" when exit the session
90
+
91
+ See {file:docs/EpubOpen} for more info.
92
+
93
+ REQUIREMENTS
94
+ ------------
95
+ * Ruby 1.9.3 or later
96
+ * C compiler to compile Zip/Ruby and Nokogiri
97
+
98
+ Related Gems
99
+ ------------
100
+ * [gepub](https://github.com/skoji/gepub) - a generic EPUB library for Ruby
101
+ * [epubinfo](https://github.com/chdorner/epubinfo) - Extracts metadata information from EPUB files. Supports EPUB2 and EPUB3 formats.
102
+ * [ReVIEW](https://github.com/kmuto/review) - ReVIEW is a easy-to-use digital publishing system for books and ebooks.
103
+ * [epzip](https://github.com/takahashim/epzip) - epzip is EPUB packing tool. It's just only doing 'zip.' :)
104
+ * [eeepub](https://github.com/jugyo/eeepub) - EeePub is a Ruby ePub generator
105
+ * [epub-maker](https://github.com/KitaitiMakoto/epub-maker) - This library supports making and editing EPUB books based on this EPUB Parser library
106
+
107
+ If you find other gems, please tell me or request a pull request.
108
+
109
+ RECENT CHANGES
110
+ --------------
111
+ ### 0.1.6
112
+ * Remove `EPUB.parse` method
113
+ * Remove `EPUB::Publication::Package::Metadata#to_hash`
114
+ * Add `EPUB::Publication::Package::Metadata::Identifier`
115
+ * Remove `MethodDecorators::Deprecated`
116
+ * Make `EPUB::Parser::OCF::CONTAINER_FILE` and other constants deprecated
117
+ * Make `EPUB::Publication::Package::Metadata::Link#rel` a `Set`
118
+ * Add exception class `EPUB::Constants::MediaType::UnsupportedMediaType`
119
+ * Make `EPUB::Constants::MediaType::UnsupportedError` deprecated
120
+ * Add `EPUB::Publication::Package::Item#find_item_by_relative_iri`
121
+ * Add `EPUB::Publication::Package::Item#cover_image?`
122
+ * Add `EPUB::Book::Features` module and move methods of `EPUB` module to it.(Thanks, [takahashim][]!)
123
+ * Make including `EPUB` deprecated
124
+ * Parse `hidden` attribute of `nav` elements
125
+ * [Experimental]Add `EPUB::ContentDocument::Navigation::Item#traverse`
126
+
127
+ [takahashim]: https://github.com/takahashim
128
+
129
+ ### 0.1.5
130
+ * Add `ContentDocument::XHTML#title`
131
+ * Add `Manifest::Item#xhtml?`
132
+ * Add `--words` and `--char` options to `epubinfo` command
133
+ * API change: `OCF::Container::Rootfile#full_path` became Addressable::URI object rather than `String`
134
+ * Add `ContentDocument::XHTML#rexml` and `#nokogiri`
135
+ * Inspect more readably
136
+
137
+ ### 0.1.4
138
+ * [Fixed-Layout Documents][fixed-layout] support
139
+ * Define `ContentDocument::XHTML#top_level?`
140
+ * Define `Spine::Itemref#page_spread` and `#page_spread=`
141
+ * Define some utility methods around `Manifest::Item` and `Spine::Itemref`
142
+
143
+ [fixed-layout]: http://www.idpf.org/epub/fxl/
144
+
145
+ See {file:CHANGELOG.markdown} for older changelogs and details.
146
+
147
+ TODOS
148
+ -----
149
+ * EPUB 3.0.1
150
+ * Multiple rootfiles
151
+ * Help features for `epub-open` tool
152
+ * Vocabulary Association Mechanisms
153
+ * Implementing navigation document and so on
154
+ * Media Overlays
155
+ * Content Document
156
+ * Digital Signature
157
+ * Using SAX on parsing
158
+ * Extracting and organizing common behavior from some classes to modules
159
+ * Abstraction of XML parser(making it possible to use REXML, standard bundled XML library of Ruby)
160
+ * Handle with encodings other than UTF-8
161
+
162
+ DONE
163
+ ----
164
+ * Simple inspect for `epub-open` tool
165
+ * Using zip library instead of `unzip` command, which has security issue
166
+ * Modify methods around fallback to see `bindings` element in the package
167
+ * Content Document(only for Navigation Documents)
168
+ * Fixed Layout
169
+ * Vocabulary Association Mechanisms(only for itemref)
170
+
171
+ LICENSE
172
+ -------
173
+ This library is distribuetd under the term of the MIT License.
174
+ See MIT-LICENSE file for more info.
data/Rakefile ADDED
@@ -0,0 +1,68 @@
1
+ require 'bundler/gem_helper'
2
+ require 'rake/clean'
3
+ require 'rake/testtask'
4
+ require 'yard'
5
+ require 'rdoc/task'
6
+ require 'cucumber'
7
+ require 'cucumber/rake/task'
8
+ require 'epub/parser/version'
9
+ require 'zipruby'
10
+
11
+ task :default => :test
12
+ task :test => 'test:default'
13
+
14
+ namespace :test do
15
+ task :default => [:build, :test]
16
+
17
+ desc 'Run all tests'
18
+ task :all => [:build, :test, :cucumber]
19
+
20
+ desc 'Build test fixture EPUB file'
21
+ task :build => :clean do
22
+ input_dir = 'test/fixtures/book'
23
+ sh "epzip #{input_dir}"
24
+ small_file = File.read("#{input_dir}/OPS/case-sensitive.xhtml")
25
+ Zip::Archive.open "#{input_dir}.epub" do |archive|
26
+ archive.add_buffer 'OPS/CASE-SENSITIVE.xhtml', small_file.sub('small file name', 'LARGE FILE NAME')
27
+ end
28
+ end
29
+
30
+ Rake::TestTask.new do |task|
31
+ task.test_files = FileList['test/**/test_*.rb']
32
+ task.warning = true
33
+ task.options = '--no-show-detail-immediately --verbose'
34
+ end
35
+
36
+ Cucumber::Rake::Task.new
37
+ end
38
+
39
+ task :doc => 'doc:default'
40
+
41
+ namespace :doc do
42
+ task :default => [:yard, :rdoc]
43
+
44
+ YARD::Rake::YardocTask.new
45
+ Rake::RDocTask.new do |rdoc|
46
+ rdoc.rdoc_files = FileList['lib/**/*.rb']
47
+ rdoc.rdoc_files.include 'README.markdown'
48
+ rdoc.rdoc_files.include 'MIT-LICENSE'
49
+ rdoc.rdoc_files.include 'docs/**/*.md'
50
+ end
51
+ end
52
+
53
+ namespace :gem do
54
+ desc "Build epub-parser-#{EPUB::Parser::VERSION}.gem into the pkg directory."
55
+ task :build do
56
+ Bundler::GemHelper.new.build_gem
57
+ end
58
+
59
+ desc "Build and install epub-parser-#{EPUB::Parser::VERSION}.gem into system gems."
60
+ task :install do
61
+ Bundler::GemHelper.new.install_gem
62
+ end
63
+
64
+ desc "Create tag v#{EPUB::Parser::VERSION} and build and push epub-parser-#{EPUB::Parser::VERSION}.gem to Rubygems"
65
+ task :release => :test do
66
+ Bundler::GemHelper.new.release_gem
67
+ end
68
+ end
data/bin/epub-open ADDED
@@ -0,0 +1,25 @@
1
+ require 'English'
2
+ require 'optparse'
3
+ require 'irb'
4
+ require 'epub/parser'
5
+
6
+ shell = IRB
7
+
8
+ OptionParser.new {|opt|
9
+ opt.banner = <<EOB
10
+ Open EPUB file in IRB
11
+
12
+ Usage: #{File.basename($PROGRAM_NAME)} EPUBFILE
13
+
14
+ EOB
15
+ opt.on '--pry', 'Use Pry instead of IRB as shell' do
16
+ require 'pry'
17
+ shell = Pry
18
+ end
19
+ }.parse!
20
+
21
+ $0 = File.basename($PROGRAM_NAME)
22
+ include EPUB::Book::Features
23
+ EPUB::Parser.parse(ARGV.shift, :book => self)
24
+ $stderr.puts "Enter \"exit\" to exit #{shell}"
25
+ shell.start
data/bin/epubinfo ADDED
@@ -0,0 +1,64 @@
1
+ require 'optparse'
2
+ require 'epub/parser'
3
+
4
+ options = {:format => :line}
5
+ opt = OptionParser.new do |opt|
6
+ opt.banner = <<EOB
7
+ Show metadata of an EPUB file
8
+
9
+ Usage: epubinfo [options] EPUBFILE
10
+
11
+ EOB
12
+ opt.version = EPUB::Parser::VERSION
13
+ formats = [:line, :json, :yaml]
14
+ opt.on '-f', '--format=FORMAT', formats, "format of output(#{formats[0..-2].join(', ')} or #{formats.last}), defaults to line(for console)" do |format|
15
+ options[:format] = format
16
+ end
17
+ opt.on '--words', 'count words of content documents' do
18
+ options[:words] = true
19
+ end
20
+ opt.on '--chars', 'count charactors of content documents' do
21
+ options[:chars] = true
22
+ end
23
+ end
24
+ opt.parse!(ARGV)
25
+
26
+ file = ARGV.shift
27
+ unless file
28
+ $stdout.puts "error: you must supply an EPUB file name"
29
+ $stdout.puts opt.help
30
+ abort
31
+ end
32
+
33
+ book = EPUB::Parser.parse(file)
34
+ data = {'Title' => [book.title]}
35
+ data.merge!(book.metadata.to_h)
36
+ data['Unique identifier'] = [book.metadata.unique_identifier]
37
+ data['EPUB Version'] = [book.package.version]
38
+ counts = {:chars => 0, :words => 0}
39
+ if options[:words] or options[:chars]
40
+ book.resources.select(&:xhtml?).each do |xhtml|
41
+ begin
42
+ doc = xhtml.content_document.nokogiri
43
+ body = doc.search('body').first
44
+ content = body.content
45
+ if body
46
+ counts[:words] += content.scan(/\S+/).length
47
+ counts[:chars] += content.gsub(/\r|\n/, '').length
48
+ end
49
+ rescue => error
50
+ warn "#{xhtml.href}: #{error}"
51
+ end
52
+ end
53
+ end
54
+ data['Words'] = [counts[:words]] if options[:words]
55
+ data['Characters'] = [counts[:chars]] if options[:chars]
56
+ if options[:format] == :line
57
+ key_width = data.keys.map {|k| k.length}.max + 3
58
+ data.each_pair do |k, v|
59
+ puts (k.to_s.capitalize + ':').ljust(key_width) + v.join(', ')
60
+ end
61
+ else
62
+ require options[:format].to_s
63
+ puts data.__send__("to_#{options[:format]}")
64
+ end