RubyGems - epub-parser - Versions diffs - 0.3.7 → 0.3.8 - Mend

epub-parser 0.3.7 → 0.3.8

Files changed (26) hide show

checksums.yaml +4 -4
data/.gitlab-ci.yml +1 -1
data/CHANGELOG.adoc +7 -0
data/README.adoc +9 -11
data/Rakefile +30 -7
data/docs/Home.adoc +8 -2
data/epub-parser.gemspec +2 -2
data/lib/epub/content_document/xhtml.rb +2 -2
data/lib/epub/parser.rb +1 -2
data/lib/epub/parser/content_document.rb +28 -24
data/lib/epub/parser/metadata.rb +9 -8
data/lib/epub/parser/ocf.rb +9 -9
data/lib/epub/parser/publication.rb +24 -25
data/lib/epub/parser/version.rb +1 -1
data/lib/epub/parser/xml_document.rb +77 -0
data/lib/epub/publication/package.rb +1 -0
data/lib/epub/publication/package/manifest.rb +2 -4
data/lib/epub/searcher/publication.rb +17 -8
data/lib/epub/searcher/xhtml.rb +9 -7
data/test/helper.rb +1 -0
data/test/test_ocf_physical_container.rb +12 -8
data/test/test_parser_ocf.rb +2 -7
data/test/test_parser_publication.rb +1 -9
data/test/test_searcher.rb +1 -1
metadata +16 -16
data/lib/epub/parser/utils.rb +0 -20

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4ac4cf6d94841545616b908ef95b0028d8ba6e6ad266b48b1a616c67f18c881b
-  data.tar.gz: 630f88dbacaba24a28364a5cb87d98b6778fdb1504eba785b470495cb8823bda
+  metadata.gz: 02fa97ea55de70030b58276b77bfbba26d43f5f99a77c7bffb86aec8b6afaf9e
+  data.tar.gz: c07fe68a8715101082628bb10fee8f3869d3874467aeba8598df89e79a72e442
 SHA512:
-  metadata.gz: 827f7b42fedec851ee5f3146bba19477cb80b457fdb2c98c69dff872772175bb70a877d686e851eb90744b1cf14974de46300494c5a810fcf7d9c873e2b537b1
-  data.tar.gz: 6607c5d2a5cbd63c4d9bca7eecaa7b71dfc5be49c828f0d8fdc44fd3cb4e3ee825bbb5e2adb5b06023b54ff2900ef9624cd2a3625a3f77376db12c762f3b42a0
+  metadata.gz: 7c03dcea199c65a84f1c184bbfa07c6d42583167ab26398e3159ec421e2c3205b57e39200d006f22f7f3fbf6dbb7567a7b9424d309123e724dc59ce584ee790e
+  data.tar.gz: a04003670af41618c26ecb7acd1ec0cda0063961b40c3b5fa0c9d6de21e2c7462eaac7205d6a577df69a05056f04f85ec8cf9044262e2d6bd9c24933d2ca4c04

data/.gitlab-ci.yml CHANGED

@@ -53,7 +53,7 @@ pages:
   dependencies:
     - test:2.5
   script:
-    - rake doc:yard
+    - bundle exec rake doc:yard
     - mv doc public
     - mv coverage public/
   artifacts:

data/CHANGELOG.adoc CHANGED

@@ -1,5 +1,12 @@
 = CHANGELOG
+== 0.3.8
+* [REFACTORING]Add {EPUB::Parser::NokogiriAttributeWithPrefix} and use `Nokogiri::XML::Node#attribute_with_prefix` instead of `EPUB::Parser::Utils#extract_attribute`
+* Set default value for detect_encoding argument for {EPUB::Publication::Package::Manifest::Item#read} to false
+* Make XML library switchable between REXML and Nokogiri
+* Make REXML a default XML backend
 == 0.3.7
 * Strip leading and trailing white spaces from identifiers. See http://www.idpf.org/epub/31/spec/epub-packages.html#sec-opf-dcidentifier for details.

data/README.adoc CHANGED

@@ -3,7 +3,6 @@
 = {doctitle}
 image:https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg[link="https://gitlab.com/KitaitiMakoto/epub-parser/commits/master", title="pipeline status"]
-image:https://gemnasium.com/KitaitiMakoto/epub-parser.png[link="https://gitlab.com/KitaitiMakoto/epub-parser/commits/master",title="Dependency Status"]
 image:https://badge.fury.io/rb/epub-parser.svg[link="https://gemnasium.com/KitaitiMakoto/epub-parser",title="Gem Version"]
 image:https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/coverage.svg[link="https://kitaitimakoto.gitlab.io/epub-parser/coverage/",title="coverage report"]
@@ -160,8 +159,6 @@ Then documentation will be available in `doc` directory.
 == REQUIREMENTS
 * Ruby 2.3.0 or later
-* `patch` command to install Nokogiri
-* C compiler to compile Nokogiri
 == SIMILAR EFFORTS
@@ -177,6 +174,13 @@ If you find other gems, please tell me or request a pull request.
 == RECENT CHANGES
+=== 0.3.8
+* [REFACTORING]Add {EPUB::Parser::NokogiriAttributeWithPrefix} and use `Nokogiri::XML::Node#attribute_with_prefix` instead of `EPUB::Parser::Utils#extract_attribute`
+* Set default value for detect_encoding argument for {EPUB::Publication::Package::Manifest::Item#read} to false
+* Make XML library switchable between REXML and Nokogiri
+* Make REXML a default XML backend
 === 0.3.7
 * Strip leading and trailing white spaces from identifiers
@@ -189,25 +193,18 @@ If you find other gems, please tell me or request a pull request.
 * [BUG FIX]Ignore fragment when find item by relative IRI
 * Disable https://github.com/ko1/pretty_backtrace[PrettyBacktrace] by default
-=== 0.3.5
-* [BUG FIX]Fix a bug that {EPUB::ContentDocument::Navigation::Item#item} is `nil` when `href` includes double dots(`..`)(Thanks https://gitlab.com/aelkiss[aelkiss]!)
 See {file:CHANGELOG.adoc} for older changelogs and details.
 == TODOS
 * Consider to implement IRI feature instead of to use Addressable
-* EPUB 3.0.1
-* EPUB 3.1
+* EPUB 3.2
 * Help features for `epub-open` tool
 * Vocabulary Association Mechanisms
 * Implementing navigation document and so on
 * Media Overlays
 * Content Document
 * Digital Signature
-* Using SAX on parsing
-* Abstraction of XML parser(making it possible to use REXML, standard bundled XML library of Ruby)
 * Handle with encodings other than UTF-8
 == DONE
@@ -221,6 +218,7 @@ See {file:CHANGELOG.adoc} for older changelogs and details.
 * Archive library abstraction
 * Extracting and organizing common behavior from some classes to modules
 * Multiple rootfiles
+* Abstraction of XML parser(making it possible to use REXML, standard bundled XML library of Ruby)
 == LICENSE

data/Rakefile CHANGED

@@ -4,7 +4,8 @@ require 'rubygems/tasks'
 require 'yard'
 require 'rdoc/task'
 require 'epub/parser/version'
-require 'zipruby'
+require 'archive/zip'
+require 'stringio'
 require 'epub/maker'
 task :default => :test
@@ -21,16 +22,38 @@ namespace :test do
     input_dir  = 'test/fixtures/book'
     EPUB::Maker.archive input_dir
     small_file = File.read("#{input_dir}/OPS/case-sensitive.xhtml")
-    Zip::Archive.open "#{input_dir}.epub" do |archive|
-      archive.add_buffer 'OPS/CASE-SENSITIVE.xhtml', small_file.sub('small file name', 'LARGE FILE NAME')
+    File.open "#{input_dir}.epub" do |archive_in|
+      File.open "#{input_dir}.epub.tmp", "w" do |archive_out|
+        Archive::Zip.open archive_in, :r do |z_in|
+          Archive::Zip.open archive_out, :w do |z_out|
+            z_in.each do |entry|
+              z_out << entry
+            end
+            entry = Archive::Zip::Entry::File.new("OPS/CASE-SENSITIVE.xhtml")
+            entry.file_data = StringIO.new(small_file.sub('small file name', 'LARGE FILE NAME'))
+            z_out << entry
+          end
+        end
+      end
     end
+    File.rename "#{input_dir}.epub.tmp", "#{input_dir}.epub"
   end
-  Rake::TestTask.new do |task|
-    task.test_files = FileList['test/**/test_*.rb']
-    task.warning = true
-    task.options = '--no-show-detail-immediately --verbose'
+  # TODO: Test with both REXML and Nokogiri in testing framework
+  %w[REXML Nokogiri].each do |xml_backend|
+    task "set_xml_backend_#{xml_backend.downcase}" do
+      ENV["EPUB_PARSER_XML_BACKEND"] = xml_backend
+    end
+    Rake::TestTask.new "test_with_#{xml_backend.downcase}" do |task|
+      task.test_files = FileList['test/**/test_*.rb']
+      task.warning = true
+      task.options = '--no-show-detail-immediately --verbose'
+      EPUB::Parser::XMLDocument.backend = xml_backend
+    end
+    task "test_with_#{xml_backend.downcase}" => "set_xml_backend_#{xml_backend.downcase}"
   end
+  task :test => [:test_with_rexml, :test_with_nokogiri]
 end
 task :doc => 'doc:default'

data/docs/Home.adoc CHANGED

@@ -117,6 +117,14 @@ ret == book # => true; this API is not good I feel... Welcome suggestion!
 # do something with your book
 ----
+==== Switching XML Library
+EPUB Parser uses https://ruby-doc.org/stdlib-2.5.3/libdoc/rexml/rdoc/index.html[REXML], a standard-bundled library, by default. You can use https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more if you have already installed Nokogiri gem by RubyGems or Bundler.
+----
+EPUB::Parser::XMLDocument.backend = :Nokogiri
+----
 ==== Switching ZIP library
 EPUB Parser uses https://github.com/javanthropus/archive-zip[Archive::Zip], a pure Ruby ZIP library, by default. You can use https://bitbucket.org/winebarrel/zip-ruby/wiki/Home[Zip/Ruby], a Ruby bindings for https://libzip.org/[libzip] if you have already installed Zip/Ruby gem by RubyGems or Bundler.
@@ -197,8 +205,6 @@ Then documentation will be available in `doc` directory.
 == Requirements
 * Ruby 2.2.0 or later
-* `patch` command to install Nokogiri
-* C compiler to compile Zip/Ruby and Nokogiri
 == History

data/epub-parser.gemspec CHANGED

@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
   s.add_development_dependency 'rake'
   s.add_development_dependency 'rubygems-tasks'
-  s.add_development_dependency 'zipruby'
+  s.add_development_dependency 'zipruby' unless RUBY_PLATFORM.match /mingw/
   s.add_development_dependency 'pry'
   s.add_development_dependency 'pry-doc'
   s.add_development_dependency 'test-unit'
@@ -39,9 +39,9 @@ Gem::Specification.new do |s|
   s.add_development_dependency 'pretty_backtrace'
   s.add_development_dependency 'epub-maker'
   s.add_development_dependency 'asciidoctor'
+  s.add_development_dependency 'nokogiri', '>= 1.6.0', '< 1.9'
   s.add_runtime_dependency 'archive-zip'
-  s.add_runtime_dependency 'nokogiri', '>= 1.6.0', '< 1.9'
   s.add_runtime_dependency 'addressable', '>= 2.3.5'
   s.add_runtime_dependency 'rchardet', '>= 1.6.1'
   s.add_runtime_dependency 'epub-cfi'

data/lib/epub/content_document/xhtml.rb CHANGED

@@ -18,7 +18,7 @@ module EPUB
       # @return [String] Returns the value of title element.
       #                  If none, returns empty string
       def title
-        title_elem = nokogiri.search('title').first
+        title_elem = rexml.get_elements('.//title').first
         if title_elem
           title_elem.text
         else
@@ -29,12 +29,12 @@ module EPUB
       # @return [REXML::Document] content as REXML::Document object
       def rexml
-        require 'rexml/document'
         @rexml ||= REXML::Document.new(raw_document)
       end
       # @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
       def nokogiri
+        require 'nokogiri'
         @nokogiri ||= Nokogiri.XML(raw_document)
       end
     end

data/lib/epub/parser.rb CHANGED

@@ -1,7 +1,6 @@
 require 'epub'
 require 'epub/constants'
 require 'epub/book'
-require 'nokogiri'
 module EPUB
   class Parser
@@ -96,7 +95,7 @@ module EPUB
 end
 require 'epub/parser/version'
-require 'epub/parser/utils'
+require 'epub/parser/xml_document'
 require 'epub/parser/ocf'
 require 'epub/parser/publication'
 require 'epub/parser/content_document'

data/lib/epub/parser/content_document.rb CHANGED

@@ -1,12 +1,11 @@
 require 'epub/content_document'
 require 'epub/constants'
-require 'epub/parser/utils'
-require 'nokogiri'
+require 'epub/parser/xml_document'
 module EPUB
   class Parser
     class ContentDocument
-      include Utils
+      using XMLDocument::Refinements
       # @param [EPUB::Publication::Package::Manifest::Item] item
       def initialize(item)
@@ -28,7 +27,7 @@ module EPUB
                            end
         return content_document if content_document.nil?
         content_document.item = @item
-        document = Nokogiri.XML(@item.read)
+        document = XMLDocument.new(@item.read)
         # parse_content_document(document)
         if @item.nav?
           content_document.navigations = parse_navigations(document)
@@ -36,70 +35,75 @@ module EPUB
         content_document
       end
-      # @param [Nokogiri::HTML::Document] document HTML document or element including nav
+      # @param [XMLDocument, REXML::Document, Nokogiri::HTML::Document] document HTML document or element including nav
       # @return [Array<EPUB::ContentDocument::Navigation::Nav>] navs array of Nav object
       def parse_navigations(document)
-        document.search('/xhtml:html/xhtml:body//xhtml:nav', EPUB::NAMESPACES).collect {|elem| parse_navigation elem}
+        document.each_element_by_xpath('/xhtml:html/xhtml:body//xhtml:nav', EPUB::NAMESPACES).collect {|elem| parse_navigation elem}
       end
-      # @param [Nokogiri::XML::Element] element nav element
+      # @param [REXML::Element, Nokogiri::XML::Element] element nav element
       # @return [EPUB::ContentDocument::Navigation::Nav] nav Nav object
       def parse_navigation(element)
         nav = EPUB::ContentDocument::Navigation::Navigation.new
         nav.text = find_heading(element)
-        hidden = extract_attribute(element, 'hidden')
+        hidden = element.attribute_with_prefix('hidden')
         nav.hidden = hidden.nil? ? nil : true
-        nav.type = extract_attribute(element, 'type', 'epub')
-        element.xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
+        nav.type = element.attribute_with_prefix('type', 'epub')
+        element.each_element_by_xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
           nav.items << parse_navigation_item(elem)
         end
         nav
       end
-      # @param [Nokogiri::XML::Element] element li element
+      # @param [REXML::Element, Nokogiri::XML::Element] element li element
       def parse_navigation_item(element)
         item = EPUB::ContentDocument::Navigation::Item.new
-        a_or_span = element.xpath('./xhtml:a[1]|xhtml:span[1]', EPUB::NAMESPACES).first
+        a_or_span = element.each_element_by_xpath('./xhtml:a[1]|xhtml:span[1]', EPUB::NAMESPACES).first
         return a_or_span if a_or_span.nil?
-        item.text = a_or_span.text
+        item.text = a_or_span.content
         if a_or_span.name == 'a'
           if item.text.empty?
-            embedded_content = a_or_span.xpath('./xhtml:audio[1]|xhtml:canvas[1]|xhtml:embed[1]|xhtml:iframe[1]|xhtml:img[1]|xhtml:math[1]|xhtml:object[1]|xhtml:svg[1]|xhtml:video[1]', EPUB::NAMESPACES).first
+            embedded_content = a_or_span.each_element_by_xpath('./xhtml:audio[1]|xhtml:canvas[1]|xhtml:embed[1]|xhtml:iframe[1]|xhtml:img[1]|xhtml:math[1]|xhtml:object[1]|xhtml:svg[1]|xhtml:video[1]', EPUB::NAMESPACES).first
             unless embedded_content.nil?
               case embedded_content.name
               when 'audio', 'canvas', 'embed', 'iframe'
-                item.text = extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')
+                item.text = embedded_content.attribute_with_prefix('name') || embedded_content.attribute_with_prefix('srcdoc')
               when 'img'
-                item.text = extract_attribute(embedded_content, 'alt')
+                item.text = embedded_content.attribute_with_prefix('alt')
               when 'math', 'object'
-                item.text = extract_attribute(embedded_content, 'name')
+                item.text = embedded_content.attribute_with_prefix('name')
               when 'svg', 'video'
               else
               end
             end
-            item.text = extract_attribute(a_or_span, 'title').to_s if item.text.nil? || item.text.empty?
+            item.text = a_or_span.attribute_with_prefix('title').to_s if item.text.nil? || item.text.empty?
           end
-          item.href = extract_attribute(a_or_span, 'href')
+          item.href = a_or_span.attribute_with_prefix('href')
           item.item = @item.find_item_by_relative_iri(item.href)
         end
-        item.items = element.xpath('./xhtml:ol[1]/xhtml:li', EPUB::NAMESPACES).map {|li| parse_navigation_item(li)}
+        item.items = element.each_element_by_xpath('./xhtml:ol[1]/xhtml:li', EPUB::NAMESPACES).map {|li| parse_navigation_item(li)}
         item
       end
       private
-      # @param [Nokogiri::XML::Element] element nav element
+      # @param [REXML::Element, Nokogiri::XML::Element] element nav element
       # @return [String] heading heading text
       def find_heading(element)
-        heading = element.xpath('./xhtml:h1|xhtml:h2|xhtml:h3|xhtml:h4|xhtml:h5|xhtml:h6|xhtml:hgroup', EPUB::NAMESPACES).first
+        heading = element.each_element_by_xpath('./xhtml:h1|xhtml:h2|xhtml:h3|xhtml:h4|xhtml:h5|xhtml:h6|xhtml:hgroup', EPUB::NAMESPACES).first
         return nil if heading.nil?
-        return heading.text unless heading.name == 'hgroup'
+        return heading.content unless heading.name == 'hgroup'
-        (heading/'h1' || heading/'h2' || heading/'h3' || heading/'h4' || heading/'h5' || heading/'h6').first.text
+        (heading.each_element_by_xpath(".//xhtml:h1", EPUB::NAMESPACES) ||
+         heading.each_element_by_xpath(".//xhtml:h2", EPUB::NAMESPACES) ||
+         heading.each_element_by_xpath(".//xhtml:h3", EPUB::NAMESPACES) ||
+         heading.each_element_by_xpath(".//xhtml:h4", EPUB::NAMESPACES) ||
+         heading.each_element_by_xpath(".//xhtml:h5", EPUB::NAMESPACES) ||
+         heading.each_element_by_xpath(".//xhtml:h6", EPUB::NAMESPACES)).first.content
       end
     end
   end

data/lib/epub/parser/metadata.rb CHANGED

@@ -1,23 +1,24 @@
 module EPUB
   class Parser
     module Metadata
+      using XMLDocument::Refinements
       def parse_metadata(elem, unique_identifier_id, default_namespace)
         metadata = EPUB::Publication::Package::Metadata.new
         id_map = {}
         default_namespace_uri = EPUB::NAMESPACES[default_namespace]
-        elem.element_children.each do |child|
-          namespace_uri = child.namespace && child.namespace.href
+        elem.each_element do |child|
           elem_name = child.name
           model =
-            case namespace_uri
+            case child.namespace_uri
             when EPUB::NAMESPACES['dc']
               case elem_name
               when 'identifier'
                 identifier = build_model(child, :Identifier, ['id'])
                 metadata.identifiers << identifier
-                identifier.scheme = extract_attribute(child, 'scheme', 'opf')
+                identifier.scheme = child.attribute_with_prefix('scheme', 'opf')
                 identifier
               when 'title'
                 title = build_model(child, :Title)
@@ -44,8 +45,8 @@ module EPUB
               when 'link'
                 link = build_model(child, :Link, %w[id media-type])
                 metadata.links << link
-                link.href = extract_attribute(child, 'href')
-                link.rel = Set.new(extract_attribute(child, 'rel').split(/\s+/))
+                link.href = child.attribute_with_prefix('href')
+                link.rel = Set.new(child.attribute_with_prefix('rel').split(/\s+/))
                 link
               else
                 build_unsupported_model(child)
@@ -65,7 +66,7 @@ module EPUB
             id_map[model.id] = {refinee: model}
           end
-          refines = extract_attribute(child, 'refines')
+          refines = child.attribute_with_prefix('refines')
           if refines && refines.start_with?('#')
             id = refines[1..-1]
             id_map[id] ||= {}
@@ -87,7 +88,7 @@ module EPUB
         model = EPUB::Metadata.const_get(klass).new
         attributes.each do |attr|
           writer_name = (attr == "content") ? "meta_content=" : "#{attr.gsub('-', '_')}="
-          model.__send__ writer_name, extract_attribute(elem, attr)
+          model.__send__ writer_name, elem.attribute_with_prefix(attr)
         end
         model.content = elem.content unless klass == :Link
         model.content.strip! if klass == :Identifier

data/lib/epub/parser/ocf.rb CHANGED

@@ -2,12 +2,12 @@ require 'epub/constants'
 require 'epub/ocf'
 require 'epub/ocf/physical_container'
 require 'epub/parser/metadata'
-require 'nokogiri'
+require "epub/parser/xml_document"
 module EPUB
   class Parser
     class OCF
-      include Utils
+      using XMLDocument::Refinements
       include Metadata
       DIRECTORY = 'META-INF'
@@ -37,11 +37,11 @@ module EPUB
       def parse_container(xml)
         container = EPUB::OCF::Container.new
-        doc = Nokogiri.XML(xml)
-        doc.xpath('/ocf:container/ocf:rootfiles/ocf:rootfile', EPUB::NAMESPACES).each do |elem|
+        doc = XMLDocument.new(xml)
+        doc.each_element_by_xpath "/ocf:container/ocf:rootfiles/ocf:rootfile", EPUB::NAMESPACES do |elem|
           rootfile = EPUB::OCF::Container::Rootfile.new
-          rootfile.full_path = Addressable::URI.parse(extract_attribute(elem, 'full-path'))
-          rootfile.media_type = extract_attribute(elem, 'media-type')
+          rootfile.full_path = Addressable::URI.parse(elem.attribute_with_prefix('full-path'))
+          rootfile.media_type = elem.attribute_with_prefix('media-type')
           container.rootfiles << rootfile
         end
@@ -59,14 +59,14 @@ module EPUB
       end
       def parse_metadata(content)
-        doc = Nokogiri.XML(content)
+        doc = XMLDocument.new(content)
         unless multiple_rendition_metadata?(doc)
           warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
           metadata = EPUB::OCF::UnknownFormatMetadata.new
           metadata.content = content
           return metadata
         end
-        super(doc.root, doc.root['unique-identifier'], 'metadata')
+        super(doc.root, doc.root.attribute_with_prefix('unique-identifier'), 'metadata')
       end
       def parse_rights(content)
@@ -82,7 +82,7 @@ module EPUB
       def multiple_rendition_metadata?(doc)
         doc.root &&
           doc.root.name == 'metadata' &&
-          doc.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
+          doc.root.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
       end
     end
   end

data/lib/epub/parser/publication.rb CHANGED

@@ -1,5 +1,4 @@
 require 'strscan'
-require 'nokogiri'
 require 'epub/publication'
 require 'epub/constants'
 require 'epub/parser/metadata'
@@ -7,7 +6,7 @@ require 'epub/parser/metadata'
 module EPUB
   class Parser
     class Publication
-      include Utils
+      using XMLDocument::Refinements
       include Metadata
       class << self
@@ -19,7 +18,7 @@ module EPUB
       end
       def initialize(opf)
-        @doc = Nokogiri.XML(opf)
+        @doc = XMLDocument.new(opf)
       end
       def parse
@@ -36,33 +35,33 @@ module EPUB
         package = EPUB::Publication::Package.new
         elem = doc.root
         %w[version xml:lang dir id].each do |attr|
-          package.__send__ "#{attr.gsub(/\:/, '_')}=", extract_attribute(elem, attr)
+          package.__send__ "#{attr.gsub(/\:/, '_')}=", elem.attribute_with_prefix(attr)
         end
-        package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
+        package.prefix = parse_prefix(elem.attribute_with_prefix('prefix'))
         EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
         package
       end
       def parse_metadata(doc)
-        super(doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first, doc.root['unique-identifier'], 'opf')
+        super(doc.each_element_by_xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first, doc.root.attribute_with_prefix('unique-identifier'), 'opf')
       end
       def parse_manifest(doc)
         manifest = EPUB::Publication::Package::Manifest.new
-        elem = doc.xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
-        manifest.id = extract_attribute(elem, 'id')
+        elem = doc.each_element_by_xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
+        manifest.id = elem.attribute_with_prefix('id')
         fallback_map = {}
-        elem.xpath('./opf:item', EPUB::NAMESPACES).each do |e|
+        elem.each_element_by_xpath('./opf:item', EPUB::NAMESPACES).each do |e|
           item = EPUB::Publication::Package::Manifest::Item.new
           %w[id media-type media-overlay].each do |attr|
-            item.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
+            item.__send__ "#{attr.gsub(/-/, '_')}=", e.attribute_with_prefix(attr)
           end
-          item.href = extract_attribute(e, 'href')
-          fallback = extract_attribute(e, 'fallback')
+          item.href = e.attribute_with_prefix('href')
+          fallback = e.attribute_with_prefix('fallback')
           fallback_map[fallback] = item if fallback
-          properties = extract_attribute(e, 'properties')
+          properties = e.attribute_with_prefix('properties')
           item.properties = properties.split(' ') if properties
           manifest << item
         end
@@ -75,18 +74,18 @@ module EPUB
       def parse_spine(doc)
         spine = EPUB::Publication::Package::Spine.new
-        elem = doc.xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
+        elem = doc.each_element_by_xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
         %w[id toc page-progression-direction].each do |attr|
-          spine.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(elem, attr)
+          spine.__send__ "#{attr.gsub(/-/, '_')}=", elem.attribute_with_prefix(attr)
         end
-        elem.xpath('./opf:itemref', EPUB::NAMESPACES).each do |e|
+        elem.each_element_by_xpath('./opf:itemref', EPUB::NAMESPACES).each do |e|
           itemref = EPUB::Publication::Package::Spine::Itemref.new
           %w[idref id].each do |attr|
-            itemref.__send__ "#{attr}=", extract_attribute(e, attr)
+            itemref.__send__ "#{attr}=", e.attribute_with_prefix(attr)
           end
-          itemref.linear = (extract_attribute(e, 'linear') != 'no')
-          properties = extract_attribute(e, 'properties')
+          itemref.linear = (e.attribute_with_prefix('linear') != 'no')
+          properties = e.attribute_with_prefix('properties')
           itemref.properties = properties.split(' ') if properties
           spine << itemref
         end
@@ -96,12 +95,12 @@ module EPUB
       def parse_guide(doc)
         guide = EPUB::Publication::Package::Guide.new
-        doc.xpath('/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES).each do |ref|
+        doc.each_element_by_xpath '/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES do |ref|
           reference = EPUB::Publication::Package::Guide::Reference.new
           %w[type title].each do |attr|
-            reference.__send__ "#{attr}=", extract_attribute(ref, attr)
+            reference.__send__ "#{attr}=", ref.attribute_with_prefix(attr)
           end
-          reference.href = extract_attribute(ref, 'href')
+          reference.href = ref.attribute_with_prefix('href')
           guide << reference
         end
@@ -110,10 +109,10 @@ module EPUB
       def parse_bindings(doc, handler_map)
         bindings = EPUB::Publication::Package::Bindings.new
-        doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
+        doc.each_element_by_xpath '/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES do |elem|
           media_type = EPUB::Publication::Package::Bindings::MediaType.new
-          media_type.media_type = extract_attribute(elem, 'media-type')
-          media_type.handler = handler_map[extract_attribute(elem, 'handler')]
+          media_type.media_type = elem.attribute_with_prefix('media-type')
+          media_type.handler = handler_map[elem.attribute_with_prefix('handler')]
           bindings << media_type
         end

data/lib/epub/parser/version.rb CHANGED

@@ -1,5 +1,5 @@
 module EPUB
   class Parser
-    VERSION = "0.3.7"
+    VERSION = "0.3.8"
   end
 end

data/lib/epub/parser/xml_document.rb ADDED

@@ -0,0 +1,77 @@
+require "rexml/document"
+begin
+  require "nokogiri"
+rescue LoadError
+end
+module EPUB
+  class Parser
+    class XMLDocument
+      class << self
+        attr_accessor :backend
+        def new(xml)
+          if backend == :Nokogiri
+            Nokogiri.XML(xml)
+          else
+            REXML::Document.new(xml)
+          end
+        end
+      end
+      module Refinements
+        [REXML::Element, REXML::Text].each do |klass|
+          refine klass do
+            %i[document element text].each do |type|
+              define_method "#{type}?" do
+                node_type == type
+              end
+            end
+          end
+        end
+        refine REXML::Element do
+          def each_element_by_xpath(xpath, namespaces = nil, &block)
+            REXML::XPath.each self, xpath, namespaces, &block
+          end
+          def attribute_with_prefix(name, prefix = nil)
+            attribute(name, EPUB::NAMESPACES[prefix])&.value
+          end
+          alias namespace_uri namespace
+          def content
+            texts.join
+          end
+        end
+        refine REXML::Text do
+          alias content value
+        end
+        if const_defined? :Nokogiri
+          refine Nokogiri::XML::Node do
+            def each_element_by_xpath(xpath, namespaces = nil, &block)
+              xpath(xpath, namespaces).each &block
+            end
+            def attribute_with_prefix(name, prefix = nil)
+              attribute_with_ns(name, EPUB::NAMESPACES[prefix])&.value
+            end
+            def each_element(xpath = nil, &block)
+              element_children.each(&block)
+            end
+            alias elements element_children
+            def namespace_uri
+              namespace.href
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/epub/publication/package.rb CHANGED

@@ -39,6 +39,7 @@ module EPUB
         @prefix = {}
       end
+      # @return [EPUB::Metadata::Identifier] Unique Identifier
       def unique_identifier
         @metadata.unique_identifier
       end

data/lib/epub/publication/package/manifest.rb CHANGED

@@ -136,14 +136,12 @@ module EPUB
           # Read content from EPUB archive
           #
-          # @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value will become +false+ in the near future.
+          # @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value is +false+.
           # @return [String] Content with encoding:
           #   US-ASCII when the content is not in text format such images.
           #   UTF-8 when the content is in text format and +detect_encoding+ is +false+.
           #   auto-detected encoding when the content is in text format and +detect_encoding+ is +true+.
-          def read(detect_encoding: true)
-            warn "[#{self.class}##{__method__}]Autodetection of character encoding is deprecated. Pass keyword argument detect_encoding with true explicitly." if detect_encoding
+          def read(detect_encoding: false)
             raw_content = manifest.package.book.container_adapter.read(manifest.package.book.epub_file, entry_name)
             unless media_type.start_with?('text/') or

data/lib/epub/searcher/publication.rb CHANGED

@@ -1,8 +1,11 @@
 require 'epub/publication'
+require "epub/parser/xml_document"
 module EPUB
   module Searcher
     class Publication
+      using Parser::XMLDocument::Refinements
       class << self
         def search_text(package, word, **options)
           new(package).search_text(word, options)
@@ -28,7 +31,7 @@ module EPUB
         spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
         spine.each_itemref.with_index do |itemref, index|
           itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
-          XHTML::ALGORITHMS[algorithm].search_text(Nokogiri.XML(itemref.item.read), word).each do |sub_result|
+          XHTML::ALGORITHMS[algorithm].search_text(Parser::XMLDocument.new(itemref.item.read), word).each do |sub_result|
             results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
           end
         end
@@ -38,7 +41,7 @@ module EPUB
       # @todo: Refactoring
       # @return [Array<Hash>] An array of rearch results. Each result is composed of:
-      #   :element: [Nokogiri::XML::ELement] Found element
+      #   :element: [REXML::Element, Nokogiri::XML::ELement] Found element
       #   :itemref: [EPUB::Publication::Package::Spine::Itemref] Itemref that element's document belongs to
       #   :location: [EPUB::CFI::Location] CFI that indicates the element
       #   :package: [EPUB::Publication::Package] Package that the element belongs to
@@ -55,10 +58,15 @@ module EPUB
           path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
           content_document = itemref.item.content_document
           next unless content_document
-          doc = content_document.nokogiri
           elems = if xpath
-                    doc.xpath(xpath, namespaces)
+                    doc = Parser::XMLDocument.new(content_document.read)
+                    doc.each_element_by_xpath(xpath, namespaces)
                   else
+                    begin
+                      doc = content_document.nokogiri
+                    rescue LoadError
+                      raise "#{self.class}##{__method__} with `css` argument requires Nokogiri gem for now. Install Nokogiri and then try again."
+                    end
                     doc.css(css)
                   end
           elems.each do |elem|
@@ -96,13 +104,13 @@ module EPUB
         current_node = doc.root
         path_in_doc.steps.each do |step|
           if step.element?
-            current_node = current_node.element_children[step.value / 2 - 1]
+            current_node = current_node.elements.to_a[step.value / 2 - 1]
           else
             element_index = (step.value - 1) / 2 - 1
             if element_index == -1
               current_node = current_node.children.first
             else
-              prev = current_node.element_children[element_index]
+              prev = current_node.elements.to_a[element_index]
               break unless prev
               current_node = prev.next_sibling
               break unless current_node
@@ -120,8 +128,9 @@ module EPUB
       def find_path(elem)
         steps = []
         until elem.parent.document?
-          index = elem.parent.element_children.index(elem)
-          assertion = elem["id"] ? EPUB::CFI::IDAssertion.new(elem["id"]) : nil
+          index = elem.parent.elements.to_a.index(elem)
+          id_attr = elem.attribute_with_prefix("id")
+          assertion = id_attr ? EPUB::CFI::IDAssertion.new(id_attr) : nil
           steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
           elem = elem.parent
         end

data/lib/epub/searcher/xhtml.rb CHANGED

@@ -1,13 +1,15 @@
 require 'epub'
-require 'epub/parser/utils'
+require 'epub/parser/xml_document'
 module EPUB
   module Searcher
     class XHTML
+      using Parser::XMLDocument::Refinements
       ALGORITHMS = {}
       class << self
-        # @param element [Nokogiri::XML::Element, Nokogiri::XML::Document]
+        # @param element [REXML::Element, REXML::Document, Nokogiri::XML::Element, Nokogiri::XML::Document]
         # @param word [String]
         # @return [Array<Result>]
         def search_text(element, word)
@@ -21,7 +23,7 @@ module EPUB
       end
       class Restricted < self
-        # @param element [Nokogiri::XML::Element]
+        # @param element [REXML::Element, Nokogiri::XML::Element]
         # @return [Array<Result>]
         def search_text(word, element=nil)
           results = []
@@ -29,9 +31,9 @@ module EPUB
           elem_index = 0
           (element || @element).children.each do |child|
             if child.element?
-              child_step = Result::Step.new(:element, elem_index, {:name => child.name, :id => Parser::Utils.extract_attribute(child, 'id')})
+              child_step = Result::Step.new(:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')})
               if child.name == 'img'
-                if Parser::Utils.extract_attribute(child, 'alt').index(word)
+                if child.attribute_with_prefix('alt').index(word)
                   results << Result.new([child_step], nil, nil)
                 end
               else
@@ -76,10 +78,10 @@ module EPUB
           elem_index = 0
           element.children.each do |child|
             if child.element?
-              child_step = [:element, elem_index, {:name => child.name, :id => Parser::Utils.extract_attribute(child, 'id')}]
+              child_step = [:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')}]
               elem_index += 1
               if child.name == 'img'
-                alt = Parser::Utils.extract_attribute(child, 'alt')
+                alt = child.attribute_with_prefix('alt')
                 next if alt.nil? || alt.empty?
                 indices[content.length] = [child_step]
                 content << alt

data/test/helper.rb CHANGED

@@ -15,3 +15,4 @@ if ENV["PRETTY_BACKTRACE"]
 end
 require 'epub/parser'
+EPUB::Parser::XMLDocument.backend = ENV["EPUB_PARSER_XML_BACKEND"].to_sym

data/test/test_ocf_physical_container.rb CHANGED

@@ -43,15 +43,19 @@ class TestOCFPhysicalContainer < Test::Unit::TestCase
     end
   end
-  require 'epub/ocf/physical_container/zipruby'
-  class TestZipruby < self
-    include ConcreteContainer
-    def setup
-      super
-      @class = EPUB::OCF::PhysicalContainer::Zipruby
-      @container = @class.new(@container_path)
+  begin
+    require 'epub/ocf/physical_container/zipruby'
+    class TestZipruby < self
+      include ConcreteContainer
+      def setup
+        super
+        @class = EPUB::OCF::PhysicalContainer::Zipruby
+        @container = @class.new(@container_path)
+      end
     end
+  rescue LoadError
+    warn "Skip TestOPFPhysicalContainer::TestZipRuby"
   end
   class TestUnpackedDirectory < self

data/test/test_parser_ocf.rb CHANGED

@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 require File.expand_path 'helper', File.dirname(__FILE__)
-require 'zipruby'
 class TestParserOCF < Test::Unit::TestCase
   def setup
@@ -8,12 +7,8 @@ class TestParserOCF < Test::Unit::TestCase
     EPUB::OCF::PhysicalContainer.open(file) {|container|
       @parser = EPUB::Parser::OCF.new(container)
     }
-    @container_xml = Zip::Archive.open(file) {|archive|
-      archive.fopen('META-INF/container.xml').read
-    }
-    @metadata_xml = Zip::Archive.open(file) {|archive|
-      archive.fopen('META-INF/metadata.xml').read
-    }
+    @container_xml = File.read("test/fixtures/book/META-INF/container.xml")
+    @metadata_xml = File.read("test/fixtures/book/META-INF/metadata.xml")
   end
   def test_parsed_container_has_two_rootfiles

data/test/test_parser_publication.rb CHANGED

@@ -1,22 +1,14 @@
 # -*- coding: utf-8 -*-
 require File.expand_path 'helper', File.dirname(__FILE__)
-require 'zipruby'
 class TestParserPublication < Test::Unit::TestCase
   def setup
-    file = 'test/fixtures/book.epub'
-    rootfile = 'OPS/ルートファイル.opf'
-    @zip = Zip::Archive.open(file)
-    opf = @zip.fopen(rootfile).read
+    opf = File.read("test/fixtures/book/OPS/ルートファイル.opf")
     @opf = Nokogiri.XML(opf)
     @parser = EPUB::Parser::Publication.new(opf)
     @package = @parser.parse_package(@opf)
   end
-  def teardown
-    @zip.close
-  end
   def test_parse_package
     assert_equal '3.0', @package.version
   end

data/test/test_searcher.rb CHANGED

@@ -9,7 +9,7 @@ class TestSearcher < Test::Unit::TestCase
       super
       opf_path = File.expand_path('../fixtures/book/OPS/ルートファイル.opf', __FILE__)
       nav_path = File.expand_path('../fixtures/book/OPS/nav.xhtml', __FILE__)
-      @package = EPUB::Parser::Publication.new(open(opf_path)).parse
+      @package = EPUB::Parser::Publication.new(File.read(opf_path)).parse
       @package.spine.each_itemref do |itemref|
         stub(itemref.item).read {
           itemref.idref == 'nav' ? File.read(nav_path) : '<html></html>'

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: epub-parser
 version: !ruby/object:Gem::Version
-  version: 0.3.7
+  version: 0.3.8
 platform: ruby
 authors:
 - KITAITI Makoto
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-05-06 00:00:00.000000000 Z
+date: 2018-10-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -221,39 +221,39 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: archive-zip
+  name: nokogiri
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0'
-  type: :runtime
+        version: 1.6.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '1.9'
+  type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 1.6.0
+    - - "<"
+      - !ruby/object:Gem::Version
+        version: '1.9'
 - !ruby/object:Gem::Dependency
-  name: nokogiri
+  name: archive-zip
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.6.0
-    - - "<"
-      - !ruby/object:Gem::Version
-        version: '1.9'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.6.0
-    - - "<"
-      - !ruby/object:Gem::Version
-        version: '1.9'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: addressable
   requirement: !ruby/object:Gem::Requirement
@@ -362,8 +362,8 @@ files:
 - lib/epub/parser/metadata.rb
 - lib/epub/parser/ocf.rb
 - lib/epub/parser/publication.rb
-- lib/epub/parser/utils.rb
 - lib/epub/parser/version.rb
+- lib/epub/parser/xml_document.rb
 - lib/epub/publication.rb
 - lib/epub/publication/fixed_layout.rb
 - lib/epub/publication/package.rb

data/lib/epub/parser/utils.rb DELETED

@@ -1,20 +0,0 @@
-module EPUB
-  class Parser
-    module Utils
-      # Extract the value of attribute of element
-      #
-      # @todo Refinement Nokogiri::XML::Node instead of use this method after Ruby 2.0 becomes popular
-      #
-      # @param [Nokogiri::XML::Element] element
-      # @param [String] name name of attribute excluding namespace prefix
-      # @param [String, nil] prefix XML namespace prefix in {EPUB::NAMESPACES} keys
-      # @return [String] value of attribute when the attribute exists
-      # @return nil when the attribute doesn't exist
-      def extract_attribute(element, name, prefix=nil)
-        attr = element.attribute_with_ns(name, EPUB::NAMESPACES[prefix])
-        attr.nil? ? nil : attr.value
-      end
-      module_function :extract_attribute
-    end
-  end
-end