RubyGems - cdb-crawlr - Versions diffs - 0.0.3 → 0.0.4 - Mend

cdb-crawlr 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/lib/cdb-crawlr.rb CHANGED

@@ -10,7 +10,7 @@ require 'cdb/issue'
 require 'cdb/title'
 module CDB
-  VERSION = '0.0.3'
+  VERSION = '0.0.4'
   BASE_URL = 'http://www.comicbookdb.com'
   REQUEST_HEADERS = {'Connection' => 'keep-alive'}
@@ -24,9 +24,7 @@ module CDB
         form_search: query
       )
       url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
-      content = open(url, REQUEST_HEADERS).read
-      content.force_encoding('ISO-8859-1').encode!('UTF-8')
-      doc = Nokogiri::HTML(content)
+      doc = read_page(url)
       node = doc.css('h2:contains("Search Results")').first.parent
       {
         :titles => CDB::Title.parse_results(node),
@@ -34,5 +32,20 @@ module CDB
       }
     end
+    def show(id, type)
+      data = URI.encode_www_form('ID' => id)
+      url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
+      page = read_page(url)
+      type.parse_data(id, page)
+    end
+  private
+    def read_page(url)
+      content = open(url, REQUEST_HEADERS).read
+      content.force_encoding('ISO-8859-1').encode!('UTF-8')
+      Nokogiri::HTML(content)
+    end
   end
 end

data/lib/cdb/cli.rb CHANGED

@@ -14,15 +14,15 @@ module CDB
     end
     def []=(k, v)
+      v = v.to_s.strip
       case k
       when :command
-        v = v.to_s.strip.downcase
+        v = v.downcase
         raise unless COMMANDS.include?(v)
       when :scope
-        v = v.to_s.strip.downcase.gsub(/^=|s$/, '')
+        v = v.downcase.gsub(/^=|s$/, '')
         raise unless SCOPES.include?(v)
       when :args
-        v = v.to_s.strip
         if self[:command] == 'search'
           raise "invalid search query" if v.empty?
         end
@@ -40,8 +40,8 @@ module CDB
       case self[:scope] || 'all'
       when 'all'
         CDB.search(self[:args]).each do |key, res|
-          puts key.to_s.capitalize
-          res.each{|r| puts r.to_json}
+          puts key.to_s.capitalize+':'
+          res.each{|r| puts '  '+r.to_json}
         end
       when 'title'
         CDB::Title.search(self[:args]).each{|r| puts r.to_json}

data/lib/cdb/issue.rb CHANGED

@@ -1,5 +1,5 @@
 module CDB
-  class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
+  class Issue < Struct.new(:cdb_id, :title, :num, :name, :story_arc, :cover_date)
     FORM_SEARCHTYPE = 'IssueName'
     WEB_PATH = 'issue.php'
@@ -15,12 +15,24 @@ module CDB
           id = link.attr('href').split('=').last.to_i
           text = link.child.text.strip
           match = text.match(/^(.* \(\d{4}\)) (.*)$/)
-          title, num = match[1..2]
+          title = match[1]
+          num = match[2].gsub(/^#/, '')
           name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
           new(:cdb_id => id, :title => title, :num => num, :name => name)
         end.sort_by(&:cdb_id)
       end
+      def from_tr(node, title)
+        tds = node.css('td')
+        link = tds[0].css("a[href^=\"#{WEB_PATH}\"]").first
+        new(:cdb_id => link['href'].split('=').last.strip,
+            :title => title,
+            :num => link.text.strip,
+            :name => tds[2].text.strip,
+            :story_arc => tds[4].text.strip,
+            :cover_date => tds[6].text.strip)
+      end
     end
   end
 end

data/lib/cdb/struct.rb CHANGED

@@ -8,7 +8,7 @@ module CDB
       h.each{|k,v| send("#{k}=", v)}
     end
-    def as_json
+    def as_json(*)
       members.inject({}){|map, m|
         map[m] = self[m]; map
       }

data/lib/cdb/title.rb CHANGED

@@ -1,5 +1,5 @@
 module CDB
-  class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
+  class Title < Struct.new(:cdb_id, :name, :issues, :publisher, :imprint, :begin_date, :end_date, :country, :language)
     FORM_SEARCHTYPE = 'Title'
     WEB_PATH = 'title.php'
@@ -10,6 +10,10 @@ module CDB
         results[:titles]
       end
+      def show(id)
+        CDB.show(id, self)
+      end
       def parse_results(node)
         node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
           id = link.attr('href').split('=').last.to_i
@@ -21,6 +25,26 @@ module CDB
         end.sort_by(&:cdb_id)
       end
+      def parse_data(id, page)
+        dates = page.css('strong:contains("Publication Date: ")').first.next_sibling.text.strip
+        start_d, end_d = dates.split('-').map(&:strip)
+        title = new(
+          :cdb_id => id,
+          :name => page.css('.page_headline').first.text.strip,
+          :publisher => page.css('a[href^="publisher.php"]').first.text.strip,
+          :imprint => page.css('a[href^="imprint.php"]').first.text.strip,
+          :begin_date => start_d,
+          :end_date => end_d,
+          :country => page.css('strong:contains("Country: ")').first.next_sibling.text.strip,
+          :language => page.css('strong:contains("Language: ")').first.next_sibling.text.strip
+        )
+        title.issues = page.css("td[width='726'] a.page_link[href^=\"#{CDB::Issue::WEB_PATH}\"]").map do |link|
+          tr = link.parent.parent
+          CDB::Issue.from_tr(tr, title)
+        end
+        title
+      end
     end
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: cdb-crawlr
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-11-02 00:00:00.000000000Z
+date: 2012-11-03 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
-  requirement: &18435060 !ruby/object:Gem::Requirement
+  requirement: &23392700 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *18435060
+  version_requirements: *23392700
 description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
 email:
 - sgt.floydpepper@gmail.com