RubyGems - cdb-crawlr - Versions diffs - 0.0.2 - Mend

cdb-crawlr 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/lib/cdb-crawlr.rb ADDED

@@ -0,0 +1,36 @@
+require 'nokogiri'
+require 'open-uri'
+$:.unshift(File.dirname(__FILE__))
+require 'cdb/struct'
+require 'cdb/issue'
+require 'cdb/title'
+module CDB
+  VERSION = '0.0.2'
+  BASE_URL = 'http://www.comicbookdb.com'
+  REQUEST_HEADERS = {'Connection' => 'keep-alive'}
+  SEARCH_PATH = 'search.php'
+  class << self; attr
+    def search(query, type='FullSite')
+      data = URI.encode_www_form(
+        form_searchtype: type,
+        form_search: query
+      )
+      url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
+      content = open(url, REQUEST_HEADERS).read
+      content.force_encoding('ISO-8859-1').encode!('UTF-8')
+      doc = Nokogiri::HTML(content)
+      node = doc.css('h2:contains("Search Results")').first.parent
+      {
+        :titles => CDB::Title.parse_results(node),
+        :issues => CDB::Issue.parse_results(node)
+      }
+    end
+  end
+end

data/lib/cdb/issue.rb ADDED

@@ -0,0 +1,26 @@
+module CDB
+  class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
+    FORM_SEARCHTYPE = 'IssueName'
+    WEB_PATH = 'issue.php'
+    class << self
+      def search(query)
+        results = CDB.search(query, FORM_SEARCHTYPE)
+        results[:issues]
+      end
+      def parse_results(node)
+        node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
+          id = link.attr('href').split('=').last
+          text = link.child.text.strip
+          match = text.match(/^(.* \(\d{4}\)) (.*)$/)
+          title, num = match[1..2]
+          name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
+          new(:cdb_id => id, :title => title, :num => num, :name => name)
+        end
+      end
+    end
+  end
+end

data/lib/cdb/struct.rb ADDED

@@ -0,0 +1,18 @@
+module CDB
+  # Modifications to Ruby's Struct class for use within the CDB module.
+  # Must be called 'Struct' to play nice with YARD's @attr documentation.
+  class Struct < Struct
+    # Override Struct's initialize method to accept a hash of members instead.
+    def initialize(h={})
+      h.each{|k,v| send("#{k}=", v)}
+    end
+    def to_json(*a)
+      members.inject({}){|map, m|
+        map[m] = self[m]; map
+      }.to_json(*a)
+    end
+  end
+end

data/lib/cdb/title.rb ADDED

@@ -0,0 +1,26 @@
+module CDB
+  class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
+    FORM_SEARCHTYPE = 'Title'
+    WEB_PATH = 'title.php'
+    class << self
+      def search(query)
+        results = CDB.search(query, FORM_SEARCHTYPE)
+        results[:titles]
+      end
+      def parse_results(node)
+        node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
+          id = link.attr('href').split('=').last
+          text = link.child.text.strip
+          name = text.slice(0..-8)
+          year = text.slice(-5..-2)
+          pub = link.next_sibling.text.gsub(/^\s*\(|\)\s*$/, '')
+          new(:cdb_id => id, :name => name, :publisher => pub, :begin_date => year)
+        end
+      end
+    end
+  end
+end

metadata ADDED

@@ -0,0 +1,61 @@
+--- !ruby/object:Gem::Specification
+name: cdb-crawlr
+version: !ruby/object:Gem::Version
+  version: 0.0.2
+  prerelease:
+platform: ruby
+authors:
+- Gabe Smith
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-11-01 00:00:00.000000000Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: &4367990 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: *4367990
+description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
+email:
+- sgt.floydpepper@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/cdb/issue.rb
+- lib/cdb/struct.rb
+- lib/cdb/title.rb
+- lib/cdb-crawlr.rb
+homepage: https://github.com/sgtFloyd/cdb-crawlr
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.10
+signing_key:
+specification_version: 3
+summary: Ruby gem and command-line tool for querying ComicBookDB.com
+test_files: []
+has_rdoc: