cdb-crawlr 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,36 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ $:.unshift(File.dirname(__FILE__))
5
+
6
+ require 'cdb/struct'
7
+ require 'cdb/issue'
8
+ require 'cdb/title'
9
+
10
+ module CDB
11
+ VERSION = '0.0.2'
12
+
13
+ BASE_URL = 'http://www.comicbookdb.com'
14
+ REQUEST_HEADERS = {'Connection' => 'keep-alive'}
15
+ SEARCH_PATH = 'search.php'
16
+
17
+ class << self; attr
18
+
19
+ def search(query, type='FullSite')
20
+ data = URI.encode_www_form(
21
+ form_searchtype: type,
22
+ form_search: query
23
+ )
24
+ url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
25
+ content = open(url, REQUEST_HEADERS).read
26
+ content.force_encoding('ISO-8859-1').encode!('UTF-8')
27
+ doc = Nokogiri::HTML(content)
28
+ node = doc.css('h2:contains("Search Results")').first.parent
29
+ {
30
+ :titles => CDB::Title.parse_results(node),
31
+ :issues => CDB::Issue.parse_results(node)
32
+ }
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,26 @@
1
+ module CDB
2
+ class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
3
+ FORM_SEARCHTYPE = 'IssueName'
4
+ WEB_PATH = 'issue.php'
5
+
6
+ class << self
7
+
8
+ def search(query)
9
+ results = CDB.search(query, FORM_SEARCHTYPE)
10
+ results[:issues]
11
+ end
12
+
13
+ def parse_results(node)
14
+ node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
15
+ id = link.attr('href').split('=').last
16
+ text = link.child.text.strip
17
+ match = text.match(/^(.* \(\d{4}\)) (.*)$/)
18
+ title, num = match[1..2]
19
+ name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
20
+ new(:cdb_id => id, :title => title, :num => num, :name => name)
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,18 @@
1
+ module CDB
2
+ # Modifications to Ruby's Struct class for use within the CDB module.
3
+ # Must be called 'Struct' to play nice with YARD's @attr documentation.
4
+ class Struct < Struct
5
+
6
+ # Override Struct's initialize method to accept a hash of members instead.
7
+ def initialize(h={})
8
+ h.each{|k,v| send("#{k}=", v)}
9
+ end
10
+
11
+ def to_json(*a)
12
+ members.inject({}){|map, m|
13
+ map[m] = self[m]; map
14
+ }.to_json(*a)
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,26 @@
1
+ module CDB
2
+ class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
3
+ FORM_SEARCHTYPE = 'Title'
4
+ WEB_PATH = 'title.php'
5
+
6
+ class << self
7
+
8
+ def search(query)
9
+ results = CDB.search(query, FORM_SEARCHTYPE)
10
+ results[:titles]
11
+ end
12
+
13
+ def parse_results(node)
14
+ node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
15
+ id = link.attr('href').split('=').last
16
+ text = link.child.text.strip
17
+ name = text.slice(0..-8)
18
+ year = text.slice(-5..-2)
19
+ pub = link.next_sibling.text.gsub(/^\s*\(|\)\s*$/, '')
20
+ new(:cdb_id => id, :name => name, :publisher => pub, :begin_date => year)
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cdb-crawlr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Gabe Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-01 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &4367990 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *4367990
25
+ description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
26
+ email:
27
+ - sgt.floydpepper@gmail.com
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - lib/cdb/issue.rb
33
+ - lib/cdb/struct.rb
34
+ - lib/cdb/title.rb
35
+ - lib/cdb-crawlr.rb
36
+ homepage: https://github.com/sgtFloyd/cdb-crawlr
37
+ licenses: []
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 1.8.10
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Ruby gem and command-line tool for querying ComicBookDB.com
60
+ test_files: []
61
+ has_rdoc: