cdb-crawlr 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cdb-crawlr.rb +36 -0
- data/lib/cdb/issue.rb +26 -0
- data/lib/cdb/struct.rb +18 -0
- data/lib/cdb/title.rb +26 -0
- metadata +61 -0
data/lib/cdb-crawlr.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
$:.unshift(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
require 'cdb/struct'
|
7
|
+
require 'cdb/issue'
|
8
|
+
require 'cdb/title'
|
9
|
+
|
10
|
+
module CDB
|
11
|
+
VERSION = '0.0.2'
|
12
|
+
|
13
|
+
BASE_URL = 'http://www.comicbookdb.com'
|
14
|
+
REQUEST_HEADERS = {'Connection' => 'keep-alive'}
|
15
|
+
SEARCH_PATH = 'search.php'
|
16
|
+
|
17
|
+
class << self; attr
|
18
|
+
|
19
|
+
def search(query, type='FullSite')
|
20
|
+
data = URI.encode_www_form(
|
21
|
+
form_searchtype: type,
|
22
|
+
form_search: query
|
23
|
+
)
|
24
|
+
url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
|
25
|
+
content = open(url, REQUEST_HEADERS).read
|
26
|
+
content.force_encoding('ISO-8859-1').encode!('UTF-8')
|
27
|
+
doc = Nokogiri::HTML(content)
|
28
|
+
node = doc.css('h2:contains("Search Results")').first.parent
|
29
|
+
{
|
30
|
+
:titles => CDB::Title.parse_results(node),
|
31
|
+
:issues => CDB::Issue.parse_results(node)
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
data/lib/cdb/issue.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
module CDB
|
2
|
+
class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
|
3
|
+
FORM_SEARCHTYPE = 'IssueName'
|
4
|
+
WEB_PATH = 'issue.php'
|
5
|
+
|
6
|
+
class << self
|
7
|
+
|
8
|
+
def search(query)
|
9
|
+
results = CDB.search(query, FORM_SEARCHTYPE)
|
10
|
+
results[:issues]
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_results(node)
|
14
|
+
node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
|
15
|
+
id = link.attr('href').split('=').last
|
16
|
+
text = link.child.text.strip
|
17
|
+
match = text.match(/^(.* \(\d{4}\)) (.*)$/)
|
18
|
+
title, num = match[1..2]
|
19
|
+
name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
|
20
|
+
new(:cdb_id => id, :title => title, :num => num, :name => name)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/cdb/struct.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
module CDB
|
2
|
+
# Modifications to Ruby's Struct class for use within the CDB module.
|
3
|
+
# Must be called 'Struct' to play nice with YARD's @attr documentation.
|
4
|
+
class Struct < Struct
|
5
|
+
|
6
|
+
# Override Struct's initialize method to accept a hash of members instead.
|
7
|
+
def initialize(h={})
|
8
|
+
h.each{|k,v| send("#{k}=", v)}
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_json(*a)
|
12
|
+
members.inject({}){|map, m|
|
13
|
+
map[m] = self[m]; map
|
14
|
+
}.to_json(*a)
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
data/lib/cdb/title.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
module CDB
|
2
|
+
class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
|
3
|
+
FORM_SEARCHTYPE = 'Title'
|
4
|
+
WEB_PATH = 'title.php'
|
5
|
+
|
6
|
+
class << self
|
7
|
+
|
8
|
+
def search(query)
|
9
|
+
results = CDB.search(query, FORM_SEARCHTYPE)
|
10
|
+
results[:titles]
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_results(node)
|
14
|
+
node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
|
15
|
+
id = link.attr('href').split('=').last
|
16
|
+
text = link.child.text.strip
|
17
|
+
name = text.slice(0..-8)
|
18
|
+
year = text.slice(-5..-2)
|
19
|
+
pub = link.next_sibling.text.gsub(/^\s*\(|\)\s*$/, '')
|
20
|
+
new(:cdb_id => id, :name => name, :publisher => pub, :begin_date => year)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cdb-crawlr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Gabe Smith
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-11-01 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &4367990 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *4367990
|
25
|
+
description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
|
26
|
+
email:
|
27
|
+
- sgt.floydpepper@gmail.com
|
28
|
+
executables: []
|
29
|
+
extensions: []
|
30
|
+
extra_rdoc_files: []
|
31
|
+
files:
|
32
|
+
- lib/cdb/issue.rb
|
33
|
+
- lib/cdb/struct.rb
|
34
|
+
- lib/cdb/title.rb
|
35
|
+
- lib/cdb-crawlr.rb
|
36
|
+
homepage: https://github.com/sgtFloyd/cdb-crawlr
|
37
|
+
licenses: []
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
requirements: []
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.8.10
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: Ruby gem and command-line tool for querying ComicBookDB.com
|
60
|
+
test_files: []
|
61
|
+
has_rdoc:
|