cdb-crawlr 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/cdb-crawlr.rb +17 -4
- data/lib/cdb/cli.rb +5 -5
- data/lib/cdb/issue.rb +14 -2
- data/lib/cdb/struct.rb +1 -1
- data/lib/cdb/title.rb +25 -1
- metadata +4 -4
data/lib/cdb-crawlr.rb
CHANGED
@@ -10,7 +10,7 @@ require 'cdb/issue'
|
|
10
10
|
require 'cdb/title'
|
11
11
|
|
12
12
|
module CDB
|
13
|
-
VERSION = '0.0.
|
13
|
+
VERSION = '0.0.4'
|
14
14
|
|
15
15
|
BASE_URL = 'http://www.comicbookdb.com'
|
16
16
|
REQUEST_HEADERS = {'Connection' => 'keep-alive'}
|
@@ -24,9 +24,7 @@ module CDB
|
|
24
24
|
form_search: query
|
25
25
|
)
|
26
26
|
url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
|
27
|
-
|
28
|
-
content.force_encoding('ISO-8859-1').encode!('UTF-8')
|
29
|
-
doc = Nokogiri::HTML(content)
|
27
|
+
doc = read_page(url)
|
30
28
|
node = doc.css('h2:contains("Search Results")').first.parent
|
31
29
|
{
|
32
30
|
:titles => CDB::Title.parse_results(node),
|
@@ -34,5 +32,20 @@ module CDB
|
|
34
32
|
}
|
35
33
|
end
|
36
34
|
|
35
|
+
def show(id, type)
|
36
|
+
data = URI.encode_www_form('ID' => id)
|
37
|
+
url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
|
38
|
+
page = read_page(url)
|
39
|
+
type.parse_data(id, page)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def read_page(url)
|
45
|
+
content = open(url, REQUEST_HEADERS).read
|
46
|
+
content.force_encoding('ISO-8859-1').encode!('UTF-8')
|
47
|
+
Nokogiri::HTML(content)
|
48
|
+
end
|
49
|
+
|
37
50
|
end
|
38
51
|
end
|
data/lib/cdb/cli.rb
CHANGED
@@ -14,15 +14,15 @@ module CDB
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def []=(k, v)
|
17
|
+
v = v.to_s.strip
|
17
18
|
case k
|
18
19
|
when :command
|
19
|
-
v = v.
|
20
|
+
v = v.downcase
|
20
21
|
raise unless COMMANDS.include?(v)
|
21
22
|
when :scope
|
22
|
-
v = v.
|
23
|
+
v = v.downcase.gsub(/^=|s$/, '')
|
23
24
|
raise unless SCOPES.include?(v)
|
24
25
|
when :args
|
25
|
-
v = v.to_s.strip
|
26
26
|
if self[:command] == 'search'
|
27
27
|
raise "invalid search query" if v.empty?
|
28
28
|
end
|
@@ -40,8 +40,8 @@ module CDB
|
|
40
40
|
case self[:scope] || 'all'
|
41
41
|
when 'all'
|
42
42
|
CDB.search(self[:args]).each do |key, res|
|
43
|
-
puts key.to_s.capitalize
|
44
|
-
res.each{|r| puts r.to_json}
|
43
|
+
puts key.to_s.capitalize+':'
|
44
|
+
res.each{|r| puts ' '+r.to_json}
|
45
45
|
end
|
46
46
|
when 'title'
|
47
47
|
CDB::Title.search(self[:args]).each{|r| puts r.to_json}
|
data/lib/cdb/issue.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
|
2
|
+
class Issue < Struct.new(:cdb_id, :title, :num, :name, :story_arc, :cover_date)
|
3
3
|
FORM_SEARCHTYPE = 'IssueName'
|
4
4
|
WEB_PATH = 'issue.php'
|
5
5
|
|
@@ -15,12 +15,24 @@ module CDB
|
|
15
15
|
id = link.attr('href').split('=').last.to_i
|
16
16
|
text = link.child.text.strip
|
17
17
|
match = text.match(/^(.* \(\d{4}\)) (.*)$/)
|
18
|
-
title
|
18
|
+
title = match[1]
|
19
|
+
num = match[2].gsub(/^#/, '')
|
19
20
|
name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
|
20
21
|
new(:cdb_id => id, :title => title, :num => num, :name => name)
|
21
22
|
end.sort_by(&:cdb_id)
|
22
23
|
end
|
23
24
|
|
25
|
+
def from_tr(node, title)
|
26
|
+
tds = node.css('td')
|
27
|
+
link = tds[0].css("a[href^=\"#{WEB_PATH}\"]").first
|
28
|
+
new(:cdb_id => link['href'].split('=').last.strip,
|
29
|
+
:title => title,
|
30
|
+
:num => link.text.strip,
|
31
|
+
:name => tds[2].text.strip,
|
32
|
+
:story_arc => tds[4].text.strip,
|
33
|
+
:cover_date => tds[6].text.strip)
|
34
|
+
end
|
35
|
+
|
24
36
|
end
|
25
37
|
end
|
26
38
|
end
|
data/lib/cdb/struct.rb
CHANGED
data/lib/cdb/title.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
|
2
|
+
class Title < Struct.new(:cdb_id, :name, :issues, :publisher, :imprint, :begin_date, :end_date, :country, :language)
|
3
3
|
FORM_SEARCHTYPE = 'Title'
|
4
4
|
WEB_PATH = 'title.php'
|
5
5
|
|
@@ -10,6 +10,10 @@ module CDB
|
|
10
10
|
results[:titles]
|
11
11
|
end
|
12
12
|
|
13
|
+
def show(id)
|
14
|
+
CDB.show(id, self)
|
15
|
+
end
|
16
|
+
|
13
17
|
def parse_results(node)
|
14
18
|
node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
|
15
19
|
id = link.attr('href').split('=').last.to_i
|
@@ -21,6 +25,26 @@ module CDB
|
|
21
25
|
end.sort_by(&:cdb_id)
|
22
26
|
end
|
23
27
|
|
28
|
+
def parse_data(id, page)
|
29
|
+
dates = page.css('strong:contains("Publication Date: ")').first.next_sibling.text.strip
|
30
|
+
start_d, end_d = dates.split('-').map(&:strip)
|
31
|
+
|
32
|
+
title = new(
|
33
|
+
:cdb_id => id,
|
34
|
+
:name => page.css('.page_headline').first.text.strip,
|
35
|
+
:publisher => page.css('a[href^="publisher.php"]').first.text.strip,
|
36
|
+
:imprint => page.css('a[href^="imprint.php"]').first.text.strip,
|
37
|
+
:begin_date => start_d,
|
38
|
+
:end_date => end_d,
|
39
|
+
:country => page.css('strong:contains("Country: ")').first.next_sibling.text.strip,
|
40
|
+
:language => page.css('strong:contains("Language: ")').first.next_sibling.text.strip
|
41
|
+
)
|
42
|
+
title.issues = page.css("td[width='726'] a.page_link[href^=\"#{CDB::Issue::WEB_PATH}\"]").map do |link|
|
43
|
+
tr = link.parent.parent
|
44
|
+
CDB::Issue.from_tr(tr, title)
|
45
|
+
end
|
46
|
+
title
|
47
|
+
end
|
24
48
|
end
|
25
49
|
end
|
26
50
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdb-crawlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-03 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &23392700 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *23392700
|
25
25
|
description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
|
26
26
|
email:
|
27
27
|
- sgt.floydpepper@gmail.com
|