cdb-crawlr 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cdb-crawlr.rb +17 -4
- data/lib/cdb/cli.rb +5 -5
- data/lib/cdb/issue.rb +14 -2
- data/lib/cdb/struct.rb +1 -1
- data/lib/cdb/title.rb +25 -1
- metadata +4 -4
data/lib/cdb-crawlr.rb
CHANGED
@@ -10,7 +10,7 @@ require 'cdb/issue'
|
|
10
10
|
require 'cdb/title'
|
11
11
|
|
12
12
|
module CDB
|
13
|
-
VERSION = '0.0.
|
13
|
+
VERSION = '0.0.4'
|
14
14
|
|
15
15
|
BASE_URL = 'http://www.comicbookdb.com'
|
16
16
|
REQUEST_HEADERS = {'Connection' => 'keep-alive'}
|
@@ -24,9 +24,7 @@ module CDB
|
|
24
24
|
form_search: query
|
25
25
|
)
|
26
26
|
url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
|
27
|
-
|
28
|
-
content.force_encoding('ISO-8859-1').encode!('UTF-8')
|
29
|
-
doc = Nokogiri::HTML(content)
|
27
|
+
doc = read_page(url)
|
30
28
|
node = doc.css('h2:contains("Search Results")').first.parent
|
31
29
|
{
|
32
30
|
:titles => CDB::Title.parse_results(node),
|
@@ -34,5 +32,20 @@ module CDB
|
|
34
32
|
}
|
35
33
|
end
|
36
34
|
|
35
|
+
def show(id, type)
|
36
|
+
data = URI.encode_www_form('ID' => id)
|
37
|
+
url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
|
38
|
+
page = read_page(url)
|
39
|
+
type.parse_data(id, page)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def read_page(url)
|
45
|
+
content = open(url, REQUEST_HEADERS).read
|
46
|
+
content.force_encoding('ISO-8859-1').encode!('UTF-8')
|
47
|
+
Nokogiri::HTML(content)
|
48
|
+
end
|
49
|
+
|
37
50
|
end
|
38
51
|
end
|
data/lib/cdb/cli.rb
CHANGED
@@ -14,15 +14,15 @@ module CDB
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def []=(k, v)
|
17
|
+
v = v.to_s.strip
|
17
18
|
case k
|
18
19
|
when :command
|
19
|
-
v = v.
|
20
|
+
v = v.downcase
|
20
21
|
raise unless COMMANDS.include?(v)
|
21
22
|
when :scope
|
22
|
-
v = v.
|
23
|
+
v = v.downcase.gsub(/^=|s$/, '')
|
23
24
|
raise unless SCOPES.include?(v)
|
24
25
|
when :args
|
25
|
-
v = v.to_s.strip
|
26
26
|
if self[:command] == 'search'
|
27
27
|
raise "invalid search query" if v.empty?
|
28
28
|
end
|
@@ -40,8 +40,8 @@ module CDB
|
|
40
40
|
case self[:scope] || 'all'
|
41
41
|
when 'all'
|
42
42
|
CDB.search(self[:args]).each do |key, res|
|
43
|
-
puts key.to_s.capitalize
|
44
|
-
res.each{|r| puts r.to_json}
|
43
|
+
puts key.to_s.capitalize+':'
|
44
|
+
res.each{|r| puts ' '+r.to_json}
|
45
45
|
end
|
46
46
|
when 'title'
|
47
47
|
CDB::Title.search(self[:args]).each{|r| puts r.to_json}
|
data/lib/cdb/issue.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
|
2
|
+
class Issue < Struct.new(:cdb_id, :title, :num, :name, :story_arc, :cover_date)
|
3
3
|
FORM_SEARCHTYPE = 'IssueName'
|
4
4
|
WEB_PATH = 'issue.php'
|
5
5
|
|
@@ -15,12 +15,24 @@ module CDB
|
|
15
15
|
id = link.attr('href').split('=').last.to_i
|
16
16
|
text = link.child.text.strip
|
17
17
|
match = text.match(/^(.* \(\d{4}\)) (.*)$/)
|
18
|
-
title
|
18
|
+
title = match[1]
|
19
|
+
num = match[2].gsub(/^#/, '')
|
19
20
|
name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
|
20
21
|
new(:cdb_id => id, :title => title, :num => num, :name => name)
|
21
22
|
end.sort_by(&:cdb_id)
|
22
23
|
end
|
23
24
|
|
25
|
+
def from_tr(node, title)
|
26
|
+
tds = node.css('td')
|
27
|
+
link = tds[0].css("a[href^=\"#{WEB_PATH}\"]").first
|
28
|
+
new(:cdb_id => link['href'].split('=').last.strip,
|
29
|
+
:title => title,
|
30
|
+
:num => link.text.strip,
|
31
|
+
:name => tds[2].text.strip,
|
32
|
+
:story_arc => tds[4].text.strip,
|
33
|
+
:cover_date => tds[6].text.strip)
|
34
|
+
end
|
35
|
+
|
24
36
|
end
|
25
37
|
end
|
26
38
|
end
|
data/lib/cdb/struct.rb
CHANGED
data/lib/cdb/title.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
|
2
|
+
class Title < Struct.new(:cdb_id, :name, :issues, :publisher, :imprint, :begin_date, :end_date, :country, :language)
|
3
3
|
FORM_SEARCHTYPE = 'Title'
|
4
4
|
WEB_PATH = 'title.php'
|
5
5
|
|
@@ -10,6 +10,10 @@ module CDB
|
|
10
10
|
results[:titles]
|
11
11
|
end
|
12
12
|
|
13
|
+
def show(id)
|
14
|
+
CDB.show(id, self)
|
15
|
+
end
|
16
|
+
|
13
17
|
def parse_results(node)
|
14
18
|
node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
|
15
19
|
id = link.attr('href').split('=').last.to_i
|
@@ -21,6 +25,26 @@ module CDB
|
|
21
25
|
end.sort_by(&:cdb_id)
|
22
26
|
end
|
23
27
|
|
28
|
+
def parse_data(id, page)
|
29
|
+
dates = page.css('strong:contains("Publication Date: ")').first.next_sibling.text.strip
|
30
|
+
start_d, end_d = dates.split('-').map(&:strip)
|
31
|
+
|
32
|
+
title = new(
|
33
|
+
:cdb_id => id,
|
34
|
+
:name => page.css('.page_headline').first.text.strip,
|
35
|
+
:publisher => page.css('a[href^="publisher.php"]').first.text.strip,
|
36
|
+
:imprint => page.css('a[href^="imprint.php"]').first.text.strip,
|
37
|
+
:begin_date => start_d,
|
38
|
+
:end_date => end_d,
|
39
|
+
:country => page.css('strong:contains("Country: ")').first.next_sibling.text.strip,
|
40
|
+
:language => page.css('strong:contains("Language: ")').first.next_sibling.text.strip
|
41
|
+
)
|
42
|
+
title.issues = page.css("td[width='726'] a.page_link[href^=\"#{CDB::Issue::WEB_PATH}\"]").map do |link|
|
43
|
+
tr = link.parent.parent
|
44
|
+
CDB::Issue.from_tr(tr, title)
|
45
|
+
end
|
46
|
+
title
|
47
|
+
end
|
24
48
|
end
|
25
49
|
end
|
26
50
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdb-crawlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-03 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &23392700 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *23392700
|
25
25
|
description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
|
26
26
|
email:
|
27
27
|
- sgt.floydpepper@gmail.com
|