cdb-crawlr 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/cdb-crawlr.rb +3 -3
- data/lib/cdb/cli.rb +4 -4
- data/lib/cdb/issue.rb +6 -6
- data/lib/cdb/{title.rb → series.rb} +7 -7
- metadata +4 -4
data/lib/cdb-crawlr.rb
CHANGED
@@ -7,10 +7,10 @@ $:.unshift(File.dirname(__FILE__))
|
|
7
7
|
require 'cdb/cli'
|
8
8
|
require 'cdb/struct'
|
9
9
|
require 'cdb/issue'
|
10
|
-
require 'cdb/
|
10
|
+
require 'cdb/series'
|
11
11
|
|
12
12
|
module CDB
|
13
|
-
VERSION = '0.0
|
13
|
+
VERSION = '0.1.0'
|
14
14
|
|
15
15
|
BASE_URL = 'http://www.comicbookdb.com'
|
16
16
|
REQUEST_HEADERS = {'Connection' => 'keep-alive'}
|
@@ -27,7 +27,7 @@ module CDB
|
|
27
27
|
doc = read_page(url)
|
28
28
|
node = doc.css('h2:contains("Search Results")').first.parent
|
29
29
|
{
|
30
|
-
:
|
30
|
+
:series => CDB::Series.parse_results(node),
|
31
31
|
:issues => CDB::Issue.parse_results(node)
|
32
32
|
}
|
33
33
|
end
|
data/lib/cdb/cli.rb
CHANGED
@@ -3,7 +3,7 @@ require 'pp'
|
|
3
3
|
module CDB
|
4
4
|
class CLI
|
5
5
|
COMMANDS = %w[search]
|
6
|
-
SCOPES = %w[all
|
6
|
+
SCOPES = %w[all series issue]
|
7
7
|
|
8
8
|
def initialize(options={})
|
9
9
|
@options = options
|
@@ -20,7 +20,7 @@ module CDB
|
|
20
20
|
v = v.downcase
|
21
21
|
raise unless COMMANDS.include?(v)
|
22
22
|
when :scope
|
23
|
-
v = v.downcase.gsub(
|
23
|
+
v = v.downcase.gsub(/^=/, '')
|
24
24
|
raise unless SCOPES.include?(v)
|
25
25
|
when :args
|
26
26
|
if self[:command] == 'search'
|
@@ -43,8 +43,8 @@ module CDB
|
|
43
43
|
puts key.to_s.capitalize+':'
|
44
44
|
res.each{|r| puts ' '+r.to_json}
|
45
45
|
end
|
46
|
-
when '
|
47
|
-
CDB::
|
46
|
+
when 'series'
|
47
|
+
CDB::Series.search(self[:args]).each{|r| puts r.to_json}
|
48
48
|
when 'issue'
|
49
49
|
CDB::Issue.search(self[:args]).each{|r| puts r.to_json}
|
50
50
|
end
|
data/lib/cdb/issue.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class Issue < Struct.new(:cdb_id, :
|
2
|
+
class Issue < Struct.new(:cdb_id, :series, :num, :name, :story_arc, :cover_date)
|
3
3
|
FORM_SEARCHTYPE = 'IssueName'
|
4
4
|
WEB_PATH = 'issue.php'
|
5
5
|
|
@@ -15,18 +15,18 @@ module CDB
|
|
15
15
|
id = link.attr('href').split('=').last.to_i
|
16
16
|
text = link.child.text.strip
|
17
17
|
match = text.match(/^(.* \(\d{4}\)) (.*)$/)
|
18
|
-
|
18
|
+
series = match[1]
|
19
19
|
num = match[2].gsub(/^#/, '')
|
20
20
|
name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
|
21
|
-
new(:cdb_id => id, :
|
21
|
+
new(:cdb_id => id, :series => series, :num => num, :name => name)
|
22
22
|
end.sort_by(&:cdb_id)
|
23
23
|
end
|
24
24
|
|
25
|
-
def from_tr(node,
|
25
|
+
def from_tr(node, series)
|
26
26
|
tds = node.css('td')
|
27
27
|
link = tds[0].css("a[href^=\"#{WEB_PATH}\"]").first
|
28
|
-
new(:cdb_id => link['href'].split('=').last.strip,
|
29
|
-
:
|
28
|
+
new(:cdb_id => link['href'].split('=').last.strip.to_i,
|
29
|
+
:series => series,
|
30
30
|
:num => link.text.strip,
|
31
31
|
:name => tds[2].text.strip,
|
32
32
|
:story_arc => tds[4].text.strip,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class
|
2
|
+
class Series < Struct.new(:cdb_id, :name, :issues, :publisher, :imprint, :begin_date, :end_date, :country, :language)
|
3
3
|
FORM_SEARCHTYPE = 'Title'
|
4
4
|
WEB_PATH = 'title.php'
|
5
5
|
|
@@ -7,7 +7,7 @@ module CDB
|
|
7
7
|
|
8
8
|
def search(query)
|
9
9
|
results = CDB.search(query, FORM_SEARCHTYPE)
|
10
|
-
results[:
|
10
|
+
results[:series]
|
11
11
|
end
|
12
12
|
|
13
13
|
def show(id)
|
@@ -29,21 +29,21 @@ module CDB
|
|
29
29
|
dates = page.css('strong:contains("Publication Date: ")').first.next_sibling.text.strip
|
30
30
|
start_d, end_d = dates.split('-').map(&:strip)
|
31
31
|
|
32
|
-
|
32
|
+
series = new(
|
33
33
|
:cdb_id => id,
|
34
34
|
:name => page.css('.page_headline').first.text.strip,
|
35
35
|
:publisher => page.css('a[href^="publisher.php"]').first.text.strip,
|
36
|
-
:imprint => page.css('a[href^="imprint.php"]').first.text.strip,
|
36
|
+
:imprint => (page.css('a[href^="imprint.php"]').first.text.strip rescue nil),
|
37
37
|
:begin_date => start_d,
|
38
38
|
:end_date => end_d,
|
39
39
|
:country => page.css('strong:contains("Country: ")').first.next_sibling.text.strip,
|
40
40
|
:language => page.css('strong:contains("Language: ")').first.next_sibling.text.strip
|
41
41
|
)
|
42
|
-
|
42
|
+
series.issues = page.css("td[width='726'] a.page_link[href^=\"#{CDB::Issue::WEB_PATH}\"]").map do |link|
|
43
43
|
tr = link.parent.parent
|
44
|
-
CDB::Issue.from_tr(tr,
|
44
|
+
CDB::Issue.from_tr(tr, series)
|
45
45
|
end
|
46
|
-
|
46
|
+
series
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdb-crawlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-11-03 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &21186450 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *21186450
|
25
25
|
description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
|
26
26
|
email:
|
27
27
|
- sgt.floydpepper@gmail.com
|
@@ -32,8 +32,8 @@ extra_rdoc_files: []
|
|
32
32
|
files:
|
33
33
|
- lib/cdb/cli.rb
|
34
34
|
- lib/cdb/issue.rb
|
35
|
+
- lib/cdb/series.rb
|
35
36
|
- lib/cdb/struct.rb
|
36
|
-
- lib/cdb/title.rb
|
37
37
|
- lib/cdb-crawlr.rb
|
38
38
|
- bin/cdb
|
39
39
|
homepage: https://github.com/sgtFloyd/cdb-crawlr
|