cdb-crawlr 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cdb-crawlr.rb +3 -3
- data/lib/cdb/cli.rb +4 -4
- data/lib/cdb/issue.rb +6 -6
- data/lib/cdb/{title.rb → series.rb} +7 -7
- metadata +4 -4
data/lib/cdb-crawlr.rb
CHANGED
@@ -7,10 +7,10 @@ $:.unshift(File.dirname(__FILE__))
|
|
7
7
|
require 'cdb/cli'
|
8
8
|
require 'cdb/struct'
|
9
9
|
require 'cdb/issue'
|
10
|
-
require 'cdb/
|
10
|
+
require 'cdb/series'
|
11
11
|
|
12
12
|
module CDB
|
13
|
-
VERSION = '0.0
|
13
|
+
VERSION = '0.1.0'
|
14
14
|
|
15
15
|
BASE_URL = 'http://www.comicbookdb.com'
|
16
16
|
REQUEST_HEADERS = {'Connection' => 'keep-alive'}
|
@@ -27,7 +27,7 @@ module CDB
|
|
27
27
|
doc = read_page(url)
|
28
28
|
node = doc.css('h2:contains("Search Results")').first.parent
|
29
29
|
{
|
30
|
-
:
|
30
|
+
:series => CDB::Series.parse_results(node),
|
31
31
|
:issues => CDB::Issue.parse_results(node)
|
32
32
|
}
|
33
33
|
end
|
data/lib/cdb/cli.rb
CHANGED
@@ -3,7 +3,7 @@ require 'pp'
|
|
3
3
|
module CDB
|
4
4
|
class CLI
|
5
5
|
COMMANDS = %w[search]
|
6
|
-
SCOPES = %w[all
|
6
|
+
SCOPES = %w[all series issue]
|
7
7
|
|
8
8
|
def initialize(options={})
|
9
9
|
@options = options
|
@@ -20,7 +20,7 @@ module CDB
|
|
20
20
|
v = v.downcase
|
21
21
|
raise unless COMMANDS.include?(v)
|
22
22
|
when :scope
|
23
|
-
v = v.downcase.gsub(
|
23
|
+
v = v.downcase.gsub(/^=/, '')
|
24
24
|
raise unless SCOPES.include?(v)
|
25
25
|
when :args
|
26
26
|
if self[:command] == 'search'
|
@@ -43,8 +43,8 @@ module CDB
|
|
43
43
|
puts key.to_s.capitalize+':'
|
44
44
|
res.each{|r| puts ' '+r.to_json}
|
45
45
|
end
|
46
|
-
when '
|
47
|
-
CDB::
|
46
|
+
when 'series'
|
47
|
+
CDB::Series.search(self[:args]).each{|r| puts r.to_json}
|
48
48
|
when 'issue'
|
49
49
|
CDB::Issue.search(self[:args]).each{|r| puts r.to_json}
|
50
50
|
end
|
data/lib/cdb/issue.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class Issue < Struct.new(:cdb_id, :
|
2
|
+
class Issue < Struct.new(:cdb_id, :series, :num, :name, :story_arc, :cover_date)
|
3
3
|
FORM_SEARCHTYPE = 'IssueName'
|
4
4
|
WEB_PATH = 'issue.php'
|
5
5
|
|
@@ -15,18 +15,18 @@ module CDB
|
|
15
15
|
id = link.attr('href').split('=').last.to_i
|
16
16
|
text = link.child.text.strip
|
17
17
|
match = text.match(/^(.* \(\d{4}\)) (.*)$/)
|
18
|
-
|
18
|
+
series = match[1]
|
19
19
|
num = match[2].gsub(/^#/, '')
|
20
20
|
name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
|
21
|
-
new(:cdb_id => id, :
|
21
|
+
new(:cdb_id => id, :series => series, :num => num, :name => name)
|
22
22
|
end.sort_by(&:cdb_id)
|
23
23
|
end
|
24
24
|
|
25
|
-
def from_tr(node,
|
25
|
+
def from_tr(node, series)
|
26
26
|
tds = node.css('td')
|
27
27
|
link = tds[0].css("a[href^=\"#{WEB_PATH}\"]").first
|
28
|
-
new(:cdb_id => link['href'].split('=').last.strip,
|
29
|
-
:
|
28
|
+
new(:cdb_id => link['href'].split('=').last.strip.to_i,
|
29
|
+
:series => series,
|
30
30
|
:num => link.text.strip,
|
31
31
|
:name => tds[2].text.strip,
|
32
32
|
:story_arc => tds[4].text.strip,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module CDB
|
2
|
-
class
|
2
|
+
class Series < Struct.new(:cdb_id, :name, :issues, :publisher, :imprint, :begin_date, :end_date, :country, :language)
|
3
3
|
FORM_SEARCHTYPE = 'Title'
|
4
4
|
WEB_PATH = 'title.php'
|
5
5
|
|
@@ -7,7 +7,7 @@ module CDB
|
|
7
7
|
|
8
8
|
def search(query)
|
9
9
|
results = CDB.search(query, FORM_SEARCHTYPE)
|
10
|
-
results[:
|
10
|
+
results[:series]
|
11
11
|
end
|
12
12
|
|
13
13
|
def show(id)
|
@@ -29,21 +29,21 @@ module CDB
|
|
29
29
|
dates = page.css('strong:contains("Publication Date: ")').first.next_sibling.text.strip
|
30
30
|
start_d, end_d = dates.split('-').map(&:strip)
|
31
31
|
|
32
|
-
|
32
|
+
series = new(
|
33
33
|
:cdb_id => id,
|
34
34
|
:name => page.css('.page_headline').first.text.strip,
|
35
35
|
:publisher => page.css('a[href^="publisher.php"]').first.text.strip,
|
36
|
-
:imprint => page.css('a[href^="imprint.php"]').first.text.strip,
|
36
|
+
:imprint => (page.css('a[href^="imprint.php"]').first.text.strip rescue nil),
|
37
37
|
:begin_date => start_d,
|
38
38
|
:end_date => end_d,
|
39
39
|
:country => page.css('strong:contains("Country: ")').first.next_sibling.text.strip,
|
40
40
|
:language => page.css('strong:contains("Language: ")').first.next_sibling.text.strip
|
41
41
|
)
|
42
|
-
|
42
|
+
series.issues = page.css("td[width='726'] a.page_link[href^=\"#{CDB::Issue::WEB_PATH}\"]").map do |link|
|
43
43
|
tr = link.parent.parent
|
44
|
-
CDB::Issue.from_tr(tr,
|
44
|
+
CDB::Issue.from_tr(tr, series)
|
45
45
|
end
|
46
|
-
|
46
|
+
series
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdb-crawlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-11-03 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &21186450 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *21186450
|
25
25
|
description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
|
26
26
|
email:
|
27
27
|
- sgt.floydpepper@gmail.com
|
@@ -32,8 +32,8 @@ extra_rdoc_files: []
|
|
32
32
|
files:
|
33
33
|
- lib/cdb/cli.rb
|
34
34
|
- lib/cdb/issue.rb
|
35
|
+
- lib/cdb/series.rb
|
35
36
|
- lib/cdb/struct.rb
|
36
|
-
- lib/cdb/title.rb
|
37
37
|
- lib/cdb-crawlr.rb
|
38
38
|
- bin/cdb
|
39
39
|
homepage: https://github.com/sgtFloyd/cdb-crawlr
|