cdb-crawlr 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@ require 'cdb/issue'
10
10
  require 'cdb/title'
11
11
 
12
12
  module CDB
13
- VERSION = '0.0.3'
13
+ VERSION = '0.0.4'
14
14
 
15
15
  BASE_URL = 'http://www.comicbookdb.com'
16
16
  REQUEST_HEADERS = {'Connection' => 'keep-alive'}
@@ -24,9 +24,7 @@ module CDB
24
24
  form_search: query
25
25
  )
26
26
  url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
27
- content = open(url, REQUEST_HEADERS).read
28
- content.force_encoding('ISO-8859-1').encode!('UTF-8')
29
- doc = Nokogiri::HTML(content)
27
+ doc = read_page(url)
30
28
  node = doc.css('h2:contains("Search Results")').first.parent
31
29
  {
32
30
  :titles => CDB::Title.parse_results(node),
@@ -34,5 +32,20 @@ module CDB
34
32
  }
35
33
  end
36
34
 
35
+ def show(id, type)
36
+ data = URI.encode_www_form('ID' => id)
37
+ url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
38
+ page = read_page(url)
39
+ type.parse_data(id, page)
40
+ end
41
+
42
+ private
43
+
44
+ def read_page(url)
45
+ content = open(url, REQUEST_HEADERS).read
46
+ content.force_encoding('ISO-8859-1').encode!('UTF-8')
47
+ Nokogiri::HTML(content)
48
+ end
49
+
37
50
  end
38
51
  end
@@ -14,15 +14,15 @@ module CDB
14
14
  end
15
15
 
16
16
  def []=(k, v)
17
+ v = v.to_s.strip
17
18
  case k
18
19
  when :command
19
- v = v.to_s.strip.downcase
20
+ v = v.downcase
20
21
  raise unless COMMANDS.include?(v)
21
22
  when :scope
22
- v = v.to_s.strip.downcase.gsub(/^=|s$/, '')
23
+ v = v.downcase.gsub(/^=|s$/, '')
23
24
  raise unless SCOPES.include?(v)
24
25
  when :args
25
- v = v.to_s.strip
26
26
  if self[:command] == 'search'
27
27
  raise "invalid search query" if v.empty?
28
28
  end
@@ -40,8 +40,8 @@ module CDB
40
40
  case self[:scope] || 'all'
41
41
  when 'all'
42
42
  CDB.search(self[:args]).each do |key, res|
43
- puts key.to_s.capitalize
44
- res.each{|r| puts r.to_json}
43
+ puts key.to_s.capitalize+':'
44
+ res.each{|r| puts ' '+r.to_json}
45
45
  end
46
46
  when 'title'
47
47
  CDB::Title.search(self[:args]).each{|r| puts r.to_json}
@@ -1,5 +1,5 @@
1
1
  module CDB
2
- class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
2
+ class Issue < Struct.new(:cdb_id, :title, :num, :name, :story_arc, :cover_date)
3
3
  FORM_SEARCHTYPE = 'IssueName'
4
4
  WEB_PATH = 'issue.php'
5
5
 
@@ -15,12 +15,24 @@ module CDB
15
15
  id = link.attr('href').split('=').last.to_i
16
16
  text = link.child.text.strip
17
17
  match = text.match(/^(.* \(\d{4}\)) (.*)$/)
18
- title, num = match[1..2]
18
+ title = match[1]
19
+ num = match[2].gsub(/^#/, '')
19
20
  name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
20
21
  new(:cdb_id => id, :title => title, :num => num, :name => name)
21
22
  end.sort_by(&:cdb_id)
22
23
  end
23
24
 
25
+ def from_tr(node, title)
26
+ tds = node.css('td')
27
+ link = tds[0].css("a[href^=\"#{WEB_PATH}\"]").first
28
+ new(:cdb_id => link['href'].split('=').last.strip,
29
+ :title => title,
30
+ :num => link.text.strip,
31
+ :name => tds[2].text.strip,
32
+ :story_arc => tds[4].text.strip,
33
+ :cover_date => tds[6].text.strip)
34
+ end
35
+
24
36
  end
25
37
  end
26
38
  end
@@ -8,7 +8,7 @@ module CDB
8
8
  h.each{|k,v| send("#{k}=", v)}
9
9
  end
10
10
 
11
- def as_json
11
+ def as_json(*)
12
12
  members.inject({}){|map, m|
13
13
  map[m] = self[m]; map
14
14
  }
@@ -1,5 +1,5 @@
1
1
  module CDB
2
- class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
2
+ class Title < Struct.new(:cdb_id, :name, :issues, :publisher, :imprint, :begin_date, :end_date, :country, :language)
3
3
  FORM_SEARCHTYPE = 'Title'
4
4
  WEB_PATH = 'title.php'
5
5
 
@@ -10,6 +10,10 @@ module CDB
10
10
  results[:titles]
11
11
  end
12
12
 
13
+ def show(id)
14
+ CDB.show(id, self)
15
+ end
16
+
13
17
  def parse_results(node)
14
18
  node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
15
19
  id = link.attr('href').split('=').last.to_i
@@ -21,6 +25,26 @@ module CDB
21
25
  end.sort_by(&:cdb_id)
22
26
  end
23
27
 
28
+ def parse_data(id, page)
29
+ dates = page.css('strong:contains("Publication Date: ")').first.next_sibling.text.strip
30
+ start_d, end_d = dates.split('-').map(&:strip)
31
+
32
+ title = new(
33
+ :cdb_id => id,
34
+ :name => page.css('.page_headline').first.text.strip,
35
+ :publisher => page.css('a[href^="publisher.php"]').first.text.strip,
36
+ :imprint => page.css('a[href^="imprint.php"]').first.text.strip,
37
+ :begin_date => start_d,
38
+ :end_date => end_d,
39
+ :country => page.css('strong:contains("Country: ")').first.next_sibling.text.strip,
40
+ :language => page.css('strong:contains("Language: ")').first.next_sibling.text.strip
41
+ )
42
+ title.issues = page.css("td[width='726'] a.page_link[href^=\"#{CDB::Issue::WEB_PATH}\"]").map do |link|
43
+ tr = link.parent.parent
44
+ CDB::Issue.from_tr(tr, title)
45
+ end
46
+ title
47
+ end
24
48
  end
25
49
  end
26
50
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdb-crawlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-02 00:00:00.000000000Z
12
+ date: 2012-11-03 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &18435060 !ruby/object:Gem::Requirement
16
+ requirement: &23392700 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *18435060
24
+ version_requirements: *23392700
25
25
  description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
26
26
  email:
27
27
  - sgt.floydpepper@gmail.com