cdb-crawlr 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,7 +10,7 @@ require 'cdb/issue'
10
10
  require 'cdb/title'
11
11
 
12
12
  module CDB
13
- VERSION = '0.0.3'
13
+ VERSION = '0.0.4'
14
14
 
15
15
  BASE_URL = 'http://www.comicbookdb.com'
16
16
  REQUEST_HEADERS = {'Connection' => 'keep-alive'}
@@ -24,9 +24,7 @@ module CDB
24
24
  form_search: query
25
25
  )
26
26
  url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
27
- content = open(url, REQUEST_HEADERS).read
28
- content.force_encoding('ISO-8859-1').encode!('UTF-8')
29
- doc = Nokogiri::HTML(content)
27
+ doc = read_page(url)
30
28
  node = doc.css('h2:contains("Search Results")').first.parent
31
29
  {
32
30
  :titles => CDB::Title.parse_results(node),
@@ -34,5 +32,20 @@ module CDB
34
32
  }
35
33
  end
36
34
 
35
+ def show(id, type)
36
+ data = URI.encode_www_form('ID' => id)
37
+ url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
38
+ page = read_page(url)
39
+ type.parse_data(id, page)
40
+ end
41
+
42
+ private
43
+
44
+ def read_page(url)
45
+ content = open(url, REQUEST_HEADERS).read
46
+ content.force_encoding('ISO-8859-1').encode!('UTF-8')
47
+ Nokogiri::HTML(content)
48
+ end
49
+
37
50
  end
38
51
  end
@@ -14,15 +14,15 @@ module CDB
14
14
  end
15
15
 
16
16
  def []=(k, v)
17
+ v = v.to_s.strip
17
18
  case k
18
19
  when :command
19
- v = v.to_s.strip.downcase
20
+ v = v.downcase
20
21
  raise unless COMMANDS.include?(v)
21
22
  when :scope
22
- v = v.to_s.strip.downcase.gsub(/^=|s$/, '')
23
+ v = v.downcase.gsub(/^=|s$/, '')
23
24
  raise unless SCOPES.include?(v)
24
25
  when :args
25
- v = v.to_s.strip
26
26
  if self[:command] == 'search'
27
27
  raise "invalid search query" if v.empty?
28
28
  end
@@ -40,8 +40,8 @@ module CDB
40
40
  case self[:scope] || 'all'
41
41
  when 'all'
42
42
  CDB.search(self[:args]).each do |key, res|
43
- puts key.to_s.capitalize
44
- res.each{|r| puts r.to_json}
43
+ puts key.to_s.capitalize+':'
44
+ res.each{|r| puts ' '+r.to_json}
45
45
  end
46
46
  when 'title'
47
47
  CDB::Title.search(self[:args]).each{|r| puts r.to_json}
@@ -1,5 +1,5 @@
1
1
  module CDB
2
- class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
2
+ class Issue < Struct.new(:cdb_id, :title, :num, :name, :story_arc, :cover_date)
3
3
  FORM_SEARCHTYPE = 'IssueName'
4
4
  WEB_PATH = 'issue.php'
5
5
 
@@ -15,12 +15,24 @@ module CDB
15
15
  id = link.attr('href').split('=').last.to_i
16
16
  text = link.child.text.strip
17
17
  match = text.match(/^(.* \(\d{4}\)) (.*)$/)
18
- title, num = match[1..2]
18
+ title = match[1]
19
+ num = match[2].gsub(/^#/, '')
19
20
  name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
20
21
  new(:cdb_id => id, :title => title, :num => num, :name => name)
21
22
  end.sort_by(&:cdb_id)
22
23
  end
23
24
 
25
+ def from_tr(node, title)
26
+ tds = node.css('td')
27
+ link = tds[0].css("a[href^=\"#{WEB_PATH}\"]").first
28
+ new(:cdb_id => link['href'].split('=').last.strip,
29
+ :title => title,
30
+ :num => link.text.strip,
31
+ :name => tds[2].text.strip,
32
+ :story_arc => tds[4].text.strip,
33
+ :cover_date => tds[6].text.strip)
34
+ end
35
+
24
36
  end
25
37
  end
26
38
  end
@@ -8,7 +8,7 @@ module CDB
8
8
  h.each{|k,v| send("#{k}=", v)}
9
9
  end
10
10
 
11
- def as_json
11
+ def as_json(*)
12
12
  members.inject({}){|map, m|
13
13
  map[m] = self[m]; map
14
14
  }
@@ -1,5 +1,5 @@
1
1
  module CDB
2
- class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
2
+ class Title < Struct.new(:cdb_id, :name, :issues, :publisher, :imprint, :begin_date, :end_date, :country, :language)
3
3
  FORM_SEARCHTYPE = 'Title'
4
4
  WEB_PATH = 'title.php'
5
5
 
@@ -10,6 +10,10 @@ module CDB
10
10
  results[:titles]
11
11
  end
12
12
 
13
+ def show(id)
14
+ CDB.show(id, self)
15
+ end
16
+
13
17
  def parse_results(node)
14
18
  node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
15
19
  id = link.attr('href').split('=').last.to_i
@@ -21,6 +25,26 @@ module CDB
21
25
  end.sort_by(&:cdb_id)
22
26
  end
23
27
 
28
+ def parse_data(id, page)
29
+ dates = page.css('strong:contains("Publication Date: ")').first.next_sibling.text.strip
30
+ start_d, end_d = dates.split('-').map(&:strip)
31
+
32
+ title = new(
33
+ :cdb_id => id,
34
+ :name => page.css('.page_headline').first.text.strip,
35
+ :publisher => page.css('a[href^="publisher.php"]').first.text.strip,
36
+ :imprint => page.css('a[href^="imprint.php"]').first.text.strip,
37
+ :begin_date => start_d,
38
+ :end_date => end_d,
39
+ :country => page.css('strong:contains("Country: ")').first.next_sibling.text.strip,
40
+ :language => page.css('strong:contains("Language: ")').first.next_sibling.text.strip
41
+ )
42
+ title.issues = page.css("td[width='726'] a.page_link[href^=\"#{CDB::Issue::WEB_PATH}\"]").map do |link|
43
+ tr = link.parent.parent
44
+ CDB::Issue.from_tr(tr, title)
45
+ end
46
+ title
47
+ end
24
48
  end
25
49
  end
26
50
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdb-crawlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-02 00:00:00.000000000Z
12
+ date: 2012-11-03 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &18435060 !ruby/object:Gem::Requirement
16
+ requirement: &23392700 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *18435060
24
+ version_requirements: *23392700
25
25
  description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
26
26
  email:
27
27
  - sgt.floydpepper@gmail.com