cdb-crawlr 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ $:.unshift(File.dirname(__FILE__))
5
+
6
+ require 'cdb/struct'
7
+ require 'cdb/issue'
8
+ require 'cdb/title'
9
+
10
+ module CDB
11
+ VERSION = '0.0.2'
12
+
13
+ BASE_URL = 'http://www.comicbookdb.com'
14
+ REQUEST_HEADERS = {'Connection' => 'keep-alive'}
15
+ SEARCH_PATH = 'search.php'
16
+
17
+ class << self; attr
18
+
19
+ def search(query, type='FullSite')
20
+ data = URI.encode_www_form(
21
+ form_searchtype: type,
22
+ form_search: query
23
+ )
24
+ url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
25
+ content = open(url, REQUEST_HEADERS).read
26
+ content.force_encoding('ISO-8859-1').encode!('UTF-8')
27
+ doc = Nokogiri::HTML(content)
28
+ node = doc.css('h2:contains("Search Results")').first.parent
29
+ {
30
+ :titles => CDB::Title.parse_results(node),
31
+ :issues => CDB::Issue.parse_results(node)
32
+ }
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,26 @@
1
+ module CDB
2
+ class Issue < Struct.new(:cdb_id, :title, :num, :name, :cover_date)
3
+ FORM_SEARCHTYPE = 'IssueName'
4
+ WEB_PATH = 'issue.php'
5
+
6
+ class << self
7
+
8
+ def search(query)
9
+ results = CDB.search(query, FORM_SEARCHTYPE)
10
+ results[:issues]
11
+ end
12
+
13
+ def parse_results(node)
14
+ node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
15
+ id = link.attr('href').split('=').last
16
+ text = link.child.text.strip
17
+ match = text.match(/^(.* \(\d{4}\)) (.*)$/)
18
+ title, num = match[1..2]
19
+ name = link.next_sibling.text.strip.gsub(/^-\s*"|"$/, '').strip
20
+ new(:cdb_id => id, :title => title, :num => num, :name => name)
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,18 @@
1
+ module CDB
2
+ # Modifications to Ruby's Struct class for use within the CDB module.
3
+ # Must be called 'Struct' to play nice with YARD's @attr documentation.
4
+ class Struct < Struct
5
+
6
+ # Override Struct's initialize method to accept a hash of members instead.
7
+ def initialize(h={})
8
+ h.each{|k,v| send("#{k}=", v)}
9
+ end
10
+
11
+ def to_json(*a)
12
+ members.inject({}){|map, m|
13
+ map[m] = self[m]; map
14
+ }.to_json(*a)
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,26 @@
1
+ module CDB
2
+ class Title < Struct.new(:cdb_id, :name, :publisher, :begin_date, :end_date)
3
+ FORM_SEARCHTYPE = 'Title'
4
+ WEB_PATH = 'title.php'
5
+
6
+ class << self
7
+
8
+ def search(query)
9
+ results = CDB.search(query, FORM_SEARCHTYPE)
10
+ results[:titles]
11
+ end
12
+
13
+ def parse_results(node)
14
+ node.css("a[href^=\"#{WEB_PATH}\"]").map do |link|
15
+ id = link.attr('href').split('=').last
16
+ text = link.child.text.strip
17
+ name = text.slice(0..-8)
18
+ year = text.slice(-5..-2)
19
+ pub = link.next_sibling.text.gsub(/^\s*\(|\)\s*$/, '')
20
+ new(:cdb_id => id, :name => name, :publisher => pub, :begin_date => year)
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cdb-crawlr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Gabe Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-01 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &4367990 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *4367990
25
+ description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
26
+ email:
27
+ - sgt.floydpepper@gmail.com
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - lib/cdb/issue.rb
33
+ - lib/cdb/struct.rb
34
+ - lib/cdb/title.rb
35
+ - lib/cdb-crawlr.rb
36
+ homepage: https://github.com/sgtFloyd/cdb-crawlr
37
+ licenses: []
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 1.8.10
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Ruby gem and command-line tool for querying ComicBookDB.com
60
+ test_files: []
61
+ has_rdoc: