cdb-crawlr 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/cdb CHANGED
@@ -1,94 +1,106 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'cdb-crawlr'
4
- # load 'lib/cdb-crawlr.rb'
5
- require 'optparse'
6
-
7
- $cli = CDB::CLI.new
8
-
9
- def print_help(opt = @global, error=nil)
10
- puts(error.to_s+"\n\n") if error
11
- puts opt
12
- exit 1
13
- end
14
-
15
- @global = OptionParser.new do |opts|
16
- opts.banner = "Usage: cdb [-h|--help] [-v|--version] <COMMAND> <TYPE> [<ARGS>]"
17
-
18
- opts.on("-h", "--help", "Display this screen"){ print_help }
19
- opts.on("-v", "--version", "Show version information") do
20
- puts "cdb #{CDB::VERSION}"; exit
21
- end
22
-
23
- opts.separator "\nCOMMANDS:"
24
- opts.separator " search Search for entries of a given TYPE matching QUERY"
25
- opts.separator " show Show details of an entry using a CDB_ID obtained from search"
26
- end
27
-
28
- @search = OptionParser.new do |opts|
29
- opts.banner = "Search for entries of a given TYPE matching QUERY\n"+
30
- "Usage: cdb search [-h|--help] <TYPE> <QUERY>"
31
-
32
- opts.on("-h", "--help", "Display this screen"){ print_help opts }
33
-
34
- opts.separator "\nTYPES:"
35
- opts.separator " issue Search comic issue names for given QUERY"
36
- opts.separator " series Search comic series names for given QUERY"
37
- end
38
-
39
- @show = OptionParser.new do |opts|
40
- opts.banner = "Show details of an entry using a CDB_ID obtained from search\n"+
41
- "Usage: cdb show [-h|--help] <TYPE> <CDB_ID>"
42
-
43
- opts.on("-h", "--help", "Display this screen"){ print_help opts }
44
-
45
- opts.separator "\nTYPES:"
46
- opts.separator " series Get all available details of a comic series"
47
- end
48
-
49
- @command_opts = {
50
- 'search' => @search,
51
- 'show' => @show
52
- }
53
-
54
- # Parse global flags
55
- begin
56
- @global.order!
57
- rescue OptionParser::InvalidOption => e
58
- puts e; print_help
59
- end
60
-
61
- # Pop and verify command
62
- begin
63
- command = ARGV.shift
64
- $cli[:command] = command
65
- command_opt = @command_opts[$cli[:command]]
66
- rescue
67
- error = "invalid COMMAND: #{command}" unless command.to_s.empty?
68
- print_help @global, error
69
- end
70
-
71
- # Parse command flags
72
- begin
73
- command_opt.order!
74
- rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
75
- print_help command_opt, e
76
- end
77
-
78
- # Pop and verify type
79
- begin
80
- type = ARGV.shift
81
- $cli[:type] = type
82
- rescue
83
- error = "invalid TYPE: #{type}" unless type.to_s.empty?
84
- print_help command_opt, error
85
- end
86
-
87
- # Verify args
88
- begin
89
- $cli[:args] = ARGV.join(' ')
90
- rescue => e
91
- print_help command_opt, e
92
- end
93
-
94
- $cli.execute
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cdb-crawlr'
4
+ # load 'lib/cdb-crawlr.rb'
5
+ require 'optparse'
6
+
7
+ $cli = CDB::CLI.new
8
+
9
+ def print_help(opt = @global, error=nil)
10
+ puts "cdb: #{error}\n" if error && !error.to_s.empty?
11
+ puts opt
12
+ exit 1
13
+ end
14
+
15
+ @global = OptionParser.new do |opts|
16
+ opts.banner = "Usage: cdb [-v|--version] <COMMAND> [<ARGS>]"
17
+
18
+ opts.on("-h", "--help", "Display this screen"){ print_help }
19
+ opts.on("-v", "--version", "Show version information") do
20
+ puts "cdb #{CDB::VERSION}"; exit
21
+ end
22
+
23
+ opts.separator "\nCOMMANDS:"
24
+ opts.separator " rename Rename a directory of comics according to series data"
25
+ opts.separator " search Search for entries of a given TYPE matching QUERY"
26
+ opts.separator " show Show details of an entry using a CDB_ID obtained from search"
27
+ end
28
+
29
+ @search = OptionParser.new do |opts|
30
+ opts.banner = "Usage: cdb search <TYPE> <QUERY>"
31
+
32
+ opts.on("-h", "--help", "Display this screen"){ print_help opts }
33
+
34
+ opts.separator "\nTYPES:"
35
+ opts.separator " issue Search comic issue names for given QUERY"
36
+ opts.separator " series Search comic series names for given QUERY"
37
+ end
38
+
39
+ @show = OptionParser.new do |opts|
40
+ opts.banner = "Usage: cdb show <TYPE> <CDB_ID>"
41
+
42
+ opts.on("-h", "--help", "Display this screen"){ print_help opts }
43
+
44
+ opts.separator "\nTYPES:"
45
+ opts.separator " series Get all available details of a comic series"
46
+ end
47
+
48
+ @rename = OptionParser.new do |opts|
49
+ opts.banner = "Usage: cdb rename [-f|--force] <PATH> <CDB_ID>"
50
+
51
+ opts.on("-h", "--help", "Display this screen"){ print_help opts }
52
+ opts.on("-f", "--force", "Perform the rename without any confirmations"){ $cli[:force] = true }
53
+ opts.on("-i", "--ignore", "Ignore warnings about unknown and misformatted issue numbers"){ $cli[:ignore] = true }
54
+ end
55
+
56
+ @command_opts = {
57
+ 'search' => @search,
58
+ 'show' => @show,
59
+ 'rename' => @rename
60
+ }
61
+
62
+ # Parse global flags
63
+ begin
64
+ @global.order!
65
+ rescue OptionParser::InvalidOption => e
66
+ puts e; print_help
67
+ end
68
+
69
+ # Pop and verify command
70
+ begin
71
+ command = ARGV.shift
72
+ $cli[:command] = command
73
+ command_opt = @command_opts[$cli[:command]]
74
+ rescue
75
+ error = "invalid COMMAND: #{command}" unless command.to_s.empty?
76
+ print_help @global, error
77
+ end
78
+
79
+ # Parse command flags
80
+ begin
81
+ command_opt.order!
82
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
83
+ print_help command_opt, e
84
+ end
85
+
86
+ # Pop and verify third argument
87
+ begin
88
+ next_arg = ARGV.shift
89
+ case $cli[:command]
90
+ when 'search, show'
91
+ $cli[:type] = next_arg
92
+ when 'rename'
93
+ $cli[:path] = next_arg
94
+ end
95
+ rescue => e
96
+ print_help command_opt, e
97
+ end
98
+
99
+ # Verify args
100
+ begin
101
+ $cli[:args] = ARGV.join(' ')
102
+ rescue => e
103
+ print_help command_opt, e
104
+ end
105
+
106
+ $cli.execute
@@ -1,51 +1,52 @@
1
- require 'json'
2
- require 'nokogiri'
3
- require 'open-uri'
4
-
5
- $:.unshift(File.dirname(__FILE__))
6
-
7
- require 'cdb/cli'
8
- require 'cdb/struct'
9
- require 'cdb/issue'
10
- require 'cdb/series'
11
-
12
- module CDB
13
- VERSION = '0.2.1'
14
-
15
- BASE_URL = 'http://www.comicbookdb.com'
16
- REQUEST_HEADERS = {'Connection' => 'keep-alive'}
17
- SEARCH_PATH = 'search.php'
18
-
19
- class << self; attr
20
-
21
- def search(query, type='FullSite')
22
- data = URI.encode_www_form(
23
- form_searchtype: type,
24
- form_search: query
25
- )
26
- url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
27
- doc = read_page(url)
28
- node = doc.css('h2:contains("Search Results")').first.parent
29
- {
30
- :series => CDB::Series.parse_results(node),
31
- :issues => CDB::Issue.parse_results(node)
32
- }
33
- end
34
-
35
- def show(id, type)
36
- data = URI.encode_www_form('ID' => id)
37
- url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
38
- page = read_page(url)
39
- type.parse_data(id, page)
40
- end
41
-
42
- private
43
-
44
- def read_page(url)
45
- content = open(url, REQUEST_HEADERS).read
46
- content.force_encoding('ISO-8859-1').encode!('UTF-8')
47
- Nokogiri::HTML(content)
48
- end
49
-
50
- end
51
- end
1
+ require 'json'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+
5
+ $:.unshift(File.dirname(__FILE__))
6
+
7
+ require 'cdb/cli'
8
+ require 'cdb/renamer'
9
+ require 'cdb/struct'
10
+ require 'cdb/issue'
11
+ require 'cdb/series'
12
+
13
+ module CDB
14
+ VERSION = '0.3.0'
15
+
16
+ BASE_URL = 'http://www.comicbookdb.com'
17
+ REQUEST_HEADERS = {'Connection' => 'keep-alive'}
18
+ SEARCH_PATH = 'search.php'
19
+
20
+ class << self; attr
21
+
22
+ def search(query, type='FullSite')
23
+ data = URI.encode_www_form(
24
+ form_searchtype: type,
25
+ form_search: query
26
+ )
27
+ url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
28
+ doc = read_page(url)
29
+ node = doc.css('h2:contains("Search Results")').first.parent
30
+ {
31
+ :series => CDB::Series.parse_results(node),
32
+ :issues => CDB::Issue.parse_results(node)
33
+ }
34
+ end
35
+
36
+ def show(id, type)
37
+ data = URI.encode_www_form('ID' => id)
38
+ url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
39
+ page = read_page(url)
40
+ type.parse_data(id, page)
41
+ end
42
+
43
+ private
44
+
45
+ def read_page(url)
46
+ content = open(url, REQUEST_HEADERS).read
47
+ content.force_encoding('ISO-8859-1').encode!('UTF-8')
48
+ Nokogiri::HTML(content)
49
+ end
50
+
51
+ end
52
+ end
@@ -1,61 +1,84 @@
1
- require 'pp'
2
-
3
- module CDB
4
- class CLI
5
- COMMANDS = %w[search show]
6
- TYPES = %w[series issue issues]
7
-
8
- def initialize(options={})
9
- @options = options
10
- end
11
-
12
- def [](k)
13
- @options[k]
14
- end
15
-
16
- def []=(k, v)
17
- v = v.to_s.strip
18
- case k
19
- when :command
20
- v = v.downcase
21
- raise unless COMMANDS.include?(v)
22
- when :type
23
- v = v.downcase
24
- if self[:command] == 'show'
25
- # remove when "show issue" is supported
26
- raise unless v == 'series'
27
- else
28
- raise unless TYPES.include?(v)
29
- end
30
- when :args
31
- raise "invalid args" if v.empty?
32
- end
33
- @options[k] = v
34
- end
35
-
36
- def execute
37
- send self[:command]
38
- end
39
-
40
- private
41
-
42
- def search
43
- case self[:type]
44
- when 'series'
45
- CDB::Series.search(self[:args]).each{|r| puts r.to_json}
46
- when 'issue', 'issues'
47
- CDB::Issue.search(self[:args]).each{|r| puts r.to_json}
48
- end
49
- end
50
-
51
- def show
52
- case self[:type]
53
- when 'series'
54
- res = CDB::Series.show(self[:args])
55
- res.issues.each{|i| i.series=nil}
56
- puts res.to_json(array_nl:"\n", object_nl:"\n", indent:' ')
57
- end
58
- end
59
-
60
- end
61
- end
1
+ require 'pp'
2
+
3
+ module CDB
4
+ class CLI
5
+ COMMANDS = %w[search show rename]
6
+ TYPES = %w[series issue issues]
7
+
8
+ def initialize(options={})
9
+ @options = options
10
+ end
11
+
12
+ def [](k)
13
+ @options[k]
14
+ end
15
+
16
+ def []=(k, v)
17
+ v = v.to_s.strip
18
+ begin
19
+ send("#{k}=", v)
20
+ rescue NoMethodError
21
+ @options[k] = v
22
+ end
23
+ end
24
+
25
+ def execute
26
+ send self[:command]
27
+ end
28
+
29
+ private
30
+
31
+ def search
32
+ case self[:type]
33
+ when 'series'
34
+ CDB::Series.search(self[:args]).each{|r| puts r.to_json}
35
+ when 'issue', 'issues'
36
+ CDB::Issue.search(self[:args]).each{|r| puts r.to_json}
37
+ end
38
+ end
39
+
40
+ def show
41
+ case self[:type]
42
+ when 'series'
43
+ res = CDB::Series.show(self[:args])
44
+ res.issues.each{|i| i.series=nil}
45
+ puts res.to_json(array_nl:"\n", object_nl:"\n", indent:' ')
46
+ end
47
+ end
48
+
49
+ def rename
50
+ renamer = CDB::Renamer.new(@options)
51
+ renamer.execute
52
+ end
53
+
54
+ def args=(v)
55
+ raise "invalid args" if v.empty?
56
+ @options[:args] = v
57
+ end
58
+
59
+ def command=(v)
60
+ v = v.downcase
61
+ raise unless COMMANDS.include?(v)
62
+ @options[:command] = v
63
+ end
64
+
65
+ def type=(v)
66
+ v = v.downcase
67
+ error = "invalid TYPE: #{v}" unless v.empty?
68
+ if @options[:command] == 'show'
69
+ # remove when "show issue" is supported
70
+ raise error.to_s unless v == 'series'
71
+ else
72
+ raise error.to_s unless TYPES.include?(v)
73
+ end
74
+ @options[:type] = v
75
+ end
76
+
77
+ def path=(v)
78
+ error = "#{v}: No such directory" unless v.empty?
79
+ raise error.to_s unless File.directory?(v)
80
+ @options[:path] = v
81
+ end
82
+
83
+ end
84
+ end
File without changes
@@ -0,0 +1,117 @@
1
+ module CDB
2
+ class Renamer
3
+ EXTENSIONS = %w[cbz cbr]
4
+ ISSUE_NUM = '[\d\.]+\w?'
5
+ INPUT_FORMAT = /#(#{ISSUE_NUM})/
6
+ OUTPUT_FORMAT = "%{series} #%{padded_num} (%{cover_date})"
7
+
8
+ def initialize(options)
9
+ @path = options[:path]
10
+ @cdb_id = options[:args]
11
+ @force = options[:force]
12
+ @ignore = options[:ignore]
13
+ end
14
+
15
+ def execute
16
+ @rename_map =
17
+ files.each_with_object({}) do |filename, map|
18
+ map[filename]= transform(filename)
19
+ end.select{|k,v| v}
20
+
21
+ do_rename if verify_map
22
+ end
23
+
24
+ private
25
+
26
+ def do_rename
27
+ Dir.chdir(@path) do
28
+ @rename_map.each do |source, destination|
29
+ next if source == destination
30
+ puts "#{pad(source)} => #{destination}"
31
+ if @force
32
+ %x[ mv "#{source}" "#{destination}" ]
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ def verify_map
39
+ dups = @rename_map.select do |k,v|
40
+ @rename_map.values.count(v) > 1
41
+ end
42
+ dups.keys.uniq.each do |k|
43
+ padded = pad(k, dups.keys.map(&:length).max)
44
+ puts "ERROR: output name clash: #{padded} => #{dups[k]}"
45
+ end
46
+ dups.empty?
47
+ end
48
+
49
+ def transform(filename)
50
+ return unless num = parse_issue_num(filename)
51
+ if issue = issues[num]
52
+ generate_output(filename, issue)
53
+ else
54
+ puts "WARNING: #{filename}: unknown issue: #{num}"
55
+ end
56
+ end
57
+
58
+ def parse_issue_num(filename)
59
+ if match = filename.match(INPUT_FORMAT)
60
+ num = match[1].gsub(/^0+|\.$/,'')
61
+ num = '0' if num == ''
62
+ num
63
+ else
64
+ puts "WARNING: #{filename}: invalid input format" unless @ignore
65
+ end
66
+ end
67
+
68
+ def generate_output(filename, issue)
69
+ json = issue.as_json
70
+ json[:series] = issue.series.name
71
+ json[:padded_num] = pad_num(issue.num)
72
+ output = OUTPUT_FORMAT % json
73
+ sanitize(output + File.extname(filename))
74
+ end
75
+
76
+ def sanitize(filename)
77
+ filename.gsub(/[:]/, ' -')
78
+ .gsub(/[\/\\<>]/, '-')
79
+ .gsub(/[\?\*|"]/, '_')
80
+ end
81
+
82
+ def pad(file, max=nil)
83
+ max ||= files.map(&:length).max
84
+ file + (' '*(max-file.length))
85
+ end
86
+
87
+ def pad_num(num, max=nil)
88
+ max ||= max_num_length
89
+ '0'*(max-num.to_s.length)+num.to_s
90
+ end
91
+
92
+ def files
93
+ Dir.chdir(@path) do
94
+ @files ||= EXTENSIONS
95
+ .map{|e| Dir["*.#{e}"]}.flatten
96
+ .select{|f| File.file?(f)}.sort
97
+ end
98
+ @files
99
+ end
100
+
101
+ def max_num_length
102
+ @max_num ||= files.map{|f| parse_issue_num(f).length}.max
103
+ end
104
+
105
+ def series
106
+ @series ||= CDB::Series.show(@cdb_id)
107
+ end
108
+
109
+ def issues
110
+ # Only act on issues - not TPB, HC, or anything else
111
+ @issues ||= Hash[series.issues
112
+ .select{|i| i.num.match(/^#{ISSUE_NUM}$/)}
113
+ .map{|i| [i.num.to_s, i]}]
114
+ end
115
+
116
+ end
117
+ end
@@ -30,7 +30,7 @@ module CDB
30
30
  start_d, end_d = dates.split('-').map(&:strip)
31
31
 
32
32
  series = new(
33
- :cdb_id => id,
33
+ :cdb_id => id.to_i,
34
34
  :name => page.css('.page_headline').first.text.strip,
35
35
  :publisher => page.css('a[href^="publisher.php"]').first.text.strip,
36
36
  :imprint => (page.css('a[href^="imprint.php"]').first.text.strip rescue nil),
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdb-crawlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-04 00:00:00.000000000Z
12
+ date: 2012-11-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &25288420 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,12 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *25288420
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
26
31
  email:
27
32
  - sgt.floydpepper@gmail.com
@@ -32,6 +37,7 @@ extra_rdoc_files: []
32
37
  files:
33
38
  - lib/cdb/cli.rb
34
39
  - lib/cdb/issue.rb
40
+ - lib/cdb/renamer.rb
35
41
  - lib/cdb/series.rb
36
42
  - lib/cdb/struct.rb
37
43
  - lib/cdb-crawlr.rb
@@ -56,9 +62,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
62
  version: '0'
57
63
  requirements: []
58
64
  rubyforge_project:
59
- rubygems_version: 1.8.10
65
+ rubygems_version: 1.8.24
60
66
  signing_key:
61
67
  specification_version: 3
62
68
  summary: Ruby gem and command-line tool for querying ComicBookDB.com
63
69
  test_files: []
64
- has_rdoc: