cdb-crawlr 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/cdb CHANGED
@@ -1,94 +1,106 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'cdb-crawlr'
4
- # load 'lib/cdb-crawlr.rb'
5
- require 'optparse'
6
-
7
- $cli = CDB::CLI.new
8
-
9
- def print_help(opt = @global, error=nil)
10
- puts(error.to_s+"\n\n") if error
11
- puts opt
12
- exit 1
13
- end
14
-
15
- @global = OptionParser.new do |opts|
16
- opts.banner = "Usage: cdb [-h|--help] [-v|--version] <COMMAND> <TYPE> [<ARGS>]"
17
-
18
- opts.on("-h", "--help", "Display this screen"){ print_help }
19
- opts.on("-v", "--version", "Show version information") do
20
- puts "cdb #{CDB::VERSION}"; exit
21
- end
22
-
23
- opts.separator "\nCOMMANDS:"
24
- opts.separator " search Search for entries of a given TYPE matching QUERY"
25
- opts.separator " show Show details of an entry using a CDB_ID obtained from search"
26
- end
27
-
28
- @search = OptionParser.new do |opts|
29
- opts.banner = "Search for entries of a given TYPE matching QUERY\n"+
30
- "Usage: cdb search [-h|--help] <TYPE> <QUERY>"
31
-
32
- opts.on("-h", "--help", "Display this screen"){ print_help opts }
33
-
34
- opts.separator "\nTYPES:"
35
- opts.separator " issue Search comic issue names for given QUERY"
36
- opts.separator " series Search comic series names for given QUERY"
37
- end
38
-
39
- @show = OptionParser.new do |opts|
40
- opts.banner = "Show details of an entry using a CDB_ID obtained from search\n"+
41
- "Usage: cdb show [-h|--help] <TYPE> <CDB_ID>"
42
-
43
- opts.on("-h", "--help", "Display this screen"){ print_help opts }
44
-
45
- opts.separator "\nTYPES:"
46
- opts.separator " series Get all available details of a comic series"
47
- end
48
-
49
- @command_opts = {
50
- 'search' => @search,
51
- 'show' => @show
52
- }
53
-
54
- # Parse global flags
55
- begin
56
- @global.order!
57
- rescue OptionParser::InvalidOption => e
58
- puts e; print_help
59
- end
60
-
61
- # Pop and verify command
62
- begin
63
- command = ARGV.shift
64
- $cli[:command] = command
65
- command_opt = @command_opts[$cli[:command]]
66
- rescue
67
- error = "invalid COMMAND: #{command}" unless command.to_s.empty?
68
- print_help @global, error
69
- end
70
-
71
- # Parse command flags
72
- begin
73
- command_opt.order!
74
- rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
75
- print_help command_opt, e
76
- end
77
-
78
- # Pop and verify type
79
- begin
80
- type = ARGV.shift
81
- $cli[:type] = type
82
- rescue
83
- error = "invalid TYPE: #{type}" unless type.to_s.empty?
84
- print_help command_opt, error
85
- end
86
-
87
- # Verify args
88
- begin
89
- $cli[:args] = ARGV.join(' ')
90
- rescue => e
91
- print_help command_opt, e
92
- end
93
-
94
- $cli.execute
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cdb-crawlr'
4
+ # load 'lib/cdb-crawlr.rb'
5
+ require 'optparse'
6
+
7
+ $cli = CDB::CLI.new
8
+
9
+ def print_help(opt = @global, error=nil)
10
+ puts "cdb: #{error}\n" if error && !error.to_s.empty?
11
+ puts opt
12
+ exit 1
13
+ end
14
+
15
+ @global = OptionParser.new do |opts|
16
+ opts.banner = "Usage: cdb [-v|--version] <COMMAND> [<ARGS>]"
17
+
18
+ opts.on("-h", "--help", "Display this screen"){ print_help }
19
+ opts.on("-v", "--version", "Show version information") do
20
+ puts "cdb #{CDB::VERSION}"; exit
21
+ end
22
+
23
+ opts.separator "\nCOMMANDS:"
24
+ opts.separator " rename Rename a directory of comics according to series data"
25
+ opts.separator " search Search for entries of a given TYPE matching QUERY"
26
+ opts.separator " show Show details of an entry using a CDB_ID obtained from search"
27
+ end
28
+
29
+ @search = OptionParser.new do |opts|
30
+ opts.banner = "Usage: cdb search <TYPE> <QUERY>"
31
+
32
+ opts.on("-h", "--help", "Display this screen"){ print_help opts }
33
+
34
+ opts.separator "\nTYPES:"
35
+ opts.separator " issue Search comic issue names for given QUERY"
36
+ opts.separator " series Search comic series names for given QUERY"
37
+ end
38
+
39
+ @show = OptionParser.new do |opts|
40
+ opts.banner = "Usage: cdb show <TYPE> <CDB_ID>"
41
+
42
+ opts.on("-h", "--help", "Display this screen"){ print_help opts }
43
+
44
+ opts.separator "\nTYPES:"
45
+ opts.separator " series Get all available details of a comic series"
46
+ end
47
+
48
+ @rename = OptionParser.new do |opts|
49
+ opts.banner = "Usage: cdb rename [-f|--force] <PATH> <CDB_ID>"
50
+
51
+ opts.on("-h", "--help", "Display this screen"){ print_help opts }
52
+ opts.on("-f", "--force", "Perform the rename without any confirmations"){ $cli[:force] = true }
53
+ opts.on("-i", "--ignore", "Ignore warnings about unknown and misformatted issue numbers"){ $cli[:ignore] = true }
54
+ end
55
+
56
+ @command_opts = {
57
+ 'search' => @search,
58
+ 'show' => @show,
59
+ 'rename' => @rename
60
+ }
61
+
62
+ # Parse global flags
63
+ begin
64
+ @global.order!
65
+ rescue OptionParser::InvalidOption => e
66
+ puts e; print_help
67
+ end
68
+
69
+ # Pop and verify command
70
+ begin
71
+ command = ARGV.shift
72
+ $cli[:command] = command
73
+ command_opt = @command_opts[$cli[:command]]
74
+ rescue
75
+ error = "invalid COMMAND: #{command}" unless command.to_s.empty?
76
+ print_help @global, error
77
+ end
78
+
79
+ # Parse command flags
80
+ begin
81
+ command_opt.order!
82
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
83
+ print_help command_opt, e
84
+ end
85
+
86
+ # Pop and verify third argument
87
+ begin
88
+ next_arg = ARGV.shift
89
+ case $cli[:command]
90
+ when 'search, show'
91
+ $cli[:type] = next_arg
92
+ when 'rename'
93
+ $cli[:path] = next_arg
94
+ end
95
+ rescue => e
96
+ print_help command_opt, e
97
+ end
98
+
99
+ # Verify args
100
+ begin
101
+ $cli[:args] = ARGV.join(' ')
102
+ rescue => e
103
+ print_help command_opt, e
104
+ end
105
+
106
+ $cli.execute
@@ -1,51 +1,52 @@
1
- require 'json'
2
- require 'nokogiri'
3
- require 'open-uri'
4
-
5
- $:.unshift(File.dirname(__FILE__))
6
-
7
- require 'cdb/cli'
8
- require 'cdb/struct'
9
- require 'cdb/issue'
10
- require 'cdb/series'
11
-
12
- module CDB
13
- VERSION = '0.2.1'
14
-
15
- BASE_URL = 'http://www.comicbookdb.com'
16
- REQUEST_HEADERS = {'Connection' => 'keep-alive'}
17
- SEARCH_PATH = 'search.php'
18
-
19
- class << self; attr
20
-
21
- def search(query, type='FullSite')
22
- data = URI.encode_www_form(
23
- form_searchtype: type,
24
- form_search: query
25
- )
26
- url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
27
- doc = read_page(url)
28
- node = doc.css('h2:contains("Search Results")').first.parent
29
- {
30
- :series => CDB::Series.parse_results(node),
31
- :issues => CDB::Issue.parse_results(node)
32
- }
33
- end
34
-
35
- def show(id, type)
36
- data = URI.encode_www_form('ID' => id)
37
- url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
38
- page = read_page(url)
39
- type.parse_data(id, page)
40
- end
41
-
42
- private
43
-
44
- def read_page(url)
45
- content = open(url, REQUEST_HEADERS).read
46
- content.force_encoding('ISO-8859-1').encode!('UTF-8')
47
- Nokogiri::HTML(content)
48
- end
49
-
50
- end
51
- end
1
+ require 'json'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+
5
+ $:.unshift(File.dirname(__FILE__))
6
+
7
+ require 'cdb/cli'
8
+ require 'cdb/renamer'
9
+ require 'cdb/struct'
10
+ require 'cdb/issue'
11
+ require 'cdb/series'
12
+
13
+ module CDB
14
+ VERSION = '0.3.0'
15
+
16
+ BASE_URL = 'http://www.comicbookdb.com'
17
+ REQUEST_HEADERS = {'Connection' => 'keep-alive'}
18
+ SEARCH_PATH = 'search.php'
19
+
20
+ class << self; attr
21
+
22
+ def search(query, type='FullSite')
23
+ data = URI.encode_www_form(
24
+ form_searchtype: type,
25
+ form_search: query
26
+ )
27
+ url = "#{BASE_URL}/#{SEARCH_PATH}?#{data}"
28
+ doc = read_page(url)
29
+ node = doc.css('h2:contains("Search Results")').first.parent
30
+ {
31
+ :series => CDB::Series.parse_results(node),
32
+ :issues => CDB::Issue.parse_results(node)
33
+ }
34
+ end
35
+
36
+ def show(id, type)
37
+ data = URI.encode_www_form('ID' => id)
38
+ url = "#{BASE_URL}/#{type::WEB_PATH}?#{data}"
39
+ page = read_page(url)
40
+ type.parse_data(id, page)
41
+ end
42
+
43
+ private
44
+
45
+ def read_page(url)
46
+ content = open(url, REQUEST_HEADERS).read
47
+ content.force_encoding('ISO-8859-1').encode!('UTF-8')
48
+ Nokogiri::HTML(content)
49
+ end
50
+
51
+ end
52
+ end
@@ -1,61 +1,84 @@
1
- require 'pp'
2
-
3
- module CDB
4
- class CLI
5
- COMMANDS = %w[search show]
6
- TYPES = %w[series issue issues]
7
-
8
- def initialize(options={})
9
- @options = options
10
- end
11
-
12
- def [](k)
13
- @options[k]
14
- end
15
-
16
- def []=(k, v)
17
- v = v.to_s.strip
18
- case k
19
- when :command
20
- v = v.downcase
21
- raise unless COMMANDS.include?(v)
22
- when :type
23
- v = v.downcase
24
- if self[:command] == 'show'
25
- # remove when "show issue" is supported
26
- raise unless v == 'series'
27
- else
28
- raise unless TYPES.include?(v)
29
- end
30
- when :args
31
- raise "invalid args" if v.empty?
32
- end
33
- @options[k] = v
34
- end
35
-
36
- def execute
37
- send self[:command]
38
- end
39
-
40
- private
41
-
42
- def search
43
- case self[:type]
44
- when 'series'
45
- CDB::Series.search(self[:args]).each{|r| puts r.to_json}
46
- when 'issue', 'issues'
47
- CDB::Issue.search(self[:args]).each{|r| puts r.to_json}
48
- end
49
- end
50
-
51
- def show
52
- case self[:type]
53
- when 'series'
54
- res = CDB::Series.show(self[:args])
55
- res.issues.each{|i| i.series=nil}
56
- puts res.to_json(array_nl:"\n", object_nl:"\n", indent:' ')
57
- end
58
- end
59
-
60
- end
61
- end
1
+ require 'pp'
2
+
3
+ module CDB
4
+ class CLI
5
+ COMMANDS = %w[search show rename]
6
+ TYPES = %w[series issue issues]
7
+
8
+ def initialize(options={})
9
+ @options = options
10
+ end
11
+
12
+ def [](k)
13
+ @options[k]
14
+ end
15
+
16
+ def []=(k, v)
17
+ v = v.to_s.strip
18
+ begin
19
+ send("#{k}=", v)
20
+ rescue NoMethodError
21
+ @options[k] = v
22
+ end
23
+ end
24
+
25
+ def execute
26
+ send self[:command]
27
+ end
28
+
29
+ private
30
+
31
+ def search
32
+ case self[:type]
33
+ when 'series'
34
+ CDB::Series.search(self[:args]).each{|r| puts r.to_json}
35
+ when 'issue', 'issues'
36
+ CDB::Issue.search(self[:args]).each{|r| puts r.to_json}
37
+ end
38
+ end
39
+
40
+ def show
41
+ case self[:type]
42
+ when 'series'
43
+ res = CDB::Series.show(self[:args])
44
+ res.issues.each{|i| i.series=nil}
45
+ puts res.to_json(array_nl:"\n", object_nl:"\n", indent:' ')
46
+ end
47
+ end
48
+
49
+ def rename
50
+ renamer = CDB::Renamer.new(@options)
51
+ renamer.execute
52
+ end
53
+
54
+ def args=(v)
55
+ raise "invalid args" if v.empty?
56
+ @options[:args] = v
57
+ end
58
+
59
+ def command=(v)
60
+ v = v.downcase
61
+ raise unless COMMANDS.include?(v)
62
+ @options[:command] = v
63
+ end
64
+
65
+ def type=(v)
66
+ v = v.downcase
67
+ error = "invalid TYPE: #{v}" unless v.empty?
68
+ if @options[:command] == 'show'
69
+ # remove when "show issue" is supported
70
+ raise error.to_s unless v == 'series'
71
+ else
72
+ raise error.to_s unless TYPES.include?(v)
73
+ end
74
+ @options[:type] = v
75
+ end
76
+
77
+ def path=(v)
78
+ error = "#{v}: No such directory" unless v.empty?
79
+ raise error.to_s unless File.directory?(v)
80
+ @options[:path] = v
81
+ end
82
+
83
+ end
84
+ end
File without changes
@@ -0,0 +1,117 @@
1
+ module CDB
2
+ class Renamer
3
+ EXTENSIONS = %w[cbz cbr]
4
+ ISSUE_NUM = '[\d\.]+\w?'
5
+ INPUT_FORMAT = /#(#{ISSUE_NUM})/
6
+ OUTPUT_FORMAT = "%{series} #%{padded_num} (%{cover_date})"
7
+
8
+ def initialize(options)
9
+ @path = options[:path]
10
+ @cdb_id = options[:args]
11
+ @force = options[:force]
12
+ @ignore = options[:ignore]
13
+ end
14
+
15
+ def execute
16
+ @rename_map =
17
+ files.each_with_object({}) do |filename, map|
18
+ map[filename]= transform(filename)
19
+ end.select{|k,v| v}
20
+
21
+ do_rename if verify_map
22
+ end
23
+
24
+ private
25
+
26
+ def do_rename
27
+ Dir.chdir(@path) do
28
+ @rename_map.each do |source, destination|
29
+ next if source == destination
30
+ puts "#{pad(source)} => #{destination}"
31
+ if @force
32
+ %x[ mv "#{source}" "#{destination}" ]
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ def verify_map
39
+ dups = @rename_map.select do |k,v|
40
+ @rename_map.values.count(v) > 1
41
+ end
42
+ dups.keys.uniq.each do |k|
43
+ padded = pad(k, dups.keys.map(&:length).max)
44
+ puts "ERROR: output name clash: #{padded} => #{dups[k]}"
45
+ end
46
+ dups.empty?
47
+ end
48
+
49
+ def transform(filename)
50
+ return unless num = parse_issue_num(filename)
51
+ if issue = issues[num]
52
+ generate_output(filename, issue)
53
+ else
54
+ puts "WARNING: #{filename}: unknown issue: #{num}"
55
+ end
56
+ end
57
+
58
+ def parse_issue_num(filename)
59
+ if match = filename.match(INPUT_FORMAT)
60
+ num = match[1].gsub(/^0+|\.$/,'')
61
+ num = '0' if num == ''
62
+ num
63
+ else
64
+ puts "WARNING: #{filename}: invalid input format" unless @ignore
65
+ end
66
+ end
67
+
68
+ def generate_output(filename, issue)
69
+ json = issue.as_json
70
+ json[:series] = issue.series.name
71
+ json[:padded_num] = pad_num(issue.num)
72
+ output = OUTPUT_FORMAT % json
73
+ sanitize(output + File.extname(filename))
74
+ end
75
+
76
+ def sanitize(filename)
77
+ filename.gsub(/[:]/, ' -')
78
+ .gsub(/[\/\\<>]/, '-')
79
+ .gsub(/[\?\*|"]/, '_')
80
+ end
81
+
82
+ def pad(file, max=nil)
83
+ max ||= files.map(&:length).max
84
+ file + (' '*(max-file.length))
85
+ end
86
+
87
+ def pad_num(num, max=nil)
88
+ max ||= max_num_length
89
+ '0'*(max-num.to_s.length)+num.to_s
90
+ end
91
+
92
+ def files
93
+ Dir.chdir(@path) do
94
+ @files ||= EXTENSIONS
95
+ .map{|e| Dir["*.#{e}"]}.flatten
96
+ .select{|f| File.file?(f)}.sort
97
+ end
98
+ @files
99
+ end
100
+
101
+ def max_num_length
102
+ @max_num ||= files.map{|f| parse_issue_num(f).length}.max
103
+ end
104
+
105
+ def series
106
+ @series ||= CDB::Series.show(@cdb_id)
107
+ end
108
+
109
+ def issues
110
+ # Only act on issues - not TPB, HC, or anything else
111
+ @issues ||= Hash[series.issues
112
+ .select{|i| i.num.match(/^#{ISSUE_NUM}$/)}
113
+ .map{|i| [i.num.to_s, i]}]
114
+ end
115
+
116
+ end
117
+ end
@@ -30,7 +30,7 @@ module CDB
30
30
  start_d, end_d = dates.split('-').map(&:strip)
31
31
 
32
32
  series = new(
33
- :cdb_id => id,
33
+ :cdb_id => id.to_i,
34
34
  :name => page.css('.page_headline').first.text.strip,
35
35
  :publisher => page.css('a[href^="publisher.php"]').first.text.strip,
36
36
  :imprint => (page.css('a[href^="imprint.php"]').first.text.strip rescue nil),
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdb-crawlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-04 00:00:00.000000000Z
12
+ date: 2012-11-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &25288420 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,12 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *25288420
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  description: cdb-crawlr is a Ruby gem and command-line tool for querying ComicBookDB.com
26
31
  email:
27
32
  - sgt.floydpepper@gmail.com
@@ -32,6 +37,7 @@ extra_rdoc_files: []
32
37
  files:
33
38
  - lib/cdb/cli.rb
34
39
  - lib/cdb/issue.rb
40
+ - lib/cdb/renamer.rb
35
41
  - lib/cdb/series.rb
36
42
  - lib/cdb/struct.rb
37
43
  - lib/cdb-crawlr.rb
@@ -56,9 +62,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
62
  version: '0'
57
63
  requirements: []
58
64
  rubyforge_project:
59
- rubygems_version: 1.8.10
65
+ rubygems_version: 1.8.24
60
66
  signing_key:
61
67
  specification_version: 3
62
68
  summary: Ruby gem and command-line tool for querying ComicBookDB.com
63
69
  test_files: []
64
- has_rdoc: