sq 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 12b002f2a938d9096d85360736c3949d8ca60935
4
- data.tar.gz: 023f25cd005f28611644a9b6cecc89f703a50b61
3
+ metadata.gz: 8dc704917f0cdb8d045e5307a0962f84e5e4d8e8
4
+ data.tar.gz: 7cd4146a0f2216b369c463f871c842689f0cdcb5
5
5
  SHA512:
6
- metadata.gz: 34ad6b35d57baa6b039ccca9a245f1675205b50a82317645414fafc91742c43c5a14ac230f36cdffe4103d8e06a6e452f04d77e19a54f3b1363a996e405f5d99
7
- data.tar.gz: deda0c2bf3f2168db8ea07b52b348b310a4b6d810f4d1605ac41f3506f545648de6f21cc85882742ec2fcab273417c42486b082fb290ef6488e3c64bf7244566
6
+ metadata.gz: 595313fac368b5b7dd90dfd490e71b308f6e923086383e250b5d46b580cc52a20a02c1ebaf4dcb0f438f7582409fff7ea9a2e672ecd6ae03c78a72ed881701c6
7
+ data.tar.gz: 8f7d036a2b02df7931543eef834e2f340672b12905b3d4cefd555e2ac67d8448e7fc32464b77bc0cc77d92d2e256494ef4ab4fc829ca222167591f3c66c9a7be
data/bin/sq CHANGED
@@ -16,6 +16,7 @@ EOS
16
16
 
17
17
  opt :directory, 'Choose the output directory', :short => '-o', :type => :string, :default => '.'
18
18
  opt :verbose, 'Print more info', :short => '-V', :type => :bool, :default => false
19
+ opt :format, 'Filename format', :short => '-F', :type => :string, :default => '%s.pdf'
19
20
  end
20
21
 
21
22
  if ARGV.empty?
data/lib/sq.rb CHANGED
@@ -14,8 +14,9 @@ module SQ
14
14
  "SQ/#{version} +github.com/bfontaine/sq"
15
15
  end
16
16
 
17
- # query an URI and return a list of PDFs. Each PDF is an hash with two
18
- # keys: :uri is its absolute URI, :name is its name (last part of its URI).
17
+ # query an URI and return a list of PDFs. Each PDF is an hash with three
18
+ # keys: +:uri+ is its absolute URI, +:name+ is its name (last part of its
19
+ # URI), and +:text+ is each link text.
19
20
  # @uri [String]
20
21
  # @regex [Regexp]
21
22
  def query(uri, regex=/./)
@@ -24,23 +25,49 @@ module SQ
24
25
  doc = Nokogiri::HTML(open(uri, 'User-Agent' => user_agent))
25
26
  links = doc.css('a[href]')
26
27
 
27
- uris = links.map { |a| URI.join(uri, a.attr('href')) }
28
- uris.select! { |u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
28
+ uris = links.map { |a| [a.text, URI.join(uri, a.attr('href'))] }
29
+ uris.select! { |_,u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
29
30
 
30
- uris.map do |u|
31
+ uris.map do |text,u|
31
32
  {
32
33
  :uri => u.to_s,
33
- :name => u.path.split('/').last
34
+ :name => u.path.split('/').last,
35
+ :text => text
34
36
  }
35
37
  end
36
38
  end
37
39
 
40
+ # Output a formatted filename.
41
+ # @doc [Hash] as returned from +SQ.query+.
42
+ # @fmt [String]
43
+ # @opts [Hash] additional info.
44
+ def format(doc, fmt='%s.pdf', opts={})
45
+ opts[:number] ||= 0
46
+ opts[:count] ||= 0
47
+
48
+ fmt.gsub(/%./) do |f|
49
+ case f
50
+ when '%n' then opts[:number]
51
+ when '%N' then opts[:number]+1
52
+ when '%c' then opts[:count]
53
+ when '%s' then doc[:name].sub(/\.pdf$/i, '')
54
+ when '%S' then doc[:text]
55
+ when '%_' then doc[:text].gsub(/\s+/, '_')
56
+ when '%-' then doc[:text].gsub(/\s+/, '-')
57
+ when '%%' then '%'
58
+ end
59
+ end
60
+ end
61
+
38
62
  # query an URI and download all PDFs which match the regex. It returns the
39
63
  # number of downloaded PDFs.
40
64
  # @uri [String]
41
65
  # @regex [Regexp] Regex to use to match PDF URIs
42
- # @opts [Hash] Supported options: :verbose, :directory (specify the
43
- # directory to use for output instead of the current one)
66
+ # @opts [Hash] Supported options: +:verbose+, +:directory+ (specify the
67
+ # directory to use for output instead of the current one),
68
+ # and +:format+ the output format. See the README for
69
+ # details.
70
+ #
44
71
  def process(uri, regex=/./, opts={})
45
72
  uris = self.query(uri, regex)
46
73
  count = uris.count
@@ -50,6 +77,7 @@ module SQ
50
77
  return 0 if uris.empty?
51
78
 
52
79
  out = File.expand_path(opts[:directory] || '.')
80
+ fmt = opts[:format] || '%s.pdf'
53
81
 
54
82
  unless Dir.exists?(out)
55
83
  puts "-> mkdir #{out}" if opts[:verbose]
@@ -57,12 +85,15 @@ module SQ
57
85
  end
58
86
 
59
87
  p = ProgressBar.create(:title => "PDFs", :total => count)
88
+ i = 0
60
89
 
61
90
  uris.each do |u|
62
- open("#{out}/#{u[:name]}", 'wb') do |f|
91
+ name = format(u, fmt, {:number => i, :count => count})
92
+ i += 1
93
+ open("#{out}/#{name}", 'wb') do |f|
63
94
  open(u[:uri], 'rb') do |resp|
64
95
  f.write(resp.read)
65
- p.log u[:name] if opts[:verbose]
96
+ p.log name if opts[:verbose]
66
97
  p.increment
67
98
  end
68
99
  end
data/lib/version.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  module SQ
4
4
  class << self
5
5
  def version
6
- '0.1.0'
6
+ '0.1.1'
7
7
  end
8
8
  end
9
9
  end
@@ -0,0 +1,63 @@
1
+ # -*- coding: UTF-8 -*-
2
+
3
+ class SQ_format_test < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @foo = {
7
+ :text => 'Foo Bar',
8
+ :url => 'http://example.com/foo.pdf',
9
+ :name => 'foo.pdf'
10
+ }
11
+
12
+ @opts = { :count => 42, :number => 0 }
13
+ end
14
+
15
+ def test_empty_format
16
+ assert_equal('', SQ.format(@foo, '', @opts))
17
+ end
18
+
19
+ def test_format_litteral
20
+ assert_equal('%', SQ.format(@foo, '%%', @opts))
21
+ end
22
+
23
+ def test_format_pdf_number0
24
+ assert_equal('0', SQ.format(@foo, '%n', @opts))
25
+ end
26
+
27
+ def test_format_pdf_number1
28
+ assert_equal('1', SQ.format(@foo, '%N', @opts))
29
+ end
30
+
31
+ def test_format_pdf_count
32
+ assert_equal('42', SQ.format(@foo, '%c', @opts))
33
+ end
34
+
35
+ def test_format_pdf_name
36
+ assert_equal('foo', SQ.format(@foo, '%s', @opts))
37
+ end
38
+
39
+ def test_format_link_text
40
+ assert_equal(@foo[:text], SQ.format(@foo, '%S', @opts))
41
+ end
42
+
43
+ def test_format_link_text_underscores
44
+ assert_equal('Foo_Bar', SQ.format(@foo, '%_', @opts))
45
+ end
46
+
47
+ def test_format_link_text_hyphens
48
+ assert_equal('Foo-Bar', SQ.format(@foo, '%-', @opts))
49
+ end
50
+
51
+ def test_format_no_special
52
+ assert_equal('foo-qux', SQ.format(@foo, 'foo-qux', @opts))
53
+ end
54
+
55
+ def test_format_multiple_percentsigns
56
+ assert_equal('%%%', SQ.format(@foo, '%%%%%%', @opts))
57
+ end
58
+
59
+ def test_format_multiple_placeholders
60
+ assert_equal('0-1-Foo-Bar', SQ.format(@foo, '%n-%N-%-', @opts))
61
+ end
62
+
63
+ end
data/tests/query_tests.rb CHANGED
@@ -27,8 +27,8 @@ class SQ_query_test < Test::Unit::TestCase
27
27
 
28
28
  def test_full_match
29
29
  pdfs = [
30
- {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
31
- {:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
30
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar1'},
31
+ {:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf', :text => 'bar2'}
32
32
  ]
33
33
  assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
34
34
  assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
@@ -36,14 +36,14 @@ class SQ_query_test < Test::Unit::TestCase
36
36
 
37
37
  def test_absolute_path
38
38
  pdfs = [
39
- {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
39
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
40
40
  ]
41
41
  assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
42
42
  end
43
43
 
44
44
  def test_malformed_html
45
45
  pdfs = [
46
- {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
46
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
47
47
  ]
48
48
  assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
49
49
  assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sq
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Baptiste Fontaine
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '2.0'
41
- - !ruby/object:Gem::Dependency
42
- name: colored
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ~>
46
- - !ruby/object:Gem::Version
47
- version: '1.2'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ~>
53
- - !ruby/object:Gem::Version
54
- version: '1.2'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: ruby-progressbar
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -131,6 +117,7 @@ extra_rdoc_files: []
131
117
  files:
132
118
  - lib/sq.rb
133
119
  - lib/version.rb
120
+ - tests/format_tests.rb
134
121
  - tests/process_tests.rb
135
122
  - tests/query_tests.rb
136
123
  - tests/tests.rb
@@ -160,6 +147,7 @@ signing_key:
160
147
  specification_version: 4
161
148
  summary: Bulk PDFs downloader
162
149
  test_files:
150
+ - tests/format_tests.rb
163
151
  - tests/process_tests.rb
164
152
  - tests/query_tests.rb
165
153
  - tests/tests.rb