sq 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 12b002f2a938d9096d85360736c3949d8ca60935
4
- data.tar.gz: 023f25cd005f28611644a9b6cecc89f703a50b61
3
+ metadata.gz: 8dc704917f0cdb8d045e5307a0962f84e5e4d8e8
4
+ data.tar.gz: 7cd4146a0f2216b369c463f871c842689f0cdcb5
5
5
  SHA512:
6
- metadata.gz: 34ad6b35d57baa6b039ccca9a245f1675205b50a82317645414fafc91742c43c5a14ac230f36cdffe4103d8e06a6e452f04d77e19a54f3b1363a996e405f5d99
7
- data.tar.gz: deda0c2bf3f2168db8ea07b52b348b310a4b6d810f4d1605ac41f3506f545648de6f21cc85882742ec2fcab273417c42486b082fb290ef6488e3c64bf7244566
6
+ metadata.gz: 595313fac368b5b7dd90dfd490e71b308f6e923086383e250b5d46b580cc52a20a02c1ebaf4dcb0f438f7582409fff7ea9a2e672ecd6ae03c78a72ed881701c6
7
+ data.tar.gz: 8f7d036a2b02df7931543eef834e2f340672b12905b3d4cefd555e2ac67d8448e7fc32464b77bc0cc77d92d2e256494ef4ab4fc829ca222167591f3c66c9a7be
data/bin/sq CHANGED
@@ -16,6 +16,7 @@ EOS
16
16
 
17
17
  opt :directory, 'Choose the output directory', :short => '-o', :type => :string, :default => '.'
18
18
  opt :verbose, 'Print more info', :short => '-V', :type => :bool, :default => false
19
+ opt :format, 'Filename format', :short => '-F', :type => :string, :default => '%s.pdf'
19
20
  end
20
21
 
21
22
  if ARGV.empty?
data/lib/sq.rb CHANGED
@@ -14,8 +14,9 @@ module SQ
14
14
  "SQ/#{version} +github.com/bfontaine/sq"
15
15
  end
16
16
 
17
- # query an URI and return a list of PDFs. Each PDF is an hash with two
18
- # keys: :uri is its absolute URI, :name is its name (last part of its URI).
17
+ # query an URI and return a list of PDFs. Each PDF is an hash with three
18
+ # keys: +:uri+ is its absolute URI, +:name+ is its name (last part of its
19
+ # URI), and +:text+ is each link text.
19
20
  # @uri [String]
20
21
  # @regex [Regexp]
21
22
  def query(uri, regex=/./)
@@ -24,23 +25,49 @@ module SQ
24
25
  doc = Nokogiri::HTML(open(uri, 'User-Agent' => user_agent))
25
26
  links = doc.css('a[href]')
26
27
 
27
- uris = links.map { |a| URI.join(uri, a.attr('href')) }
28
- uris.select! { |u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
28
+ uris = links.map { |a| [a.text, URI.join(uri, a.attr('href'))] }
29
+ uris.select! { |_,u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
29
30
 
30
- uris.map do |u|
31
+ uris.map do |text,u|
31
32
  {
32
33
  :uri => u.to_s,
33
- :name => u.path.split('/').last
34
+ :name => u.path.split('/').last,
35
+ :text => text
34
36
  }
35
37
  end
36
38
  end
37
39
 
40
+ # Output a formatted filename.
41
+ # @doc [Hash] as returned from +SQ.query+.
42
+ # @fmt [String]
43
+ # @opts [Hash] additional info.
44
+ def format(doc, fmt='%s.pdf', opts={})
45
+ opts[:number] ||= 0
46
+ opts[:count] ||= 0
47
+
48
+ fmt.gsub(/%./) do |f|
49
+ case f
50
+ when '%n' then opts[:number]
51
+ when '%N' then opts[:number]+1
52
+ when '%c' then opts[:count]
53
+ when '%s' then doc[:name].sub(/\.pdf$/i, '')
54
+ when '%S' then doc[:text]
55
+ when '%_' then doc[:text].gsub(/\s+/, '_')
56
+ when '%-' then doc[:text].gsub(/\s+/, '-')
57
+ when '%%' then '%'
58
+ end
59
+ end
60
+ end
61
+
38
62
  # query an URI and download all PDFs which match the regex. It returns the
39
63
  # number of downloaded PDFs.
40
64
  # @uri [String]
41
65
  # @regex [Regexp] Regex to use to match PDF URIs
42
- # @opts [Hash] Supported options: :verbose, :directory (specify the
43
- # directory to use for output instead of the current one)
66
+ # @opts [Hash] Supported options: +:verbose+, +:directory+ (specify the
67
+ # directory to use for output instead of the current one),
68
+ # and +:format+ the output format. See the README for
69
+ # details.
70
+ #
44
71
  def process(uri, regex=/./, opts={})
45
72
  uris = self.query(uri, regex)
46
73
  count = uris.count
@@ -50,6 +77,7 @@ module SQ
50
77
  return 0 if uris.empty?
51
78
 
52
79
  out = File.expand_path(opts[:directory] || '.')
80
+ fmt = opts[:format] || '%s.pdf'
53
81
 
54
82
  unless Dir.exists?(out)
55
83
  puts "-> mkdir #{out}" if opts[:verbose]
@@ -57,12 +85,15 @@ module SQ
57
85
  end
58
86
 
59
87
  p = ProgressBar.create(:title => "PDFs", :total => count)
88
+ i = 0
60
89
 
61
90
  uris.each do |u|
62
- open("#{out}/#{u[:name]}", 'wb') do |f|
91
+ name = format(u, fmt, {:number => i, :count => count})
92
+ i += 1
93
+ open("#{out}/#{name}", 'wb') do |f|
63
94
  open(u[:uri], 'rb') do |resp|
64
95
  f.write(resp.read)
65
- p.log u[:name] if opts[:verbose]
96
+ p.log name if opts[:verbose]
66
97
  p.increment
67
98
  end
68
99
  end
data/lib/version.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  module SQ
4
4
  class << self
5
5
  def version
6
- '0.1.0'
6
+ '0.1.1'
7
7
  end
8
8
  end
9
9
  end
@@ -0,0 +1,63 @@
1
+ # -*- coding: UTF-8 -*-
2
+
3
+ class SQ_format_test < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @foo = {
7
+ :text => 'Foo Bar',
8
+ :url => 'http://example.com/foo.pdf',
9
+ :name => 'foo.pdf'
10
+ }
11
+
12
+ @opts = { :count => 42, :number => 0 }
13
+ end
14
+
15
+ def test_empty_format
16
+ assert_equal('', SQ.format(@foo, '', @opts))
17
+ end
18
+
19
+ def test_format_litteral
20
+ assert_equal('%', SQ.format(@foo, '%%', @opts))
21
+ end
22
+
23
+ def test_format_pdf_number0
24
+ assert_equal('0', SQ.format(@foo, '%n', @opts))
25
+ end
26
+
27
+ def test_format_pdf_number1
28
+ assert_equal('1', SQ.format(@foo, '%N', @opts))
29
+ end
30
+
31
+ def test_format_pdf_count
32
+ assert_equal('42', SQ.format(@foo, '%c', @opts))
33
+ end
34
+
35
+ def test_format_pdf_name
36
+ assert_equal('foo', SQ.format(@foo, '%s', @opts))
37
+ end
38
+
39
+ def test_format_link_text
40
+ assert_equal(@foo[:text], SQ.format(@foo, '%S', @opts))
41
+ end
42
+
43
+ def test_format_link_text_underscores
44
+ assert_equal('Foo_Bar', SQ.format(@foo, '%_', @opts))
45
+ end
46
+
47
+ def test_format_link_text_hyphens
48
+ assert_equal('Foo-Bar', SQ.format(@foo, '%-', @opts))
49
+ end
50
+
51
+ def test_format_no_special
52
+ assert_equal('foo-qux', SQ.format(@foo, 'foo-qux', @opts))
53
+ end
54
+
55
+ def test_format_multiple_percentsigns
56
+ assert_equal('%%%', SQ.format(@foo, '%%%%%%', @opts))
57
+ end
58
+
59
+ def test_format_multiple_placeholders
60
+ assert_equal('0-1-Foo-Bar', SQ.format(@foo, '%n-%N-%-', @opts))
61
+ end
62
+
63
+ end
data/tests/query_tests.rb CHANGED
@@ -27,8 +27,8 @@ class SQ_query_test < Test::Unit::TestCase
27
27
 
28
28
  def test_full_match
29
29
  pdfs = [
30
- {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
31
- {:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
30
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar1'},
31
+ {:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf', :text => 'bar2'}
32
32
  ]
33
33
  assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
34
34
  assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
@@ -36,14 +36,14 @@ class SQ_query_test < Test::Unit::TestCase
36
36
 
37
37
  def test_absolute_path
38
38
  pdfs = [
39
- {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
39
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
40
40
  ]
41
41
  assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
42
42
  end
43
43
 
44
44
  def test_malformed_html
45
45
  pdfs = [
46
- {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
46
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
47
47
  ]
48
48
  assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
49
49
  assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sq
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Baptiste Fontaine
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '2.0'
41
- - !ruby/object:Gem::Dependency
42
- name: colored
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ~>
46
- - !ruby/object:Gem::Version
47
- version: '1.2'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ~>
53
- - !ruby/object:Gem::Version
54
- version: '1.2'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: ruby-progressbar
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -131,6 +117,7 @@ extra_rdoc_files: []
131
117
  files:
132
118
  - lib/sq.rb
133
119
  - lib/version.rb
120
+ - tests/format_tests.rb
134
121
  - tests/process_tests.rb
135
122
  - tests/query_tests.rb
136
123
  - tests/tests.rb
@@ -160,6 +147,7 @@ signing_key:
160
147
  specification_version: 4
161
148
  summary: Bulk PDFs downloader
162
149
  test_files:
150
+ - tests/format_tests.rb
163
151
  - tests/process_tests.rb
164
152
  - tests/query_tests.rb
165
153
  - tests/tests.rb