sq 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/sq +1 -0
- data/lib/sq.rb +41 -10
- data/lib/version.rb +1 -1
- data/tests/format_tests.rb +63 -0
- data/tests/query_tests.rb +4 -4
- metadata +3 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8dc704917f0cdb8d045e5307a0962f84e5e4d8e8
|
4
|
+
data.tar.gz: 7cd4146a0f2216b369c463f871c842689f0cdcb5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 595313fac368b5b7dd90dfd490e71b308f6e923086383e250b5d46b580cc52a20a02c1ebaf4dcb0f438f7582409fff7ea9a2e672ecd6ae03c78a72ed881701c6
|
7
|
+
data.tar.gz: 8f7d036a2b02df7931543eef834e2f340672b12905b3d4cefd555e2ac67d8448e7fc32464b77bc0cc77d92d2e256494ef4ab4fc829ca222167591f3c66c9a7be
|
data/bin/sq
CHANGED
@@ -16,6 +16,7 @@ EOS
|
|
16
16
|
|
17
17
|
opt :directory, 'Choose the output directory', :short => '-o', :type => :string, :default => '.'
|
18
18
|
opt :verbose, 'Print more info', :short => '-V', :type => :bool, :default => false
|
19
|
+
opt :format, 'Filename format', :short => '-F', :type => :string, :default => '%s.pdf'
|
19
20
|
end
|
20
21
|
|
21
22
|
if ARGV.empty?
|
data/lib/sq.rb
CHANGED
@@ -14,8 +14,9 @@ module SQ
|
|
14
14
|
"SQ/#{version} +github.com/bfontaine/sq"
|
15
15
|
end
|
16
16
|
|
17
|
-
# query an URI and return a list of PDFs. Each PDF is an hash with
|
18
|
-
# keys:
|
17
|
+
# query an URI and return a list of PDFs. Each PDF is an hash with three
|
18
|
+
# keys: +:uri+ is its absolute URI, +:name+ is its name (last part of its
|
19
|
+
# URI), and +:text+ is each link text.
|
19
20
|
# @uri [String]
|
20
21
|
# @regex [Regexp]
|
21
22
|
def query(uri, regex=/./)
|
@@ -24,23 +25,49 @@ module SQ
|
|
24
25
|
doc = Nokogiri::HTML(open(uri, 'User-Agent' => user_agent))
|
25
26
|
links = doc.css('a[href]')
|
26
27
|
|
27
|
-
uris = links.map { |a| URI.join(uri, a.attr('href')) }
|
28
|
-
uris.select! { |u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
|
28
|
+
uris = links.map { |a| [a.text, URI.join(uri, a.attr('href'))] }
|
29
|
+
uris.select! { |_,u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
|
29
30
|
|
30
|
-
uris.map do |u|
|
31
|
+
uris.map do |text,u|
|
31
32
|
{
|
32
33
|
:uri => u.to_s,
|
33
|
-
:name => u.path.split('/').last
|
34
|
+
:name => u.path.split('/').last,
|
35
|
+
:text => text
|
34
36
|
}
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
40
|
+
# Output a formatted filename.
|
41
|
+
# @doc [Hash] as returned from +SQ.query+.
|
42
|
+
# @fmt [String]
|
43
|
+
# @opts [Hash] additional info.
|
44
|
+
def format(doc, fmt='%s.pdf', opts={})
|
45
|
+
opts[:number] ||= 0
|
46
|
+
opts[:count] ||= 0
|
47
|
+
|
48
|
+
fmt.gsub(/%./) do |f|
|
49
|
+
case f
|
50
|
+
when '%n' then opts[:number]
|
51
|
+
when '%N' then opts[:number]+1
|
52
|
+
when '%c' then opts[:count]
|
53
|
+
when '%s' then doc[:name].sub(/\.pdf$/i, '')
|
54
|
+
when '%S' then doc[:text]
|
55
|
+
when '%_' then doc[:text].gsub(/\s+/, '_')
|
56
|
+
when '%-' then doc[:text].gsub(/\s+/, '-')
|
57
|
+
when '%%' then '%'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
38
62
|
# query an URI and download all PDFs which match the regex. It returns the
|
39
63
|
# number of downloaded PDFs.
|
40
64
|
# @uri [String]
|
41
65
|
# @regex [Regexp] Regex to use to match PDF URIs
|
42
|
-
# @opts [Hash] Supported options:
|
43
|
-
# directory to use for output instead of the current one)
|
66
|
+
# @opts [Hash] Supported options: +:verbose+, +:directory+ (specify the
|
67
|
+
# directory to use for output instead of the current one),
|
68
|
+
# and +:format+ the output format. See the README for
|
69
|
+
# details.
|
70
|
+
#
|
44
71
|
def process(uri, regex=/./, opts={})
|
45
72
|
uris = self.query(uri, regex)
|
46
73
|
count = uris.count
|
@@ -50,6 +77,7 @@ module SQ
|
|
50
77
|
return 0 if uris.empty?
|
51
78
|
|
52
79
|
out = File.expand_path(opts[:directory] || '.')
|
80
|
+
fmt = opts[:format] || '%s.pdf'
|
53
81
|
|
54
82
|
unless Dir.exists?(out)
|
55
83
|
puts "-> mkdir #{out}" if opts[:verbose]
|
@@ -57,12 +85,15 @@ module SQ
|
|
57
85
|
end
|
58
86
|
|
59
87
|
p = ProgressBar.create(:title => "PDFs", :total => count)
|
88
|
+
i = 0
|
60
89
|
|
61
90
|
uris.each do |u|
|
62
|
-
|
91
|
+
name = format(u, fmt, {:number => i, :count => count})
|
92
|
+
i += 1
|
93
|
+
open("#{out}/#{name}", 'wb') do |f|
|
63
94
|
open(u[:uri], 'rb') do |resp|
|
64
95
|
f.write(resp.read)
|
65
|
-
p.log
|
96
|
+
p.log name if opts[:verbose]
|
66
97
|
p.increment
|
67
98
|
end
|
68
99
|
end
|
data/lib/version.rb
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
class SQ_format_test < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
@foo = {
|
7
|
+
:text => 'Foo Bar',
|
8
|
+
:url => 'http://example.com/foo.pdf',
|
9
|
+
:name => 'foo.pdf'
|
10
|
+
}
|
11
|
+
|
12
|
+
@opts = { :count => 42, :number => 0 }
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_empty_format
|
16
|
+
assert_equal('', SQ.format(@foo, '', @opts))
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_format_litteral
|
20
|
+
assert_equal('%', SQ.format(@foo, '%%', @opts))
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_format_pdf_number0
|
24
|
+
assert_equal('0', SQ.format(@foo, '%n', @opts))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_format_pdf_number1
|
28
|
+
assert_equal('1', SQ.format(@foo, '%N', @opts))
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_format_pdf_count
|
32
|
+
assert_equal('42', SQ.format(@foo, '%c', @opts))
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_format_pdf_name
|
36
|
+
assert_equal('foo', SQ.format(@foo, '%s', @opts))
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_format_link_text
|
40
|
+
assert_equal(@foo[:text], SQ.format(@foo, '%S', @opts))
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_format_link_text_underscores
|
44
|
+
assert_equal('Foo_Bar', SQ.format(@foo, '%_', @opts))
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_format_link_text_hyphens
|
48
|
+
assert_equal('Foo-Bar', SQ.format(@foo, '%-', @opts))
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_format_no_special
|
52
|
+
assert_equal('foo-qux', SQ.format(@foo, 'foo-qux', @opts))
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_format_multiple_percentsigns
|
56
|
+
assert_equal('%%%', SQ.format(@foo, '%%%%%%', @opts))
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_format_multiple_placeholders
|
60
|
+
assert_equal('0-1-Foo-Bar', SQ.format(@foo, '%n-%N-%-', @opts))
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/tests/query_tests.rb
CHANGED
@@ -27,8 +27,8 @@ class SQ_query_test < Test::Unit::TestCase
|
|
27
27
|
|
28
28
|
def test_full_match
|
29
29
|
pdfs = [
|
30
|
-
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
|
31
|
-
{:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
|
30
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar1'},
|
31
|
+
{:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf', :text => 'bar2'}
|
32
32
|
]
|
33
33
|
assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
|
34
34
|
assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
|
@@ -36,14 +36,14 @@ class SQ_query_test < Test::Unit::TestCase
|
|
36
36
|
|
37
37
|
def test_absolute_path
|
38
38
|
pdfs = [
|
39
|
-
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
39
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
|
40
40
|
]
|
41
41
|
assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
|
42
42
|
end
|
43
43
|
|
44
44
|
def test_malformed_html
|
45
45
|
pdfs = [
|
46
|
-
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
46
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
|
47
47
|
]
|
48
48
|
assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
|
49
49
|
assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Baptiste Fontaine
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '2.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: colored
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ~>
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '1.2'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ~>
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '1.2'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: ruby-progressbar
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -131,6 +117,7 @@ extra_rdoc_files: []
|
|
131
117
|
files:
|
132
118
|
- lib/sq.rb
|
133
119
|
- lib/version.rb
|
120
|
+
- tests/format_tests.rb
|
134
121
|
- tests/process_tests.rb
|
135
122
|
- tests/query_tests.rb
|
136
123
|
- tests/tests.rb
|
@@ -160,6 +147,7 @@ signing_key:
|
|
160
147
|
specification_version: 4
|
161
148
|
summary: Bulk PDFs downloader
|
162
149
|
test_files:
|
150
|
+
- tests/format_tests.rb
|
163
151
|
- tests/process_tests.rb
|
164
152
|
- tests/query_tests.rb
|
165
153
|
- tests/tests.rb
|