sq 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/sq +1 -0
- data/lib/sq.rb +41 -10
- data/lib/version.rb +1 -1
- data/tests/format_tests.rb +63 -0
- data/tests/query_tests.rb +4 -4
- metadata +3 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8dc704917f0cdb8d045e5307a0962f84e5e4d8e8
|
4
|
+
data.tar.gz: 7cd4146a0f2216b369c463f871c842689f0cdcb5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 595313fac368b5b7dd90dfd490e71b308f6e923086383e250b5d46b580cc52a20a02c1ebaf4dcb0f438f7582409fff7ea9a2e672ecd6ae03c78a72ed881701c6
|
7
|
+
data.tar.gz: 8f7d036a2b02df7931543eef834e2f340672b12905b3d4cefd555e2ac67d8448e7fc32464b77bc0cc77d92d2e256494ef4ab4fc829ca222167591f3c66c9a7be
|
data/bin/sq
CHANGED
@@ -16,6 +16,7 @@ EOS
|
|
16
16
|
|
17
17
|
opt :directory, 'Choose the output directory', :short => '-o', :type => :string, :default => '.'
|
18
18
|
opt :verbose, 'Print more info', :short => '-V', :type => :bool, :default => false
|
19
|
+
opt :format, 'Filename format', :short => '-F', :type => :string, :default => '%s.pdf'
|
19
20
|
end
|
20
21
|
|
21
22
|
if ARGV.empty?
|
data/lib/sq.rb
CHANGED
@@ -14,8 +14,9 @@ module SQ
|
|
14
14
|
"SQ/#{version} +github.com/bfontaine/sq"
|
15
15
|
end
|
16
16
|
|
17
|
-
# query an URI and return a list of PDFs. Each PDF is an hash with
|
18
|
-
# keys:
|
17
|
+
# query an URI and return a list of PDFs. Each PDF is an hash with three
|
18
|
+
# keys: +:uri+ is its absolute URI, +:name+ is its name (last part of its
|
19
|
+
# URI), and +:text+ is each link text.
|
19
20
|
# @uri [String]
|
20
21
|
# @regex [Regexp]
|
21
22
|
def query(uri, regex=/./)
|
@@ -24,23 +25,49 @@ module SQ
|
|
24
25
|
doc = Nokogiri::HTML(open(uri, 'User-Agent' => user_agent))
|
25
26
|
links = doc.css('a[href]')
|
26
27
|
|
27
|
-
uris = links.map { |a| URI.join(uri, a.attr('href')) }
|
28
|
-
uris.select! { |u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
|
28
|
+
uris = links.map { |a| [a.text, URI.join(uri, a.attr('href'))] }
|
29
|
+
uris.select! { |_,u| u.path =~ /\.pdf$/i && u.to_s =~ regex }
|
29
30
|
|
30
|
-
uris.map do |u|
|
31
|
+
uris.map do |text,u|
|
31
32
|
{
|
32
33
|
:uri => u.to_s,
|
33
|
-
:name => u.path.split('/').last
|
34
|
+
:name => u.path.split('/').last,
|
35
|
+
:text => text
|
34
36
|
}
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
40
|
+
# Output a formatted filename.
|
41
|
+
# @doc [Hash] as returned from +SQ.query+.
|
42
|
+
# @fmt [String]
|
43
|
+
# @opts [Hash] additional info.
|
44
|
+
def format(doc, fmt='%s.pdf', opts={})
|
45
|
+
opts[:number] ||= 0
|
46
|
+
opts[:count] ||= 0
|
47
|
+
|
48
|
+
fmt.gsub(/%./) do |f|
|
49
|
+
case f
|
50
|
+
when '%n' then opts[:number]
|
51
|
+
when '%N' then opts[:number]+1
|
52
|
+
when '%c' then opts[:count]
|
53
|
+
when '%s' then doc[:name].sub(/\.pdf$/i, '')
|
54
|
+
when '%S' then doc[:text]
|
55
|
+
when '%_' then doc[:text].gsub(/\s+/, '_')
|
56
|
+
when '%-' then doc[:text].gsub(/\s+/, '-')
|
57
|
+
when '%%' then '%'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
38
62
|
# query an URI and download all PDFs which match the regex. It returns the
|
39
63
|
# number of downloaded PDFs.
|
40
64
|
# @uri [String]
|
41
65
|
# @regex [Regexp] Regex to use to match PDF URIs
|
42
|
-
# @opts [Hash] Supported options:
|
43
|
-
# directory to use for output instead of the current one)
|
66
|
+
# @opts [Hash] Supported options: +:verbose+, +:directory+ (specify the
|
67
|
+
# directory to use for output instead of the current one),
|
68
|
+
# and +:format+ the output format. See the README for
|
69
|
+
# details.
|
70
|
+
#
|
44
71
|
def process(uri, regex=/./, opts={})
|
45
72
|
uris = self.query(uri, regex)
|
46
73
|
count = uris.count
|
@@ -50,6 +77,7 @@ module SQ
|
|
50
77
|
return 0 if uris.empty?
|
51
78
|
|
52
79
|
out = File.expand_path(opts[:directory] || '.')
|
80
|
+
fmt = opts[:format] || '%s.pdf'
|
53
81
|
|
54
82
|
unless Dir.exists?(out)
|
55
83
|
puts "-> mkdir #{out}" if opts[:verbose]
|
@@ -57,12 +85,15 @@ module SQ
|
|
57
85
|
end
|
58
86
|
|
59
87
|
p = ProgressBar.create(:title => "PDFs", :total => count)
|
88
|
+
i = 0
|
60
89
|
|
61
90
|
uris.each do |u|
|
62
|
-
|
91
|
+
name = format(u, fmt, {:number => i, :count => count})
|
92
|
+
i += 1
|
93
|
+
open("#{out}/#{name}", 'wb') do |f|
|
63
94
|
open(u[:uri], 'rb') do |resp|
|
64
95
|
f.write(resp.read)
|
65
|
-
p.log
|
96
|
+
p.log name if opts[:verbose]
|
66
97
|
p.increment
|
67
98
|
end
|
68
99
|
end
|
data/lib/version.rb
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
class SQ_format_test < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
@foo = {
|
7
|
+
:text => 'Foo Bar',
|
8
|
+
:url => 'http://example.com/foo.pdf',
|
9
|
+
:name => 'foo.pdf'
|
10
|
+
}
|
11
|
+
|
12
|
+
@opts = { :count => 42, :number => 0 }
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_empty_format
|
16
|
+
assert_equal('', SQ.format(@foo, '', @opts))
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_format_litteral
|
20
|
+
assert_equal('%', SQ.format(@foo, '%%', @opts))
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_format_pdf_number0
|
24
|
+
assert_equal('0', SQ.format(@foo, '%n', @opts))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_format_pdf_number1
|
28
|
+
assert_equal('1', SQ.format(@foo, '%N', @opts))
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_format_pdf_count
|
32
|
+
assert_equal('42', SQ.format(@foo, '%c', @opts))
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_format_pdf_name
|
36
|
+
assert_equal('foo', SQ.format(@foo, '%s', @opts))
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_format_link_text
|
40
|
+
assert_equal(@foo[:text], SQ.format(@foo, '%S', @opts))
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_format_link_text_underscores
|
44
|
+
assert_equal('Foo_Bar', SQ.format(@foo, '%_', @opts))
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_format_link_text_hyphens
|
48
|
+
assert_equal('Foo-Bar', SQ.format(@foo, '%-', @opts))
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_format_no_special
|
52
|
+
assert_equal('foo-qux', SQ.format(@foo, 'foo-qux', @opts))
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_format_multiple_percentsigns
|
56
|
+
assert_equal('%%%', SQ.format(@foo, '%%%%%%', @opts))
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_format_multiple_placeholders
|
60
|
+
assert_equal('0-1-Foo-Bar', SQ.format(@foo, '%n-%N-%-', @opts))
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/tests/query_tests.rb
CHANGED
@@ -27,8 +27,8 @@ class SQ_query_test < Test::Unit::TestCase
|
|
27
27
|
|
28
28
|
def test_full_match
|
29
29
|
pdfs = [
|
30
|
-
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
|
31
|
-
{:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
|
30
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar1'},
|
31
|
+
{:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf', :text => 'bar2'}
|
32
32
|
]
|
33
33
|
assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
|
34
34
|
assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
|
@@ -36,14 +36,14 @@ class SQ_query_test < Test::Unit::TestCase
|
|
36
36
|
|
37
37
|
def test_absolute_path
|
38
38
|
pdfs = [
|
39
|
-
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
39
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
|
40
40
|
]
|
41
41
|
assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
|
42
42
|
end
|
43
43
|
|
44
44
|
def test_malformed_html
|
45
45
|
pdfs = [
|
46
|
-
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
46
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf', :text => 'bar'}
|
47
47
|
]
|
48
48
|
assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
|
49
49
|
assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Baptiste Fontaine
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '2.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: colored
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ~>
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '1.2'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ~>
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '1.2'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: ruby-progressbar
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -131,6 +117,7 @@ extra_rdoc_files: []
|
|
131
117
|
files:
|
132
118
|
- lib/sq.rb
|
133
119
|
- lib/version.rb
|
120
|
+
- tests/format_tests.rb
|
134
121
|
- tests/process_tests.rb
|
135
122
|
- tests/query_tests.rb
|
136
123
|
- tests/tests.rb
|
@@ -160,6 +147,7 @@ signing_key:
|
|
160
147
|
specification_version: 4
|
161
148
|
summary: Bulk PDFs downloader
|
162
149
|
test_files:
|
150
|
+
- tests/format_tests.rb
|
163
151
|
- tests/process_tests.rb
|
164
152
|
- tests/query_tests.rb
|
165
153
|
- tests/tests.rb
|