sq 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/sq.rb +20 -4
- data/lib/version.rb +1 -1
- data/tests/process_tests.rb +69 -0
- data/tests/query_tests.rb +53 -0
- data/tests/tests.rb +6 -1
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 12b002f2a938d9096d85360736c3949d8ca60935
|
4
|
+
data.tar.gz: 023f25cd005f28611644a9b6cecc89f703a50b61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34ad6b35d57baa6b039ccca9a245f1675205b50a82317645414fafc91742c43c5a14ac230f36cdffe4103d8e06a6e452f04d77e19a54f3b1363a996e405f5d99
|
7
|
+
data.tar.gz: deda0c2bf3f2168db8ea07b52b348b310a4b6d810f4d1605ac41f3506f545648de6f21cc85882742ec2fcab273417c42486b082fb290ef6488e3c64bf7244566
|
data/lib/sq.rb
CHANGED
@@ -4,14 +4,20 @@ require 'uri'
|
|
4
4
|
require 'open-uri'
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'fileutils'
|
7
|
+
require 'ruby-progressbar'
|
7
8
|
require File.expand_path(File.dirname __FILE__) + '/version'
|
8
9
|
|
9
10
|
module SQ
|
10
11
|
class << self
|
12
|
+
# return the user-agent used by SQ
|
11
13
|
def user_agent
|
12
14
|
"SQ/#{version} +github.com/bfontaine/sq"
|
13
15
|
end
|
14
16
|
|
17
|
+
# query an URI and return a list of PDFs. Each PDF is an hash with two
|
18
|
+
# keys: :uri is its absolute URI, :name is its name (last part of its URI).
|
19
|
+
# @uri [String]
|
20
|
+
# @regex [Regexp]
|
15
21
|
def query(uri, regex=/./)
|
16
22
|
uri = 'http://' + uri unless uri =~ /^https?:\/\//
|
17
23
|
|
@@ -29,10 +35,17 @@ module SQ
|
|
29
35
|
end
|
30
36
|
end
|
31
37
|
|
38
|
+
# query an URI and download all PDFs which match the regex. It returns the
|
39
|
+
# number of downloaded PDFs.
|
40
|
+
# @uri [String]
|
41
|
+
# @regex [Regexp] Regex to use to match PDF URIs
|
42
|
+
# @opts [Hash] Supported options: :verbose, :directory (specify the
|
43
|
+
# directory to use for output instead of the current one)
|
32
44
|
def process(uri, regex=/./, opts={})
|
33
45
|
uris = self.query(uri, regex)
|
46
|
+
count = uris.count
|
34
47
|
|
35
|
-
puts "Found #{
|
48
|
+
puts "Found #{count} PDFs:" if opts[:verbose]
|
36
49
|
|
37
50
|
return 0 if uris.empty?
|
38
51
|
|
@@ -40,17 +53,20 @@ module SQ
|
|
40
53
|
|
41
54
|
unless Dir.exists?(out)
|
42
55
|
puts "-> mkdir #{out}" if opts[:verbose]
|
43
|
-
|
56
|
+
FileUtils.mkdir_p(out)
|
44
57
|
end
|
45
58
|
|
59
|
+
p = ProgressBar.create(:title => "PDFs", :total => count)
|
60
|
+
|
46
61
|
uris.each do |u|
|
47
|
-
puts "Downloading #{u[:name]}..." if opts[:verbose]
|
48
62
|
open("#{out}/#{u[:name]}", 'wb') do |f|
|
49
63
|
open(u[:uri], 'rb') do |resp|
|
50
64
|
f.write(resp.read)
|
65
|
+
p.log u[:name] if opts[:verbose]
|
66
|
+
p.increment
|
51
67
|
end
|
52
68
|
end
|
53
|
-
end
|
69
|
+
end.count
|
54
70
|
end
|
55
71
|
end
|
56
72
|
end
|
data/lib/version.rb
CHANGED
@@ -0,0 +1,69 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'tmpdir'
|
4
|
+
require 'fileutils'
|
5
|
+
require File.dirname(__FILE__) + '/fake_responses'
|
6
|
+
|
7
|
+
class SQ_process_test < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@url = 'example.com'
|
11
|
+
@http = "http://#{@url}"
|
12
|
+
|
13
|
+
@prev_path = Dir.pwd
|
14
|
+
@test_path = Dir.mktmpdir('sq-tests')
|
15
|
+
Dir.chdir @test_path
|
16
|
+
end
|
17
|
+
|
18
|
+
def teardown
|
19
|
+
Dir.chdir @prev_path
|
20
|
+
FileUtils.rm_rf @test_path
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_no_links
|
24
|
+
assert_equal(0, SQ.process("#{@url}/no-links", /./))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_one_link_no_dir
|
28
|
+
assert_equal(1, SQ.process("#{@url}/one", /./))
|
29
|
+
assert(File.exists?('bar.pdf'), 'bar.pdf exists')
|
30
|
+
assert_equal('%PDFbar', File.read('bar.pdf'))
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_one_link_existing_dir
|
34
|
+
dir = 'foo'
|
35
|
+
Dir.mkdir dir
|
36
|
+
assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
|
37
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
38
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_one_link_existing_subdir
|
42
|
+
dir = 'foo/bar/qux'
|
43
|
+
FileUtils.mkdir_p dir
|
44
|
+
assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
|
45
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
46
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_one_link_unexisting_subdir
|
50
|
+
dir = 'foo/bar/qux'
|
51
|
+
assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
|
52
|
+
assert(Dir.exists?(dir), "#{dir} exists")
|
53
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
54
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_two_links_unexisting_subdir
|
58
|
+
dir = 'foo/bar/qux'
|
59
|
+
assert_equal(2, SQ.process("#{@url}/two", /./, :directory => dir))
|
60
|
+
assert(Dir.exists?(dir), "#{dir} exists")
|
61
|
+
|
62
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
63
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
64
|
+
|
65
|
+
assert(File.exists?("#{dir}/foo.pdf"), "#{dir}/foo.pdf exists")
|
66
|
+
assert_equal('%PDFfoo', File.read("#{dir}/foo.pdf"))
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/fake_responses'
|
4
|
+
|
5
|
+
class SQ_query_test < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@url = 'example.com'
|
9
|
+
@http = "http://#{@url}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_no_links
|
13
|
+
assert_equal([], SQ.query("#{@url}/no-links", /./))
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_no_href
|
17
|
+
assert_equal([], SQ.query("#{@url}/no-href", /./))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_no_pdfs
|
21
|
+
assert_equal([], SQ.query("#{@url}/no-pdf", /./))
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_no_match
|
25
|
+
assert_equal([], SQ.query("#{@url}/bar", /foo/))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_full_match
|
29
|
+
pdfs = [
|
30
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
|
31
|
+
{:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
|
32
|
+
]
|
33
|
+
assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
|
34
|
+
assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_absolute_path
|
38
|
+
pdfs = [
|
39
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
40
|
+
]
|
41
|
+
assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_malformed_html
|
45
|
+
pdfs = [
|
46
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
47
|
+
]
|
48
|
+
assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
|
49
|
+
assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
|
50
|
+
assert_equal(pdfs, SQ.query("#{@url}/malformed3", /./))
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
data/tests/tests.rb
CHANGED
@@ -16,12 +16,17 @@ end
|
|
16
16
|
|
17
17
|
class SQTests < Test::Unit::TestCase
|
18
18
|
|
19
|
-
# ==
|
19
|
+
# == SQ#version == #
|
20
20
|
|
21
21
|
def test_sq_version
|
22
22
|
assert(SQ.version =~ /^\d+\.\d+\.\d+/)
|
23
23
|
end
|
24
24
|
|
25
|
+
# == SQ#user_agent == #
|
26
|
+
def test_sq_ua
|
27
|
+
assert(SQ.user_agent =~ /^SQ\/\d+\.\d+\.\d+/)
|
28
|
+
end
|
29
|
+
|
25
30
|
end
|
26
31
|
|
27
32
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Baptiste Fontaine
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: ruby-progressbar
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: simplecov
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -117,6 +131,8 @@ extra_rdoc_files: []
|
|
117
131
|
files:
|
118
132
|
- lib/sq.rb
|
119
133
|
- lib/version.rb
|
134
|
+
- tests/process_tests.rb
|
135
|
+
- tests/query_tests.rb
|
120
136
|
- tests/tests.rb
|
121
137
|
- bin/sq
|
122
138
|
homepage: https://github.com/bfontaine/sq
|
@@ -144,4 +160,6 @@ signing_key:
|
|
144
160
|
specification_version: 4
|
145
161
|
summary: Bulk PDFs downloader
|
146
162
|
test_files:
|
163
|
+
- tests/process_tests.rb
|
164
|
+
- tests/query_tests.rb
|
147
165
|
- tests/tests.rb
|