sq 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sq.rb +20 -4
- data/lib/version.rb +1 -1
- data/tests/process_tests.rb +69 -0
- data/tests/query_tests.rb +53 -0
- data/tests/tests.rb +6 -1
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 12b002f2a938d9096d85360736c3949d8ca60935
|
4
|
+
data.tar.gz: 023f25cd005f28611644a9b6cecc89f703a50b61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34ad6b35d57baa6b039ccca9a245f1675205b50a82317645414fafc91742c43c5a14ac230f36cdffe4103d8e06a6e452f04d77e19a54f3b1363a996e405f5d99
|
7
|
+
data.tar.gz: deda0c2bf3f2168db8ea07b52b348b310a4b6d810f4d1605ac41f3506f545648de6f21cc85882742ec2fcab273417c42486b082fb290ef6488e3c64bf7244566
|
data/lib/sq.rb
CHANGED
@@ -4,14 +4,20 @@ require 'uri'
|
|
4
4
|
require 'open-uri'
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'fileutils'
|
7
|
+
require 'ruby-progressbar'
|
7
8
|
require File.expand_path(File.dirname __FILE__) + '/version'
|
8
9
|
|
9
10
|
module SQ
|
10
11
|
class << self
|
12
|
+
# return the user-agent used by SQ
|
11
13
|
def user_agent
|
12
14
|
"SQ/#{version} +github.com/bfontaine/sq"
|
13
15
|
end
|
14
16
|
|
17
|
+
# query an URI and return a list of PDFs. Each PDF is an hash with two
|
18
|
+
# keys: :uri is its absolute URI, :name is its name (last part of its URI).
|
19
|
+
# @uri [String]
|
20
|
+
# @regex [Regexp]
|
15
21
|
def query(uri, regex=/./)
|
16
22
|
uri = 'http://' + uri unless uri =~ /^https?:\/\//
|
17
23
|
|
@@ -29,10 +35,17 @@ module SQ
|
|
29
35
|
end
|
30
36
|
end
|
31
37
|
|
38
|
+
# query an URI and download all PDFs which match the regex. It returns the
|
39
|
+
# number of downloaded PDFs.
|
40
|
+
# @uri [String]
|
41
|
+
# @regex [Regexp] Regex to use to match PDF URIs
|
42
|
+
# @opts [Hash] Supported options: :verbose, :directory (specify the
|
43
|
+
# directory to use for output instead of the current one)
|
32
44
|
def process(uri, regex=/./, opts={})
|
33
45
|
uris = self.query(uri, regex)
|
46
|
+
count = uris.count
|
34
47
|
|
35
|
-
puts "Found #{
|
48
|
+
puts "Found #{count} PDFs:" if opts[:verbose]
|
36
49
|
|
37
50
|
return 0 if uris.empty?
|
38
51
|
|
@@ -40,17 +53,20 @@ module SQ
|
|
40
53
|
|
41
54
|
unless Dir.exists?(out)
|
42
55
|
puts "-> mkdir #{out}" if opts[:verbose]
|
43
|
-
|
56
|
+
FileUtils.mkdir_p(out)
|
44
57
|
end
|
45
58
|
|
59
|
+
p = ProgressBar.create(:title => "PDFs", :total => count)
|
60
|
+
|
46
61
|
uris.each do |u|
|
47
|
-
puts "Downloading #{u[:name]}..." if opts[:verbose]
|
48
62
|
open("#{out}/#{u[:name]}", 'wb') do |f|
|
49
63
|
open(u[:uri], 'rb') do |resp|
|
50
64
|
f.write(resp.read)
|
65
|
+
p.log u[:name] if opts[:verbose]
|
66
|
+
p.increment
|
51
67
|
end
|
52
68
|
end
|
53
|
-
end
|
69
|
+
end.count
|
54
70
|
end
|
55
71
|
end
|
56
72
|
end
|
data/lib/version.rb
CHANGED
@@ -0,0 +1,69 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'tmpdir'
|
4
|
+
require 'fileutils'
|
5
|
+
require File.dirname(__FILE__) + '/fake_responses'
|
6
|
+
|
7
|
+
class SQ_process_test < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@url = 'example.com'
|
11
|
+
@http = "http://#{@url}"
|
12
|
+
|
13
|
+
@prev_path = Dir.pwd
|
14
|
+
@test_path = Dir.mktmpdir('sq-tests')
|
15
|
+
Dir.chdir @test_path
|
16
|
+
end
|
17
|
+
|
18
|
+
def teardown
|
19
|
+
Dir.chdir @prev_path
|
20
|
+
FileUtils.rm_rf @test_path
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_no_links
|
24
|
+
assert_equal(0, SQ.process("#{@url}/no-links", /./))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_one_link_no_dir
|
28
|
+
assert_equal(1, SQ.process("#{@url}/one", /./))
|
29
|
+
assert(File.exists?('bar.pdf'), 'bar.pdf exists')
|
30
|
+
assert_equal('%PDFbar', File.read('bar.pdf'))
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_one_link_existing_dir
|
34
|
+
dir = 'foo'
|
35
|
+
Dir.mkdir dir
|
36
|
+
assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
|
37
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
38
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_one_link_existing_subdir
|
42
|
+
dir = 'foo/bar/qux'
|
43
|
+
FileUtils.mkdir_p dir
|
44
|
+
assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
|
45
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
46
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_one_link_unexisting_subdir
|
50
|
+
dir = 'foo/bar/qux'
|
51
|
+
assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
|
52
|
+
assert(Dir.exists?(dir), "#{dir} exists")
|
53
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
54
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_two_links_unexisting_subdir
|
58
|
+
dir = 'foo/bar/qux'
|
59
|
+
assert_equal(2, SQ.process("#{@url}/two", /./, :directory => dir))
|
60
|
+
assert(Dir.exists?(dir), "#{dir} exists")
|
61
|
+
|
62
|
+
assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
|
63
|
+
assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
|
64
|
+
|
65
|
+
assert(File.exists?("#{dir}/foo.pdf"), "#{dir}/foo.pdf exists")
|
66
|
+
assert_equal('%PDFfoo', File.read("#{dir}/foo.pdf"))
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/fake_responses'
|
4
|
+
|
5
|
+
class SQ_query_test < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@url = 'example.com'
|
9
|
+
@http = "http://#{@url}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_no_links
|
13
|
+
assert_equal([], SQ.query("#{@url}/no-links", /./))
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_no_href
|
17
|
+
assert_equal([], SQ.query("#{@url}/no-href", /./))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_no_pdfs
|
21
|
+
assert_equal([], SQ.query("#{@url}/no-pdf", /./))
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_no_match
|
25
|
+
assert_equal([], SQ.query("#{@url}/bar", /foo/))
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_full_match
|
29
|
+
pdfs = [
|
30
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
|
31
|
+
{:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
|
32
|
+
]
|
33
|
+
assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
|
34
|
+
assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_absolute_path
|
38
|
+
pdfs = [
|
39
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
40
|
+
]
|
41
|
+
assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_malformed_html
|
45
|
+
pdfs = [
|
46
|
+
{:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
|
47
|
+
]
|
48
|
+
assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
|
49
|
+
assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
|
50
|
+
assert_equal(pdfs, SQ.query("#{@url}/malformed3", /./))
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
data/tests/tests.rb
CHANGED
@@ -16,12 +16,17 @@ end
|
|
16
16
|
|
17
17
|
class SQTests < Test::Unit::TestCase
|
18
18
|
|
19
|
-
# ==
|
19
|
+
# == SQ#version == #
|
20
20
|
|
21
21
|
def test_sq_version
|
22
22
|
assert(SQ.version =~ /^\d+\.\d+\.\d+/)
|
23
23
|
end
|
24
24
|
|
25
|
+
# == SQ#user_agent == #
|
26
|
+
def test_sq_ua
|
27
|
+
assert(SQ.user_agent =~ /^SQ\/\d+\.\d+\.\d+/)
|
28
|
+
end
|
29
|
+
|
25
30
|
end
|
26
31
|
|
27
32
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Baptiste Fontaine
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: ruby-progressbar
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: simplecov
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -117,6 +131,8 @@ extra_rdoc_files: []
|
|
117
131
|
files:
|
118
132
|
- lib/sq.rb
|
119
133
|
- lib/version.rb
|
134
|
+
- tests/process_tests.rb
|
135
|
+
- tests/query_tests.rb
|
120
136
|
- tests/tests.rb
|
121
137
|
- bin/sq
|
122
138
|
homepage: https://github.com/bfontaine/sq
|
@@ -144,4 +160,6 @@ signing_key:
|
|
144
160
|
specification_version: 4
|
145
161
|
summary: Bulk PDFs downloader
|
146
162
|
test_files:
|
163
|
+
- tests/process_tests.rb
|
164
|
+
- tests/query_tests.rb
|
147
165
|
- tests/tests.rb
|