sq 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41ba49d5ce6c8cf47cbf2b2ac14b6cec6a937bd5
4
- data.tar.gz: bea18aa8b03dd5770bd957befc8bb4f86f4f97a5
3
+ metadata.gz: 12b002f2a938d9096d85360736c3949d8ca60935
4
+ data.tar.gz: 023f25cd005f28611644a9b6cecc89f703a50b61
5
5
  SHA512:
6
- metadata.gz: d005f3c16ead6a5cfd85a5f5e0f155ebe7a041eaf8a295ad809645ccb28140dee885e11e26c28d8c795766b14318586ac02ba4e4c3bfa81c3ac64070618635a3
7
- data.tar.gz: d648efad30b62583ae3da4d06dd658c95664321075cbe6917fc17031bbbf3554ecd58088b8469f7ba1f11d822f27a5ca4f7b6109400cddf2225796f43eb90427
6
+ metadata.gz: 34ad6b35d57baa6b039ccca9a245f1675205b50a82317645414fafc91742c43c5a14ac230f36cdffe4103d8e06a6e452f04d77e19a54f3b1363a996e405f5d99
7
+ data.tar.gz: deda0c2bf3f2168db8ea07b52b348b310a4b6d810f4d1605ac41f3506f545648de6f21cc85882742ec2fcab273417c42486b082fb290ef6488e3c64bf7244566
data/lib/sq.rb CHANGED
@@ -4,14 +4,20 @@ require 'uri'
4
4
  require 'open-uri'
5
5
  require 'nokogiri'
6
6
  require 'fileutils'
7
+ require 'ruby-progressbar'
7
8
  require File.expand_path(File.dirname __FILE__) + '/version'
8
9
 
9
10
  module SQ
10
11
  class << self
12
+ # return the user-agent used by SQ
11
13
  def user_agent
12
14
  "SQ/#{version} +github.com/bfontaine/sq"
13
15
  end
14
16
 
17
+ # query an URI and return a list of PDFs. Each PDF is an hash with two
18
+ # keys: :uri is its absolute URI, :name is its name (last part of its URI).
19
+ # @uri [String]
20
+ # @regex [Regexp]
15
21
  def query(uri, regex=/./)
16
22
  uri = 'http://' + uri unless uri =~ /^https?:\/\//
17
23
 
@@ -29,10 +35,17 @@ module SQ
29
35
  end
30
36
  end
31
37
 
38
+ # query an URI and download all PDFs which match the regex. It returns the
39
+ # number of downloaded PDFs.
40
+ # @uri [String]
41
+ # @regex [Regexp] Regex to use to match PDF URIs
42
+ # @opts [Hash] Supported options: :verbose, :directory (specify the
43
+ # directory to use for output instead of the current one)
32
44
  def process(uri, regex=/./, opts={})
33
45
  uris = self.query(uri, regex)
46
+ count = uris.count
34
47
 
35
- puts "Found #{uris.count} PDFs." if opts[:verbose]
48
+ puts "Found #{count} PDFs:" if opts[:verbose]
36
49
 
37
50
  return 0 if uris.empty?
38
51
 
@@ -40,17 +53,20 @@ module SQ
40
53
 
41
54
  unless Dir.exists?(out)
42
55
  puts "-> mkdir #{out}" if opts[:verbose]
43
- Dir.mkdir(out)
56
+ FileUtils.mkdir_p(out)
44
57
  end
45
58
 
59
+ p = ProgressBar.create(:title => "PDFs", :total => count)
60
+
46
61
  uris.each do |u|
47
- puts "Downloading #{u[:name]}..." if opts[:verbose]
48
62
  open("#{out}/#{u[:name]}", 'wb') do |f|
49
63
  open(u[:uri], 'rb') do |resp|
50
64
  f.write(resp.read)
65
+ p.log u[:name] if opts[:verbose]
66
+ p.increment
51
67
  end
52
68
  end
53
- end
69
+ end.count
54
70
  end
55
71
  end
56
72
  end
@@ -3,7 +3,7 @@
3
3
  module SQ
4
4
  class << self
5
5
  def version
6
- '0.0.2'
6
+ '0.1.0'
7
7
  end
8
8
  end
9
9
  end
@@ -0,0 +1,69 @@
1
+ # -*- coding: UTF-8 -*-
2
+
3
+ require 'tmpdir'
4
+ require 'fileutils'
5
+ require File.dirname(__FILE__) + '/fake_responses'
6
+
7
+ class SQ_process_test < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @url = 'example.com'
11
+ @http = "http://#{@url}"
12
+
13
+ @prev_path = Dir.pwd
14
+ @test_path = Dir.mktmpdir('sq-tests')
15
+ Dir.chdir @test_path
16
+ end
17
+
18
+ def teardown
19
+ Dir.chdir @prev_path
20
+ FileUtils.rm_rf @test_path
21
+ end
22
+
23
+ def test_no_links
24
+ assert_equal(0, SQ.process("#{@url}/no-links", /./))
25
+ end
26
+
27
+ def test_one_link_no_dir
28
+ assert_equal(1, SQ.process("#{@url}/one", /./))
29
+ assert(File.exists?('bar.pdf'), 'bar.pdf exists')
30
+ assert_equal('%PDFbar', File.read('bar.pdf'))
31
+ end
32
+
33
+ def test_one_link_existing_dir
34
+ dir = 'foo'
35
+ Dir.mkdir dir
36
+ assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
37
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
38
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
39
+ end
40
+
41
+ def test_one_link_existing_subdir
42
+ dir = 'foo/bar/qux'
43
+ FileUtils.mkdir_p dir
44
+ assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
45
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
46
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
47
+ end
48
+
49
+ def test_one_link_unexisting_subdir
50
+ dir = 'foo/bar/qux'
51
+ assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
52
+ assert(Dir.exists?(dir), "#{dir} exists")
53
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
54
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
55
+ end
56
+
57
+ def test_two_links_unexisting_subdir
58
+ dir = 'foo/bar/qux'
59
+ assert_equal(2, SQ.process("#{@url}/two", /./, :directory => dir))
60
+ assert(Dir.exists?(dir), "#{dir} exists")
61
+
62
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
63
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
64
+
65
+ assert(File.exists?("#{dir}/foo.pdf"), "#{dir}/foo.pdf exists")
66
+ assert_equal('%PDFfoo', File.read("#{dir}/foo.pdf"))
67
+ end
68
+
69
+ end
@@ -0,0 +1,53 @@
1
+ # -*- coding: UTF-8 -*-
2
+
3
+ require File.dirname(__FILE__) + '/fake_responses'
4
+
5
+ class SQ_query_test < Test::Unit::TestCase
6
+
7
+ def setup
8
+ @url = 'example.com'
9
+ @http = "http://#{@url}"
10
+ end
11
+
12
+ def test_no_links
13
+ assert_equal([], SQ.query("#{@url}/no-links", /./))
14
+ end
15
+
16
+ def test_no_href
17
+ assert_equal([], SQ.query("#{@url}/no-href", /./))
18
+ end
19
+
20
+ def test_no_pdfs
21
+ assert_equal([], SQ.query("#{@url}/no-pdf", /./))
22
+ end
23
+
24
+ def test_no_match
25
+ assert_equal([], SQ.query("#{@url}/bar", /foo/))
26
+ end
27
+
28
+ def test_full_match
29
+ pdfs = [
30
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
31
+ {:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
32
+ ]
33
+ assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
34
+ assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
35
+ end
36
+
37
+ def test_absolute_path
38
+ pdfs = [
39
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
40
+ ]
41
+ assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
42
+ end
43
+
44
+ def test_malformed_html
45
+ pdfs = [
46
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
47
+ ]
48
+ assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
49
+ assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
50
+ assert_equal(pdfs, SQ.query("#{@url}/malformed3", /./))
51
+ end
52
+
53
+ end
@@ -16,12 +16,17 @@ end
16
16
 
17
17
  class SQTests < Test::Unit::TestCase
18
18
 
19
- # == UD#version == #
19
+ # == SQ#version == #
20
20
 
21
21
  def test_sq_version
22
22
  assert(SQ.version =~ /^\d+\.\d+\.\d+/)
23
23
  end
24
24
 
25
+ # == SQ#user_agent == #
26
+ def test_sq_ua
27
+ assert(SQ.user_agent =~ /^SQ\/\d+\.\d+\.\d+/)
28
+ end
29
+
25
30
  end
26
31
 
27
32
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sq
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Baptiste Fontaine
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-13 00:00:00.000000000 Z
11
+ date: 2014-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: ruby-progressbar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: simplecov
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -117,6 +131,8 @@ extra_rdoc_files: []
117
131
  files:
118
132
  - lib/sq.rb
119
133
  - lib/version.rb
134
+ - tests/process_tests.rb
135
+ - tests/query_tests.rb
120
136
  - tests/tests.rb
121
137
  - bin/sq
122
138
  homepage: https://github.com/bfontaine/sq
@@ -144,4 +160,6 @@ signing_key:
144
160
  specification_version: 4
145
161
  summary: Bulk PDFs downloader
146
162
  test_files:
163
+ - tests/process_tests.rb
164
+ - tests/query_tests.rb
147
165
  - tests/tests.rb