sq 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41ba49d5ce6c8cf47cbf2b2ac14b6cec6a937bd5
4
- data.tar.gz: bea18aa8b03dd5770bd957befc8bb4f86f4f97a5
3
+ metadata.gz: 12b002f2a938d9096d85360736c3949d8ca60935
4
+ data.tar.gz: 023f25cd005f28611644a9b6cecc89f703a50b61
5
5
  SHA512:
6
- metadata.gz: d005f3c16ead6a5cfd85a5f5e0f155ebe7a041eaf8a295ad809645ccb28140dee885e11e26c28d8c795766b14318586ac02ba4e4c3bfa81c3ac64070618635a3
7
- data.tar.gz: d648efad30b62583ae3da4d06dd658c95664321075cbe6917fc17031bbbf3554ecd58088b8469f7ba1f11d822f27a5ca4f7b6109400cddf2225796f43eb90427
6
+ metadata.gz: 34ad6b35d57baa6b039ccca9a245f1675205b50a82317645414fafc91742c43c5a14ac230f36cdffe4103d8e06a6e452f04d77e19a54f3b1363a996e405f5d99
7
+ data.tar.gz: deda0c2bf3f2168db8ea07b52b348b310a4b6d810f4d1605ac41f3506f545648de6f21cc85882742ec2fcab273417c42486b082fb290ef6488e3c64bf7244566
data/lib/sq.rb CHANGED
@@ -4,14 +4,20 @@ require 'uri'
4
4
  require 'open-uri'
5
5
  require 'nokogiri'
6
6
  require 'fileutils'
7
+ require 'ruby-progressbar'
7
8
  require File.expand_path(File.dirname __FILE__) + '/version'
8
9
 
9
10
  module SQ
10
11
  class << self
12
+ # return the user-agent used by SQ
11
13
  def user_agent
12
14
  "SQ/#{version} +github.com/bfontaine/sq"
13
15
  end
14
16
 
17
+ # query an URI and return a list of PDFs. Each PDF is an hash with two
18
+ # keys: :uri is its absolute URI, :name is its name (last part of its URI).
19
+ # @uri [String]
20
+ # @regex [Regexp]
15
21
  def query(uri, regex=/./)
16
22
  uri = 'http://' + uri unless uri =~ /^https?:\/\//
17
23
 
@@ -29,10 +35,17 @@ module SQ
29
35
  end
30
36
  end
31
37
 
38
+ # query an URI and download all PDFs which match the regex. It returns the
39
+ # number of downloaded PDFs.
40
+ # @uri [String]
41
+ # @regex [Regexp] Regex to use to match PDF URIs
42
+ # @opts [Hash] Supported options: :verbose, :directory (specify the
43
+ # directory to use for output instead of the current one)
32
44
  def process(uri, regex=/./, opts={})
33
45
  uris = self.query(uri, regex)
46
+ count = uris.count
34
47
 
35
- puts "Found #{uris.count} PDFs." if opts[:verbose]
48
+ puts "Found #{count} PDFs:" if opts[:verbose]
36
49
 
37
50
  return 0 if uris.empty?
38
51
 
@@ -40,17 +53,20 @@ module SQ
40
53
 
41
54
  unless Dir.exists?(out)
42
55
  puts "-> mkdir #{out}" if opts[:verbose]
43
- Dir.mkdir(out)
56
+ FileUtils.mkdir_p(out)
44
57
  end
45
58
 
59
+ p = ProgressBar.create(:title => "PDFs", :total => count)
60
+
46
61
  uris.each do |u|
47
- puts "Downloading #{u[:name]}..." if opts[:verbose]
48
62
  open("#{out}/#{u[:name]}", 'wb') do |f|
49
63
  open(u[:uri], 'rb') do |resp|
50
64
  f.write(resp.read)
65
+ p.log u[:name] if opts[:verbose]
66
+ p.increment
51
67
  end
52
68
  end
53
- end
69
+ end.count
54
70
  end
55
71
  end
56
72
  end
@@ -3,7 +3,7 @@
3
3
  module SQ
4
4
  class << self
5
5
  def version
6
- '0.0.2'
6
+ '0.1.0'
7
7
  end
8
8
  end
9
9
  end
@@ -0,0 +1,69 @@
1
+ # -*- coding: UTF-8 -*-
2
+
3
+ require 'tmpdir'
4
+ require 'fileutils'
5
+ require File.dirname(__FILE__) + '/fake_responses'
6
+
7
+ class SQ_process_test < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @url = 'example.com'
11
+ @http = "http://#{@url}"
12
+
13
+ @prev_path = Dir.pwd
14
+ @test_path = Dir.mktmpdir('sq-tests')
15
+ Dir.chdir @test_path
16
+ end
17
+
18
+ def teardown
19
+ Dir.chdir @prev_path
20
+ FileUtils.rm_rf @test_path
21
+ end
22
+
23
+ def test_no_links
24
+ assert_equal(0, SQ.process("#{@url}/no-links", /./))
25
+ end
26
+
27
+ def test_one_link_no_dir
28
+ assert_equal(1, SQ.process("#{@url}/one", /./))
29
+ assert(File.exists?('bar.pdf'), 'bar.pdf exists')
30
+ assert_equal('%PDFbar', File.read('bar.pdf'))
31
+ end
32
+
33
+ def test_one_link_existing_dir
34
+ dir = 'foo'
35
+ Dir.mkdir dir
36
+ assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
37
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
38
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
39
+ end
40
+
41
+ def test_one_link_existing_subdir
42
+ dir = 'foo/bar/qux'
43
+ FileUtils.mkdir_p dir
44
+ assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
45
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
46
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
47
+ end
48
+
49
+ def test_one_link_unexisting_subdir
50
+ dir = 'foo/bar/qux'
51
+ assert_equal(1, SQ.process("#{@url}/one", /./, :directory => dir))
52
+ assert(Dir.exists?(dir), "#{dir} exists")
53
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
54
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
55
+ end
56
+
57
+ def test_two_links_unexisting_subdir
58
+ dir = 'foo/bar/qux'
59
+ assert_equal(2, SQ.process("#{@url}/two", /./, :directory => dir))
60
+ assert(Dir.exists?(dir), "#{dir} exists")
61
+
62
+ assert(File.exists?("#{dir}/bar.pdf"), "#{dir}/bar.pdf exists")
63
+ assert_equal('%PDFbar', File.read("#{dir}/bar.pdf"))
64
+
65
+ assert(File.exists?("#{dir}/foo.pdf"), "#{dir}/foo.pdf exists")
66
+ assert_equal('%PDFfoo', File.read("#{dir}/foo.pdf"))
67
+ end
68
+
69
+ end
@@ -0,0 +1,53 @@
1
+ # -*- coding: UTF-8 -*-
2
+
3
+ require File.dirname(__FILE__) + '/fake_responses'
4
+
5
+ class SQ_query_test < Test::Unit::TestCase
6
+
7
+ def setup
8
+ @url = 'example.com'
9
+ @http = "http://#{@url}"
10
+ end
11
+
12
+ def test_no_links
13
+ assert_equal([], SQ.query("#{@url}/no-links", /./))
14
+ end
15
+
16
+ def test_no_href
17
+ assert_equal([], SQ.query("#{@url}/no-href", /./))
18
+ end
19
+
20
+ def test_no_pdfs
21
+ assert_equal([], SQ.query("#{@url}/no-pdf", /./))
22
+ end
23
+
24
+ def test_no_match
25
+ assert_equal([], SQ.query("#{@url}/bar", /foo/))
26
+ end
27
+
28
+ def test_full_match
29
+ pdfs = [
30
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'},
31
+ {:uri => "#{@http}/bar2.pdf", :name => 'bar2.pdf'}
32
+ ]
33
+ assert_equal(pdfs, SQ.query("#{@url}/bar", /./))
34
+ assert_equal(pdfs, SQ.query("#{@http}/bar", /./))
35
+ end
36
+
37
+ def test_absolute_path
38
+ pdfs = [
39
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
40
+ ]
41
+ assert_equal(pdfs, SQ.query("#{@url}/ab/so/lu/te", /./))
42
+ end
43
+
44
+ def test_malformed_html
45
+ pdfs = [
46
+ {:uri => "#{@http}/bar1.pdf", :name => 'bar1.pdf'}
47
+ ]
48
+ assert_equal(pdfs, SQ.query("#{@url}/malformed1", /./))
49
+ assert_equal(pdfs, SQ.query("#{@url}/malformed2", /./))
50
+ assert_equal(pdfs, SQ.query("#{@url}/malformed3", /./))
51
+ end
52
+
53
+ end
@@ -16,12 +16,17 @@ end
16
16
 
17
17
  class SQTests < Test::Unit::TestCase
18
18
 
19
- # == UD#version == #
19
+ # == SQ#version == #
20
20
 
21
21
  def test_sq_version
22
22
  assert(SQ.version =~ /^\d+\.\d+\.\d+/)
23
23
  end
24
24
 
25
+ # == SQ#user_agent == #
26
+ def test_sq_ua
27
+ assert(SQ.user_agent =~ /^SQ\/\d+\.\d+\.\d+/)
28
+ end
29
+
25
30
  end
26
31
 
27
32
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sq
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Baptiste Fontaine
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-13 00:00:00.000000000 Z
11
+ date: 2014-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: ruby-progressbar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: simplecov
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -117,6 +131,8 @@ extra_rdoc_files: []
117
131
  files:
118
132
  - lib/sq.rb
119
133
  - lib/version.rb
134
+ - tests/process_tests.rb
135
+ - tests/query_tests.rb
120
136
  - tests/tests.rb
121
137
  - bin/sq
122
138
  homepage: https://github.com/bfontaine/sq
@@ -144,4 +160,6 @@ signing_key:
144
160
  specification_version: 4
145
161
  summary: Bulk PDFs downloader
146
162
  test_files:
163
+ - tests/process_tests.rb
164
+ - tests/query_tests.rb
147
165
  - tests/tests.rb