sq 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +1 -0
- data/bin/sq +1 -1
- data/lib/sq.rb +31 -18
- data/lib/version.rb +3 -1
- data/tests/query_tests.rb +4 -0
- data/tests/tests.rb +5 -1
- metadata +70 -33
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aeab6710743c4e7c1d05af7282c199a21e882be6
|
4
|
+
data.tar.gz: dcc24a5f930eed920f52df8069a82e9aa0800e4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 10a8027f10d94fb2d4f5d37e45a85b8d528047ff57c2706c6031850054ceefc44793f8666733a9779349268ca2ee62efb305acd47f8b1d43dbeda687ef6226cd
|
7
|
+
data.tar.gz: ee670820c2729551505f23058ed9ee19884ad8b52dcda566b1a3dc416ce1027a73a84555732597dc0f75fce70ca36bf05e3378acf5f75395274c053ae8216cbc
|
checksums.yaml.gz.sig
ADDED
Binary file
|
data.tar.gz.sig
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
��Mr�`?)mC����ƒa�V$Bg�����3���E<K��'����)���� #�k�Hә�9����Z8�"��T�+�5�L?~��s�2���3�$�s���47�������ݤ�!v<�q�?���(��`��9"���=����D.h�p��K�IH���4L���x~�:��\d��c�+z]��!��S!lP}�?#~��{���p�~������vz�>%k��t,*��|77�b�ʆ�ƛ�"��;�H�m���
|
data/bin/sq
CHANGED
@@ -33,7 +33,7 @@ is '%s.pdf'.
|
|
33
33
|
%n - PDF number, starting at 0
|
34
34
|
%N - PDF number, starting at 1
|
35
35
|
%z - same as %n, but zero-padded
|
36
|
-
%Z - same as %
|
36
|
+
%Z - same as %N, but zero-padded
|
37
37
|
%c - total number of PDFs
|
38
38
|
%s - name of the PDF, extracted from its URI, without `.pdf`
|
39
39
|
%S - name of the PDF, extracted from the link text
|
data/lib/sq.rb
CHANGED
@@ -7,26 +7,37 @@ require 'fileutils'
|
|
7
7
|
require 'ruby-progressbar'
|
8
8
|
require File.expand_path(File.dirname __FILE__) + '/version'
|
9
9
|
|
10
|
+
# This module provide some tools to bulk-download a set of PDF documents, all
|
11
|
+
# linked in one HTML page.
|
10
12
|
module SQ
|
11
13
|
class << self
|
12
|
-
# return the user-agent used by SQ
|
14
|
+
# @return [String] the user-agent used by SQ
|
13
15
|
def user_agent
|
14
16
|
"SQ/#{version} +github.com/bfontaine/sq"
|
15
17
|
end
|
16
18
|
|
17
|
-
#
|
19
|
+
# Query an URI and return a list of PDFs. Each PDF is an hash with three
|
18
20
|
# keys: +:uri+ is its absolute URI, +:name+ is its name (last part of its
|
19
21
|
# URI), and +:text+ is each link text.
|
20
|
-
# @uri [String]
|
21
|
-
# @regex [Regexp]
|
22
|
+
# @param uri [String]
|
23
|
+
# @param regex [Regexp]
|
24
|
+
# @return [Array<Hash>]
|
22
25
|
def query(uri, regex=/./)
|
23
26
|
uri = 'http://' + uri unless uri =~ /^https?:\/\//
|
24
27
|
|
25
28
|
doc = Nokogiri::HTML(open(uri, 'User-Agent' => user_agent))
|
26
29
|
links = doc.css('a[href]')
|
27
30
|
|
28
|
-
uris = links.map
|
29
|
-
|
31
|
+
uris = links.map do |a|
|
32
|
+
full = begin
|
33
|
+
URI.join(uri, a.attr('href'))
|
34
|
+
rescue
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
|
38
|
+
[a.text, full]
|
39
|
+
end
|
40
|
+
uris.select! { |_,u| u && u.path =~ /\.pdf$/i && u.to_s =~ regex }
|
30
41
|
|
31
42
|
uris.map do |text,u|
|
32
43
|
{
|
@@ -38,9 +49,12 @@ module SQ
|
|
38
49
|
end
|
39
50
|
|
40
51
|
# Output a formatted filename.
|
41
|
-
# @doc [Hash]
|
42
|
-
# @fmt [String]
|
43
|
-
#
|
52
|
+
# @param doc [Hash] as returned from +SQ.query+.
|
53
|
+
# @param fmt [String] format. See the project's README for more info on
|
54
|
+
# available format options
|
55
|
+
# @param opts [Hash] additional info. Supported keys include: +:number+
|
56
|
+
# (the current number), +:count+ (total files count).
|
57
|
+
# @return [String]
|
44
58
|
def format(doc, fmt='%s.pdf', opts={})
|
45
59
|
opts[:number] ||= 0
|
46
60
|
opts[:count] ||= 0
|
@@ -63,15 +77,14 @@ module SQ
|
|
63
77
|
end
|
64
78
|
end
|
65
79
|
|
66
|
-
#
|
67
|
-
#
|
68
|
-
# @
|
69
|
-
# @
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
#
|
74
|
-
#
|
80
|
+
# Query an URI and download all PDFs which match the regex.
|
81
|
+
# @param uri [String]
|
82
|
+
# @param regex [Regexp] Regex to use to match PDF URIs
|
83
|
+
# @param opts [Hash] Supported options: +:verbose+, +:directory+
|
84
|
+
# (specify the directory to use for output instead of
|
85
|
+
# the current one), and +:format+ the output format.
|
86
|
+
# See the README for details.
|
87
|
+
# @return [Integer] number of downloaded PDFs.
|
75
88
|
def process(uri, regex=/./, opts={})
|
76
89
|
uris = self.query(uri, regex)
|
77
90
|
count = uris.count
|
data/lib/version.rb
CHANGED
data/tests/query_tests.rb
CHANGED
data/tests/tests.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
# -*- coding: UTF-8 -*-
|
3
3
|
|
4
|
+
require 'coveralls'
|
5
|
+
Coveralls.wear!
|
6
|
+
|
4
7
|
require 'test/unit'
|
5
8
|
require 'simplecov'
|
6
9
|
|
7
10
|
test_dir = File.expand_path( File.dirname(__FILE__) )
|
8
11
|
|
9
|
-
SimpleCov.
|
12
|
+
SimpleCov.formatter = Coveralls::SimpleCov::Formatter
|
13
|
+
SimpleCov.start { add_filter '/tests/' }
|
10
14
|
|
11
15
|
require 'sq'
|
12
16
|
|
metadata
CHANGED
@@ -1,113 +1,149 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Baptiste Fontaine
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
|
-
cert_chain:
|
11
|
-
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIDaDCCAlCgAwIBAgIBATANBgkqhkiG9w0BAQUFADA9MRAwDgYDVQQDDAdiYXRp
|
14
|
+
Zm9uMRUwEwYKCZImiZPyLGQBGRYFeWFob28xEjAQBgoJkiaJk/IsZAEZFgJmcjAe
|
15
|
+
Fw0xNDA4MjQxMTM5NTJaFw0xNTA4MjQxMTM5NTJaMD0xEDAOBgNVBAMMB2JhdGlm
|
16
|
+
b24xFTATBgoJkiaJk/IsZAEZFgV5YWhvbzESMBAGCgmSJomT8ixkARkWAmZyMIIB
|
17
|
+
IjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAn3uOgWl+FwXIjDdCay28i6cK
|
18
|
+
FxHWhHoS/mH9pkXzSGVctEKP2fulie6MkIvrLCP5M6TpByeaBjcJjZPadrou1FIc
|
19
|
+
Yc/O14jYjaKTqfMxpzgNfGzDdBgBo0QZ9rcHjORetdIZdUSDaZjPtI1aGS6eBMsh
|
20
|
+
W2X6GxL4UQ1kH0Lyg7iPYAH5RHnD3+G+S28iOPFfRLFzm4fwJp1k7URiiSyOHTDp
|
21
|
+
B0ZehKKrW/ibCaRMYp2VoCamcim4de1VA6CTOaYSShueqThE18n1HM6aprihziyM
|
22
|
+
04yIpo80/unO6JxlsUFdBjsb5d7oJSqPJ6/OfcFnyXa/VRm+Ed9d6PTwZvL7YwID
|
23
|
+
AQABo3MwcTAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUWyH3xMx1
|
24
|
+
8I5NLncgNEC0ZlDRKegwGwYDVR0RBBQwEoEQYmF0aWZvbkB5YWhvby5mcjAbBgNV
|
25
|
+
HRIEFDASgRBiYXRpZm9uQHlhaG9vLmZyMA0GCSqGSIb3DQEBBQUAA4IBAQAaTVya
|
26
|
+
CGgojxBwUoadCCIsFheGsXvSFhikkXYNXy0VxEYr8BaTfGwzYh9c9T5N+Y5Mu5MW
|
27
|
+
WegqwIwRhIu6Rg7huqJ7TK50pVDF0yrZcsxvWjOfd3clblBHjKGQx5Mbu7LVNGKE
|
28
|
+
+QNdTAwYVTAA8wXHpxk200cHb9xz4e9ANpb4lonGuPz8jKmb/A7Z1M5QD6zStG8l
|
29
|
+
sTlVAhA/LZiC9gL9LtW8Iq7o7xRFhxNPKWHu6JVThH9i16eli+JignOJbGna7C40
|
30
|
+
QnOQb8zHyNL+gq2m/mnZGrSehx+6AujokjOfHbmivYMfDATOQQx0eIBI18IhacZm
|
31
|
+
42WxhhIV2bwDtd77
|
32
|
+
-----END CERTIFICATE-----
|
33
|
+
date: 2014-11-10 00:00:00.000000000 Z
|
12
34
|
dependencies:
|
13
35
|
- !ruby/object:Gem::Dependency
|
14
36
|
name: nokogiri
|
15
37
|
requirement: !ruby/object:Gem::Requirement
|
16
38
|
requirements:
|
17
|
-
- - ~>
|
39
|
+
- - "~>"
|
18
40
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.6
|
41
|
+
version: '1.6'
|
20
42
|
type: :runtime
|
21
43
|
prerelease: false
|
22
44
|
version_requirements: !ruby/object:Gem::Requirement
|
23
45
|
requirements:
|
24
|
-
- - ~>
|
46
|
+
- - "~>"
|
25
47
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.6
|
48
|
+
version: '1.6'
|
27
49
|
- !ruby/object:Gem::Dependency
|
28
50
|
name: trollop
|
29
51
|
requirement: !ruby/object:Gem::Requirement
|
30
52
|
requirements:
|
31
|
-
- - ~>
|
53
|
+
- - "~>"
|
32
54
|
- !ruby/object:Gem::Version
|
33
55
|
version: '2.0'
|
34
56
|
type: :runtime
|
35
57
|
prerelease: false
|
36
58
|
version_requirements: !ruby/object:Gem::Requirement
|
37
59
|
requirements:
|
38
|
-
- - ~>
|
60
|
+
- - "~>"
|
39
61
|
- !ruby/object:Gem::Version
|
40
62
|
version: '2.0'
|
41
63
|
- !ruby/object:Gem::Dependency
|
42
64
|
name: ruby-progressbar
|
43
65
|
requirement: !ruby/object:Gem::Requirement
|
44
66
|
requirements:
|
45
|
-
- -
|
67
|
+
- - "~>"
|
46
68
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
69
|
+
version: '1.4'
|
48
70
|
type: :runtime
|
49
71
|
prerelease: false
|
50
72
|
version_requirements: !ruby/object:Gem::Requirement
|
51
73
|
requirements:
|
52
|
-
- -
|
74
|
+
- - "~>"
|
53
75
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
76
|
+
version: '1.4'
|
55
77
|
- !ruby/object:Gem::Dependency
|
56
78
|
name: simplecov
|
57
79
|
requirement: !ruby/object:Gem::Requirement
|
58
80
|
requirements:
|
59
|
-
- -
|
81
|
+
- - "~>"
|
60
82
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
83
|
+
version: '0.8'
|
62
84
|
type: :development
|
63
85
|
prerelease: false
|
64
86
|
version_requirements: !ruby/object:Gem::Requirement
|
65
87
|
requirements:
|
66
|
-
- -
|
88
|
+
- - "~>"
|
67
89
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
90
|
+
version: '0.8'
|
69
91
|
- !ruby/object:Gem::Dependency
|
70
92
|
name: rake
|
71
93
|
requirement: !ruby/object:Gem::Requirement
|
72
94
|
requirements:
|
73
|
-
- -
|
95
|
+
- - "~>"
|
74
96
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
97
|
+
version: '10.1'
|
76
98
|
type: :development
|
77
99
|
prerelease: false
|
78
100
|
version_requirements: !ruby/object:Gem::Requirement
|
79
101
|
requirements:
|
80
|
-
- -
|
102
|
+
- - "~>"
|
81
103
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
104
|
+
version: '10.1'
|
83
105
|
- !ruby/object:Gem::Dependency
|
84
106
|
name: test-unit
|
85
107
|
requirement: !ruby/object:Gem::Requirement
|
86
108
|
requirements:
|
87
|
-
- -
|
109
|
+
- - "~>"
|
88
110
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
111
|
+
version: '2.5'
|
90
112
|
type: :development
|
91
113
|
prerelease: false
|
92
114
|
version_requirements: !ruby/object:Gem::Requirement
|
93
115
|
requirements:
|
94
|
-
- -
|
116
|
+
- - "~>"
|
95
117
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
118
|
+
version: '2.5'
|
97
119
|
- !ruby/object:Gem::Dependency
|
98
120
|
name: fakeweb
|
99
121
|
requirement: !ruby/object:Gem::Requirement
|
100
122
|
requirements:
|
101
|
-
- -
|
123
|
+
- - "~>"
|
102
124
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
125
|
+
version: '1.3'
|
104
126
|
type: :development
|
105
127
|
prerelease: false
|
106
128
|
version_requirements: !ruby/object:Gem::Requirement
|
107
129
|
requirements:
|
108
|
-
- -
|
130
|
+
- - "~>"
|
109
131
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
132
|
+
version: '1.3'
|
133
|
+
- !ruby/object:Gem::Dependency
|
134
|
+
name: coveralls
|
135
|
+
requirement: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - "~>"
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0.7'
|
140
|
+
type: :development
|
141
|
+
prerelease: false
|
142
|
+
version_requirements: !ruby/object:Gem::Requirement
|
143
|
+
requirements:
|
144
|
+
- - "~>"
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
version: '0.7'
|
111
147
|
description: Download all PDFs linked in a Web page
|
112
148
|
email: batifon@yahoo.fr
|
113
149
|
executables:
|
@@ -115,13 +151,13 @@ executables:
|
|
115
151
|
extensions: []
|
116
152
|
extra_rdoc_files: []
|
117
153
|
files:
|
154
|
+
- bin/sq
|
118
155
|
- lib/sq.rb
|
119
156
|
- lib/version.rb
|
120
157
|
- tests/format_tests.rb
|
121
158
|
- tests/process_tests.rb
|
122
159
|
- tests/query_tests.rb
|
123
160
|
- tests/tests.rb
|
124
|
-
- bin/sq
|
125
161
|
homepage: https://github.com/bfontaine/sq
|
126
162
|
licenses:
|
127
163
|
- MIT
|
@@ -132,17 +168,17 @@ require_paths:
|
|
132
168
|
- lib
|
133
169
|
required_ruby_version: !ruby/object:Gem::Requirement
|
134
170
|
requirements:
|
135
|
-
- -
|
171
|
+
- - ">="
|
136
172
|
- !ruby/object:Gem::Version
|
137
173
|
version: '0'
|
138
174
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
175
|
requirements:
|
140
|
-
- -
|
176
|
+
- - ">="
|
141
177
|
- !ruby/object:Gem::Version
|
142
178
|
version: '0'
|
143
179
|
requirements: []
|
144
180
|
rubyforge_project:
|
145
|
-
rubygems_version: 2.
|
181
|
+
rubygems_version: 2.2.2
|
146
182
|
signing_key:
|
147
183
|
specification_version: 4
|
148
184
|
summary: Bulk PDFs downloader
|
@@ -151,3 +187,4 @@ test_files:
|
|
151
187
|
- tests/process_tests.rb
|
152
188
|
- tests/query_tests.rb
|
153
189
|
- tests/tests.rb
|
190
|
+
has_rdoc:
|
metadata.gz.sig
ADDED
Binary file
|