ferto 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/CI.yml +29 -0
- data/CHANGELOG.md +50 -0
- data/README.md +20 -3
- data/ferto.gemspec +2 -3
- data/lib/ferto/client.rb +61 -7
- data/lib/ferto/version.rb +1 -1
- data/lib/ferto.rb +21 -0
- metadata +16 -30
- data/.travis.yml +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7c778310b7f3bba9e3d6984c185b7c61ed475e398a38924c34ccecbf490ddb8d
|
4
|
+
data.tar.gz: 3f027ed81e81275682260282b1b72c09a0469ebb7d026864105a79e3bb0c6ad8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2745ec8da102954efe1eb8e6682290ca56ab9970816f1411e5e01947d9d61b4fcd69efbc4d329af567dc7f04ff1d9d590a845c245b30bac67bb78633ebabc3bb
|
7
|
+
data.tar.gz: e3d558ff92fd5bcd542f56a7a924b77119ddaae6bcb4887f966fe34c0c39b221a0d29bac848d16db1793005d90a92aff6b21afc50626e5693cbb7750dc63a0e3
|
@@ -0,0 +1,29 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
|
13
|
+
strategy:
|
14
|
+
fail-fast: false
|
15
|
+
matrix:
|
16
|
+
ruby-version: ['2.7', '3.2']
|
17
|
+
|
18
|
+
steps:
|
19
|
+
- uses: actions/checkout@v3
|
20
|
+
- name: Install dependencies
|
21
|
+
run: sudo apt install -y libcurl4-openssl-dev
|
22
|
+
- name: Set up Ruby ${{ matrix.ruby-version }}
|
23
|
+
uses: ruby/setup-ruby@v1
|
24
|
+
with:
|
25
|
+
ruby-version: ${{ matrix.ruby-version }}
|
26
|
+
- name: Install dependencies
|
27
|
+
run: bundle install
|
28
|
+
- name: Run tests
|
29
|
+
run: bundle exec rspec
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
Breaking changes are prefixed with a "[BREAKING]" label.
|
4
|
+
|
5
|
+
## master (unreleased)
|
6
|
+
|
7
|
+
## 0.1.0 (2023-06-16)
|
8
|
+
|
9
|
+
- Add compatibility for Ruby 3
|
10
|
+
- Unpin curb version from gemspec
|
11
|
+
- Unpin faker version
|
12
|
+
- specs: Pass params as kwargs instead of hash
|
13
|
+
|
14
|
+
## 0.0.9 (2022-11-14)
|
15
|
+
|
16
|
+
### Added
|
17
|
+
|
18
|
+
- Support for different callbacks when a job fails [[#13](https://github.com/skroutz/ferto/pull/13)]
|
19
|
+
|
20
|
+
## 0.0.8 (2022-08-16)
|
21
|
+
|
22
|
+
### Added
|
23
|
+
|
24
|
+
- Support for setting subpath in download requests [[#12](https://github.com/skroutz/ferto/pull/12)]
|
25
|
+
|
26
|
+
## 0.0.6 (2019-07-09)
|
27
|
+
|
28
|
+
### Added
|
29
|
+
|
30
|
+
- Support for setting request headers in download requests [[#10](https://github.com/skroutz/ferto/pull/10)]
|
31
|
+
|
32
|
+
## 0.0.7 (2022-07-21)
|
33
|
+
|
34
|
+
### Added
|
35
|
+
|
36
|
+
- Support for setting AWS S3 bucket as filestorage solution [[#11](https://github.com/skroutz/ferto/pull/11)]
|
37
|
+
|
38
|
+
## 0.0.5 (2019-05-16)
|
39
|
+
|
40
|
+
### Added
|
41
|
+
|
42
|
+
- [BREAKING] `Ferto::ResponseError` exception raising when 40X or 50X response is returned [[#9](https://github.com/skroutz/ferto/pull/9)]
|
43
|
+
|
44
|
+
## 0.0.4 (2019-04-18)
|
45
|
+
|
46
|
+
### Added
|
47
|
+
|
48
|
+
- Support setting a job download timeout [[#7](https://github.com/skroutz/ferto/pull/7)]
|
49
|
+
- Support setting an HTTP proxy for use in download requests [[#7](https://github.com/skroutz/ferto/pull/7)]
|
50
|
+
- Support setting the User-Agent header in download requests [[#7](https://github.com/skroutz/ferto/pull/7)]
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Ferto
|
2
2
|
|
3
|
-
|
3
|
+

|
4
4
|
[](https://badge.fury.io/rb/ferto)
|
5
5
|
[](http://www.rubydoc.info/github/skroutz/ferto)
|
6
6
|
|
@@ -50,7 +50,8 @@ dl_resp = client.download(aggr_id: 'bucket1',
|
|
50
50
|
mime_type: 'text/html',
|
51
51
|
callback_type: 'http',
|
52
52
|
callback_dst: 'http://myservice.com/downloader_callback',
|
53
|
-
extra: { some_extra_info: 'info' }
|
53
|
+
extra: { some_extra_info: 'info' },
|
54
|
+
request_headers: { "Accept" => "application/html,application/xhtml+html" })
|
54
55
|
```
|
55
56
|
|
56
57
|
In order for a service to consume downloader's result, it *must* accept the HTTP
|
@@ -65,7 +66,8 @@ dl_resp = client.download(aggr_id: 'bucket1',
|
|
65
66
|
mime_type: 'text/html',
|
66
67
|
callback_type: 'kafka',
|
67
68
|
callback_dst: 'my-kafka-topic',
|
68
|
-
extra: { some_extra_info: 'info' }
|
69
|
+
extra: { some_extra_info: 'info' },
|
70
|
+
request_headers: { "Accept" => "application/html,application/xhtml+html" })
|
69
71
|
```
|
70
72
|
|
71
73
|
To consume the downloader's result, you can use your favorite Kafka library and
|
@@ -77,6 +79,10 @@ If the connection with the `downloader` API was successful, the aforementioned
|
|
77
79
|
object. If the client failed to connect, a
|
78
80
|
[`Ferto::ConnectionError`](https://github.com/skroutz/ferto/blob/master/lib/ferto.rb#L18)
|
79
81
|
exception is raised.
|
82
|
+
Also if the download call, results to a response with code
|
83
|
+
either `40X` or `50X` then a [`Ferto::ResponseError`](https://github.com/skroutz/ferto/blob/master/lib/ferto.rb#L21)
|
84
|
+
is raised with the response object encapsulated in the raised exception in order
|
85
|
+
to be further handled by the end user.
|
80
86
|
|
81
87
|
To handle the actual callback message, e.g. from inside a Rails controller:
|
82
88
|
|
@@ -103,6 +109,17 @@ end
|
|
103
109
|
> parameters](https://github.com/skroutz/downloader#endpoints), [callback
|
104
110
|
> payload](https://github.com/skroutz/downloader/tree/kafka-backend#usage)).
|
105
111
|
|
112
|
+
|
113
|
+
#### A Note on User-Agent
|
114
|
+
|
115
|
+
We continue to expose the `user_agent` field as tools like `curl` and `wget` do.
|
116
|
+
Along with that we will follow their paradigm where if both a `user-agent` flag
|
117
|
+
and a `User-Agent` in the request headers are provided then the user-agent in
|
118
|
+
the request headers is preferred.
|
119
|
+
|
120
|
+
Also if the `user_agent` is provided but the request headers do not
|
121
|
+
contain a `User-Agent` key, then the `user_agent` is copied to the headers
|
122
|
+
|
106
123
|
## Contributing
|
107
124
|
|
108
125
|
Bug reports and pull requests are welcome on GitHub at
|
data/ferto.gemspec
CHANGED
@@ -23,12 +23,11 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
24
|
spec.require_paths = ["lib"]
|
25
25
|
|
26
|
-
spec.add_dependency 'curb'
|
26
|
+
spec.add_dependency 'curb'
|
27
27
|
|
28
|
-
spec.add_development_dependency "bundler", "~> 1.13"
|
29
28
|
spec.add_development_dependency "rake", "~> 10.0"
|
30
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
30
|
spec.add_development_dependency "webmock", "~> 3.5"
|
32
31
|
spec.add_development_dependency "factory_bot", "~> 4.10"
|
33
|
-
spec.add_development_dependency "faker"
|
32
|
+
spec.add_development_dependency "faker"
|
34
33
|
end
|
data/lib/ferto/client.rb
CHANGED
@@ -50,6 +50,8 @@ module Ferto
|
|
50
50
|
# @param url [String] the resource to be downloaded
|
51
51
|
# @param callback_type [String]
|
52
52
|
# @param callback_dst [String] the callback destination
|
53
|
+
# @param callback_error_type [String]
|
54
|
+
# @param callback_error_dst [String] the callback destination in case the job fails
|
53
55
|
# @param mime_type [String] (default: "") accepted MIME types for the
|
54
56
|
# resource
|
55
57
|
# @param aggr_id [String] aggregation identifier
|
@@ -62,6 +64,11 @@ module Ferto
|
|
62
64
|
# @param user_agent [String] the User-Agent string to use for
|
63
65
|
# downloading the resource, by default it uses the User-Agent string
|
64
66
|
# set in the downloader's configuration
|
67
|
+
# @param request_headers [Hash] the request headers that will be used
|
68
|
+
# in downloader when performing the actual request in order to fetch
|
69
|
+
# the desired resource
|
70
|
+
# @param subpath [String] the subfolder(s) that the jobs will be stored
|
71
|
+
# under the top level directory of storage backend
|
65
72
|
#
|
66
73
|
# @example
|
67
74
|
# client.download(
|
@@ -73,25 +80,34 @@ module Ferto
|
|
73
80
|
# aggr_proxy: 'http://myproxy.com/',
|
74
81
|
# user_agent: 'my-useragent',
|
75
82
|
# mime_type: "image/jpeg",
|
83
|
+
# request_headers: { "Accept" => "image/*,*/*;q=0.8" },
|
76
84
|
# extra: { something: 'someone' }
|
77
85
|
# )
|
78
86
|
#
|
79
87
|
# @raise [Ferto::ConnectionError] if there was an error scheduling the
|
80
|
-
# job to downloader
|
88
|
+
# job to downloader with respect to the fact that a Curl ConnectionFailedError occured
|
89
|
+
# @raise [Ferto::ResponseError] if a response code of 40X or 50X is received
|
81
90
|
#
|
82
91
|
# @return [Ferto::Response]
|
83
92
|
#
|
84
93
|
# @see https://github.com/skroutz/downloader/#post-download
|
85
94
|
def download(aggr_id:, aggr_limit: @aggr_limit, url:,
|
86
95
|
aggr_proxy: nil, download_timeout: nil, user_agent: nil,
|
87
|
-
callback_url: "", callback_dst: "",
|
88
|
-
|
96
|
+
callback_url: "", callback_dst: "", callback_type: "",
|
97
|
+
callback_error_type: "", callback_error_dst: "",
|
98
|
+
mime_type: "", extra: {},
|
99
|
+
request_headers: {},
|
100
|
+
s3_bucket: nil, s3_region: nil, subpath: nil)
|
89
101
|
uri = URI::HTTP.build(
|
90
102
|
scheme: scheme, host: host, port: port, path: path
|
91
103
|
)
|
92
104
|
body = build_body(
|
93
|
-
aggr_id, aggr_limit, url,
|
94
|
-
|
105
|
+
aggr_id, aggr_limit, url,
|
106
|
+
callback_url, callback_type, callback_dst,
|
107
|
+
callback_error_type, callback_error_dst,
|
108
|
+
aggr_proxy, download_timeout, user_agent,
|
109
|
+
mime_type, extra, request_headers,
|
110
|
+
s3_bucket, s3_region, subpath
|
95
111
|
)
|
96
112
|
# Curl.post reuses the same handler
|
97
113
|
begin
|
@@ -100,6 +116,14 @@ module Ferto
|
|
100
116
|
handle.connect_timeout = connect_timeout
|
101
117
|
handle.timeout = timeout
|
102
118
|
end
|
119
|
+
|
120
|
+
case res.response_code
|
121
|
+
when 400..599
|
122
|
+
error_msg = ("An error occured during the download call. " \
|
123
|
+
"Received a #{res.response_code} response code and body " \
|
124
|
+
"#{res.body_str}")
|
125
|
+
raise Ferto::ResponseError.new(error_msg, res)
|
126
|
+
end
|
103
127
|
rescue Curl::Err::ConnectionFailedError => e
|
104
128
|
raise Ferto::ConnectionError.new(e)
|
105
129
|
end
|
@@ -117,14 +141,27 @@ module Ferto
|
|
117
141
|
end
|
118
142
|
|
119
143
|
def build_body(aggr_id, aggr_limit, url, callback_url, callback_type,
|
120
|
-
callback_dst,
|
121
|
-
|
144
|
+
callback_dst, callback_error_type, callback_error_dst,
|
145
|
+
aggr_proxy, download_timeout, user_agent,
|
146
|
+
mime_type, extra, request_headers,
|
147
|
+
s3_bucket, s3_region, subpath)
|
122
148
|
body = {
|
123
149
|
aggr_id: aggr_id,
|
124
150
|
aggr_limit: aggr_limit,
|
125
151
|
url: url
|
126
152
|
}
|
127
153
|
|
154
|
+
if s3_bucket && s3_region
|
155
|
+
body[:s3_bucket] = s3_bucket
|
156
|
+
body[:s3_region] = s3_region
|
157
|
+
end
|
158
|
+
|
159
|
+
if !s3_bucket && s3_region
|
160
|
+
raise ArgumentError, "s3_region provided without an s3_bucket"
|
161
|
+
elsif !s3_region && s3_bucket
|
162
|
+
raise ArgumentError, "s3_bucket provided without an s3_region"
|
163
|
+
end
|
164
|
+
|
128
165
|
if callback_url.empty?
|
129
166
|
body[:callback_type] = callback_type
|
130
167
|
body[:callback_dst] = callback_dst
|
@@ -132,10 +169,14 @@ module Ferto
|
|
132
169
|
body[:callback_url] = callback_url
|
133
170
|
end
|
134
171
|
|
172
|
+
body[:callback_error_type] = callback_error_type unless callback_error_type.to_s.empty?
|
173
|
+
body[:callback_error_dst] = callback_error_dst unless callback_error_dst.to_s.empty?
|
174
|
+
|
135
175
|
if !mime_type.empty?
|
136
176
|
body[:mime_type] = mime_type
|
137
177
|
end
|
138
178
|
|
179
|
+
body[:subpath] = subpath if subpath
|
139
180
|
body[:aggr_proxy] = aggr_proxy if aggr_proxy
|
140
181
|
body[:download_timeout] = download_timeout if download_timeout
|
141
182
|
body[:user_agent] = user_agent if user_agent
|
@@ -144,6 +185,19 @@ module Ferto
|
|
144
185
|
body[:extra] = extra.is_a?(Hash) ? extra.to_json : extra.to_s
|
145
186
|
end
|
146
187
|
|
188
|
+
# We will continue to expose the user_agent field just like tools
|
189
|
+
# like curl and wget do. Along with that we will follow their paradigm
|
190
|
+
# where if both a user-agent flag and a `User-Agent` in the request headers
|
191
|
+
# are provided then the user agent in the request headers is preferred.
|
192
|
+
#
|
193
|
+
# Also if the `user_agent` is provided but the request headers do not
|
194
|
+
# contain a `User-Agent` key, then the `user_agent` is copied to the headers
|
195
|
+
if user_agent && !request_headers.key?("User-Agent")
|
196
|
+
request_headers["User-Agent"] = user_agent
|
197
|
+
end
|
198
|
+
|
199
|
+
body[:request_headers] = request_headers
|
200
|
+
|
147
201
|
body
|
148
202
|
end
|
149
203
|
end
|
data/lib/ferto/version.rb
CHANGED
data/lib/ferto.rb
CHANGED
@@ -16,4 +16,25 @@ module Ferto
|
|
16
16
|
}.freeze
|
17
17
|
|
18
18
|
class ConnectionError < StandardError; end
|
19
|
+
|
20
|
+
# A custom error class for 40X and 50X responses
|
21
|
+
class ResponseError < StandardError
|
22
|
+
|
23
|
+
# Initialize a Ferto::ResponseError
|
24
|
+
#
|
25
|
+
# @param [String] err A string describing the error occured
|
26
|
+
# @param [Curl::Easy | nil] response a Curl::Easy object
|
27
|
+
# that represents the response returned by the download method.
|
28
|
+
# Default: nil
|
29
|
+
def initialize(err, response=nil)
|
30
|
+
super(err)
|
31
|
+
@response = response
|
32
|
+
end
|
33
|
+
|
34
|
+
# response is set, during the download in case of
|
35
|
+
# 40X or 50X responses are returned, so that it
|
36
|
+
# can be used in case of debugging but it is also
|
37
|
+
# included for reasons of completeness.
|
38
|
+
attr_reader :response
|
39
|
+
end
|
19
40
|
end
|
metadata
CHANGED
@@ -1,43 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ferto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aggelos Avgerinos
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: curb
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0
|
19
|
+
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: bundler
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '1.13'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '1.13'
|
26
|
+
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: rake
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,16 +84,16 @@ dependencies:
|
|
98
84
|
name: faker
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
100
86
|
requirements:
|
101
|
-
- - "
|
87
|
+
- - ">="
|
102
88
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
89
|
+
version: '0'
|
104
90
|
type: :development
|
105
91
|
prerelease: false
|
106
92
|
version_requirements: !ruby/object:Gem::Requirement
|
107
93
|
requirements:
|
108
|
-
- - "
|
94
|
+
- - ">="
|
109
95
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
96
|
+
version: '0'
|
111
97
|
description: Ruby API client for Downloader service
|
112
98
|
email:
|
113
99
|
- avgerinos@skroutz.gr
|
@@ -115,8 +101,9 @@ executables: []
|
|
115
101
|
extensions: []
|
116
102
|
extra_rdoc_files: []
|
117
103
|
files:
|
104
|
+
- ".github/workflows/CI.yml"
|
118
105
|
- ".gitignore"
|
119
|
-
-
|
106
|
+
- CHANGELOG.md
|
120
107
|
- Gemfile
|
121
108
|
- LICENSE.txt
|
122
109
|
- README.md
|
@@ -134,7 +121,7 @@ homepage: https://github.com/skroutz/ferto
|
|
134
121
|
licenses:
|
135
122
|
- GPL-3.0
|
136
123
|
metadata: {}
|
137
|
-
post_install_message:
|
124
|
+
post_install_message:
|
138
125
|
rdoc_options: []
|
139
126
|
require_paths:
|
140
127
|
- lib
|
@@ -149,9 +136,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
136
|
- !ruby/object:Gem::Version
|
150
137
|
version: '0'
|
151
138
|
requirements: []
|
152
|
-
|
153
|
-
|
154
|
-
signing_key:
|
139
|
+
rubygems_version: 3.4.14
|
140
|
+
signing_key:
|
155
141
|
specification_version: 4
|
156
142
|
summary: Ruby API client for Downloader
|
157
143
|
test_files: []
|