embulk-input-http 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -5
- data/build.gradle +1 -1
- data/classpath/embulk-input-http-0.0.5.jar +0 -0
- data/src/main/java/org/embulk/input/HttpInputPlugin.java +46 -5
- data/src/main/java/org/embulk/input/RetryHandler.java +44 -0
- metadata +4 -3
- data/classpath/embulk-input-http-0.0.4.jar +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c03f483d0ec3c8838d1538940f9a206c0e3b58f8
|
|
4
|
+
data.tar.gz: 6e1e2d691f5d5acd099f12edb1cf6a3986a190ab
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c32fe0519c6d8444291cd7eb552256ae828106c160616490510d780fd1583cb98c482cc7351950dcc733d8e8c7f38aa378619c26a4b51317adffd086abb371c8
|
|
7
|
+
data.tar.gz: 518387033e044784f908c95e6af973882df9ab68da75066c5eb10b9b07151094c370b44ad9cbb878e3dd92efd907e18ba4ac9b635749bce7be6cf8964234db63
|
data/README.md
CHANGED
|
@@ -4,10 +4,6 @@ Input HTTP plugin for [Embulk](https://github.com/embulk/embulk).
|
|
|
4
4
|
Fetch data via HTTP.
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
### Big changes in v0.0.4
|
|
8
|
-
|
|
9
|
-
From `v0.0.4`, *iterate* section is removed, embulk-input-http is must be used with some **parer plugin**.
|
|
10
|
-
|
|
11
7
|
## Installation
|
|
12
8
|
|
|
13
9
|
Run this command with your embulk binary.
|
|
@@ -40,7 +36,9 @@ in:
|
|
|
40
36
|
- **charset**: Charset to specify request header (optional, utf-8 is used by default)
|
|
41
37
|
- **open_timeout**: timeout msec to open connection (optional, 2000 is used by default)
|
|
42
38
|
- **read_timeout**: timeout msec to read content via http (optional, 10000 is used by default)
|
|
43
|
-
|
|
39
|
+
- **max_retries**: max number of retry request if failed (optional, 5 is used by default)
|
|
40
|
+
- **retry_interval**: interval msec to retry max (optional, 10000 is used by default)
|
|
41
|
+
- **sleep\_before\_request**: wait msec before each requests (optional, 0 is used by default)
|
|
44
42
|
|
|
45
43
|
### Brace expansion style in params
|
|
46
44
|
|
data/build.gradle
CHANGED
|
Binary file
|
|
@@ -5,8 +5,12 @@ import com.google.common.base.Throwables;
|
|
|
5
5
|
import org.apache.http.Header;
|
|
6
6
|
import org.apache.http.HttpException;
|
|
7
7
|
import org.apache.http.HttpResponse;
|
|
8
|
+
import org.apache.http.conn.HttpClientConnectionManager;
|
|
9
|
+
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
|
|
10
|
+
import org.apache.http.util.EntityUtils;
|
|
8
11
|
import org.apache.http.NameValuePair;
|
|
9
12
|
import org.apache.http.client.HttpClient;
|
|
13
|
+
import org.apache.http.client.HttpRequestRetryHandler;
|
|
10
14
|
import org.apache.http.client.config.RequestConfig;
|
|
11
15
|
import org.apache.http.client.entity.UrlEncodedFormEntity;
|
|
12
16
|
import org.apache.http.client.methods.HttpGet;
|
|
@@ -27,6 +31,7 @@ import org.slf4j.Logger;
|
|
|
27
31
|
|
|
28
32
|
import java.io.IOException;
|
|
29
33
|
import java.io.InputStream;
|
|
34
|
+
import java.io.ByteArrayInputStream;
|
|
30
35
|
import java.io.UnsupportedEncodingException;
|
|
31
36
|
import java.net.URISyntaxException;
|
|
32
37
|
import java.util.ArrayList;
|
|
@@ -60,6 +65,19 @@ public class HttpInputPlugin implements FileInputPlugin {
|
|
|
60
65
|
@ConfigDefault("10000")
|
|
61
66
|
public int getReadTimeout();
|
|
62
67
|
|
|
68
|
+
@Config("max_retries")
|
|
69
|
+
@ConfigDefault("5")
|
|
70
|
+
public int getMaxRetries();
|
|
71
|
+
|
|
72
|
+
@Config("retry_interval")
|
|
73
|
+
@ConfigDefault("10000")
|
|
74
|
+
public int getRetryInterval();
|
|
75
|
+
|
|
76
|
+
@Config("sleep_before_request")
|
|
77
|
+
@ConfigDefault("0")
|
|
78
|
+
public int getSleepBeforeRequest();
|
|
79
|
+
public void setSleepBeforeRequest(int sleepBeforeRequest);
|
|
80
|
+
|
|
63
81
|
@Config("params")
|
|
64
82
|
@ConfigDefault("null")
|
|
65
83
|
public Optional<ParamsConfig> getParams();
|
|
@@ -92,6 +110,10 @@ public class HttpInputPlugin implements FileInputPlugin {
|
|
|
92
110
|
task.setQueries(new ArrayList<ParamsConfig>());
|
|
93
111
|
}
|
|
94
112
|
|
|
113
|
+
if (numOfThreads == 1) {
|
|
114
|
+
task.setSleepBeforeRequest(0);
|
|
115
|
+
}
|
|
116
|
+
|
|
95
117
|
switch (task.getMethod().toUpperCase()) {
|
|
96
118
|
case "GET":
|
|
97
119
|
task.setHttpMethod(HttpMethod.GET);
|
|
@@ -130,16 +152,35 @@ public class HttpInputPlugin implements FileInputPlugin {
|
|
|
130
152
|
} catch (URISyntaxException | UnsupportedEncodingException e) {
|
|
131
153
|
throw Throwables.propagate(e);
|
|
132
154
|
}
|
|
133
|
-
logger.info(String.format("%s \"%s\"", task.getMethod().toUpperCase(),
|
|
134
|
-
request.getURI().toString()));
|
|
135
155
|
|
|
136
|
-
|
|
156
|
+
HttpClientBuilder builder = HttpClientBuilder.create()
|
|
137
157
|
.setDefaultRequestConfig(makeRequestConfig(task))
|
|
138
|
-
.setDefaultHeaders(makeHeaders(task))
|
|
139
|
-
|
|
158
|
+
.setDefaultHeaders(makeHeaders(task));
|
|
159
|
+
|
|
160
|
+
if (task.getMaxRetries() > 0) {
|
|
161
|
+
final int retry = task.getMaxRetries();
|
|
162
|
+
final int interval = task.getRetryInterval();
|
|
163
|
+
HttpRequestRetryHandler retryHandler = new RetryHandler(retry, interval);
|
|
164
|
+
builder.setRetryHandler(retryHandler);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
HttpClient client = builder.build();
|
|
168
|
+
|
|
169
|
+
if (task.getSleepBeforeRequest() > 0) {
|
|
170
|
+
try {
|
|
171
|
+
logger.info(String.format("Waiting %d msec ...", task.getSleepBeforeRequest()));
|
|
172
|
+
Thread.sleep(task.getSleepBeforeRequest());
|
|
173
|
+
} catch (InterruptedException e) {
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
logger.info(String.format("%s \"%s\"", task.getMethod().toUpperCase(),
|
|
178
|
+
request.getURI().toString()));
|
|
140
179
|
try {
|
|
141
180
|
HttpResponse response = client.execute(request);
|
|
142
181
|
statusIsOkOrThrow(response);
|
|
182
|
+
//final String body = EntityUtils.toString(response.getEntity());
|
|
183
|
+
//InputStream stream = new ByteArrayInputStream(body.getBytes());
|
|
143
184
|
InputStream stream = response.getEntity().getContent();
|
|
144
185
|
PluginFileInput input = new PluginFileInput(task, stream);
|
|
145
186
|
stream = null;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
package org.embulk.input;
|
|
2
|
+
|
|
3
|
+
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
|
|
4
|
+
import org.apache.http.protocol.HttpContext;
|
|
5
|
+
import org.embulk.spi.Exec;
|
|
6
|
+
import org.slf4j.Logger;
|
|
7
|
+
|
|
8
|
+
import javax.net.ssl.SSLException;
|
|
9
|
+
import java.io.IOException;
|
|
10
|
+
import java.net.ConnectException;
|
|
11
|
+
import java.net.UnknownHostException;
|
|
12
|
+
import java.util.Arrays;
|
|
13
|
+
|
|
14
|
+
public class RetryHandler extends DefaultHttpRequestRetryHandler
|
|
15
|
+
{
|
|
16
|
+
|
|
17
|
+
private final Logger logger = Exec.getLogger(getClass());
|
|
18
|
+
|
|
19
|
+
private int interval = 0;
|
|
20
|
+
|
|
21
|
+
public RetryHandler(int retry, int interval)
|
|
22
|
+
{
|
|
23
|
+
super(retry, true, Arrays.asList(
|
|
24
|
+
UnknownHostException.class,
|
|
25
|
+
ConnectException.class,
|
|
26
|
+
SSLException.class));
|
|
27
|
+
this.interval = interval;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
@Override
|
|
31
|
+
public boolean retryRequest(final IOException exception,
|
|
32
|
+
final int executionCount, final HttpContext context)
|
|
33
|
+
{
|
|
34
|
+
final boolean isRertriable = super.retryRequest(exception, executionCount, context);
|
|
35
|
+
if (isRertriable) {
|
|
36
|
+
try {
|
|
37
|
+
logger.info(String.format("Sleep %d msec before retry", interval));
|
|
38
|
+
Thread.sleep(interval);
|
|
39
|
+
} catch (InterruptedException e) {}
|
|
40
|
+
}
|
|
41
|
+
return isRertriable;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
}
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk-input-http
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Takuma kanari
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-04-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -58,8 +58,9 @@ files:
|
|
|
58
58
|
- src/main/java/org/embulk/input/HttpInputPlugin.java
|
|
59
59
|
- src/main/java/org/embulk/input/ParamsConfig.java
|
|
60
60
|
- src/main/java/org/embulk/input/QueryConfig.java
|
|
61
|
+
- src/main/java/org/embulk/input/RetryHandler.java
|
|
61
62
|
- src/test/java/org/embulk/input/TestHttpInputPlugin.java
|
|
62
|
-
- classpath/embulk-input-http-0.0.
|
|
63
|
+
- classpath/embulk-input-http-0.0.5.jar
|
|
63
64
|
- classpath/httpclient-4.4.jar
|
|
64
65
|
- classpath/commons-logging-1.2.jar
|
|
65
66
|
- classpath/httpcore-4.4.jar
|
|
Binary file
|