embulk-input-randomj 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +16 -17
- data/build.gradle +1 -1
- data/example/README.md +9 -0
- data/example/config.yml +1 -1
- data/example/config.yml.liquid +18 -0
- data/src/main/java/org/embulk/input/randomj/JsonColumnVisitor.java +0 -1
- data/src/main/java/org/embulk/input/randomj/RandomjColumnVisitor.java +73 -10
- data/src/main/java/org/embulk/input/randomj/RandomjInputPlugin.java +3 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 312c2e6df7d5ebe9d43775931f076d6d63502615
|
4
|
+
data.tar.gz: 68749dee15b28f53a187bac33dd8c5fa63380641
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8671f2b06c0eb6e36fb01fe31e96b8b9a8f6078100e3c749d5c7e284e923ecce8d5431a173bf271b00992def93a469d9f283377f410507906d04fc7b1d1f65d1
|
7
|
+
data.tar.gz: 10ae95c775793701dd4e88eb6f73e4173f9bfb16f3afb19a8dc321ef6f2fd9fa0010c04fafd889e5ecc74c5d03ec68aaafcf57dafb147e2afbf831b7157d86cb
|
data/.travis.yml
CHANGED
@@ -11,4 +11,4 @@ script:
|
|
11
11
|
- ./gradlew --info checkstyle
|
12
12
|
- ./gradlew --info check
|
13
13
|
|
14
|
-
after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
|
14
|
+
# after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
|
data/README.md
CHANGED
@@ -45,23 +45,14 @@ in:
|
|
45
45
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
46
46
|
```
|
47
47
|
|
48
|
-
Add `length`, `max_value`, `min_value` option (from 0.3.0)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
threads: 1
|
54
|
-
primary_key: myid
|
55
|
-
schema:
|
56
|
-
- {name: myid, type: long}
|
57
|
-
- {name: named, type: string, length: 12}
|
58
|
-
- {name: price, type: long, max_value: 1080, min_value: 100}
|
59
|
-
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
60
|
-
```
|
48
|
+
- Add `length`, `max_value`, `min_value` option (from 0.3.0)
|
49
|
+
- Add `null_rate` option (from 0.4.0)
|
50
|
+
This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
|
51
|
+
- Support json type (from 0.5.0)
|
52
|
+
- Support `start_date` & `end_date` key in **Timestamp** field.
|
61
53
|
|
62
|
-
|
63
|
-
|
64
|
-
This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
|
54
|
+
- Ex1. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331, end_date: 20180430}`
|
55
|
+
- Ex2. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}`
|
65
56
|
|
66
57
|
```yaml
|
67
58
|
in:
|
@@ -74,10 +65,10 @@ in:
|
|
74
65
|
- {name: named, type: string, length: 12}
|
75
66
|
- {name: price, type: long, max_value: 1080, min_value: 100, null_rate: 8}
|
76
67
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
68
|
+
- {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
|
77
69
|
```
|
78
70
|
|
79
71
|
|
80
|
-
|
81
72
|
## Usage
|
82
73
|
|
83
74
|
### Example1
|
@@ -202,6 +193,14 @@ $ embulk run -I lib config/example.yml
|
|
202
193
|
|
203
194
|
## ChangeLog
|
204
195
|
|
196
|
+
### 0.5.1
|
197
|
+
|
198
|
+
- Support start_date & end_date key with Timestamp field.
|
199
|
+
|
200
|
+
### 0.5.0
|
201
|
+
|
202
|
+
- Support `json` datatype
|
203
|
+
|
205
204
|
### v0.4
|
206
205
|
|
207
206
|
- Support null_rate parameter
|
data/build.gradle
CHANGED
data/example/README.md
ADDED
data/example/config.yml
CHANGED
@@ -11,7 +11,7 @@ in:
|
|
11
11
|
- {name: x_flag, type: boolean}
|
12
12
|
- {name: rate, type: double, max_value: 100, min_value: -100}
|
13
13
|
- {name: score, type: long, max_value: 255, min_value: 100}
|
14
|
-
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
14
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20171221, end_date: 20171231}
|
15
15
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
16
16
|
- {name: json_obj, type: json, schema: '[{"name": "obj1", "type": "object", "items": [{"name": "key1", "type": "string"}, {"name": "key2", "type": "number"} ] }]' }
|
17
17
|
- {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
|
@@ -0,0 +1,18 @@
|
|
1
|
+
in:
|
2
|
+
type: randomj
|
3
|
+
rows: {{ env.ROWS }}
|
4
|
+
threads: 1
|
5
|
+
# default_timezone: Asia/Tokyo
|
6
|
+
primary_key: myid
|
7
|
+
schema:
|
8
|
+
- {name: myid, type: long}
|
9
|
+
- {name: named, type: string}
|
10
|
+
- {name: named_s, type: string, length: 8}
|
11
|
+
- {name: x_flag, type: boolean}
|
12
|
+
- {name: rate, type: double, max_value: 100, min_value: -100}
|
13
|
+
- {name: score, type: long, max_value: 255, min_value: 100}
|
14
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}
|
15
|
+
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
16
|
+
|
17
|
+
out:
|
18
|
+
type: stdout
|
@@ -3,6 +3,8 @@ package org.embulk.input.randomj;
|
|
3
3
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
4
|
import com.fasterxml.jackson.databind.JsonNode;
|
5
5
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
|
+
import com.google.common.cache.Cache;
|
7
|
+
import com.google.common.cache.CacheBuilder;
|
6
8
|
import org.apache.commons.text.CharacterPredicates;
|
7
9
|
import org.apache.commons.text.RandomStringGenerator;
|
8
10
|
import org.embulk.spi.Column;
|
@@ -11,8 +13,13 @@ import org.embulk.spi.PageBuilder;
|
|
11
13
|
import org.embulk.spi.json.JsonParser;
|
12
14
|
import org.embulk.spi.time.Timestamp;
|
13
15
|
|
16
|
+
import java.time.Duration;
|
17
|
+
import java.time.LocalDate;
|
14
18
|
import java.time.LocalDateTime;
|
15
19
|
import java.time.ZoneId;
|
20
|
+
import java.time.ZonedDateTime;
|
21
|
+
import java.time.format.DateTimeFormatter;
|
22
|
+
import java.time.format.ResolverStyle;
|
16
23
|
import java.util.HashMap;
|
17
24
|
import java.util.List;
|
18
25
|
import java.util.Map;
|
@@ -35,6 +42,19 @@ public class RandomjColumnVisitor
|
|
35
42
|
private final JsonParser jsonParser = new JsonParser();
|
36
43
|
private final ObjectMapper mapper = new ObjectMapper();
|
37
44
|
|
45
|
+
private static final DateTimeFormatter formatter = DateTimeFormatter
|
46
|
+
.ofPattern("yyyyMMdd")
|
47
|
+
.withResolverStyle(ResolverStyle.LENIENT);
|
48
|
+
private static final long cacheSize = 64;
|
49
|
+
private static final Cache<String, ZonedDateTime> zonedDateTimeCache = CacheBuilder
|
50
|
+
.newBuilder()
|
51
|
+
.maximumSize(cacheSize)
|
52
|
+
.build();
|
53
|
+
private static final Cache<String, Long> durationCache = CacheBuilder
|
54
|
+
.newBuilder()
|
55
|
+
.maximumSize(cacheSize)
|
56
|
+
.build();
|
57
|
+
|
38
58
|
private static final String NULL_RATE = "null_rate";
|
39
59
|
|
40
60
|
public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row,
|
@@ -149,15 +169,56 @@ public class RandomjColumnVisitor
|
|
149
169
|
pageBuilder.setNull(column);
|
150
170
|
}
|
151
171
|
else {
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
172
|
+
ZonedDateTime start = getZonedDatetime(column, "start_date");
|
173
|
+
ZonedDateTime end = getZonedDatetime(column, "end_date");
|
174
|
+
long duration = getDuration(column, start, end);
|
175
|
+
if (duration != 0) {
|
176
|
+
int plus = rnd.nextInt((int) duration);
|
177
|
+
Timestamp timestamp = Timestamp.ofEpochSecond(
|
178
|
+
start.plusSeconds(plus).toEpochSecond()
|
179
|
+
);
|
180
|
+
pageBuilder.setTimestamp(column, timestamp);
|
181
|
+
}
|
182
|
+
else {
|
183
|
+
final double randd = Math.random();
|
184
|
+
LocalDateTime randomDate = LocalDateTime.now()
|
185
|
+
.plusDays((long) (randd * 100))
|
186
|
+
.plusSeconds((long) (randd * 1000000));
|
187
|
+
Timestamp timestamp = Timestamp.ofEpochSecond(
|
188
|
+
randomDate.atZone(zoneId).toEpochSecond()
|
189
|
+
);
|
190
|
+
pageBuilder.setTimestamp(column, timestamp);
|
191
|
+
}
|
192
|
+
}
|
193
|
+
}
|
194
|
+
|
195
|
+
private ZonedDateTime getZonedDatetime(Column column, String dateString)
|
196
|
+
{
|
197
|
+
String cacheKey = String.format("%s::%s", column.getName(), dateString);
|
198
|
+
ZonedDateTime start = zonedDateTimeCache.getIfPresent(cacheKey);
|
199
|
+
if (start == null) {
|
200
|
+
Integer startDate = columnOptions.get(column).getOrDefault(dateString, null);
|
201
|
+
if (startDate == null) {
|
202
|
+
start = LocalDate.now().atStartOfDay(zoneId);
|
203
|
+
}
|
204
|
+
else {
|
205
|
+
start = LocalDate.parse(startDate.toString(), formatter)
|
206
|
+
.atStartOfDay(zoneId);
|
207
|
+
}
|
208
|
+
zonedDateTimeCache.put(cacheKey, start);
|
209
|
+
}
|
210
|
+
|
211
|
+
return start;
|
212
|
+
}
|
213
|
+
|
214
|
+
private long getDuration(Column column, ZonedDateTime start, ZonedDateTime end)
|
215
|
+
{
|
216
|
+
Long duration = durationCache.getIfPresent(column.getName());
|
217
|
+
if (duration == null) {
|
218
|
+
duration = Duration.between(start, end).getSeconds();
|
219
|
+
durationCache.put(column.getName(), duration);
|
160
220
|
}
|
221
|
+
return duration;
|
161
222
|
}
|
162
223
|
|
163
224
|
@Override
|
@@ -181,7 +242,8 @@ public class RandomjColumnVisitor
|
|
181
242
|
|
182
243
|
private void visit(JsonNode node, JsonColumnVisitor visitor)
|
183
244
|
{
|
184
|
-
SupportedJsonObject object = SupportedJsonObject
|
245
|
+
SupportedJsonObject object = SupportedJsonObject
|
246
|
+
.valueOf(node.get("type").asText().toUpperCase());
|
185
247
|
if (object.equals(SupportedJsonObject.BOOLEAN)) {
|
186
248
|
visitor.booleanNode(node);
|
187
249
|
}
|
@@ -201,7 +263,8 @@ public class RandomjColumnVisitor
|
|
201
263
|
visitor.objectNode(node);
|
202
264
|
}
|
203
265
|
else {
|
204
|
-
throw new UnsupportedOperationException(
|
266
|
+
throw new UnsupportedOperationException(
|
267
|
+
"randomj input plugin does not support json-data type");
|
205
268
|
}
|
206
269
|
}
|
207
270
|
}
|
@@ -94,6 +94,9 @@ public class RandomjInputPlugin
|
|
94
94
|
miniMap.put("null_rate", c.getOption().get(Integer.class, "null_rate", 0));
|
95
95
|
miniMap.put("max_value", c.getOption().get(Integer.class, "max_value", null));
|
96
96
|
miniMap.put("min_value", c.getOption().get(Integer.class, "min_value", null));
|
97
|
+
// For datetime
|
98
|
+
miniMap.put("start_date", c.getOption().get(Integer.class, "start_date", null));
|
99
|
+
miniMap.put("end_date", c.getOption().get(Integer.class, "end_date", null));
|
97
100
|
lengthMap.put(column, miniMap);
|
98
101
|
}
|
99
102
|
return lengthMap;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-randomj
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,7 +52,9 @@ files:
|
|
52
52
|
- build.gradle
|
53
53
|
- config/checkstyle/checkstyle.xml
|
54
54
|
- config/checkstyle/default.xml
|
55
|
+
- example/README.md
|
55
56
|
- example/config.yml
|
57
|
+
- example/config.yml.liquid
|
56
58
|
- gradle/wrapper/gradle-wrapper.jar
|
57
59
|
- gradle/wrapper/gradle-wrapper.properties
|
58
60
|
- gradlew
|
@@ -65,7 +67,7 @@ files:
|
|
65
67
|
- src/test/java/org/embulk/input/randomj/TestRandomjInputPlugin.java
|
66
68
|
- classpath/commons-lang3-3.5.jar
|
67
69
|
- classpath/commons-text-1.1.jar
|
68
|
-
- classpath/embulk-input-randomj-0.5.
|
70
|
+
- classpath/embulk-input-randomj-0.5.1.jar
|
69
71
|
homepage: https://github.com/yuokada/embulk-input-randomj
|
70
72
|
licenses:
|
71
73
|
- MIT
|