embulk-input-randomj 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +16 -17
- data/build.gradle +1 -1
- data/example/README.md +9 -0
- data/example/config.yml +1 -1
- data/example/config.yml.liquid +18 -0
- data/src/main/java/org/embulk/input/randomj/JsonColumnVisitor.java +0 -1
- data/src/main/java/org/embulk/input/randomj/RandomjColumnVisitor.java +73 -10
- data/src/main/java/org/embulk/input/randomj/RandomjInputPlugin.java +3 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 312c2e6df7d5ebe9d43775931f076d6d63502615
|
4
|
+
data.tar.gz: 68749dee15b28f53a187bac33dd8c5fa63380641
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8671f2b06c0eb6e36fb01fe31e96b8b9a8f6078100e3c749d5c7e284e923ecce8d5431a173bf271b00992def93a469d9f283377f410507906d04fc7b1d1f65d1
|
7
|
+
data.tar.gz: 10ae95c775793701dd4e88eb6f73e4173f9bfb16f3afb19a8dc321ef6f2fd9fa0010c04fafd889e5ecc74c5d03ec68aaafcf57dafb147e2afbf831b7157d86cb
|
data/.travis.yml
CHANGED
@@ -11,4 +11,4 @@ script:
|
|
11
11
|
- ./gradlew --info checkstyle
|
12
12
|
- ./gradlew --info check
|
13
13
|
|
14
|
-
after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
|
14
|
+
# after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
|
data/README.md
CHANGED
@@ -45,23 +45,14 @@ in:
|
|
45
45
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
46
46
|
```
|
47
47
|
|
48
|
-
Add `length`, `max_value`, `min_value` option (from 0.3.0)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
threads: 1
|
54
|
-
primary_key: myid
|
55
|
-
schema:
|
56
|
-
- {name: myid, type: long}
|
57
|
-
- {name: named, type: string, length: 12}
|
58
|
-
- {name: price, type: long, max_value: 1080, min_value: 100}
|
59
|
-
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
60
|
-
```
|
48
|
+
- Add `length`, `max_value`, `min_value` option (from 0.3.0)
|
49
|
+
- Add `null_rate` option (from 0.4.0)
|
50
|
+
This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
|
51
|
+
- Support json type (from 0.5.0)
|
52
|
+
- Support `start_date` & `end_date` key in **Timestamp** field.
|
61
53
|
|
62
|
-
|
63
|
-
|
64
|
-
This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
|
54
|
+
- Ex1. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331, end_date: 20180430}`
|
55
|
+
- Ex2. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}`
|
65
56
|
|
66
57
|
```yaml
|
67
58
|
in:
|
@@ -74,10 +65,10 @@ in:
|
|
74
65
|
- {name: named, type: string, length: 12}
|
75
66
|
- {name: price, type: long, max_value: 1080, min_value: 100, null_rate: 8}
|
76
67
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
68
|
+
- {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
|
77
69
|
```
|
78
70
|
|
79
71
|
|
80
|
-
|
81
72
|
## Usage
|
82
73
|
|
83
74
|
### Example1
|
@@ -202,6 +193,14 @@ $ embulk run -I lib config/example.yml
|
|
202
193
|
|
203
194
|
## ChangeLog
|
204
195
|
|
196
|
+
### 0.5.1
|
197
|
+
|
198
|
+
- Support start_date & end_date key with Timestamp field.
|
199
|
+
|
200
|
+
### 0.5.0
|
201
|
+
|
202
|
+
- Support `json` datatype
|
203
|
+
|
205
204
|
### v0.4
|
206
205
|
|
207
206
|
- Support null_rate parameter
|
data/build.gradle
CHANGED
data/example/README.md
ADDED
data/example/config.yml
CHANGED
@@ -11,7 +11,7 @@ in:
|
|
11
11
|
- {name: x_flag, type: boolean}
|
12
12
|
- {name: rate, type: double, max_value: 100, min_value: -100}
|
13
13
|
- {name: score, type: long, max_value: 255, min_value: 100}
|
14
|
-
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
14
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20171221, end_date: 20171231}
|
15
15
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
16
16
|
- {name: json_obj, type: json, schema: '[{"name": "obj1", "type": "object", "items": [{"name": "key1", "type": "string"}, {"name": "key2", "type": "number"} ] }]' }
|
17
17
|
- {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
|
@@ -0,0 +1,18 @@
|
|
1
|
+
in:
|
2
|
+
type: randomj
|
3
|
+
rows: {{ env.ROWS }}
|
4
|
+
threads: 1
|
5
|
+
# default_timezone: Asia/Tokyo
|
6
|
+
primary_key: myid
|
7
|
+
schema:
|
8
|
+
- {name: myid, type: long}
|
9
|
+
- {name: named, type: string}
|
10
|
+
- {name: named_s, type: string, length: 8}
|
11
|
+
- {name: x_flag, type: boolean}
|
12
|
+
- {name: rate, type: double, max_value: 100, min_value: -100}
|
13
|
+
- {name: score, type: long, max_value: 255, min_value: 100}
|
14
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}
|
15
|
+
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
16
|
+
|
17
|
+
out:
|
18
|
+
type: stdout
|
@@ -3,6 +3,8 @@ package org.embulk.input.randomj;
|
|
3
3
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
4
|
import com.fasterxml.jackson.databind.JsonNode;
|
5
5
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
6
|
+
import com.google.common.cache.Cache;
|
7
|
+
import com.google.common.cache.CacheBuilder;
|
6
8
|
import org.apache.commons.text.CharacterPredicates;
|
7
9
|
import org.apache.commons.text.RandomStringGenerator;
|
8
10
|
import org.embulk.spi.Column;
|
@@ -11,8 +13,13 @@ import org.embulk.spi.PageBuilder;
|
|
11
13
|
import org.embulk.spi.json.JsonParser;
|
12
14
|
import org.embulk.spi.time.Timestamp;
|
13
15
|
|
16
|
+
import java.time.Duration;
|
17
|
+
import java.time.LocalDate;
|
14
18
|
import java.time.LocalDateTime;
|
15
19
|
import java.time.ZoneId;
|
20
|
+
import java.time.ZonedDateTime;
|
21
|
+
import java.time.format.DateTimeFormatter;
|
22
|
+
import java.time.format.ResolverStyle;
|
16
23
|
import java.util.HashMap;
|
17
24
|
import java.util.List;
|
18
25
|
import java.util.Map;
|
@@ -35,6 +42,19 @@ public class RandomjColumnVisitor
|
|
35
42
|
private final JsonParser jsonParser = new JsonParser();
|
36
43
|
private final ObjectMapper mapper = new ObjectMapper();
|
37
44
|
|
45
|
+
private static final DateTimeFormatter formatter = DateTimeFormatter
|
46
|
+
.ofPattern("yyyyMMdd")
|
47
|
+
.withResolverStyle(ResolverStyle.LENIENT);
|
48
|
+
private static final long cacheSize = 64;
|
49
|
+
private static final Cache<String, ZonedDateTime> zonedDateTimeCache = CacheBuilder
|
50
|
+
.newBuilder()
|
51
|
+
.maximumSize(cacheSize)
|
52
|
+
.build();
|
53
|
+
private static final Cache<String, Long> durationCache = CacheBuilder
|
54
|
+
.newBuilder()
|
55
|
+
.maximumSize(cacheSize)
|
56
|
+
.build();
|
57
|
+
|
38
58
|
private static final String NULL_RATE = "null_rate";
|
39
59
|
|
40
60
|
public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row,
|
@@ -149,15 +169,56 @@ public class RandomjColumnVisitor
|
|
149
169
|
pageBuilder.setNull(column);
|
150
170
|
}
|
151
171
|
else {
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
172
|
+
ZonedDateTime start = getZonedDatetime(column, "start_date");
|
173
|
+
ZonedDateTime end = getZonedDatetime(column, "end_date");
|
174
|
+
long duration = getDuration(column, start, end);
|
175
|
+
if (duration != 0) {
|
176
|
+
int plus = rnd.nextInt((int) duration);
|
177
|
+
Timestamp timestamp = Timestamp.ofEpochSecond(
|
178
|
+
start.plusSeconds(plus).toEpochSecond()
|
179
|
+
);
|
180
|
+
pageBuilder.setTimestamp(column, timestamp);
|
181
|
+
}
|
182
|
+
else {
|
183
|
+
final double randd = Math.random();
|
184
|
+
LocalDateTime randomDate = LocalDateTime.now()
|
185
|
+
.plusDays((long) (randd * 100))
|
186
|
+
.plusSeconds((long) (randd * 1000000));
|
187
|
+
Timestamp timestamp = Timestamp.ofEpochSecond(
|
188
|
+
randomDate.atZone(zoneId).toEpochSecond()
|
189
|
+
);
|
190
|
+
pageBuilder.setTimestamp(column, timestamp);
|
191
|
+
}
|
192
|
+
}
|
193
|
+
}
|
194
|
+
|
195
|
+
private ZonedDateTime getZonedDatetime(Column column, String dateString)
|
196
|
+
{
|
197
|
+
String cacheKey = String.format("%s::%s", column.getName(), dateString);
|
198
|
+
ZonedDateTime start = zonedDateTimeCache.getIfPresent(cacheKey);
|
199
|
+
if (start == null) {
|
200
|
+
Integer startDate = columnOptions.get(column).getOrDefault(dateString, null);
|
201
|
+
if (startDate == null) {
|
202
|
+
start = LocalDate.now().atStartOfDay(zoneId);
|
203
|
+
}
|
204
|
+
else {
|
205
|
+
start = LocalDate.parse(startDate.toString(), formatter)
|
206
|
+
.atStartOfDay(zoneId);
|
207
|
+
}
|
208
|
+
zonedDateTimeCache.put(cacheKey, start);
|
209
|
+
}
|
210
|
+
|
211
|
+
return start;
|
212
|
+
}
|
213
|
+
|
214
|
+
private long getDuration(Column column, ZonedDateTime start, ZonedDateTime end)
|
215
|
+
{
|
216
|
+
Long duration = durationCache.getIfPresent(column.getName());
|
217
|
+
if (duration == null) {
|
218
|
+
duration = Duration.between(start, end).getSeconds();
|
219
|
+
durationCache.put(column.getName(), duration);
|
160
220
|
}
|
221
|
+
return duration;
|
161
222
|
}
|
162
223
|
|
163
224
|
@Override
|
@@ -181,7 +242,8 @@ public class RandomjColumnVisitor
|
|
181
242
|
|
182
243
|
private void visit(JsonNode node, JsonColumnVisitor visitor)
|
183
244
|
{
|
184
|
-
SupportedJsonObject object = SupportedJsonObject
|
245
|
+
SupportedJsonObject object = SupportedJsonObject
|
246
|
+
.valueOf(node.get("type").asText().toUpperCase());
|
185
247
|
if (object.equals(SupportedJsonObject.BOOLEAN)) {
|
186
248
|
visitor.booleanNode(node);
|
187
249
|
}
|
@@ -201,7 +263,8 @@ public class RandomjColumnVisitor
|
|
201
263
|
visitor.objectNode(node);
|
202
264
|
}
|
203
265
|
else {
|
204
|
-
throw new UnsupportedOperationException(
|
266
|
+
throw new UnsupportedOperationException(
|
267
|
+
"randomj input plugin does not support json-data type");
|
205
268
|
}
|
206
269
|
}
|
207
270
|
}
|
@@ -94,6 +94,9 @@ public class RandomjInputPlugin
|
|
94
94
|
miniMap.put("null_rate", c.getOption().get(Integer.class, "null_rate", 0));
|
95
95
|
miniMap.put("max_value", c.getOption().get(Integer.class, "max_value", null));
|
96
96
|
miniMap.put("min_value", c.getOption().get(Integer.class, "min_value", null));
|
97
|
+
// For datetime
|
98
|
+
miniMap.put("start_date", c.getOption().get(Integer.class, "start_date", null));
|
99
|
+
miniMap.put("end_date", c.getOption().get(Integer.class, "end_date", null));
|
97
100
|
lengthMap.put(column, miniMap);
|
98
101
|
}
|
99
102
|
return lengthMap;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-randomj
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,7 +52,9 @@ files:
|
|
52
52
|
- build.gradle
|
53
53
|
- config/checkstyle/checkstyle.xml
|
54
54
|
- config/checkstyle/default.xml
|
55
|
+
- example/README.md
|
55
56
|
- example/config.yml
|
57
|
+
- example/config.yml.liquid
|
56
58
|
- gradle/wrapper/gradle-wrapper.jar
|
57
59
|
- gradle/wrapper/gradle-wrapper.properties
|
58
60
|
- gradlew
|
@@ -65,7 +67,7 @@ files:
|
|
65
67
|
- src/test/java/org/embulk/input/randomj/TestRandomjInputPlugin.java
|
66
68
|
- classpath/commons-lang3-3.5.jar
|
67
69
|
- classpath/commons-text-1.1.jar
|
68
|
-
- classpath/embulk-input-randomj-0.5.
|
70
|
+
- classpath/embulk-input-randomj-0.5.1.jar
|
69
71
|
homepage: https://github.com/yuokada/embulk-input-randomj
|
70
72
|
licenses:
|
71
73
|
- MIT
|