embulk-input-randomj 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 82a2c62219bbf24fdfc2cf2bb24f8f0b2210b829
4
- data.tar.gz: bd0cad5900b36c009f6e92fa86357b5edd97c741
3
+ metadata.gz: 312c2e6df7d5ebe9d43775931f076d6d63502615
4
+ data.tar.gz: 68749dee15b28f53a187bac33dd8c5fa63380641
5
5
  SHA512:
6
- metadata.gz: ff0b599c31cf975d09f28ca2e3328abc5fcbee510832d551d0ab5b0e0dc7b63005f269478a9ff3bccaddaf68ba4435964a9f126ed736d62d86eed7843f66f2cd
7
- data.tar.gz: 3a6ec59cbc4d120e15b5012c26633bf4494e47870b58cc67be520851c5695807bb1553012b82748f97ed7ca6f16f0dfa556f9b690cbeef6d3d2484a0115cc998
6
+ metadata.gz: 8671f2b06c0eb6e36fb01fe31e96b8b9a8f6078100e3c749d5c7e284e923ecce8d5431a173bf271b00992def93a469d9f283377f410507906d04fc7b1d1f65d1
7
+ data.tar.gz: 10ae95c775793701dd4e88eb6f73e4173f9bfb16f3afb19a8dc321ef6f2fd9fa0010c04fafd889e5ecc74c5d03ec68aaafcf57dafb147e2afbf831b7157d86cb
@@ -11,4 +11,4 @@ script:
11
11
  - ./gradlew --info checkstyle
12
12
  - ./gradlew --info check
13
13
 
14
- after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
14
+ # after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
data/README.md CHANGED
@@ -45,23 +45,14 @@ in:
45
45
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
46
46
  ```
47
47
 
48
- Add `length`, `max_value`, `min_value` option (from 0.3.0)
49
- ```yaml
50
- in:
51
- type: randomj
52
- rows: 16
53
- threads: 1
54
- primary_key: myid
55
- schema:
56
- - {name: myid, type: long}
57
- - {name: named, type: string, length: 12}
58
- - {name: price, type: long, max_value: 1080, min_value: 100}
59
- - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
60
- ```
48
+ - Add `length`, `max_value`, `min_value` option (from 0.3.0)
49
+ - Add `null_rate` option (from 0.4.0)
50
+ This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
51
+ - Support json type (from 0.5.0)
52
+ - Support `start_date` & `end_date` key in **Timestamp** field.
61
53
 
62
- Add `null_rate` option (from 0.4.0)
63
-
64
- This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
54
+ - Ex1. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331, end_date: 20180430}`
55
+ - Ex2. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}`
65
56
 
66
57
  ```yaml
67
58
  in:
@@ -74,10 +65,10 @@ in:
74
65
  - {name: named, type: string, length: 12}
75
66
  - {name: price, type: long, max_value: 1080, min_value: 100, null_rate: 8}
76
67
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
68
+ - {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
77
69
  ```
78
70
 
79
71
 
80
-
81
72
  ## Usage
82
73
 
83
74
  ### Example1
@@ -202,6 +193,14 @@ $ embulk run -I lib config/example.yml
202
193
 
203
194
  ## ChangeLog
204
195
 
196
+ ### 0.5.1
197
+
198
+ - Support start_date & end_date key with Timestamp field.
199
+
200
+ ### 0.5.0
201
+
202
+ - Support `json` datatype
203
+
205
204
  ### v0.4
206
205
 
207
206
  - Support null_rate parameter
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.5.0"
18
+ version = "0.5.1"
19
19
 
20
20
  sourceCompatibility = 1.8
21
21
  targetCompatibility = 1.8
@@ -0,0 +1,9 @@
1
+ # HELP
2
+
3
+ ## How to run with "ROWS" Environment
4
+
5
+ You can change the number of output-records by using the environment variable "ROWS".
6
+
7
+ ```
8
+ % ROWS=160 embulk run etc/config.yml.liquid
9
+ ```
@@ -11,7 +11,7 @@ in:
11
11
  - {name: x_flag, type: boolean}
12
12
  - {name: rate, type: double, max_value: 100, min_value: -100}
13
13
  - {name: score, type: long, max_value: 255, min_value: 100}
14
- - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
14
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20171221, end_date: 20171231}
15
15
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
16
16
  - {name: json_obj, type: json, schema: '[{"name": "obj1", "type": "object", "items": [{"name": "key1", "type": "string"}, {"name": "key2", "type": "number"} ] }]' }
17
17
  - {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
@@ -0,0 +1,18 @@
1
+ in:
2
+ type: randomj
3
+ rows: {{ env.ROWS }}
4
+ threads: 1
5
+ # default_timezone: Asia/Tokyo
6
+ primary_key: myid
7
+ schema:
8
+ - {name: myid, type: long}
9
+ - {name: named, type: string}
10
+ - {name: named_s, type: string, length: 8}
11
+ - {name: x_flag, type: boolean}
12
+ - {name: rate, type: double, max_value: 100, min_value: -100}
13
+ - {name: score, type: long, max_value: 255, min_value: 100}
14
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}
15
+ - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
16
+
17
+ out:
18
+ type: stdout
@@ -12,7 +12,6 @@ import java.util.Random;
12
12
 
13
13
  public class JsonColumnVisitor
14
14
  {
15
-
16
15
  private final Map<String, Object> map;
17
16
  private final Random rnd = new Random();
18
17
  private final RandomStringGenerator generator = new RandomStringGenerator.Builder()
@@ -3,6 +3,8 @@ package org.embulk.input.randomj;
3
3
  import com.fasterxml.jackson.core.JsonProcessingException;
4
4
  import com.fasterxml.jackson.databind.JsonNode;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
+ import com.google.common.cache.Cache;
7
+ import com.google.common.cache.CacheBuilder;
6
8
  import org.apache.commons.text.CharacterPredicates;
7
9
  import org.apache.commons.text.RandomStringGenerator;
8
10
  import org.embulk.spi.Column;
@@ -11,8 +13,13 @@ import org.embulk.spi.PageBuilder;
11
13
  import org.embulk.spi.json.JsonParser;
12
14
  import org.embulk.spi.time.Timestamp;
13
15
 
16
+ import java.time.Duration;
17
+ import java.time.LocalDate;
14
18
  import java.time.LocalDateTime;
15
19
  import java.time.ZoneId;
20
+ import java.time.ZonedDateTime;
21
+ import java.time.format.DateTimeFormatter;
22
+ import java.time.format.ResolverStyle;
16
23
  import java.util.HashMap;
17
24
  import java.util.List;
18
25
  import java.util.Map;
@@ -35,6 +42,19 @@ public class RandomjColumnVisitor
35
42
  private final JsonParser jsonParser = new JsonParser();
36
43
  private final ObjectMapper mapper = new ObjectMapper();
37
44
 
45
+ private static final DateTimeFormatter formatter = DateTimeFormatter
46
+ .ofPattern("yyyyMMdd")
47
+ .withResolverStyle(ResolverStyle.LENIENT);
48
+ private static final long cacheSize = 64;
49
+ private static final Cache<String, ZonedDateTime> zonedDateTimeCache = CacheBuilder
50
+ .newBuilder()
51
+ .maximumSize(cacheSize)
52
+ .build();
53
+ private static final Cache<String, Long> durationCache = CacheBuilder
54
+ .newBuilder()
55
+ .maximumSize(cacheSize)
56
+ .build();
57
+
38
58
  private static final String NULL_RATE = "null_rate";
39
59
 
40
60
  public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row,
@@ -149,15 +169,56 @@ public class RandomjColumnVisitor
149
169
  pageBuilder.setNull(column);
150
170
  }
151
171
  else {
152
- final double randd = Math.random();
153
- LocalDateTime randomDate = LocalDateTime.now()
154
- .plusDays((long) (randd * 100))
155
- .plusSeconds((long) (randd * 1000000));
156
- Timestamp timestamp = Timestamp.ofEpochSecond(
157
- randomDate.atZone(zoneId).toEpochSecond()
158
- );
159
- pageBuilder.setTimestamp(column, timestamp);
172
+ ZonedDateTime start = getZonedDatetime(column, "start_date");
173
+ ZonedDateTime end = getZonedDatetime(column, "end_date");
174
+ long duration = getDuration(column, start, end);
175
+ if (duration != 0) {
176
+ int plus = rnd.nextInt((int) duration);
177
+ Timestamp timestamp = Timestamp.ofEpochSecond(
178
+ start.plusSeconds(plus).toEpochSecond()
179
+ );
180
+ pageBuilder.setTimestamp(column, timestamp);
181
+ }
182
+ else {
183
+ final double randd = Math.random();
184
+ LocalDateTime randomDate = LocalDateTime.now()
185
+ .plusDays((long) (randd * 100))
186
+ .plusSeconds((long) (randd * 1000000));
187
+ Timestamp timestamp = Timestamp.ofEpochSecond(
188
+ randomDate.atZone(zoneId).toEpochSecond()
189
+ );
190
+ pageBuilder.setTimestamp(column, timestamp);
191
+ }
192
+ }
193
+ }
194
+
195
+ private ZonedDateTime getZonedDatetime(Column column, String dateString)
196
+ {
197
+ String cacheKey = String.format("%s::%s", column.getName(), dateString);
198
+ ZonedDateTime start = zonedDateTimeCache.getIfPresent(cacheKey);
199
+ if (start == null) {
200
+ Integer startDate = columnOptions.get(column).getOrDefault(dateString, null);
201
+ if (startDate == null) {
202
+ start = LocalDate.now().atStartOfDay(zoneId);
203
+ }
204
+ else {
205
+ start = LocalDate.parse(startDate.toString(), formatter)
206
+ .atStartOfDay(zoneId);
207
+ }
208
+ zonedDateTimeCache.put(cacheKey, start);
209
+ }
210
+
211
+ return start;
212
+ }
213
+
214
+ private long getDuration(Column column, ZonedDateTime start, ZonedDateTime end)
215
+ {
216
+ Long duration = durationCache.getIfPresent(column.getName());
217
+ if (duration == null) {
218
+ duration = Duration.between(start, end).getSeconds();
219
+ durationCache.put(column.getName(), duration);
160
220
  }
221
+ return duration;
161
222
  }
162
223
 
163
224
  @Override
@@ -181,7 +242,8 @@ public class RandomjColumnVisitor
181
242
 
182
243
  private void visit(JsonNode node, JsonColumnVisitor visitor)
183
244
  {
184
- SupportedJsonObject object = SupportedJsonObject.valueOf(node.get("type").asText().toUpperCase());
245
+ SupportedJsonObject object = SupportedJsonObject
246
+ .valueOf(node.get("type").asText().toUpperCase());
185
247
  if (object.equals(SupportedJsonObject.BOOLEAN)) {
186
248
  visitor.booleanNode(node);
187
249
  }
@@ -201,7 +263,8 @@ public class RandomjColumnVisitor
201
263
  visitor.objectNode(node);
202
264
  }
203
265
  else {
204
- throw new UnsupportedOperationException("randomj input plugin does not support json-data type");
266
+ throw new UnsupportedOperationException(
267
+ "randomj input plugin does not support json-data type");
205
268
  }
206
269
  }
207
270
  }
@@ -94,6 +94,9 @@ public class RandomjInputPlugin
94
94
  miniMap.put("null_rate", c.getOption().get(Integer.class, "null_rate", 0));
95
95
  miniMap.put("max_value", c.getOption().get(Integer.class, "max_value", null));
96
96
  miniMap.put("min_value", c.getOption().get(Integer.class, "min_value", null));
97
+ // For datetime
98
+ miniMap.put("start_date", c.getOption().get(Integer.class, "start_date", null));
99
+ miniMap.put("end_date", c.getOption().get(Integer.class, "end_date", null));
97
100
  lengthMap.put(column, miniMap);
98
101
  }
99
102
  return lengthMap;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-randomj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-25 00:00:00.000000000 Z
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -52,7 +52,9 @@ files:
52
52
  - build.gradle
53
53
  - config/checkstyle/checkstyle.xml
54
54
  - config/checkstyle/default.xml
55
+ - example/README.md
55
56
  - example/config.yml
57
+ - example/config.yml.liquid
56
58
  - gradle/wrapper/gradle-wrapper.jar
57
59
  - gradle/wrapper/gradle-wrapper.properties
58
60
  - gradlew
@@ -65,7 +67,7 @@ files:
65
67
  - src/test/java/org/embulk/input/randomj/TestRandomjInputPlugin.java
66
68
  - classpath/commons-lang3-3.5.jar
67
69
  - classpath/commons-text-1.1.jar
68
- - classpath/embulk-input-randomj-0.5.0.jar
70
+ - classpath/embulk-input-randomj-0.5.1.jar
69
71
  homepage: https://github.com/yuokada/embulk-input-randomj
70
72
  licenses:
71
73
  - MIT