embulk-input-randomj 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 82a2c62219bbf24fdfc2cf2bb24f8f0b2210b829
4
- data.tar.gz: bd0cad5900b36c009f6e92fa86357b5edd97c741
3
+ metadata.gz: 312c2e6df7d5ebe9d43775931f076d6d63502615
4
+ data.tar.gz: 68749dee15b28f53a187bac33dd8c5fa63380641
5
5
  SHA512:
6
- metadata.gz: ff0b599c31cf975d09f28ca2e3328abc5fcbee510832d551d0ab5b0e0dc7b63005f269478a9ff3bccaddaf68ba4435964a9f126ed736d62d86eed7843f66f2cd
7
- data.tar.gz: 3a6ec59cbc4d120e15b5012c26633bf4494e47870b58cc67be520851c5695807bb1553012b82748f97ed7ca6f16f0dfa556f9b690cbeef6d3d2484a0115cc998
6
+ metadata.gz: 8671f2b06c0eb6e36fb01fe31e96b8b9a8f6078100e3c749d5c7e284e923ecce8d5431a173bf271b00992def93a469d9f283377f410507906d04fc7b1d1f65d1
7
+ data.tar.gz: 10ae95c775793701dd4e88eb6f73e4173f9bfb16f3afb19a8dc321ef6f2fd9fa0010c04fafd889e5ecc74c5d03ec68aaafcf57dafb147e2afbf831b7157d86cb
@@ -11,4 +11,4 @@ script:
11
11
  - ./gradlew --info checkstyle
12
12
  - ./gradlew --info check
13
13
 
14
- after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
14
+ # after_success: ./gradlew sonarqube -Dsonar.organization=yuokada-github -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=${SONAR_TOKEN}
data/README.md CHANGED
@@ -45,23 +45,14 @@ in:
45
45
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
46
46
  ```
47
47
 
48
- Add `length`, `max_value`, `min_value` option (from 0.3.0)
49
- ```yaml
50
- in:
51
- type: randomj
52
- rows: 16
53
- threads: 1
54
- primary_key: myid
55
- schema:
56
- - {name: myid, type: long}
57
- - {name: named, type: string, length: 12}
58
- - {name: price, type: long, max_value: 1080, min_value: 100}
59
- - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
60
- ```
48
+ - Add `length`, `max_value`, `min_value` option (from 0.3.0)
49
+ - Add `null_rate` option (from 0.4.0)
50
+ This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
51
+ - Support json type (from 0.5.0)
52
+ - Support `start_date` & `end_date` key in **Timestamp** field.
61
53
 
62
- Add `null_rate` option (from 0.4.0)
63
-
64
- This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
54
+ - Ex1. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331, end_date: 20180430}`
55
+ - Ex2. `{name: created_at, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}`
65
56
 
66
57
  ```yaml
67
58
  in:
@@ -74,10 +65,10 @@ in:
74
65
  - {name: named, type: string, length: 12}
75
66
  - {name: price, type: long, max_value: 1080, min_value: 100, null_rate: 8}
76
67
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
68
+ - {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
77
69
  ```
78
70
 
79
71
 
80
-
81
72
  ## Usage
82
73
 
83
74
  ### Example1
@@ -202,6 +193,14 @@ $ embulk run -I lib config/example.yml
202
193
 
203
194
  ## ChangeLog
204
195
 
196
+ ### 0.5.1
197
+
198
+ - Support start_date & end_date key with Timestamp field.
199
+
200
+ ### 0.5.0
201
+
202
+ - Support `json` datatype
203
+
205
204
  ### v0.4
206
205
 
207
206
  - Support null_rate parameter
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.5.0"
18
+ version = "0.5.1"
19
19
 
20
20
  sourceCompatibility = 1.8
21
21
  targetCompatibility = 1.8
@@ -0,0 +1,9 @@
1
+ # HELP
2
+
3
+ ## How to run with "ROWS" Environment
4
+
5
+ You can change the number of output-records by using the environment variable "ROWS".
6
+
7
+ ```
8
+ % ROWS=160 embulk run etc/config.yml.liquid
9
+ ```
@@ -11,7 +11,7 @@ in:
11
11
  - {name: x_flag, type: boolean}
12
12
  - {name: rate, type: double, max_value: 100, min_value: -100}
13
13
  - {name: score, type: long, max_value: 255, min_value: 100}
14
- - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
14
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20171221, end_date: 20171231}
15
15
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
16
16
  - {name: json_obj, type: json, schema: '[{"name": "obj1", "type": "object", "items": [{"name": "key1", "type": "string"}, {"name": "key2", "type": "number"} ] }]' }
17
17
  - {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
@@ -0,0 +1,18 @@
1
+ in:
2
+ type: randomj
3
+ rows: {{ env.ROWS }}
4
+ threads: 1
5
+ # default_timezone: Asia/Tokyo
6
+ primary_key: myid
7
+ schema:
8
+ - {name: myid, type: long}
9
+ - {name: named, type: string}
10
+ - {name: named_s, type: string, length: 8}
11
+ - {name: x_flag, type: boolean}
12
+ - {name: rate, type: double, max_value: 100, min_value: -100}
13
+ - {name: score, type: long, max_value: 255, min_value: 100}
14
+ - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S', start_date: 20180331}
15
+ - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
16
+
17
+ out:
18
+ type: stdout
@@ -12,7 +12,6 @@ import java.util.Random;
12
12
 
13
13
  public class JsonColumnVisitor
14
14
  {
15
-
16
15
  private final Map<String, Object> map;
17
16
  private final Random rnd = new Random();
18
17
  private final RandomStringGenerator generator = new RandomStringGenerator.Builder()
@@ -3,6 +3,8 @@ package org.embulk.input.randomj;
3
3
  import com.fasterxml.jackson.core.JsonProcessingException;
4
4
  import com.fasterxml.jackson.databind.JsonNode;
5
5
  import com.fasterxml.jackson.databind.ObjectMapper;
6
+ import com.google.common.cache.Cache;
7
+ import com.google.common.cache.CacheBuilder;
6
8
  import org.apache.commons.text.CharacterPredicates;
7
9
  import org.apache.commons.text.RandomStringGenerator;
8
10
  import org.embulk.spi.Column;
@@ -11,8 +13,13 @@ import org.embulk.spi.PageBuilder;
11
13
  import org.embulk.spi.json.JsonParser;
12
14
  import org.embulk.spi.time.Timestamp;
13
15
 
16
+ import java.time.Duration;
17
+ import java.time.LocalDate;
14
18
  import java.time.LocalDateTime;
15
19
  import java.time.ZoneId;
20
+ import java.time.ZonedDateTime;
21
+ import java.time.format.DateTimeFormatter;
22
+ import java.time.format.ResolverStyle;
16
23
  import java.util.HashMap;
17
24
  import java.util.List;
18
25
  import java.util.Map;
@@ -35,6 +42,19 @@ public class RandomjColumnVisitor
35
42
  private final JsonParser jsonParser = new JsonParser();
36
43
  private final ObjectMapper mapper = new ObjectMapper();
37
44
 
45
+ private static final DateTimeFormatter formatter = DateTimeFormatter
46
+ .ofPattern("yyyyMMdd")
47
+ .withResolverStyle(ResolverStyle.LENIENT);
48
+ private static final long cacheSize = 64;
49
+ private static final Cache<String, ZonedDateTime> zonedDateTimeCache = CacheBuilder
50
+ .newBuilder()
51
+ .maximumSize(cacheSize)
52
+ .build();
53
+ private static final Cache<String, Long> durationCache = CacheBuilder
54
+ .newBuilder()
55
+ .maximumSize(cacheSize)
56
+ .build();
57
+
38
58
  private static final String NULL_RATE = "null_rate";
39
59
 
40
60
  public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row,
@@ -149,15 +169,56 @@ public class RandomjColumnVisitor
149
169
  pageBuilder.setNull(column);
150
170
  }
151
171
  else {
152
- final double randd = Math.random();
153
- LocalDateTime randomDate = LocalDateTime.now()
154
- .plusDays((long) (randd * 100))
155
- .plusSeconds((long) (randd * 1000000));
156
- Timestamp timestamp = Timestamp.ofEpochSecond(
157
- randomDate.atZone(zoneId).toEpochSecond()
158
- );
159
- pageBuilder.setTimestamp(column, timestamp);
172
+ ZonedDateTime start = getZonedDatetime(column, "start_date");
173
+ ZonedDateTime end = getZonedDatetime(column, "end_date");
174
+ long duration = getDuration(column, start, end);
175
+ if (duration != 0) {
176
+ int plus = rnd.nextInt((int) duration);
177
+ Timestamp timestamp = Timestamp.ofEpochSecond(
178
+ start.plusSeconds(plus).toEpochSecond()
179
+ );
180
+ pageBuilder.setTimestamp(column, timestamp);
181
+ }
182
+ else {
183
+ final double randd = Math.random();
184
+ LocalDateTime randomDate = LocalDateTime.now()
185
+ .plusDays((long) (randd * 100))
186
+ .plusSeconds((long) (randd * 1000000));
187
+ Timestamp timestamp = Timestamp.ofEpochSecond(
188
+ randomDate.atZone(zoneId).toEpochSecond()
189
+ );
190
+ pageBuilder.setTimestamp(column, timestamp);
191
+ }
192
+ }
193
+ }
194
+
195
+ private ZonedDateTime getZonedDatetime(Column column, String dateString)
196
+ {
197
+ String cacheKey = String.format("%s::%s", column.getName(), dateString);
198
+ ZonedDateTime start = zonedDateTimeCache.getIfPresent(cacheKey);
199
+ if (start == null) {
200
+ Integer startDate = columnOptions.get(column).getOrDefault(dateString, null);
201
+ if (startDate == null) {
202
+ start = LocalDate.now().atStartOfDay(zoneId);
203
+ }
204
+ else {
205
+ start = LocalDate.parse(startDate.toString(), formatter)
206
+ .atStartOfDay(zoneId);
207
+ }
208
+ zonedDateTimeCache.put(cacheKey, start);
209
+ }
210
+
211
+ return start;
212
+ }
213
+
214
+ private long getDuration(Column column, ZonedDateTime start, ZonedDateTime end)
215
+ {
216
+ Long duration = durationCache.getIfPresent(column.getName());
217
+ if (duration == null) {
218
+ duration = Duration.between(start, end).getSeconds();
219
+ durationCache.put(column.getName(), duration);
160
220
  }
221
+ return duration;
161
222
  }
162
223
 
163
224
  @Override
@@ -181,7 +242,8 @@ public class RandomjColumnVisitor
181
242
 
182
243
  private void visit(JsonNode node, JsonColumnVisitor visitor)
183
244
  {
184
- SupportedJsonObject object = SupportedJsonObject.valueOf(node.get("type").asText().toUpperCase());
245
+ SupportedJsonObject object = SupportedJsonObject
246
+ .valueOf(node.get("type").asText().toUpperCase());
185
247
  if (object.equals(SupportedJsonObject.BOOLEAN)) {
186
248
  visitor.booleanNode(node);
187
249
  }
@@ -201,7 +263,8 @@ public class RandomjColumnVisitor
201
263
  visitor.objectNode(node);
202
264
  }
203
265
  else {
204
- throw new UnsupportedOperationException("randomj input plugin does not support json-data type");
266
+ throw new UnsupportedOperationException(
267
+ "randomj input plugin does not support json-data type");
205
268
  }
206
269
  }
207
270
  }
@@ -94,6 +94,9 @@ public class RandomjInputPlugin
94
94
  miniMap.put("null_rate", c.getOption().get(Integer.class, "null_rate", 0));
95
95
  miniMap.put("max_value", c.getOption().get(Integer.class, "max_value", null));
96
96
  miniMap.put("min_value", c.getOption().get(Integer.class, "min_value", null));
97
+ // For datetime
98
+ miniMap.put("start_date", c.getOption().get(Integer.class, "start_date", null));
99
+ miniMap.put("end_date", c.getOption().get(Integer.class, "end_date", null));
97
100
  lengthMap.put(column, miniMap);
98
101
  }
99
102
  return lengthMap;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-randomj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-25 00:00:00.000000000 Z
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -52,7 +52,9 @@ files:
52
52
  - build.gradle
53
53
  - config/checkstyle/checkstyle.xml
54
54
  - config/checkstyle/default.xml
55
+ - example/README.md
55
56
  - example/config.yml
57
+ - example/config.yml.liquid
56
58
  - gradle/wrapper/gradle-wrapper.jar
57
59
  - gradle/wrapper/gradle-wrapper.properties
58
60
  - gradlew
@@ -65,7 +67,7 @@ files:
65
67
  - src/test/java/org/embulk/input/randomj/TestRandomjInputPlugin.java
66
68
  - classpath/commons-lang3-3.5.jar
67
69
  - classpath/commons-text-1.1.jar
68
- - classpath/embulk-input-randomj-0.5.0.jar
70
+ - classpath/embulk-input-randomj-0.5.1.jar
69
71
  homepage: https://github.com/yuokada/embulk-input-randomj
70
72
  licenses:
71
73
  - MIT