embulk-input-randomj 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1165a1e2c182b649c7a2fbf73de530de55a5497f
4
- data.tar.gz: ab71604a4f4af3d7765433b7e380e5f509f7b7e2
3
+ metadata.gz: 82a2c62219bbf24fdfc2cf2bb24f8f0b2210b829
4
+ data.tar.gz: bd0cad5900b36c009f6e92fa86357b5edd97c741
5
5
  SHA512:
6
- metadata.gz: 9fc2bea3f88fcc01b2a2e33061d0a28d2588345d8b293ca567cb7aaadcd1daf3975c70f24328e2facbb2e4dd511e06dec5f854520885e77984bfa7a03a508866
7
- data.tar.gz: a6d3961d0f3fcd6ebd3f02545f6272a05805680de3923cb2ff1a3dc8db588219e429549a7279b4cca14d719feeed126046f465d7ab7ede26a92f123fe58ea787
6
+ metadata.gz: ff0b599c31cf975d09f28ca2e3328abc5fcbee510832d551d0ab5b0e0dc7b63005f269478a9ff3bccaddaf68ba4435964a9f126ed736d62d86eed7843f66f2cd
7
+ data.tar.gz: 3a6ec59cbc4d120e15b5012c26633bf4494e47870b58cc67be520851c5695807bb1553012b82748f97ed7ca6f16f0dfa556f9b690cbeef6d3d2484a0115cc998
data/README.md CHANGED
@@ -14,7 +14,7 @@ Original: [kumagi/embulk\-input\-random](https://github.com/kumagi/embulk-input-
14
14
  * **Cleanup supported**: no
15
15
  * **Guess supported**: no
16
16
 
17
- ## Install
17
+ ## Install
18
18
 
19
19
  ``` shell
20
20
  % embulk gem install embulk-input-randomj
@@ -59,6 +59,25 @@ in:
59
59
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
60
60
  ```
61
61
 
62
+ Add `null_rate` option (from 0.4.0)
63
+
64
+ This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
65
+
66
+ ```yaml
67
+ in:
68
+ type: randomj
69
+ rows: 16
70
+ threads: 1
71
+ primary_key: myid
72
+ schema:
73
+ - {name: myid, type: long}
74
+ - {name: named, type: string, length: 12}
75
+ - {name: price, type: long, max_value: 1080, min_value: 100, null_rate: 8}
76
+ - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
77
+ ```
78
+
79
+
80
+
62
81
  ## Usage
63
82
 
64
83
  ### Example1
@@ -80,7 +99,7 @@ in:
80
99
  - {name: score, type: long}
81
100
  - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
82
101
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
83
-
102
+
84
103
  out:
85
104
  type: stdout
86
105
 
@@ -116,7 +135,7 @@ in:
116
135
 
117
136
  - `named_s` return string with length 8
118
137
  - `score` return value between `100~255`
119
- - `rate` return value between `-100~100`
138
+ - `rate` return value between `-100~100`
120
139
 
121
140
  ```shell
122
141
 
@@ -176,7 +195,7 @@ $ ./gradlew gem # -t to watch change of files and rebuild continuously
176
195
  ```
177
196
 
178
197
  ```
179
- $ ./gradlew build && ./gradlew classpath
198
+ $ ./gradlew build && ./gradlew classpath
180
199
  $ embulk run -I lib config/example.yml
181
200
  ```
182
201
 
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.4.0"
18
+ version = "0.5.0"
19
19
 
20
20
  sourceCompatibility = 1.8
21
21
  targetCompatibility = 1.8
data/example/config.yml CHANGED
@@ -13,6 +13,8 @@ in:
13
13
  - {name: score, type: long, max_value: 255, min_value: 100}
14
14
  - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
15
15
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
16
+ - {name: json_obj, type: json, schema: '[{"name": "obj1", "type": "object", "items": [{"name": "key1", "type": "string"}, {"name": "key2", "type": "number"} ] }]' }
17
+ - {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
16
18
 
17
19
  out:
18
20
  type: stdout
@@ -0,0 +1,142 @@
1
+ package org.embulk.input.randomj;
2
+
3
+ import com.fasterxml.jackson.databind.JsonNode;
4
+ import org.apache.commons.text.CharacterPredicates;
5
+ import org.apache.commons.text.RandomStringGenerator;
6
+
7
+ import java.util.ArrayList;
8
+ import java.util.HashMap;
9
+ import java.util.List;
10
+ import java.util.Map;
11
+ import java.util.Random;
12
+
13
+ public class JsonColumnVisitor
14
+ {
15
+
16
+ private final Map<String, Object> map;
17
+ private final Random rnd = new Random();
18
+ private final RandomStringGenerator generator = new RandomStringGenerator.Builder()
19
+ .withinRange('0', 'z')
20
+ .filteredBy(CharacterPredicates.LETTERS, CharacterPredicates.DIGITS)
21
+ .build();
22
+
23
+ private static final String ITEMS = "items";
24
+
25
+ public JsonColumnVisitor(Map<String, Object> gmap)
26
+ {
27
+ map = gmap;
28
+ }
29
+
30
+ public void booleanNode(JsonNode node)
31
+ {
32
+ String key = node.get("name").asText();
33
+ if (Math.random() < 0.5) {
34
+ map.put(key, true);
35
+ }
36
+ else {
37
+ map.put(key, false);
38
+ }
39
+ }
40
+
41
+ public void doubleNode(JsonNode node)
42
+ {
43
+ String key = node.get("name").asText();
44
+ map.put(key, rnd.nextDouble() * 10000);
45
+ }
46
+
47
+ public void integerNode(JsonNode node)
48
+ {
49
+ String key = node.get("name").asText();
50
+ map.put(key, rnd.nextInt(10000));
51
+ }
52
+
53
+ public void stringNode(JsonNode node)
54
+ {
55
+ String key = node.get("name").asText();
56
+ map.put(key, generator.generate(8));
57
+ }
58
+
59
+ public void arrayNode(JsonNode node) // NOSONAR
60
+ {
61
+ String key = node.get("name").asText();
62
+ String dataType = node.get(ITEMS).get("type").asText();
63
+ int arraySize = node.get(ITEMS).get("size").asInt(1);
64
+ SupportedJsonObject jtype = SupportedJsonObject.valueOf(dataType.toUpperCase());
65
+ switch (jtype) {
66
+ case BOOLEAN: { // NOSONAR
67
+ List<Boolean> m = new ArrayList<>();
68
+ for (int i = 0; i < arraySize; i++) {
69
+ if (Math.random() < 0.5) {
70
+ m.add(true);
71
+ }
72
+ else {
73
+ m.add(false);
74
+ }
75
+ }
76
+ map.put(key, m);
77
+ break;
78
+ }
79
+ case INTEGER: { // NOSONAR
80
+ ArrayList<Integer> m = new ArrayList<>();
81
+ for (int i = 0; i < arraySize; i++) {
82
+ m.add(rnd.nextInt(100));
83
+ }
84
+ map.put(key, m);
85
+ break;
86
+ }
87
+ case NUMBER: { // NOSONAR
88
+ ArrayList<Number> m = new ArrayList<>();
89
+ for (int i = 0; i < arraySize; i++) {
90
+ m.add(rnd.nextDouble() * 100);
91
+ }
92
+ map.put(key, m);
93
+ break;
94
+ }
95
+ case STRING: { // NOSONAR
96
+ int length = 8;
97
+ ArrayList<String> m = new ArrayList<>();
98
+ for (int i = 0; i < arraySize; i++) {
99
+ m.add(generator.generate(length));
100
+ }
101
+ map.put(key, m);
102
+ break;
103
+ }
104
+ default:
105
+ throw new UnsupportedOperationException("randomj input plugin does not support json-array-data type");
106
+ }
107
+ }
108
+
109
+ public void objectNode(JsonNode node)
110
+ {
111
+ Map<String, Object> objectMap = new HashMap<>();
112
+
113
+ for (JsonNode jsonNode : node.findValues(ITEMS).listIterator().next()) {
114
+ String nestKey = jsonNode.get("name").asText();
115
+ SupportedJsonObject jtype = SupportedJsonObject.valueOf(jsonNode.get("type").asText().toUpperCase());
116
+
117
+ switch (jtype) {
118
+ case BOOLEAN:
119
+ if (Math.random() < 0.5) {
120
+ objectMap.put(nestKey, true);
121
+ }
122
+ else {
123
+ objectMap.put(nestKey, false);
124
+ }
125
+ break;
126
+ case NUMBER:
127
+ objectMap.put(nestKey, rnd.nextDouble() * 100);
128
+ break;
129
+ case INTEGER:
130
+ objectMap.put(nestKey, rnd.nextInt(10000));
131
+ break;
132
+ case STRING:
133
+ int length = 8;
134
+ objectMap.put(nestKey, generator.generate(length));
135
+ break;
136
+ default:
137
+ throw new UnsupportedOperationException("randomj input plugin does not support json-data type");
138
+ }
139
+ }
140
+ map.put(node.get("name").asText(), objectMap);
141
+ }
142
+ }
@@ -0,0 +1,26 @@
1
+ package org.embulk.input.randomj;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.Task;
6
+ import org.embulk.spi.SchemaConfig;
7
+
8
+ public interface PluginTask
9
+ extends Task
10
+ {
11
+ // configuration row (required integer)
12
+ @Config("rows")
13
+ int getRows();
14
+
15
+ // ref: https://github.com/embulk/embulk-input-jdbc/blob/master/embulk-input-mysql/src/main/java/org/embulk/input/MySQLInputPlugin.java#L33-L35
16
+ @Config("threads")
17
+ @ConfigDefault("1")
18
+ Integer getThreads();
19
+
20
+ @Config("primary_key")
21
+ @ConfigDefault("")
22
+ String getPrimaryKey();
23
+
24
+ @Config("schema")
25
+ SchemaConfig getSchema();
26
+ }
@@ -1,15 +1,20 @@
1
1
  package org.embulk.input.randomj;
2
2
 
3
+ import com.fasterxml.jackson.core.JsonProcessingException;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
3
6
  import org.apache.commons.text.CharacterPredicates;
4
7
  import org.apache.commons.text.RandomStringGenerator;
5
- import org.embulk.input.randomj.RandomjInputPlugin.PluginTask;
6
8
  import org.embulk.spi.Column;
7
9
  import org.embulk.spi.ColumnVisitor;
8
10
  import org.embulk.spi.PageBuilder;
11
+ import org.embulk.spi.json.JsonParser;
9
12
  import org.embulk.spi.time.Timestamp;
10
13
 
11
14
  import java.time.LocalDateTime;
12
15
  import java.time.ZoneId;
16
+ import java.util.HashMap;
17
+ import java.util.List;
13
18
  import java.util.Map;
14
19
  import java.util.Random;
15
20
 
@@ -21,25 +26,33 @@ public class RandomjColumnVisitor
21
26
  private final Integer row;
22
27
  private final Random rnd;
23
28
  private final Map<Column, Map<String, Integer>> columnOptions;
29
+ private final Map<Column, List<JsonNode>> schemaOptions;
24
30
  private final RandomStringGenerator generator = new RandomStringGenerator.Builder()
25
31
  .withinRange('0', 'z')
26
32
  .filteredBy(CharacterPredicates.LETTERS, CharacterPredicates.DIGITS)
27
33
  .build();
28
34
  private final ZoneId zoneId = ZoneId.systemDefault();
35
+ private final JsonParser jsonParser = new JsonParser();
36
+ private final ObjectMapper mapper = new ObjectMapper();
29
37
 
30
- public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row, Map<Column, Map<String, Integer>> columnOptions)
38
+ private static final String NULL_RATE = "null_rate";
39
+
40
+ public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row,
41
+ Map<Column, Map<String, Integer>> columnOptions,
42
+ Map<Column, List<JsonNode>> schemaOptions)
31
43
  {
32
44
  this.task = task;
33
45
  this.pageBuilder = pageBuilder;
34
46
  this.row = row;
35
47
  this.columnOptions = columnOptions;
36
48
  this.rnd = new Random();
49
+ this.schemaOptions = schemaOptions;
37
50
  }
38
51
 
39
52
  @Override
40
53
  public void booleanColumn(Column column)
41
54
  {
42
- Integer nrate = columnOptions.get(column).get("null_rate");
55
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
43
56
  if (Math.random() < (double) nrate / 10000) {
44
57
  pageBuilder.setNull(column);
45
58
  }
@@ -61,7 +74,7 @@ public class RandomjColumnVisitor
61
74
  pageBuilder.setLong(column, row);
62
75
  }
63
76
  else {
64
- Integer nrate = columnOptions.get(column).get("null_rate");
77
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
65
78
  if (Math.random() < (double) nrate / 10000) {
66
79
  pageBuilder.setNull(column);
67
80
  }
@@ -87,7 +100,7 @@ public class RandomjColumnVisitor
87
100
  @Override
88
101
  public void doubleColumn(Column column)
89
102
  {
90
- Integer nrate = columnOptions.get(column).get("null_rate");
103
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
91
104
  if (Math.random() < (double) nrate / 10000) {
92
105
  pageBuilder.setNull(column);
93
106
  }
@@ -113,7 +126,7 @@ public class RandomjColumnVisitor
113
126
  @Override
114
127
  public void stringColumn(Column column)
115
128
  {
116
- Integer nrate = columnOptions.get(column).get("null_rate");
129
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
117
130
  if (Math.random() < (double) nrate / 10000) {
118
131
  pageBuilder.setNull(column);
119
132
  }
@@ -131,7 +144,7 @@ public class RandomjColumnVisitor
131
144
  @Override
132
145
  public void timestampColumn(Column column)
133
146
  {
134
- Integer nrate = columnOptions.get(column).get("null_rate");
147
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
135
148
  if (Math.random() < (double) nrate / 10000) {
136
149
  pageBuilder.setNull(column);
137
150
  }
@@ -150,6 +163,45 @@ public class RandomjColumnVisitor
150
163
  @Override
151
164
  public void jsonColumn(Column column)
152
165
  {
153
- throw new UnsupportedOperationException("orc output plugin does not support json type");
166
+ Map<String, Object> map = new HashMap<>();
167
+ JsonColumnVisitor visitor = new JsonColumnVisitor(map);
168
+
169
+ List<JsonNode> nodes = schemaOptions.get(column);
170
+ for (JsonNode node : nodes) {
171
+ visit(node, visitor);
172
+ }
173
+
174
+ try {
175
+ pageBuilder.setJson(column, jsonParser.parse(mapper.writeValueAsString(map)));
176
+ }
177
+ catch (JsonProcessingException e) {
178
+ e.printStackTrace(); // NOSONAR
179
+ }
180
+ }
181
+
182
+ private void visit(JsonNode node, JsonColumnVisitor visitor)
183
+ {
184
+ SupportedJsonObject object = SupportedJsonObject.valueOf(node.get("type").asText().toUpperCase());
185
+ if (object.equals(SupportedJsonObject.BOOLEAN)) {
186
+ visitor.booleanNode(node);
187
+ }
188
+ else if (object.equals(SupportedJsonObject.NUMBER)) {
189
+ visitor.doubleNode(node);
190
+ }
191
+ else if (object.equals(SupportedJsonObject.INTEGER)) {
192
+ visitor.integerNode(node);
193
+ }
194
+ else if (object.equals(SupportedJsonObject.STRING)) {
195
+ visitor.stringNode(node);
196
+ }
197
+ else if (object.equals(SupportedJsonObject.ARRAY)) {
198
+ visitor.arrayNode(node);
199
+ }
200
+ else if (object.equals(SupportedJsonObject.OBJECT)) {
201
+ visitor.objectNode(node);
202
+ }
203
+ else {
204
+ throw new UnsupportedOperationException("randomj input plugin does not support json-data type");
205
+ }
154
206
  }
155
207
  }
@@ -1,10 +1,10 @@
1
1
  package org.embulk.input.randomj;
2
2
 
3
- import org.embulk.config.Config;
4
- import org.embulk.config.ConfigDefault;
3
+ import com.fasterxml.jackson.core.type.TypeReference;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
5
6
  import org.embulk.config.ConfigDiff;
6
7
  import org.embulk.config.ConfigSource;
7
- import org.embulk.config.Task;
8
8
  import org.embulk.config.TaskReport;
9
9
  import org.embulk.config.TaskSource;
10
10
  import org.embulk.spi.Column;
@@ -16,6 +16,7 @@ import org.embulk.spi.PageOutput;
16
16
  import org.embulk.spi.Schema;
17
17
  import org.embulk.spi.SchemaConfig;
18
18
 
19
+ import java.io.IOException;
19
20
  import java.util.HashMap;
20
21
  import java.util.List;
21
22
  import java.util.Map;
@@ -24,26 +25,6 @@ import java.util.stream.IntStream;
24
25
  public class RandomjInputPlugin
25
26
  implements InputPlugin
26
27
  {
27
- public interface PluginTask
28
- extends Task
29
- {
30
- // configuration row (required integer)
31
- @Config("rows")
32
- int getRows();
33
-
34
- // ref: https://github.com/embulk/embulk-input-jdbc/blob/master/embulk-input-mysql/src/main/java/org/embulk/input/MySQLInputPlugin.java#L33-L35
35
- @Config("threads")
36
- @ConfigDefault("1")
37
- Integer getThreads();
38
-
39
- @Config("primary_key")
40
- @ConfigDefault("")
41
- String getPrimaryKey();
42
-
43
- @Config("schema")
44
- SchemaConfig getSchema();
45
- }
46
-
47
28
  @Override
48
29
  public ConfigDiff transaction(ConfigSource config,
49
30
  InputPlugin.Control control)
@@ -70,7 +51,7 @@ public class RandomjInputPlugin
70
51
  Schema schema, int taskCount,
71
52
  List<TaskReport> successTaskReports)
72
53
  {
73
- // throw new UnsupportedOperationException("randomj input plugin does not support cleanup");
54
+ // throw new UnsupportedOperationException("randomj input plugin does not support cleanup"); // NOSONAR
74
55
  }
75
56
 
76
57
  @Override
@@ -81,13 +62,14 @@ public class RandomjInputPlugin
81
62
  PluginTask task = taskSource.loadTask(PluginTask.class);
82
63
  Integer rows = task.getRows();
83
64
  final HashMap<Column, Map<String, Integer>> columnOptions = getColumnOptions(task);
65
+ final HashMap<Column, List<JsonNode>> columnSchemas = getColumnSchemas(task);
84
66
  try (PageBuilder pagebuilder =
85
67
  new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
86
68
  IntStream.rangeClosed(
87
69
  taskIndex * rows + 1,
88
70
  taskIndex * rows + rows
89
71
  ).boxed().forEach(rowNumber -> {
90
- RandomjColumnVisitor visitor = new RandomjColumnVisitor(pagebuilder, task, rowNumber, columnOptions);
72
+ RandomjColumnVisitor visitor = new RandomjColumnVisitor(pagebuilder, task, rowNumber, columnOptions, columnSchemas);
91
73
  schema.visitColumns(visitor);
92
74
  pagebuilder.addRecord();
93
75
  });
@@ -117,6 +99,28 @@ public class RandomjInputPlugin
117
99
  return lengthMap;
118
100
  }
119
101
 
102
+ HashMap<Column, List<JsonNode>> getColumnSchemas(PluginTask task)
103
+ {
104
+ SchemaConfig schemaConfig = task.getSchema();
105
+ Schema schema = schemaConfig.toSchema();
106
+ HashMap<Column, List<JsonNode>> schemaMap = new HashMap<>();
107
+ for (Column column : schema.getColumns()) {
108
+ String schemaString = schemaConfig
109
+ .getColumn(column.getIndex())
110
+ .getOption().get(String.class, "schema", "");
111
+ if (!schemaString.isEmpty()) {
112
+ try {
113
+ List<JsonNode> jsonNodes = new ObjectMapper().readValue(schemaString, new TypeReference<List<JsonNode>>() {});
114
+ schemaMap.put(column, jsonNodes);
115
+ }
116
+ catch (IOException e) {
117
+ e.printStackTrace(); // NOSONAR
118
+ }
119
+ }
120
+ }
121
+ return schemaMap;
122
+ }
123
+
120
124
  @Override
121
125
  public ConfigDiff guess(ConfigSource config)
122
126
  {
@@ -0,0 +1,18 @@
1
+ package org.embulk.input.randomj;
2
+
3
+ public enum SupportedJsonObject
4
+ {
5
+ OBJECT("object"),
6
+ STRING("string"),
7
+ BOOLEAN("boolean"),
8
+ INTEGER("integer"),
9
+ NUMBER("number"),
10
+ ARRAY("array");
11
+
12
+ private final String typeName;
13
+
14
+ private SupportedJsonObject(final String typeName)
15
+ {
16
+ this.typeName = typeName;
17
+ }
18
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-randomj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-02 00:00:00.000000000 Z
11
+ date: 2018-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -57,12 +57,15 @@ files:
57
57
  - gradle/wrapper/gradle-wrapper.properties
58
58
  - gradlew
59
59
  - lib/embulk/input/randomj.rb
60
+ - src/main/java/org/embulk/input/randomj/JsonColumnVisitor.java
61
+ - src/main/java/org/embulk/input/randomj/PluginTask.java
60
62
  - src/main/java/org/embulk/input/randomj/RandomjColumnVisitor.java
61
63
  - src/main/java/org/embulk/input/randomj/RandomjInputPlugin.java
64
+ - src/main/java/org/embulk/input/randomj/SupportedJsonObject.java
62
65
  - src/test/java/org/embulk/input/randomj/TestRandomjInputPlugin.java
63
66
  - classpath/commons-lang3-3.5.jar
64
67
  - classpath/commons-text-1.1.jar
65
- - classpath/embulk-input-randomj-0.4.0.jar
68
+ - classpath/embulk-input-randomj-0.5.0.jar
66
69
  homepage: https://github.com/yuokada/embulk-input-randomj
67
70
  licenses:
68
71
  - MIT