embulk-input-randomj 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1165a1e2c182b649c7a2fbf73de530de55a5497f
4
- data.tar.gz: ab71604a4f4af3d7765433b7e380e5f509f7b7e2
3
+ metadata.gz: 82a2c62219bbf24fdfc2cf2bb24f8f0b2210b829
4
+ data.tar.gz: bd0cad5900b36c009f6e92fa86357b5edd97c741
5
5
  SHA512:
6
- metadata.gz: 9fc2bea3f88fcc01b2a2e33061d0a28d2588345d8b293ca567cb7aaadcd1daf3975c70f24328e2facbb2e4dd511e06dec5f854520885e77984bfa7a03a508866
7
- data.tar.gz: a6d3961d0f3fcd6ebd3f02545f6272a05805680de3923cb2ff1a3dc8db588219e429549a7279b4cca14d719feeed126046f465d7ab7ede26a92f123fe58ea787
6
+ metadata.gz: ff0b599c31cf975d09f28ca2e3328abc5fcbee510832d551d0ab5b0e0dc7b63005f269478a9ff3bccaddaf68ba4435964a9f126ed736d62d86eed7843f66f2cd
7
+ data.tar.gz: 3a6ec59cbc4d120e15b5012c26633bf4494e47870b58cc67be520851c5695807bb1553012b82748f97ed7ca6f16f0dfa556f9b690cbeef6d3d2484a0115cc998
data/README.md CHANGED
@@ -14,7 +14,7 @@ Original: [kumagi/embulk\-input\-random](https://github.com/kumagi/embulk-input-
14
14
  * **Cleanup supported**: no
15
15
  * **Guess supported**: no
16
16
 
17
- ## Install
17
+ ## Install
18
18
 
19
19
  ``` shell
20
20
  % embulk gem install embulk-input-randomj
@@ -59,6 +59,25 @@ in:
59
59
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
60
60
  ```
61
61
 
62
+ Add `null_rate` option (from 0.4.0)
63
+
64
+ This configuration is that inserted `null` into `price` filed with a probability `8` of 10000.
65
+
66
+ ```yaml
67
+ in:
68
+ type: randomj
69
+ rows: 16
70
+ threads: 1
71
+ primary_key: myid
72
+ schema:
73
+ - {name: myid, type: long}
74
+ - {name: named, type: string, length: 12}
75
+ - {name: price, type: long, max_value: 1080, min_value: 100, null_rate: 8}
76
+ - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
77
+ ```
78
+
79
+
80
+
62
81
  ## Usage
63
82
 
64
83
  ### Example1
@@ -80,7 +99,7 @@ in:
80
99
  - {name: score, type: long}
81
100
  - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
82
101
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
83
-
102
+
84
103
  out:
85
104
  type: stdout
86
105
 
@@ -116,7 +135,7 @@ in:
116
135
 
117
136
  - `named_s` return string with length 8
118
137
  - `score` return value between `100~255`
119
- - `rate` return value between `-100~100`
138
+ - `rate` return value between `-100~100`
120
139
 
121
140
  ```shell
122
141
 
@@ -176,7 +195,7 @@ $ ./gradlew gem # -t to watch change of files and rebuild continuously
176
195
  ```
177
196
 
178
197
  ```
179
- $ ./gradlew build && ./gradlew classpath
198
+ $ ./gradlew build && ./gradlew classpath
180
199
  $ embulk run -I lib config/example.yml
181
200
  ```
182
201
 
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.4.0"
18
+ version = "0.5.0"
19
19
 
20
20
  sourceCompatibility = 1.8
21
21
  targetCompatibility = 1.8
data/example/config.yml CHANGED
@@ -13,6 +13,8 @@ in:
13
13
  - {name: score, type: long, max_value: 255, min_value: 100}
14
14
  - {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
15
15
  - {name: purchase, type: timestamp, format: '%Y/%m/%d'}
16
+ - {name: json_obj, type: json, schema: '[{"name": "obj1", "type": "object", "items": [{"name": "key1", "type": "string"}, {"name": "key2", "type": "number"} ] }]' }
17
+ - {name: json_key, type: json, schema: '[{"name": "baz", "type": "array", "items": {"type": "string", "size": 1}}]' }
16
18
 
17
19
  out:
18
20
  type: stdout
@@ -0,0 +1,142 @@
1
+ package org.embulk.input.randomj;
2
+
3
+ import com.fasterxml.jackson.databind.JsonNode;
4
+ import org.apache.commons.text.CharacterPredicates;
5
+ import org.apache.commons.text.RandomStringGenerator;
6
+
7
+ import java.util.ArrayList;
8
+ import java.util.HashMap;
9
+ import java.util.List;
10
+ import java.util.Map;
11
+ import java.util.Random;
12
+
13
+ public class JsonColumnVisitor
14
+ {
15
+
16
+ private final Map<String, Object> map;
17
+ private final Random rnd = new Random();
18
+ private final RandomStringGenerator generator = new RandomStringGenerator.Builder()
19
+ .withinRange('0', 'z')
20
+ .filteredBy(CharacterPredicates.LETTERS, CharacterPredicates.DIGITS)
21
+ .build();
22
+
23
+ private static final String ITEMS = "items";
24
+
25
+ public JsonColumnVisitor(Map<String, Object> gmap)
26
+ {
27
+ map = gmap;
28
+ }
29
+
30
+ public void booleanNode(JsonNode node)
31
+ {
32
+ String key = node.get("name").asText();
33
+ if (Math.random() < 0.5) {
34
+ map.put(key, true);
35
+ }
36
+ else {
37
+ map.put(key, false);
38
+ }
39
+ }
40
+
41
+ public void doubleNode(JsonNode node)
42
+ {
43
+ String key = node.get("name").asText();
44
+ map.put(key, rnd.nextDouble() * 10000);
45
+ }
46
+
47
+ public void integerNode(JsonNode node)
48
+ {
49
+ String key = node.get("name").asText();
50
+ map.put(key, rnd.nextInt(10000));
51
+ }
52
+
53
+ public void stringNode(JsonNode node)
54
+ {
55
+ String key = node.get("name").asText();
56
+ map.put(key, generator.generate(8));
57
+ }
58
+
59
+ public void arrayNode(JsonNode node) // NOSONAR
60
+ {
61
+ String key = node.get("name").asText();
62
+ String dataType = node.get(ITEMS).get("type").asText();
63
+ int arraySize = node.get(ITEMS).get("size").asInt(1);
64
+ SupportedJsonObject jtype = SupportedJsonObject.valueOf(dataType.toUpperCase());
65
+ switch (jtype) {
66
+ case BOOLEAN: { // NOSONAR
67
+ List<Boolean> m = new ArrayList<>();
68
+ for (int i = 0; i < arraySize; i++) {
69
+ if (Math.random() < 0.5) {
70
+ m.add(true);
71
+ }
72
+ else {
73
+ m.add(false);
74
+ }
75
+ }
76
+ map.put(key, m);
77
+ break;
78
+ }
79
+ case INTEGER: { // NOSONAR
80
+ ArrayList<Integer> m = new ArrayList<>();
81
+ for (int i = 0; i < arraySize; i++) {
82
+ m.add(rnd.nextInt(100));
83
+ }
84
+ map.put(key, m);
85
+ break;
86
+ }
87
+ case NUMBER: { // NOSONAR
88
+ ArrayList<Number> m = new ArrayList<>();
89
+ for (int i = 0; i < arraySize; i++) {
90
+ m.add(rnd.nextDouble() * 100);
91
+ }
92
+ map.put(key, m);
93
+ break;
94
+ }
95
+ case STRING: { // NOSONAR
96
+ int length = 8;
97
+ ArrayList<String> m = new ArrayList<>();
98
+ for (int i = 0; i < arraySize; i++) {
99
+ m.add(generator.generate(length));
100
+ }
101
+ map.put(key, m);
102
+ break;
103
+ }
104
+ default:
105
+ throw new UnsupportedOperationException("randomj input plugin does not support json-array-data type");
106
+ }
107
+ }
108
+
109
+ public void objectNode(JsonNode node)
110
+ {
111
+ Map<String, Object> objectMap = new HashMap<>();
112
+
113
+ for (JsonNode jsonNode : node.findValues(ITEMS).listIterator().next()) {
114
+ String nestKey = jsonNode.get("name").asText();
115
+ SupportedJsonObject jtype = SupportedJsonObject.valueOf(jsonNode.get("type").asText().toUpperCase());
116
+
117
+ switch (jtype) {
118
+ case BOOLEAN:
119
+ if (Math.random() < 0.5) {
120
+ objectMap.put(nestKey, true);
121
+ }
122
+ else {
123
+ objectMap.put(nestKey, false);
124
+ }
125
+ break;
126
+ case NUMBER:
127
+ objectMap.put(nestKey, rnd.nextDouble() * 100);
128
+ break;
129
+ case INTEGER:
130
+ objectMap.put(nestKey, rnd.nextInt(10000));
131
+ break;
132
+ case STRING:
133
+ int length = 8;
134
+ objectMap.put(nestKey, generator.generate(length));
135
+ break;
136
+ default:
137
+ throw new UnsupportedOperationException("randomj input plugin does not support json-data type");
138
+ }
139
+ }
140
+ map.put(node.get("name").asText(), objectMap);
141
+ }
142
+ }
@@ -0,0 +1,26 @@
1
+ package org.embulk.input.randomj;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.Task;
6
+ import org.embulk.spi.SchemaConfig;
7
+
8
+ public interface PluginTask
9
+ extends Task
10
+ {
11
+ // configuration row (required integer)
12
+ @Config("rows")
13
+ int getRows();
14
+
15
+ // ref: https://github.com/embulk/embulk-input-jdbc/blob/master/embulk-input-mysql/src/main/java/org/embulk/input/MySQLInputPlugin.java#L33-L35
16
+ @Config("threads")
17
+ @ConfigDefault("1")
18
+ Integer getThreads();
19
+
20
+ @Config("primary_key")
21
+ @ConfigDefault("")
22
+ String getPrimaryKey();
23
+
24
+ @Config("schema")
25
+ SchemaConfig getSchema();
26
+ }
@@ -1,15 +1,20 @@
1
1
  package org.embulk.input.randomj;
2
2
 
3
+ import com.fasterxml.jackson.core.JsonProcessingException;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
3
6
  import org.apache.commons.text.CharacterPredicates;
4
7
  import org.apache.commons.text.RandomStringGenerator;
5
- import org.embulk.input.randomj.RandomjInputPlugin.PluginTask;
6
8
  import org.embulk.spi.Column;
7
9
  import org.embulk.spi.ColumnVisitor;
8
10
  import org.embulk.spi.PageBuilder;
11
+ import org.embulk.spi.json.JsonParser;
9
12
  import org.embulk.spi.time.Timestamp;
10
13
 
11
14
  import java.time.LocalDateTime;
12
15
  import java.time.ZoneId;
16
+ import java.util.HashMap;
17
+ import java.util.List;
13
18
  import java.util.Map;
14
19
  import java.util.Random;
15
20
 
@@ -21,25 +26,33 @@ public class RandomjColumnVisitor
21
26
  private final Integer row;
22
27
  private final Random rnd;
23
28
  private final Map<Column, Map<String, Integer>> columnOptions;
29
+ private final Map<Column, List<JsonNode>> schemaOptions;
24
30
  private final RandomStringGenerator generator = new RandomStringGenerator.Builder()
25
31
  .withinRange('0', 'z')
26
32
  .filteredBy(CharacterPredicates.LETTERS, CharacterPredicates.DIGITS)
27
33
  .build();
28
34
  private final ZoneId zoneId = ZoneId.systemDefault();
35
+ private final JsonParser jsonParser = new JsonParser();
36
+ private final ObjectMapper mapper = new ObjectMapper();
29
37
 
30
- public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row, Map<Column, Map<String, Integer>> columnOptions)
38
+ private static final String NULL_RATE = "null_rate";
39
+
40
+ public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row,
41
+ Map<Column, Map<String, Integer>> columnOptions,
42
+ Map<Column, List<JsonNode>> schemaOptions)
31
43
  {
32
44
  this.task = task;
33
45
  this.pageBuilder = pageBuilder;
34
46
  this.row = row;
35
47
  this.columnOptions = columnOptions;
36
48
  this.rnd = new Random();
49
+ this.schemaOptions = schemaOptions;
37
50
  }
38
51
 
39
52
  @Override
40
53
  public void booleanColumn(Column column)
41
54
  {
42
- Integer nrate = columnOptions.get(column).get("null_rate");
55
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
43
56
  if (Math.random() < (double) nrate / 10000) {
44
57
  pageBuilder.setNull(column);
45
58
  }
@@ -61,7 +74,7 @@ public class RandomjColumnVisitor
61
74
  pageBuilder.setLong(column, row);
62
75
  }
63
76
  else {
64
- Integer nrate = columnOptions.get(column).get("null_rate");
77
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
65
78
  if (Math.random() < (double) nrate / 10000) {
66
79
  pageBuilder.setNull(column);
67
80
  }
@@ -87,7 +100,7 @@ public class RandomjColumnVisitor
87
100
  @Override
88
101
  public void doubleColumn(Column column)
89
102
  {
90
- Integer nrate = columnOptions.get(column).get("null_rate");
103
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
91
104
  if (Math.random() < (double) nrate / 10000) {
92
105
  pageBuilder.setNull(column);
93
106
  }
@@ -113,7 +126,7 @@ public class RandomjColumnVisitor
113
126
  @Override
114
127
  public void stringColumn(Column column)
115
128
  {
116
- Integer nrate = columnOptions.get(column).get("null_rate");
129
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
117
130
  if (Math.random() < (double) nrate / 10000) {
118
131
  pageBuilder.setNull(column);
119
132
  }
@@ -131,7 +144,7 @@ public class RandomjColumnVisitor
131
144
  @Override
132
145
  public void timestampColumn(Column column)
133
146
  {
134
- Integer nrate = columnOptions.get(column).get("null_rate");
147
+ Integer nrate = columnOptions.get(column).get(NULL_RATE);
135
148
  if (Math.random() < (double) nrate / 10000) {
136
149
  pageBuilder.setNull(column);
137
150
  }
@@ -150,6 +163,45 @@ public class RandomjColumnVisitor
150
163
  @Override
151
164
  public void jsonColumn(Column column)
152
165
  {
153
- throw new UnsupportedOperationException("orc output plugin does not support json type");
166
+ Map<String, Object> map = new HashMap<>();
167
+ JsonColumnVisitor visitor = new JsonColumnVisitor(map);
168
+
169
+ List<JsonNode> nodes = schemaOptions.get(column);
170
+ for (JsonNode node : nodes) {
171
+ visit(node, visitor);
172
+ }
173
+
174
+ try {
175
+ pageBuilder.setJson(column, jsonParser.parse(mapper.writeValueAsString(map)));
176
+ }
177
+ catch (JsonProcessingException e) {
178
+ e.printStackTrace(); // NOSONAR
179
+ }
180
+ }
181
+
182
+ private void visit(JsonNode node, JsonColumnVisitor visitor)
183
+ {
184
+ SupportedJsonObject object = SupportedJsonObject.valueOf(node.get("type").asText().toUpperCase());
185
+ if (object.equals(SupportedJsonObject.BOOLEAN)) {
186
+ visitor.booleanNode(node);
187
+ }
188
+ else if (object.equals(SupportedJsonObject.NUMBER)) {
189
+ visitor.doubleNode(node);
190
+ }
191
+ else if (object.equals(SupportedJsonObject.INTEGER)) {
192
+ visitor.integerNode(node);
193
+ }
194
+ else if (object.equals(SupportedJsonObject.STRING)) {
195
+ visitor.stringNode(node);
196
+ }
197
+ else if (object.equals(SupportedJsonObject.ARRAY)) {
198
+ visitor.arrayNode(node);
199
+ }
200
+ else if (object.equals(SupportedJsonObject.OBJECT)) {
201
+ visitor.objectNode(node);
202
+ }
203
+ else {
204
+ throw new UnsupportedOperationException("randomj input plugin does not support json-data type");
205
+ }
154
206
  }
155
207
  }
@@ -1,10 +1,10 @@
1
1
  package org.embulk.input.randomj;
2
2
 
3
- import org.embulk.config.Config;
4
- import org.embulk.config.ConfigDefault;
3
+ import com.fasterxml.jackson.core.type.TypeReference;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
5
6
  import org.embulk.config.ConfigDiff;
6
7
  import org.embulk.config.ConfigSource;
7
- import org.embulk.config.Task;
8
8
  import org.embulk.config.TaskReport;
9
9
  import org.embulk.config.TaskSource;
10
10
  import org.embulk.spi.Column;
@@ -16,6 +16,7 @@ import org.embulk.spi.PageOutput;
16
16
  import org.embulk.spi.Schema;
17
17
  import org.embulk.spi.SchemaConfig;
18
18
 
19
+ import java.io.IOException;
19
20
  import java.util.HashMap;
20
21
  import java.util.List;
21
22
  import java.util.Map;
@@ -24,26 +25,6 @@ import java.util.stream.IntStream;
24
25
  public class RandomjInputPlugin
25
26
  implements InputPlugin
26
27
  {
27
- public interface PluginTask
28
- extends Task
29
- {
30
- // configuration row (required integer)
31
- @Config("rows")
32
- int getRows();
33
-
34
- // ref: https://github.com/embulk/embulk-input-jdbc/blob/master/embulk-input-mysql/src/main/java/org/embulk/input/MySQLInputPlugin.java#L33-L35
35
- @Config("threads")
36
- @ConfigDefault("1")
37
- Integer getThreads();
38
-
39
- @Config("primary_key")
40
- @ConfigDefault("")
41
- String getPrimaryKey();
42
-
43
- @Config("schema")
44
- SchemaConfig getSchema();
45
- }
46
-
47
28
  @Override
48
29
  public ConfigDiff transaction(ConfigSource config,
49
30
  InputPlugin.Control control)
@@ -70,7 +51,7 @@ public class RandomjInputPlugin
70
51
  Schema schema, int taskCount,
71
52
  List<TaskReport> successTaskReports)
72
53
  {
73
- // throw new UnsupportedOperationException("randomj input plugin does not support cleanup");
54
+ // throw new UnsupportedOperationException("randomj input plugin does not support cleanup"); // NOSONAR
74
55
  }
75
56
 
76
57
  @Override
@@ -81,13 +62,14 @@ public class RandomjInputPlugin
81
62
  PluginTask task = taskSource.loadTask(PluginTask.class);
82
63
  Integer rows = task.getRows();
83
64
  final HashMap<Column, Map<String, Integer>> columnOptions = getColumnOptions(task);
65
+ final HashMap<Column, List<JsonNode>> columnSchemas = getColumnSchemas(task);
84
66
  try (PageBuilder pagebuilder =
85
67
  new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
86
68
  IntStream.rangeClosed(
87
69
  taskIndex * rows + 1,
88
70
  taskIndex * rows + rows
89
71
  ).boxed().forEach(rowNumber -> {
90
- RandomjColumnVisitor visitor = new RandomjColumnVisitor(pagebuilder, task, rowNumber, columnOptions);
72
+ RandomjColumnVisitor visitor = new RandomjColumnVisitor(pagebuilder, task, rowNumber, columnOptions, columnSchemas);
91
73
  schema.visitColumns(visitor);
92
74
  pagebuilder.addRecord();
93
75
  });
@@ -117,6 +99,28 @@ public class RandomjInputPlugin
117
99
  return lengthMap;
118
100
  }
119
101
 
102
+ HashMap<Column, List<JsonNode>> getColumnSchemas(PluginTask task)
103
+ {
104
+ SchemaConfig schemaConfig = task.getSchema();
105
+ Schema schema = schemaConfig.toSchema();
106
+ HashMap<Column, List<JsonNode>> schemaMap = new HashMap<>();
107
+ for (Column column : schema.getColumns()) {
108
+ String schemaString = schemaConfig
109
+ .getColumn(column.getIndex())
110
+ .getOption().get(String.class, "schema", "");
111
+ if (!schemaString.isEmpty()) {
112
+ try {
113
+ List<JsonNode> jsonNodes = new ObjectMapper().readValue(schemaString, new TypeReference<List<JsonNode>>() {});
114
+ schemaMap.put(column, jsonNodes);
115
+ }
116
+ catch (IOException e) {
117
+ e.printStackTrace(); // NOSONAR
118
+ }
119
+ }
120
+ }
121
+ return schemaMap;
122
+ }
123
+
120
124
  @Override
121
125
  public ConfigDiff guess(ConfigSource config)
122
126
  {
@@ -0,0 +1,18 @@
1
+ package org.embulk.input.randomj;
2
+
3
+ public enum SupportedJsonObject
4
+ {
5
+ OBJECT("object"),
6
+ STRING("string"),
7
+ BOOLEAN("boolean"),
8
+ INTEGER("integer"),
9
+ NUMBER("number"),
10
+ ARRAY("array");
11
+
12
+ private final String typeName;
13
+
14
+ private SupportedJsonObject(final String typeName)
15
+ {
16
+ this.typeName = typeName;
17
+ }
18
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-randomj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-02 00:00:00.000000000 Z
11
+ date: 2018-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -57,12 +57,15 @@ files:
57
57
  - gradle/wrapper/gradle-wrapper.properties
58
58
  - gradlew
59
59
  - lib/embulk/input/randomj.rb
60
+ - src/main/java/org/embulk/input/randomj/JsonColumnVisitor.java
61
+ - src/main/java/org/embulk/input/randomj/PluginTask.java
60
62
  - src/main/java/org/embulk/input/randomj/RandomjColumnVisitor.java
61
63
  - src/main/java/org/embulk/input/randomj/RandomjInputPlugin.java
64
+ - src/main/java/org/embulk/input/randomj/SupportedJsonObject.java
62
65
  - src/test/java/org/embulk/input/randomj/TestRandomjInputPlugin.java
63
66
  - classpath/commons-lang3-3.5.jar
64
67
  - classpath/commons-text-1.1.jar
65
- - classpath/embulk-input-randomj-0.4.0.jar
68
+ - classpath/embulk-input-randomj-0.5.0.jar
66
69
  homepage: https://github.com/yuokada/embulk-input-randomj
67
70
  licenses:
68
71
  - MIT