embulk-input-randomj 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 664734a687060fbda08c7e14593684f63a501767
|
4
|
+
data.tar.gz: 3e3aa292e1eb2916cf8b516c3bc486f194e67417
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c07094226ddafc4ce96a2f8738a8a73c86a9e143d3de802b048ee80990459f6b9fff0db75adc0b1d15a5127e91a61241f1556db7540bf1d1f6f0e5ccdc055180
|
7
|
+
data.tar.gz: 6a97183220e98b63c92c2cccb7b54d8ae6456b796670abeda1d10b1bc3a6effbdde9f6182980dbb1ff13289802741f89ee9a60f8b7b01723d18d7cfc278f9bee
|
data/README.md
CHANGED
@@ -45,8 +45,24 @@ in:
|
|
45
45
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
46
46
|
```
|
47
47
|
|
48
|
+
Add `length`, `max_value`, `min_value` option (from 0.3.0)
|
49
|
+
```yaml
|
50
|
+
in:
|
51
|
+
type: randomj
|
52
|
+
rows: 16
|
53
|
+
threads: 1
|
54
|
+
primary_key: myid
|
55
|
+
schema:
|
56
|
+
- {name: myid, type: long}
|
57
|
+
- {name: named, type: string, length: 12}
|
58
|
+
- {name: price, type: long, max_value: 1080, min_value: 100}
|
59
|
+
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
60
|
+
```
|
61
|
+
|
48
62
|
## Usage
|
49
63
|
|
64
|
+
### Example1
|
65
|
+
|
50
66
|
```shell
|
51
67
|
|
52
68
|
% cat example/config.yml
|
@@ -96,6 +112,61 @@ in:
|
|
96
112
|
|
97
113
|
```
|
98
114
|
|
115
|
+
### Example2
|
116
|
+
|
117
|
+
- `named_s` return string with length 8
|
118
|
+
- `score` return value between `100~255`
|
119
|
+
- `rate` return value between `-100~100`
|
120
|
+
|
121
|
+
```shell
|
122
|
+
|
123
|
+
% cat example/config.yml
|
124
|
+
in:
|
125
|
+
type: randomj
|
126
|
+
rows: 16
|
127
|
+
threads: 1
|
128
|
+
# default_timezone: Asia/Tokyo
|
129
|
+
primary_key: myid
|
130
|
+
schema:
|
131
|
+
- {name: myid, type: long}
|
132
|
+
- {name: named, type: string}
|
133
|
+
- {name: named_s, type: string, length: 8}
|
134
|
+
- {name: x_flag, type: boolean}
|
135
|
+
- {name: rate, type: double, max_value: 100, min_value: -100}
|
136
|
+
- {name: score, type: long, max_value: 255, min_value: 100}
|
137
|
+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
138
|
+
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
139
|
+
|
140
|
+
out:
|
141
|
+
type: stdout
|
142
|
+
|
143
|
+
|
144
|
+
% embulk run -I lib example/config.yml
|
145
|
+
2017-09-10 04:45:04.894 +0900: Embulk v0.8.32
|
146
|
+
2017-09-10 04:45:10.212 +0900 [INFO] (0001:transaction): Loaded plugin embulk/input/randomj from a load path
|
147
|
+
2017-09-10 04:45:10.246 +0900 [INFO] (0001:transaction): Using local thread executor with max_threads=8 / output tasks 4 = input tasks 1 * 4
|
148
|
+
2017-09-10 04:45:10.263 +0900 [INFO] (0001:transaction): {done: 0 / 1, running: 0}
|
149
|
+
1,BOcbVJX5bWL5wRBJc532trxvwhQpmg3d,yHwXATfG,true,-79.62544211154894,129,2017-12-05 22:31:35,2017/12/26
|
150
|
+
2,N2gljQxd4yDBzJjK9iSRUdROtaZGUEl7,zSrEMjzC,false,-11.47506884041689,194,2017-09-17 15:56:18,2017/12/06
|
151
|
+
3,PJvKkf0wwpGqGMlc7OjUhjZNi0pTEZIU,q6TgdoaZ,false,85.17356188437738,137,2017-10-07 17:28:43,2017/10/22
|
152
|
+
4,DA6wWE4p3zIPDK0Mp81bWczewNSMY2sq,KeobJmS1,false,79.95787440150436,221,2017-09-28 19:35:17,2017/11/20
|
153
|
+
5,8DNF4TzhVDLCFey2x1eCHryf4GdvHlyW,D2jddtEN,true,19.801687906161735,182,2017-11-24 18:43:38,2017/12/29
|
154
|
+
6,veyIxBc9u0FMwsGksMfLhvBMuIF2D7XO,6Mtz4MN9,true,26.922649237294582,176,2017-09-23 07:43:40,2017/11/18
|
155
|
+
7,HHCTLuaxAJIRHHG7cB2u9Ake9p9OSIcy,UHHKp5xX,true,9.960707451320626,108,2017-09-14 08:11:49,2017/11/05
|
156
|
+
8,HcQhHMQ4sYiXTBpvNiTqDGskuTeVEC6r,d0VSR8K8,false,-62.405292711551624,118,2017-11-11 08:06:20,2017/10/20
|
157
|
+
9,si5BWUPEEvVHvveeqSxG6ypc7pSsKtC7,bW5p9boG,false,-76.91915279000274,192,2017-09-28 19:46:53,2017/11/04
|
158
|
+
10,xnfU0aJgigJG9rPan2rwoffhN9pzLQCy,R8MV0Jpa,true,-79.40738909989871,104,2017-11-19 02:50:07,2017/09/11
|
159
|
+
11,KiRzQqfE6wRw3WjMPAmedqtHyG3MttGU,SowzDTSb,true,77.22509797548325,163,2017-12-23 18:16:30,2017/12/27
|
160
|
+
12,pQLz3fMIkN6UANwSbzJ5vhBWzF2FI7uo,uPGyHyuW,true,71.19680005107371,180,2017-11-23 16:31:30,2017/11/14
|
161
|
+
13,aFOc2qCAu5oYbxTCGkMNcZob6Tl3wl3Y,apFu34Ps,false,82.8406608691031,226,2017-10-03 06:09:25,2017/10/06
|
162
|
+
14,Kz3JGL23k7f8SR17xQBw063ApuGdeWIP,r0c0KnUC,true,-26.484829732050134,113,2017-10-01 02:40:37,2017/11/26
|
163
|
+
15,p5vGY02BzrHqk345JyAhFU7xVsA2jEZD,nhzsefns,false,-79.0184308849151,119,2017-12-15 22:59:28,2017/11/25
|
164
|
+
16,1jyxot60lCrRFMUfjyHcZ07dq05eu76a,WewnLZfw,false,-55.315211168770816,141,2017-12-11 10:36:46,2017/12/05
|
165
|
+
2017-09-10 04:45:10.344 +0900 [INFO] (0001:transaction): {done: 1 / 1, running: 0}
|
166
|
+
2017-09-10 04:45:10.351 +0900 [INFO] (main): Committed.
|
167
|
+
2017-09-10 04:45:10.351 +0900 [INFO] (main): Next config diff: {"in":{},"out":{}}
|
168
|
+
|
169
|
+
```
|
99
170
|
|
100
171
|
## Build
|
101
172
|
|
data/build.gradle
CHANGED
data/example/config.yml
CHANGED
@@ -7,9 +7,10 @@ in:
|
|
7
7
|
schema:
|
8
8
|
- {name: myid, type: long}
|
9
9
|
- {name: named, type: string}
|
10
|
+
- {name: named_s, type: string, length: 8}
|
10
11
|
- {name: x_flag, type: boolean}
|
11
|
-
- {name:
|
12
|
-
- {name: score, type: long}
|
12
|
+
- {name: rate, type: double, max_value: 100, min_value: -100}
|
13
|
+
- {name: score, type: long, max_value: 255, min_value: 100}
|
13
14
|
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
|
14
15
|
- {name: purchase, type: timestamp, format: '%Y/%m/%d'}
|
15
16
|
|
data/src/main/java/org/embulk/input/randomj/{RandomColumnVisitor.java → RandomjColumnVisitor.java}
RENAMED
@@ -10,26 +10,30 @@ import org.embulk.spi.time.Timestamp;
|
|
10
10
|
|
11
11
|
import java.time.LocalDateTime;
|
12
12
|
import java.time.ZoneId;
|
13
|
+
import java.util.HashMap;
|
14
|
+
import java.util.Map;
|
13
15
|
import java.util.Random;
|
14
16
|
|
15
|
-
public class
|
17
|
+
public class RandomjColumnVisitor
|
16
18
|
implements ColumnVisitor
|
17
19
|
{
|
18
20
|
private final PageBuilder pageBuilder;
|
19
21
|
private final PluginTask task;
|
20
22
|
private final Integer row;
|
21
23
|
private final Random rnd;
|
24
|
+
private final HashMap<Column, Map<String, Integer>> columnOptions;
|
22
25
|
private final RandomStringGenerator generator = new RandomStringGenerator.Builder()
|
23
26
|
.withinRange('0', 'z')
|
24
27
|
.filteredBy(CharacterPredicates.LETTERS, CharacterPredicates.DIGITS)
|
25
28
|
.build();
|
26
29
|
private final ZoneId zoneId = ZoneId.systemDefault();
|
27
30
|
|
28
|
-
public
|
31
|
+
public RandomjColumnVisitor(PageBuilder pageBuilder, PluginTask task, Integer row, HashMap<Column, Map<String, Integer>> columnOptions)
|
29
32
|
{
|
30
33
|
this.task = task;
|
31
34
|
this.pageBuilder = pageBuilder;
|
32
35
|
this.row = row;
|
36
|
+
this.columnOptions = columnOptions;
|
33
37
|
this.rnd = new Random();
|
34
38
|
}
|
35
39
|
|
@@ -52,20 +56,53 @@ public class RandomColumnVisitor
|
|
52
56
|
pageBuilder.setLong(column, row);
|
53
57
|
}
|
54
58
|
else {
|
55
|
-
|
59
|
+
Integer max = columnOptions.get(column).get("max_value");
|
60
|
+
Integer min = columnOptions.get(column).get("min_value");
|
61
|
+
if (max != null) {
|
62
|
+
if (min != null) {
|
63
|
+
Integer s = min + rnd.nextInt((max - min));
|
64
|
+
pageBuilder.setLong(column, s);
|
65
|
+
}
|
66
|
+
else {
|
67
|
+
pageBuilder.setLong(column, rnd.nextInt(max));
|
68
|
+
}
|
69
|
+
}
|
70
|
+
else {
|
71
|
+
pageBuilder.setLong(column, rnd.nextInt(10000));
|
72
|
+
}
|
56
73
|
}
|
57
74
|
}
|
58
75
|
|
59
76
|
@Override
|
60
77
|
public void doubleColumn(Column column)
|
61
78
|
{
|
62
|
-
|
79
|
+
Integer max = columnOptions.get(column).get("max_value");
|
80
|
+
Integer min = columnOptions.get(column).get("min_value");
|
81
|
+
if (max != null) {
|
82
|
+
if (min != null) {
|
83
|
+
Double d = min + rnd.nextInt((max - min) - 1) + rnd.nextDouble();
|
84
|
+
pageBuilder.setDouble(column, d);
|
85
|
+
}
|
86
|
+
else {
|
87
|
+
Double d = rnd.nextInt(max - 1) + rnd.nextDouble();
|
88
|
+
pageBuilder.setDouble(column, d);
|
89
|
+
}
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
pageBuilder.setDouble(column, rnd.nextDouble() * 10000);
|
93
|
+
}
|
63
94
|
}
|
64
95
|
|
65
96
|
@Override
|
66
97
|
public void stringColumn(Column column)
|
67
98
|
{
|
68
|
-
|
99
|
+
final Integer length = columnOptions.get(column).getOrDefault("length", 0);
|
100
|
+
if (length == 0) {
|
101
|
+
pageBuilder.setString(column, generator.generate(32));
|
102
|
+
}
|
103
|
+
else {
|
104
|
+
pageBuilder.setString(column, generator.generate(length));
|
105
|
+
}
|
69
106
|
}
|
70
107
|
|
71
108
|
@Override
|
@@ -9,6 +9,8 @@ import org.embulk.config.ConfigSource;
|
|
9
9
|
import org.embulk.config.Task;
|
10
10
|
import org.embulk.config.TaskReport;
|
11
11
|
import org.embulk.config.TaskSource;
|
12
|
+
import org.embulk.spi.Column;
|
13
|
+
import org.embulk.spi.ColumnConfig;
|
12
14
|
import org.embulk.spi.Exec;
|
13
15
|
import org.embulk.spi.InputPlugin;
|
14
16
|
import org.embulk.spi.PageBuilder;
|
@@ -16,7 +18,9 @@ import org.embulk.spi.PageOutput;
|
|
16
18
|
import org.embulk.spi.Schema;
|
17
19
|
import org.embulk.spi.SchemaConfig;
|
18
20
|
|
21
|
+
import java.util.HashMap;
|
19
22
|
import java.util.List;
|
23
|
+
import java.util.Map;
|
20
24
|
import java.util.stream.IntStream;
|
21
25
|
|
22
26
|
public class RandomjInputPlugin
|
@@ -82,13 +86,14 @@ public class RandomjInputPlugin
|
|
82
86
|
{
|
83
87
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
84
88
|
Integer rows = task.getRows();
|
89
|
+
final HashMap<Column, Map<String, Integer>> columnOptions = getColumnOptions(task);
|
85
90
|
try (PageBuilder pagebuilder =
|
86
91
|
new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
87
92
|
IntStream.rangeClosed(
|
88
93
|
taskIndex * rows + 1,
|
89
94
|
taskIndex * rows + rows
|
90
95
|
).boxed().forEach(rowNumber -> {
|
91
|
-
|
96
|
+
RandomjColumnVisitor visitor = new RandomjColumnVisitor(pagebuilder, task, rowNumber, columnOptions);
|
92
97
|
schema.visitColumns(visitor);
|
93
98
|
pagebuilder.addRecord();
|
94
99
|
});
|
@@ -101,6 +106,22 @@ public class RandomjInputPlugin
|
|
101
106
|
return taskReport;
|
102
107
|
}
|
103
108
|
|
109
|
+
HashMap<Column, Map<String, Integer>> getColumnOptions(PluginTask task)
|
110
|
+
{
|
111
|
+
SchemaConfig schemaConfig = task.getSchema();
|
112
|
+
Schema schema = schemaConfig.toSchema();
|
113
|
+
HashMap<Column, Map<String, Integer>> lengthMap = new HashMap<>();
|
114
|
+
for (Column column : schema.getColumns()) {
|
115
|
+
HashMap<String, Integer> miniMap = new HashMap<>();
|
116
|
+
ColumnConfig c = schemaConfig.getColumn(column.getIndex());
|
117
|
+
miniMap.put("length", c.getOption().get(Integer.class, "length", 0));
|
118
|
+
miniMap.put("max_value", c.getOption().get(Integer.class, "max_value", null));
|
119
|
+
miniMap.put("min_value", c.getOption().get(Integer.class, "min_value", null));
|
120
|
+
lengthMap.put(column, miniMap);
|
121
|
+
}
|
122
|
+
return lengthMap;
|
123
|
+
}
|
124
|
+
|
104
125
|
@Override
|
105
126
|
public ConfigDiff guess(ConfigSource config)
|
106
127
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-randomj
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -57,12 +57,12 @@ files:
|
|
57
57
|
- gradle/wrapper/gradle-wrapper.properties
|
58
58
|
- gradlew
|
59
59
|
- lib/embulk/input/randomj.rb
|
60
|
-
- src/main/java/org/embulk/input/randomj/
|
60
|
+
- src/main/java/org/embulk/input/randomj/RandomjColumnVisitor.java
|
61
61
|
- src/main/java/org/embulk/input/randomj/RandomjInputPlugin.java
|
62
62
|
- src/test/java/org/embulk/input/randomj/TestRandomjInputPlugin.java
|
63
63
|
- classpath/commons-lang3-3.5.jar
|
64
64
|
- classpath/commons-text-1.1.jar
|
65
|
-
- classpath/embulk-input-randomj-0.
|
65
|
+
- classpath/embulk-input-randomj-0.3.0.jar
|
66
66
|
homepage: https://github.com/yuokada/embulk-input-randomj
|
67
67
|
licenses:
|
68
68
|
- MIT
|