embulk-executor-mapreduce 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/build.gradle +2 -0
- data/classpath/activation-1.1.jar +0 -0
- data/classpath/apacheds-i18n-2.0.0-M15.jar +0 -0
- data/classpath/apacheds-kerberos-codec-2.0.0-M15.jar +0 -0
- data/classpath/api-asn1-api-1.0.0-M20.jar +0 -0
- data/classpath/api-util-1.0.0-M20.jar +0 -0
- data/classpath/avro-1.7.4.jar +0 -0
- data/classpath/commons-beanutils-1.7.0.jar +0 -0
- data/classpath/commons-cli-1.2.jar +0 -0
- data/classpath/commons-codec-1.6.jar +0 -0
- data/classpath/commons-collections-3.2.1.jar +0 -0
- data/classpath/commons-compress-1.4.1.jar +0 -0
- data/classpath/commons-configuration-1.6.jar +0 -0
- data/classpath/commons-digester-1.8.jar +0 -0
- data/classpath/commons-httpclient-3.1.jar +0 -0
- data/classpath/commons-io-2.4.jar +0 -0
- data/classpath/commons-lang-2.6.jar +0 -0
- data/classpath/commons-logging-1.1.3.jar +0 -0
- data/classpath/commons-math3-3.1.1.jar +0 -0
- data/classpath/commons-net-3.1.jar +0 -0
- data/classpath/curator-client-2.6.0.jar +0 -0
- data/classpath/curator-framework-2.6.0.jar +0 -0
- data/classpath/curator-recipes-2.6.0.jar +0 -0
- data/classpath/embulk-executor-mapreduce-0.1.0.jar +0 -0
- data/classpath/gson-2.2.4.jar +0 -0
- data/classpath/hadoop-annotations-2.6.0.jar +0 -0
- data/classpath/hadoop-auth-2.6.0.jar +0 -0
- data/classpath/hadoop-client-2.6.0.jar +0 -0
- data/classpath/hadoop-common-2.6.0.jar +0 -0
- data/classpath/hadoop-hdfs-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-app-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-common-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-core-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-jobclient-2.6.0.jar +0 -0
- data/classpath/hadoop-mapreduce-client-shuffle-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-api-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-client-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-common-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-server-common-2.6.0.jar +0 -0
- data/classpath/hadoop-yarn-server-nodemanager-2.6.0.jar +0 -0
- data/classpath/htrace-core-3.0.4.jar +0 -0
- data/classpath/httpclient-4.2.5.jar +0 -0
- data/classpath/httpcore-4.2.4.jar +0 -0
- data/classpath/jackson-core-asl-1.9.13.jar +0 -0
- data/classpath/jackson-jaxrs-1.9.13.jar +0 -0
- data/classpath/jackson-mapper-asl-1.9.13.jar +0 -0
- data/classpath/jackson-xc-1.9.13.jar +0 -0
- data/classpath/jaxb-api-2.2.2.jar +0 -0
- data/classpath/jaxb-impl-2.2.3-1.jar +0 -0
- data/classpath/jersey-client-1.9.jar +0 -0
- data/classpath/jersey-core-1.9.jar +0 -0
- data/classpath/jersey-guice-1.9.jar +0 -0
- data/classpath/jersey-json-1.9.jar +0 -0
- data/classpath/jersey-server-1.9.jar +0 -0
- data/classpath/jettison-1.1.jar +0 -0
- data/classpath/jetty-util-6.1.26.jar +0 -0
- data/classpath/jline-0.9.94.jar +0 -0
- data/classpath/jsr305-1.3.9.jar +0 -0
- data/classpath/leveldbjni-all-1.8.jar +0 -0
- data/classpath/netty-3.7.0.Final.jar +0 -0
- data/classpath/paranamer-2.3.jar +0 -0
- data/classpath/protobuf-java-2.5.0.jar +0 -0
- data/classpath/servlet-api-2.5.jar +0 -0
- data/classpath/snappy-java-1.0.4.1.jar +0 -0
- data/classpath/stax-api-1.0-2.jar +0 -0
- data/classpath/xmlenc-0.52.jar +0 -0
- data/classpath/xz-1.0.jar +0 -0
- data/classpath/zookeeper-3.4.6.jar +0 -0
- data/lib/embulk/executor/mapreduce.rb +3 -0
- data/src/main/java/org/embulk/executor/mapreduce/AttemptState.java +154 -0
- data/src/main/java/org/embulk/executor/mapreduce/BufferWritable.java +74 -0
- data/src/main/java/org/embulk/executor/mapreduce/BufferedPagePartitioner.java +158 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkInputFormat.java +37 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkInputSplit.java +61 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java +359 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkPartitioningMapReduce.java +303 -0
- data/src/main/java/org/embulk/executor/mapreduce/EmbulkRecordReader.java +63 -0
- data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java +391 -0
- data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutorTask.java +60 -0
- data/src/main/java/org/embulk/executor/mapreduce/PageWritable.java +66 -0
- data/src/main/java/org/embulk/executor/mapreduce/PartitionKey.java +11 -0
- data/src/main/java/org/embulk/executor/mapreduce/Partitioner.java +11 -0
- data/src/main/java/org/embulk/executor/mapreduce/Partitioning.java +12 -0
- data/src/main/java/org/embulk/executor/mapreduce/PluginArchive.java +189 -0
- data/src/main/java/org/embulk/executor/mapreduce/RemoteTaskFailedException.java +10 -0
- data/src/main/java/org/embulk/executor/mapreduce/SetContextClassLoader.java +19 -0
- data/src/main/java/org/embulk/executor/mapreduce/TimestampPartitioning.java +291 -0
- metadata +131 -0
@@ -0,0 +1,189 @@
|
|
1
|
+
package org.embulk.executor.mapreduce;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.io.File;
|
6
|
+
import java.io.InputStream;
|
7
|
+
import java.io.OutputStream;
|
8
|
+
import java.io.FileOutputStream;
|
9
|
+
import java.io.IOException;
|
10
|
+
import java.nio.file.Path;
|
11
|
+
import java.nio.file.Files;
|
12
|
+
import java.nio.file.DirectoryStream;
|
13
|
+
import java.nio.file.NoSuchFileException;
|
14
|
+
import java.nio.file.NotDirectoryException;
|
15
|
+
import java.util.zip.ZipEntry;
|
16
|
+
import java.util.zip.ZipOutputStream;
|
17
|
+
import java.util.zip.ZipInputStream;
|
18
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
19
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
20
|
+
import com.google.common.collect.ImmutableList;
|
21
|
+
import com.google.common.io.ByteStreams;
|
22
|
+
import org.jruby.embed.ScriptingContainer;
|
23
|
+
import org.jruby.embed.InvokeFailedException;
|
24
|
+
|
25
|
+
public class PluginArchive
|
26
|
+
{
|
27
|
+
public static class GemSpec
|
28
|
+
{
|
29
|
+
private final String name;
|
30
|
+
private final List<String> requirePaths;
|
31
|
+
|
32
|
+
@JsonCreator
|
33
|
+
public GemSpec(
|
34
|
+
@JsonProperty("name") String name,
|
35
|
+
@JsonProperty("requirePaths") List<String> requirePaths)
|
36
|
+
{
|
37
|
+
this.name = name;
|
38
|
+
this.requirePaths = requirePaths;
|
39
|
+
}
|
40
|
+
|
41
|
+
@JsonProperty("name")
|
42
|
+
public String getName()
|
43
|
+
{
|
44
|
+
return name;
|
45
|
+
}
|
46
|
+
|
47
|
+
@JsonProperty("requirePaths")
|
48
|
+
public List<String> getRequirePaths()
|
49
|
+
{
|
50
|
+
return requirePaths;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
private static class LocalGem
|
55
|
+
extends GemSpec
|
56
|
+
{
|
57
|
+
private final File localPath;
|
58
|
+
|
59
|
+
public LocalGem(File localPath, String name, List<String> requirePaths)
|
60
|
+
{
|
61
|
+
super(name, requirePaths);
|
62
|
+
this.localPath = localPath;
|
63
|
+
}
|
64
|
+
|
65
|
+
public File getLocalPath()
|
66
|
+
{
|
67
|
+
return localPath;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
public static class Builder
|
72
|
+
{
|
73
|
+
private final ImmutableList.Builder<LocalGem> localGems = ImmutableList.builder();
|
74
|
+
|
75
|
+
@SuppressWarnings("unchecked")
|
76
|
+
public Builder addLoadedRubyGems(ScriptingContainer jruby)
|
77
|
+
{
|
78
|
+
List<List<String>> tuples = (List<List<String>>) jruby.runScriptlet("Gem.loaded_specs.map {|k,v| [k, v.full_gem_path, v.require_paths].flatten }");
|
79
|
+
for (List<String> tuple : tuples) {
|
80
|
+
String name = tuple.remove(0);
|
81
|
+
String fullGemPath = tuple.remove(0);
|
82
|
+
List<String> requirePaths = ImmutableList.copyOf(tuple);
|
83
|
+
addSpec(new File(fullGemPath), name, requirePaths);
|
84
|
+
}
|
85
|
+
return this;
|
86
|
+
}
|
87
|
+
|
88
|
+
public Builder addSpec(File localPath, String name, List<String> requirePaths)
|
89
|
+
{
|
90
|
+
localGems.add(new LocalGem(localPath, name, requirePaths));
|
91
|
+
return this;
|
92
|
+
}
|
93
|
+
|
94
|
+
public PluginArchive build()
|
95
|
+
{
|
96
|
+
return new PluginArchive(localGems.build());
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
private final List<LocalGem> localGems;
|
101
|
+
|
102
|
+
private PluginArchive(List<LocalGem> localGems)
|
103
|
+
{
|
104
|
+
this.localGems = localGems;
|
105
|
+
}
|
106
|
+
|
107
|
+
@SuppressWarnings("unchecked")
|
108
|
+
public void restoreLoadPathsTo(ScriptingContainer jruby)
|
109
|
+
{
|
110
|
+
List<String> loadPaths = (List<String>) jruby.runScriptlet("$LOAD_PATH");
|
111
|
+
for (LocalGem localGem : localGems) {
|
112
|
+
Path localGemPath = localGem.getLocalPath().toPath();
|
113
|
+
for (String requirePath : localGem.getRequirePaths()) {
|
114
|
+
loadPaths.add(localGemPath.resolve(requirePath).toString());
|
115
|
+
}
|
116
|
+
}
|
117
|
+
jruby.setLoadPaths(loadPaths);
|
118
|
+
}
|
119
|
+
|
120
|
+
public List<GemSpec> dump(OutputStream out)
|
121
|
+
throws IOException
|
122
|
+
{
|
123
|
+
ImmutableList.Builder<GemSpec> builder = ImmutableList.builder();
|
124
|
+
try (ZipOutputStream zip = new ZipOutputStream(out)) {
|
125
|
+
for (LocalGem localGem : localGems) {
|
126
|
+
zipDirectory(zip, localGem.getLocalPath().toPath(), localGem.getName() + "/");
|
127
|
+
builder.add(new GemSpec(localGem.getName(), localGem.getRequirePaths()));
|
128
|
+
}
|
129
|
+
}
|
130
|
+
return builder.build();
|
131
|
+
}
|
132
|
+
|
133
|
+
private static void zipDirectory(ZipOutputStream zip, Path directory, String name)
|
134
|
+
throws IOException
|
135
|
+
{
|
136
|
+
try (DirectoryStream<Path> dirStream = Files.newDirectoryStream(directory)) {
|
137
|
+
for (Path path : dirStream) {
|
138
|
+
if (Files.isDirectory(path)) {
|
139
|
+
zipDirectory(zip, path, name + path.getFileName() + "/");
|
140
|
+
} else {
|
141
|
+
zip.putNextEntry(new ZipEntry(name + path.getFileName()));
|
142
|
+
try (InputStream in = Files.newInputStream(path)) {
|
143
|
+
ByteStreams.copy(in, zip);
|
144
|
+
}
|
145
|
+
zip.closeEntry();
|
146
|
+
}
|
147
|
+
}
|
148
|
+
} catch (NoSuchFileException | NotDirectoryException ex) {
|
149
|
+
// ignore
|
150
|
+
}
|
151
|
+
}
|
152
|
+
|
153
|
+
public static PluginArchive load(File localDirectory, List<GemSpec> gemSpecs,
|
154
|
+
InputStream in) throws IOException
|
155
|
+
{
|
156
|
+
try (ZipInputStream zip = new ZipInputStream(in)) {
|
157
|
+
unzipDirectory(zip, localDirectory.toPath());
|
158
|
+
}
|
159
|
+
|
160
|
+
ImmutableList.Builder<LocalGem> builder = ImmutableList.builder();
|
161
|
+
for (GemSpec gemSpec : gemSpecs) {
|
162
|
+
builder.add(new LocalGem(
|
163
|
+
new File(localDirectory, gemSpec.getName()),
|
164
|
+
gemSpec.getName(),
|
165
|
+
gemSpec.getRequirePaths()));
|
166
|
+
}
|
167
|
+
return new PluginArchive(builder.build());
|
168
|
+
}
|
169
|
+
|
170
|
+
private static void unzipDirectory(ZipInputStream zip, Path directory)
|
171
|
+
throws IOException
|
172
|
+
{
|
173
|
+
while (true) {
|
174
|
+
ZipEntry entry = zip.getNextEntry();
|
175
|
+
if (entry == null) {
|
176
|
+
break;
|
177
|
+
}
|
178
|
+
Path path = directory.resolve(entry.getName());
|
179
|
+
if (entry.getName().endsWith("/")) {
|
180
|
+
Files.createDirectories(path);
|
181
|
+
} else {
|
182
|
+
Files.createDirectories(path.getParent());
|
183
|
+
try (OutputStream out = Files.newOutputStream(path)) {
|
184
|
+
ByteStreams.copy(zip, out);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
188
|
+
}
|
189
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
package org.embulk.executor.mapreduce;
|
2
|
+
|
3
|
+
public class SetContextClassLoader
|
4
|
+
implements AutoCloseable
|
5
|
+
{
|
6
|
+
private final ClassLoader original;
|
7
|
+
|
8
|
+
public SetContextClassLoader(ClassLoader classLoader)
|
9
|
+
{
|
10
|
+
this.original = Thread.currentThread().getContextClassLoader();
|
11
|
+
Thread.currentThread().setContextClassLoader(classLoader);
|
12
|
+
}
|
13
|
+
|
14
|
+
@Override
|
15
|
+
public void close()
|
16
|
+
{
|
17
|
+
Thread.currentThread().setContextClassLoader(original);
|
18
|
+
}
|
19
|
+
}
|
@@ -0,0 +1,291 @@
|
|
1
|
+
package org.embulk.executor.mapreduce;
|
2
|
+
|
3
|
+
import org.joda.time.DateTimeZone;
|
4
|
+
import com.google.common.base.Optional;
|
5
|
+
import org.embulk.config.Config;
|
6
|
+
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.config.ConfigSource;
|
8
|
+
import org.embulk.config.ConfigException;
|
9
|
+
import org.embulk.config.Task;
|
10
|
+
import org.embulk.config.TaskSource;
|
11
|
+
import org.embulk.spi.time.Timestamp;
|
12
|
+
import org.embulk.spi.type.TimestampType;
|
13
|
+
import org.embulk.spi.type.LongType;
|
14
|
+
import org.embulk.spi.Column;
|
15
|
+
import org.embulk.spi.PageReader;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.Buffer;
|
18
|
+
|
19
|
+
public class TimestampPartitioning
|
20
|
+
implements Partitioning
|
21
|
+
{
|
22
|
+
public interface PartitioningTask
|
23
|
+
extends Task
|
24
|
+
{
|
25
|
+
@Config("column")
|
26
|
+
public String getColumn();
|
27
|
+
|
28
|
+
@Config("unit")
|
29
|
+
public String getUnit();
|
30
|
+
|
31
|
+
@Config("timezone")
|
32
|
+
@ConfigDefault("\"UTC\"")
|
33
|
+
public DateTimeZone getTimeZone();
|
34
|
+
|
35
|
+
@Config("unix_timestamp_unit")
|
36
|
+
@ConfigDefault("\"sec\"")
|
37
|
+
public String getUnixTimestamp();
|
38
|
+
|
39
|
+
public Column getTargetColumn();
|
40
|
+
public void setTargetColumn(Column column);
|
41
|
+
}
|
42
|
+
|
43
|
+
private static enum Unit
|
44
|
+
{
|
45
|
+
HOUR(60*60),
|
46
|
+
DAY(24*60*60);
|
47
|
+
//WEEK
|
48
|
+
//MONTH,
|
49
|
+
//YEAR;
|
50
|
+
|
51
|
+
private final int unit;
|
52
|
+
|
53
|
+
private Unit(int unit)
|
54
|
+
{
|
55
|
+
this.unit = unit;
|
56
|
+
}
|
57
|
+
|
58
|
+
public long utcPartition(long seconds)
|
59
|
+
{
|
60
|
+
return seconds / unit;
|
61
|
+
}
|
62
|
+
|
63
|
+
public static Unit of(String s)
|
64
|
+
{
|
65
|
+
switch (s) {
|
66
|
+
case "hour": return HOUR;
|
67
|
+
case "day": return DAY;
|
68
|
+
//case "week": return WEEK;
|
69
|
+
//case "month": return MONTH;
|
70
|
+
//case "year": return YEAR;
|
71
|
+
default:
|
72
|
+
throw new ConfigException(
|
73
|
+
String.format("Unknown unit '%s'. Supported units are hour and day"));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
private static enum UnixTimestampUnit
|
79
|
+
{
|
80
|
+
SEC(1),
|
81
|
+
MILLI(1000),
|
82
|
+
MICRO(1000000),
|
83
|
+
NANO(1000000000);
|
84
|
+
|
85
|
+
private final int unit;
|
86
|
+
|
87
|
+
private UnixTimestampUnit(int unit)
|
88
|
+
{
|
89
|
+
this.unit = unit;
|
90
|
+
}
|
91
|
+
|
92
|
+
public long toSeconds(long v)
|
93
|
+
{
|
94
|
+
return v / unit;
|
95
|
+
}
|
96
|
+
|
97
|
+
public static UnixTimestampUnit of(String s)
|
98
|
+
{
|
99
|
+
switch (s) {
|
100
|
+
case "sec": return SEC;
|
101
|
+
case "milli": return MILLI;
|
102
|
+
case "micro": return MICRO;
|
103
|
+
case "nano": return NANO;
|
104
|
+
default:
|
105
|
+
throw new ConfigException(
|
106
|
+
String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano"));
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
@Override
|
112
|
+
public TaskSource configure(ConfigSource config, Schema schema, int outputTaskCount)
|
113
|
+
{
|
114
|
+
PartitioningTask task = config.loadConfig(PartitioningTask.class);
|
115
|
+
Column column = findColumnByName(schema, task.getColumn());
|
116
|
+
|
117
|
+
if (!task.getTimeZone().equals(DateTimeZone.UTC)) {
|
118
|
+
// TODO
|
119
|
+
throw new ConfigException("Timestamp partitioner supports only UTC time zone for now");
|
120
|
+
}
|
121
|
+
|
122
|
+
// validate unit
|
123
|
+
Unit.of(task.getUnit());
|
124
|
+
|
125
|
+
// validate type
|
126
|
+
if (column.getType() instanceof TimestampType) {
|
127
|
+
// ok
|
128
|
+
} else if (column.getType() instanceof LongType) {
|
129
|
+
// validate unix_timestamp_unit
|
130
|
+
UnixTimestampUnit.of(task.getUnixTimestamp());
|
131
|
+
} else {
|
132
|
+
throw new ConfigException(
|
133
|
+
String.format("Partitioning column '%s' must be timestamp or long but got '%s'", column.getName(), column.getType()));
|
134
|
+
}
|
135
|
+
|
136
|
+
task.setTargetColumn(column);
|
137
|
+
|
138
|
+
return task.dump();
|
139
|
+
}
|
140
|
+
|
141
|
+
private static Column findColumnByName(Schema schema, String columnName)
|
142
|
+
{
|
143
|
+
for (Column column : schema.getColumns()) {
|
144
|
+
if (column.getName().equals(columnName)) {
|
145
|
+
return column;
|
146
|
+
}
|
147
|
+
}
|
148
|
+
throw new ConfigException(
|
149
|
+
String.format("Column '%s' is not found in schema", columnName));
|
150
|
+
}
|
151
|
+
|
152
|
+
@Override
|
153
|
+
public Partitioner newPartitioner(TaskSource taskSource)
|
154
|
+
{
|
155
|
+
PartitioningTask task = taskSource.loadTask(PartitioningTask.class);
|
156
|
+
|
157
|
+
Column column = task.getTargetColumn();
|
158
|
+
if (column.getType() instanceof TimestampType) {
|
159
|
+
return new TimestampPartitioner(column, Unit.of(task.getUnit()));
|
160
|
+
} else if (column.getType() instanceof LongType) {
|
161
|
+
return new LongUnixTimestampPartitioner(column, Unit.of(task.getUnit()), UnixTimestampUnit.of(task.getUnixTimestamp()));
|
162
|
+
} else {
|
163
|
+
throw new AssertionError();
|
164
|
+
}
|
165
|
+
}
|
166
|
+
|
167
|
+
private static class LongPartitionKey
|
168
|
+
implements PartitionKey
|
169
|
+
{
|
170
|
+
public static Buffer newKeyBuffer()
|
171
|
+
{
|
172
|
+
Buffer buffer = Buffer.allocate(8);
|
173
|
+
buffer.limit(8);
|
174
|
+
return buffer;
|
175
|
+
}
|
176
|
+
|
177
|
+
private long value;
|
178
|
+
|
179
|
+
public LongPartitionKey()
|
180
|
+
{ }
|
181
|
+
|
182
|
+
private LongPartitionKey(long value)
|
183
|
+
{
|
184
|
+
this.value = value;
|
185
|
+
}
|
186
|
+
|
187
|
+
public void set(long value)
|
188
|
+
{
|
189
|
+
this.value = value;
|
190
|
+
}
|
191
|
+
|
192
|
+
@Override
|
193
|
+
public void dump(Buffer buffer)
|
194
|
+
{
|
195
|
+
// TODO optimize
|
196
|
+
buffer.array()[0] = (byte) (((int) (value >>> 0)) & 0xff);
|
197
|
+
buffer.array()[1] = (byte) (((int) (value >>> 4)) & 0xff);
|
198
|
+
buffer.array()[2] = (byte) (((int) (value >>> 8)) & 0xff);
|
199
|
+
buffer.array()[3] = (byte) (((int) (value >>> 12)) & 0xff);
|
200
|
+
buffer.array()[4] = (byte) (((int) (value >>> 16)) & 0xff);
|
201
|
+
buffer.array()[5] = (byte) (((int) (value >>> 20)) & 0xff);
|
202
|
+
buffer.array()[6] = (byte) (((int) (value >>> 24)) & 0xff);
|
203
|
+
buffer.array()[7] = (byte) (((int) (value >>> 28)) & 0xff);
|
204
|
+
}
|
205
|
+
|
206
|
+
@Override
|
207
|
+
public LongPartitionKey clone()
|
208
|
+
{
|
209
|
+
return new LongPartitionKey(value);
|
210
|
+
}
|
211
|
+
|
212
|
+
@Override
|
213
|
+
public boolean equals(Object other)
|
214
|
+
{
|
215
|
+
if (!(other instanceof LongPartitionKey)) {
|
216
|
+
return false;
|
217
|
+
}
|
218
|
+
LongPartitionKey o = (LongPartitionKey) other;
|
219
|
+
return value == o.value;
|
220
|
+
}
|
221
|
+
|
222
|
+
@Override
|
223
|
+
public int hashCode()
|
224
|
+
{
|
225
|
+
return (int) (value ^ (value >>> 32));
|
226
|
+
}
|
227
|
+
}
|
228
|
+
|
229
|
+
private static abstract class AbstractTimestampPartitioner
|
230
|
+
implements Partitioner
|
231
|
+
{
|
232
|
+
protected final Column column;
|
233
|
+
protected final Unit unit;
|
234
|
+
private final LongPartitionKey key;
|
235
|
+
|
236
|
+
public AbstractTimestampPartitioner(Column column, Unit unit)
|
237
|
+
{
|
238
|
+
this.column = column;
|
239
|
+
this.unit = unit;
|
240
|
+
this.key = new LongPartitionKey();
|
241
|
+
}
|
242
|
+
|
243
|
+
@Override
|
244
|
+
public Buffer newKeyBuffer()
|
245
|
+
{
|
246
|
+
return LongPartitionKey.newKeyBuffer();
|
247
|
+
}
|
248
|
+
|
249
|
+
protected LongPartitionKey updateKey(long v)
|
250
|
+
{
|
251
|
+
key.set(v);
|
252
|
+
return key;
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
private static class TimestampPartitioner
|
257
|
+
extends AbstractTimestampPartitioner
|
258
|
+
{
|
259
|
+
public TimestampPartitioner(Column column, Unit unit)
|
260
|
+
{
|
261
|
+
super(column, unit);
|
262
|
+
}
|
263
|
+
|
264
|
+
@Override
|
265
|
+
public PartitionKey updateKey(PageReader record)
|
266
|
+
{
|
267
|
+
Timestamp v = record.getTimestamp(column);
|
268
|
+
return super.updateKey(unit.utcPartition(v.getEpochSecond()));
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
private static class LongUnixTimestampPartitioner
|
273
|
+
extends AbstractTimestampPartitioner
|
274
|
+
{
|
275
|
+
private final UnixTimestampUnit unixTimestampUnit;
|
276
|
+
|
277
|
+
public LongUnixTimestampPartitioner(Column column, Unit unit,
|
278
|
+
UnixTimestampUnit unixTimestampUnit)
|
279
|
+
{
|
280
|
+
super(column, unit);
|
281
|
+
this.unixTimestampUnit = unixTimestampUnit;
|
282
|
+
}
|
283
|
+
|
284
|
+
@Override
|
285
|
+
public PartitionKey updateKey(PageReader record)
|
286
|
+
{
|
287
|
+
long v = record.getLong(column);
|
288
|
+
return super.updateKey(unit.utcPartition(unixTimestampUnit.toSeconds(v)));
|
289
|
+
}
|
290
|
+
}
|
291
|
+
}
|
metadata
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-executor-mapreduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sadayuki Furuhashi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-04-08 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Executes tasks on Hadoop.
|
14
|
+
email:
|
15
|
+
- frsyuki@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- build.gradle
|
21
|
+
- lib/embulk/executor/mapreduce.rb
|
22
|
+
- src/main/java/org/embulk/executor/mapreduce/AttemptState.java
|
23
|
+
- src/main/java/org/embulk/executor/mapreduce/BufferWritable.java
|
24
|
+
- src/main/java/org/embulk/executor/mapreduce/BufferedPagePartitioner.java
|
25
|
+
- src/main/java/org/embulk/executor/mapreduce/EmbulkInputFormat.java
|
26
|
+
- src/main/java/org/embulk/executor/mapreduce/EmbulkInputSplit.java
|
27
|
+
- src/main/java/org/embulk/executor/mapreduce/EmbulkMapReduce.java
|
28
|
+
- src/main/java/org/embulk/executor/mapreduce/EmbulkPartitioningMapReduce.java
|
29
|
+
- src/main/java/org/embulk/executor/mapreduce/EmbulkRecordReader.java
|
30
|
+
- src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java
|
31
|
+
- src/main/java/org/embulk/executor/mapreduce/MapReduceExecutorTask.java
|
32
|
+
- src/main/java/org/embulk/executor/mapreduce/PageWritable.java
|
33
|
+
- src/main/java/org/embulk/executor/mapreduce/PartitionKey.java
|
34
|
+
- src/main/java/org/embulk/executor/mapreduce/Partitioner.java
|
35
|
+
- src/main/java/org/embulk/executor/mapreduce/Partitioning.java
|
36
|
+
- src/main/java/org/embulk/executor/mapreduce/PluginArchive.java
|
37
|
+
- src/main/java/org/embulk/executor/mapreduce/RemoteTaskFailedException.java
|
38
|
+
- src/main/java/org/embulk/executor/mapreduce/SetContextClassLoader.java
|
39
|
+
- src/main/java/org/embulk/executor/mapreduce/TimestampPartitioning.java
|
40
|
+
- classpath/activation-1.1.jar
|
41
|
+
- classpath/apacheds-i18n-2.0.0-M15.jar
|
42
|
+
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
43
|
+
- classpath/api-asn1-api-1.0.0-M20.jar
|
44
|
+
- classpath/api-util-1.0.0-M20.jar
|
45
|
+
- classpath/avro-1.7.4.jar
|
46
|
+
- classpath/commons-beanutils-1.7.0.jar
|
47
|
+
- classpath/commons-cli-1.2.jar
|
48
|
+
- classpath/commons-codec-1.6.jar
|
49
|
+
- classpath/commons-collections-3.2.1.jar
|
50
|
+
- classpath/commons-compress-1.4.1.jar
|
51
|
+
- classpath/commons-configuration-1.6.jar
|
52
|
+
- classpath/commons-digester-1.8.jar
|
53
|
+
- classpath/commons-httpclient-3.1.jar
|
54
|
+
- classpath/commons-io-2.4.jar
|
55
|
+
- classpath/commons-lang-2.6.jar
|
56
|
+
- classpath/commons-logging-1.1.3.jar
|
57
|
+
- classpath/commons-math3-3.1.1.jar
|
58
|
+
- classpath/commons-net-3.1.jar
|
59
|
+
- classpath/curator-client-2.6.0.jar
|
60
|
+
- classpath/curator-framework-2.6.0.jar
|
61
|
+
- classpath/curator-recipes-2.6.0.jar
|
62
|
+
- classpath/embulk-executor-mapreduce-0.1.0.jar
|
63
|
+
- classpath/gson-2.2.4.jar
|
64
|
+
- classpath/hadoop-annotations-2.6.0.jar
|
65
|
+
- classpath/hadoop-auth-2.6.0.jar
|
66
|
+
- classpath/hadoop-client-2.6.0.jar
|
67
|
+
- classpath/hadoop-common-2.6.0.jar
|
68
|
+
- classpath/hadoop-hdfs-2.6.0.jar
|
69
|
+
- classpath/hadoop-mapreduce-client-app-2.6.0.jar
|
70
|
+
- classpath/hadoop-mapreduce-client-common-2.6.0.jar
|
71
|
+
- classpath/hadoop-mapreduce-client-core-2.6.0.jar
|
72
|
+
- classpath/hadoop-mapreduce-client-jobclient-2.6.0.jar
|
73
|
+
- classpath/hadoop-mapreduce-client-shuffle-2.6.0.jar
|
74
|
+
- classpath/hadoop-yarn-api-2.6.0.jar
|
75
|
+
- classpath/hadoop-yarn-client-2.6.0.jar
|
76
|
+
- classpath/hadoop-yarn-common-2.6.0.jar
|
77
|
+
- classpath/hadoop-yarn-server-common-2.6.0.jar
|
78
|
+
- classpath/hadoop-yarn-server-nodemanager-2.6.0.jar
|
79
|
+
- classpath/htrace-core-3.0.4.jar
|
80
|
+
- classpath/httpclient-4.2.5.jar
|
81
|
+
- classpath/httpcore-4.2.4.jar
|
82
|
+
- classpath/jackson-core-asl-1.9.13.jar
|
83
|
+
- classpath/jackson-jaxrs-1.9.13.jar
|
84
|
+
- classpath/jackson-mapper-asl-1.9.13.jar
|
85
|
+
- classpath/jackson-xc-1.9.13.jar
|
86
|
+
- classpath/jaxb-api-2.2.2.jar
|
87
|
+
- classpath/jaxb-impl-2.2.3-1.jar
|
88
|
+
- classpath/jersey-client-1.9.jar
|
89
|
+
- classpath/jersey-core-1.9.jar
|
90
|
+
- classpath/jersey-guice-1.9.jar
|
91
|
+
- classpath/jersey-json-1.9.jar
|
92
|
+
- classpath/jersey-server-1.9.jar
|
93
|
+
- classpath/jettison-1.1.jar
|
94
|
+
- classpath/jetty-util-6.1.26.jar
|
95
|
+
- classpath/jline-0.9.94.jar
|
96
|
+
- classpath/jsr305-1.3.9.jar
|
97
|
+
- classpath/leveldbjni-all-1.8.jar
|
98
|
+
- classpath/netty-3.7.0.Final.jar
|
99
|
+
- classpath/paranamer-2.3.jar
|
100
|
+
- classpath/protobuf-java-2.5.0.jar
|
101
|
+
- classpath/servlet-api-2.5.jar
|
102
|
+
- classpath/snappy-java-1.0.4.1.jar
|
103
|
+
- classpath/stax-api-1.0-2.jar
|
104
|
+
- classpath/xmlenc-0.52.jar
|
105
|
+
- classpath/xz-1.0.jar
|
106
|
+
- classpath/zookeeper-3.4.6.jar
|
107
|
+
homepage: https://github.com/embulk/embulk-executor-mapreduce
|
108
|
+
licenses:
|
109
|
+
- Apache 2.0
|
110
|
+
metadata: {}
|
111
|
+
post_install_message:
|
112
|
+
rdoc_options: []
|
113
|
+
require_paths:
|
114
|
+
- lib
|
115
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
requirements: []
|
126
|
+
rubyforge_project:
|
127
|
+
rubygems_version: 2.1.9
|
128
|
+
signing_key:
|
129
|
+
specification_version: 4
|
130
|
+
summary: MapReduce executor plugin for Embulk
|
131
|
+
test_files: []
|