embulk-executor-mapreduce 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/classpath/{embulk-executor-mapreduce-0.2.4.jar → embulk-executor-mapreduce-0.2.5.jar} +0 -0
 - data/src/main/java/org/embulk/executor/mapreduce/BufferedPagePartitioner.java +9 -0
 - data/src/main/java/org/embulk/executor/mapreduce/EmbulkPartitioningMapReduce.java +4 -0
 - data/src/main/java/org/embulk/executor/mapreduce/PageWritable.java +42 -1
 - data/src/test/java/org/embulk/executor/mapreduce/TestMapReduceExecutor.java +30 -14
 - data/src/test/resources/config/embulk_mapred_config.yml +6 -3
 - data/src/test/resources/config/embulk_mapred_partitioning_config.yml +6 -3
 - data/src/test/resources/fixtures/csv/sample1.csv +3 -3
 - data/src/test/resources/fixtures/csv/sample2.csv +3 -4
 - metadata +3 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 4461eebeecc53f99b9b9683d7553a585a87e1a1f
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: a019cd9224918ae2721482a9cf92c9c8148a05a6
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 91e107ce10160fc097930b139f07b59dcb80b1201dde0723cc302fd1e142a283ad8a817d7518ac7684f0b066ed55537c20ae0af5446230eb0f63026d9bf7e21d
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: bc045316fedf83de62e34bbf9304152680d90e46ec6fa885fc054aca50d9a967b2d8f8eb3fa86e7c685badba7e3f15834f985fee923c5bf54c9f5197f43a68fb
         
     | 
| 
         Binary file 
     | 
| 
         @@ -71,6 +71,15 @@ public class BufferedPagePartitioner 
     | 
|
| 
       71 
71 
     | 
    
         
             
                        }
         
     | 
| 
       72 
72 
     | 
    
         
             
                    }
         
     | 
| 
       73 
73 
     | 
    
         | 
| 
      
 74 
     | 
    
         
            +
                    public void jsonColumn(Column column)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    {
         
     | 
| 
      
 76 
     | 
    
         
            +
                        if (source.isNull(column)) {
         
     | 
| 
      
 77 
     | 
    
         
            +
                            destination.setNull(column);
         
     | 
| 
      
 78 
     | 
    
         
            +
                        } else {
         
     | 
| 
      
 79 
     | 
    
         
            +
                            destination.setJson(column, source.getJson(column));
         
     | 
| 
      
 80 
     | 
    
         
            +
                        }
         
     | 
| 
      
 81 
     | 
    
         
            +
                    }
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
       74 
83 
     | 
    
         
             
                    public void timestampColumn(Column column)
         
     | 
| 
       75 
84 
     | 
    
         
             
                    {
         
     | 
| 
       76 
85 
     | 
    
         
             
                        if (source.isNull(column)) {
         
     | 
| 
         @@ -254,6 +254,7 @@ public class EmbulkPartitioningMapReduce 
     | 
|
| 
       254 
254 
     | 
    
         
             
                        this.output = output;
         
     | 
| 
       255 
255 
     | 
    
         
             
                    }
         
     | 
| 
       256 
256 
     | 
    
         | 
| 
      
 257 
     | 
    
         
            +
                    @Override
         
     | 
| 
       257 
258 
     | 
    
         
             
                    public ConfigDiff transaction(ConfigSource config,
         
     | 
| 
       258 
259 
     | 
    
         
             
                            Schema schema, int taskCount,
         
     | 
| 
       259 
260 
     | 
    
         
             
                            OutputPlugin.Control control)
         
     | 
| 
         @@ -262,6 +263,7 @@ public class EmbulkPartitioningMapReduce 
     | 
|
| 
       262 
263 
     | 
    
         
             
                        throw new RuntimeException("");
         
     | 
| 
       263 
264 
     | 
    
         
             
                    }
         
     | 
| 
       264 
265 
     | 
    
         | 
| 
      
 266 
     | 
    
         
            +
                    @Override
         
     | 
| 
       265 
267 
     | 
    
         
             
                    public ConfigDiff resume(TaskSource taskSource,
         
     | 
| 
       266 
268 
     | 
    
         
             
                            Schema schema, int taskCount,
         
     | 
| 
       267 
269 
     | 
    
         
             
                            OutputPlugin.Control control)
         
     | 
| 
         @@ -270,6 +272,7 @@ public class EmbulkPartitioningMapReduce 
     | 
|
| 
       270 
272 
     | 
    
         
             
                        throw new RuntimeException("");
         
     | 
| 
       271 
273 
     | 
    
         
             
                    }
         
     | 
| 
       272 
274 
     | 
    
         | 
| 
      
 275 
     | 
    
         
            +
                    @Override
         
     | 
| 
       273 
276 
     | 
    
         
             
                    public void cleanup(TaskSource taskSource,
         
     | 
| 
       274 
277 
     | 
    
         
             
                            Schema schema, int taskCount,
         
     | 
| 
       275 
278 
     | 
    
         
             
                            List<TaskReport> successTaskReports)
         
     | 
| 
         @@ -278,6 +281,7 @@ public class EmbulkPartitioningMapReduce 
     | 
|
| 
       278 
281 
     | 
    
         
             
                        throw new RuntimeException("");
         
     | 
| 
       279 
282 
     | 
    
         
             
                    }
         
     | 
| 
       280 
283 
     | 
    
         | 
| 
      
 284 
     | 
    
         
            +
                    @Override
         
     | 
| 
       281 
285 
     | 
    
         
             
                    public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int taskIndex)
         
     | 
| 
       282 
286 
     | 
    
         
             
                    {
         
     | 
| 
       283 
287 
     | 
    
         
             
                        return new TransactionalPageOutput() {
         
     | 
| 
         @@ -7,6 +7,13 @@ import java.util.List; 
     | 
|
| 
       7 
7 
     | 
    
         
             
            import java.util.ArrayList;
         
     | 
| 
       8 
8 
     | 
    
         
             
            import org.apache.hadoop.io.Writable;
         
     | 
| 
       9 
9 
     | 
    
         
             
            import org.apache.hadoop.io.WritableUtils;
         
     | 
| 
      
 10 
     | 
    
         
            +
            import org.apache.hadoop.io.DataOutputOutputStream;
         
     | 
| 
      
 11 
     | 
    
         
            +
            import org.msgpack.value.Value;
         
     | 
| 
      
 12 
     | 
    
         
            +
            import org.msgpack.value.ImmutableValue;
         
     | 
| 
      
 13 
     | 
    
         
            +
            import org.msgpack.core.MessagePack;
         
     | 
| 
      
 14 
     | 
    
         
            +
            import org.msgpack.core.MessageBufferPacker;
         
     | 
| 
      
 15 
     | 
    
         
            +
            import org.msgpack.core.MessageUnpacker;
         
     | 
| 
      
 16 
     | 
    
         
            +
            import org.msgpack.core.buffer.MessageBuffer;
         
     | 
| 
       10 
17 
     | 
    
         
             
            import org.embulk.spi.Buffer;
         
     | 
| 
       11 
18 
     | 
    
         
             
            import org.embulk.spi.Page;
         
     | 
| 
       12 
19 
     | 
    
         
             
            import static java.nio.charset.StandardCharsets.UTF_8;
         
     | 
| 
         @@ -40,6 +47,22 @@ public class PageWritable 
     | 
|
| 
       40 
47 
     | 
    
         
             
                    for (String s : stringReferences) {
         
     | 
| 
       41 
48 
     | 
    
         
             
                        out.writeUTF(s);
         
     | 
| 
       42 
49 
     | 
    
         
             
                    }
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                    List<ImmutableValue> valueReferences = page.getValueReferences();
         
     | 
| 
      
 52 
     | 
    
         
            +
                    WritableUtils.writeVInt(out, valueReferences.size());
         
     | 
| 
      
 53 
     | 
    
         
            +
                    for (Value value : valueReferences) {
         
     | 
| 
      
 54 
     | 
    
         
            +
                        MessageBufferPacker packer = MessagePack.newDefaultBufferPacker();  // TODO reuse allocated buffer
         
     | 
| 
      
 55 
     | 
    
         
            +
                        value.writeTo(packer);
         
     | 
| 
      
 56 
     | 
    
         
            +
                        List<MessageBuffer> buffers = packer.toBufferList();
         
     | 
| 
      
 57 
     | 
    
         
            +
                        int size = 0;
         
     | 
| 
      
 58 
     | 
    
         
            +
                        for (MessageBuffer b : buffers) {
         
     | 
| 
      
 59 
     | 
    
         
            +
                            size += b.size();
         
     | 
| 
      
 60 
     | 
    
         
            +
                        }
         
     | 
| 
      
 61 
     | 
    
         
            +
                        WritableUtils.writeVInt(out, size);
         
     | 
| 
      
 62 
     | 
    
         
            +
                        for (MessageBuffer b : buffers) {
         
     | 
| 
      
 63 
     | 
    
         
            +
                            out.write(b.array(), b.arrayOffset(), b.size());
         
     | 
| 
      
 64 
     | 
    
         
            +
                        }
         
     | 
| 
      
 65 
     | 
    
         
            +
                    }
         
     | 
| 
       43 
66 
     | 
    
         
             
                }
         
     | 
| 
       44 
67 
     | 
    
         | 
| 
       45 
68 
     | 
    
         
             
                @Override
         
     | 
| 
         @@ -51,13 +74,31 @@ public class PageWritable 
     | 
|
| 
       51 
74 
     | 
    
         
             
                    Buffer buffer = Buffer.wrap(bytes);
         
     | 
| 
       52 
75 
     | 
    
         | 
| 
       53 
76 
     | 
    
         
             
                    int stringCount = WritableUtils.readVInt(in);
         
     | 
| 
       54 
     | 
    
         
            -
                    List<String> strings = new ArrayList 
     | 
| 
      
 77 
     | 
    
         
            +
                    List<String> strings = new ArrayList<>(stringCount);
         
     | 
| 
       55 
78 
     | 
    
         
             
                    for (int i=0; i < stringCount; i++) {
         
     | 
| 
       56 
79 
     | 
    
         
             
                        strings.add(in.readUTF());
         
     | 
| 
       57 
80 
     | 
    
         
             
                    }
         
     | 
| 
       58 
81 
     | 
    
         | 
| 
      
 82 
     | 
    
         
            +
                    int valueCount = WritableUtils.readVInt(in);
         
     | 
| 
      
 83 
     | 
    
         
            +
                    List<ImmutableValue> values = new ArrayList<>(valueCount);
         
     | 
| 
      
 84 
     | 
    
         
            +
                    byte[] b = new byte[32 * 1024];
         
     | 
| 
      
 85 
     | 
    
         
            +
                    for (int i=0; i < valueCount; i++) {
         
     | 
| 
      
 86 
     | 
    
         
            +
                        int size = WritableUtils.readVInt(in);
         
     | 
| 
      
 87 
     | 
    
         
            +
                        if (b.length < size) {
         
     | 
| 
      
 88 
     | 
    
         
            +
                            int ns = b.length;
         
     | 
| 
      
 89 
     | 
    
         
            +
                            while (ns < size) {
         
     | 
| 
      
 90 
     | 
    
         
            +
                                ns *= 2;
         
     | 
| 
      
 91 
     | 
    
         
            +
                            }
         
     | 
| 
      
 92 
     | 
    
         
            +
                            b = new byte[ns];
         
     | 
| 
      
 93 
     | 
    
         
            +
                        }
         
     | 
| 
      
 94 
     | 
    
         
            +
                        in.readFully(b, 0, size);
         
     | 
| 
      
 95 
     | 
    
         
            +
                        MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(b, 0, size);
         
     | 
| 
      
 96 
     | 
    
         
            +
                        values.add(unpacker.unpackValue());
         
     | 
| 
      
 97 
     | 
    
         
            +
                    }
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
       59 
99 
     | 
    
         
             
                    Page newPage = Page.wrap(buffer);
         
     | 
| 
       60 
100 
     | 
    
         
             
                    newPage.setStringReferences(strings);
         
     | 
| 
      
 101 
     | 
    
         
            +
                    newPage.setValueReferences(values);
         
     | 
| 
       61 
102 
     | 
    
         
             
                    if (page != null) {
         
     | 
| 
       62 
103 
     | 
    
         
             
                        page.release();
         
     | 
| 
       63 
104 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -25,16 +25,19 @@ import org.slf4j.impl.Log4jLoggerFactory; 
     | 
|
| 
       25 
25 
     | 
    
         | 
| 
       26 
26 
     | 
    
         
             
            import java.io.BufferedInputStream;
         
     | 
| 
       27 
27 
     | 
    
         
             
            import java.io.BufferedReader;
         
     | 
| 
      
 28 
     | 
    
         
            +
            import java.io.File;
         
     | 
| 
       28 
29 
     | 
    
         
             
            import java.io.FileNotFoundException;
         
     | 
| 
       29 
30 
     | 
    
         
             
            import java.io.IOException;
         
     | 
| 
       30 
31 
     | 
    
         
             
            import java.io.InputStream;
         
     | 
| 
       31 
32 
     | 
    
         
             
            import java.io.InputStreamReader;
         
     | 
| 
      
 33 
     | 
    
         
            +
            import java.nio.file.Files;
         
     | 
| 
       32 
34 
     | 
    
         
             
            import java.util.ArrayList;
         
     | 
| 
       33 
35 
     | 
    
         
             
            import java.util.Collections;
         
     | 
| 
       34 
36 
     | 
    
         
             
            import java.util.Comparator;
         
     | 
| 
       35 
37 
     | 
    
         
             
            import java.util.List;
         
     | 
| 
       36 
38 
     | 
    
         
             
            import java.util.Random;
         
     | 
| 
       37 
39 
     | 
    
         | 
| 
      
 40 
     | 
    
         
            +
            import static java.nio.charset.StandardCharsets.UTF_8;
         
     | 
| 
       38 
41 
     | 
    
         
             
            import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
         
     | 
| 
       39 
42 
     | 
    
         
             
            import static org.junit.Assert.assertEquals;
         
     | 
| 
       40 
43 
     | 
    
         
             
            import static org.junit.Assert.assertTrue;
         
     | 
| 
         @@ -62,36 +65,42 @@ public class TestMapReduceExecutor 
     | 
|
| 
       62 
65 
     | 
    
         
             
                    bootstrap.setSystemConfig(systemConfig);
         
     | 
| 
       63 
66 
     | 
    
         
             
                    bootstrap.overrideModules(getModuleOverrides(systemConfig));
         
     | 
| 
       64 
67 
     | 
    
         
             
                    embulk = bootstrap.initialize();
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                    new File("tmp").mkdirs();
         
     | 
| 
       65 
70 
     | 
    
         
             
                }
         
     | 
| 
       66 
71 
     | 
    
         | 
| 
       67 
72 
     | 
    
         
             
                @Test
         
     | 
| 
       68 
73 
     | 
    
         
             
                public void testEmbulkMapper()
         
     | 
| 
       69 
74 
     | 
    
         
             
                        throws Exception
         
     | 
| 
       70 
75 
     | 
    
         
             
                {
         
     | 
| 
      
 76 
     | 
    
         
            +
                    new File("tmp/embulk_mapred_output.000.00.csv").delete();
         
     | 
| 
      
 77 
     | 
    
         
            +
                    new File("tmp/embulk_mapred_output.001.00.csv").delete();
         
     | 
| 
       71 
78 
     | 
    
         
             
                    ConfigSource config = loadConfigSource(embulk.newConfigLoader(), "config/embulk_mapred_config.yml");
         
     | 
| 
       72 
79 
     | 
    
         
             
                    embulk.run(config);
         
     | 
| 
       73 
80 
     | 
    
         
             
                    assertFileContent(
         
     | 
| 
       74 
81 
     | 
    
         
             
                            Lists.newArrayList(
         
     | 
| 
       75 
     | 
    
         
            -
                                    "fixtures/csv/sample1.csv",
         
     | 
| 
       76 
     | 
    
         
            -
                                    "fixtures/csv/sample1.csv"),
         
     | 
| 
      
 82 
     | 
    
         
            +
                                    "src/test/resources/fixtures/csv/sample1.csv",
         
     | 
| 
      
 83 
     | 
    
         
            +
                                    "src/test/resources/fixtures/csv/sample1.csv"),
         
     | 
| 
       77 
84 
     | 
    
         
             
                            Lists.newArrayList(
         
     | 
| 
       78 
     | 
    
         
            -
                                    " 
     | 
| 
       79 
     | 
    
         
            -
                                    " 
     | 
| 
      
 85 
     | 
    
         
            +
                                    "tmp/embulk_mapred_output.000.00.csv",
         
     | 
| 
      
 86 
     | 
    
         
            +
                                    "tmp/embulk_mapred_output.001.00.csv"));
         
     | 
| 
       80 
87 
     | 
    
         
             
                }
         
     | 
| 
       81 
88 
     | 
    
         | 
| 
       82 
89 
     | 
    
         
             
                @Test
         
     | 
| 
       83 
90 
     | 
    
         
             
                public void testEmbulkPartitioningMapperReducer()
         
     | 
| 
       84 
91 
     | 
    
         
             
                        throws Exception
         
     | 
| 
       85 
92 
     | 
    
         
             
                {
         
     | 
| 
      
 93 
     | 
    
         
            +
                    new File("tmp/embulk_mapred_partitioning_output.000.00.csv").delete();
         
     | 
| 
      
 94 
     | 
    
         
            +
                    new File("tmp/embulk_mapred_partitioning_output.001.00.csv").delete();
         
     | 
| 
       86 
95 
     | 
    
         
             
                    ConfigSource config = loadConfigSource(embulk.newConfigLoader(), "config/embulk_mapred_partitioning_config.yml");
         
     | 
| 
       87 
96 
     | 
    
         
             
                    embulk.run(config);
         
     | 
| 
       88 
97 
     | 
    
         
             
                    assertFileContent(
         
     | 
| 
       89 
98 
     | 
    
         
             
                            Lists.newArrayList(
         
     | 
| 
       90 
     | 
    
         
            -
                                    "fixtures/csv/sample1.csv",
         
     | 
| 
       91 
     | 
    
         
            -
                                    "fixtures/csv/sample1.csv"),
         
     | 
| 
      
 99 
     | 
    
         
            +
                                    "src/test/resources/fixtures/csv/sample1.csv",
         
     | 
| 
      
 100 
     | 
    
         
            +
                                    "src/test/resources/fixtures/csv/sample1.csv"),
         
     | 
| 
       92 
101 
     | 
    
         
             
                            Lists.newArrayList(
         
     | 
| 
       93 
     | 
    
         
            -
                                    " 
     | 
| 
       94 
     | 
    
         
            -
                                    " 
     | 
| 
      
 102 
     | 
    
         
            +
                                    "tmp/embulk_mapred_partitioning_output.000.00.csv",
         
     | 
| 
      
 103 
     | 
    
         
            +
                                    "tmp/embulk_mapred_partitioning_output.001.00.csv"));
         
     | 
| 
       95 
104 
     | 
    
         
             
                }
         
     | 
| 
       96 
105 
     | 
    
         | 
| 
       97 
106 
     | 
    
         
             
                @Test
         
     | 
| 
         @@ -104,7 +113,8 @@ public class TestMapReduceExecutor 
     | 
|
| 
       104 
113 
     | 
    
         
             
                        fail();
         
     | 
| 
       105 
114 
     | 
    
         
             
                    }
         
     | 
| 
       106 
115 
     | 
    
         
             
                    catch (Throwable t) {
         
     | 
| 
       107 
     | 
    
         
            -
                        assertTrue(t instanceof  
     | 
| 
      
 116 
     | 
    
         
            +
                        assertTrue(t instanceof PartialExecutionException);
         
     | 
| 
      
 117 
     | 
    
         
            +
                        assertTrue(t.getCause() instanceof ConfigException);
         
     | 
| 
       108 
118 
     | 
    
         
             
                    }
         
     | 
| 
       109 
119 
     | 
    
         
             
                }
         
     | 
| 
       110 
120 
     | 
    
         | 
| 
         @@ -118,7 +128,8 @@ public class TestMapReduceExecutor 
     | 
|
| 
       118 
128 
     | 
    
         
             
                        fail();
         
     | 
| 
       119 
129 
     | 
    
         
             
                    }
         
     | 
| 
       120 
130 
     | 
    
         
             
                    catch (Throwable t) {
         
     | 
| 
       121 
     | 
    
         
            -
                        assertTrue(t instanceof  
     | 
| 
      
 131 
     | 
    
         
            +
                        assertTrue(t instanceof PartialExecutionException);
         
     | 
| 
      
 132 
     | 
    
         
            +
                        assertTrue(t.getCause() instanceof ConfigException);
         
     | 
| 
       122 
133 
     | 
    
         
             
                    }
         
     | 
| 
       123 
134 
     | 
    
         
             
                }
         
     | 
| 
       124 
135 
     | 
    
         | 
| 
         @@ -132,7 +143,8 @@ public class TestMapReduceExecutor 
     | 
|
| 
       132 
143 
     | 
    
         
             
                        fail();
         
     | 
| 
       133 
144 
     | 
    
         
             
                    }
         
     | 
| 
       134 
145 
     | 
    
         
             
                    catch (Throwable t) {
         
     | 
| 
       135 
     | 
    
         
            -
                        assertTrue(t instanceof  
     | 
| 
      
 146 
     | 
    
         
            +
                        assertTrue(t instanceof PartialExecutionException);
         
     | 
| 
      
 147 
     | 
    
         
            +
                        assertTrue(t.getCause() instanceof ConfigException);
         
     | 
| 
       136 
148 
     | 
    
         
             
                    }
         
     | 
| 
       137 
149 
     | 
    
         
             
                }
         
     | 
| 
       138 
150 
     | 
    
         | 
| 
         @@ -146,7 +158,9 @@ public class TestMapReduceExecutor 
     | 
|
| 
       146 
158 
     | 
    
         
             
                        fail();
         
     | 
| 
       147 
159 
     | 
    
         
             
                    }
         
     | 
| 
       148 
160 
     | 
    
         
             
                    catch (Throwable t) {
         
     | 
| 
       149 
     | 
    
         
            -
                        assertTrue(t 
     | 
| 
      
 161 
     | 
    
         
            +
                        assertTrue(t instanceof PartialExecutionException);
         
     | 
| 
      
 162 
     | 
    
         
            +
                        assertTrue(t.getCause() instanceof RuntimeException);
         
     | 
| 
      
 163 
     | 
    
         
            +
                        assertTrue(t.getCause().getCause() instanceof FileNotFoundException);
         
     | 
| 
       150 
164 
     | 
    
         
             
                    }
         
     | 
| 
       151 
165 
     | 
    
         
             
                }
         
     | 
| 
       152 
166 
     | 
    
         | 
| 
         @@ -273,6 +287,7 @@ public class TestMapReduceExecutor 
     | 
|
| 
       273 
287 
     | 
    
         
             
                }
         
     | 
| 
       274 
288 
     | 
    
         | 
| 
       275 
289 
     | 
    
         
             
                private static void assertFileContent(List<String> inputFiles, List<String> outputFiles)
         
     | 
| 
      
 290 
     | 
    
         
            +
                    throws IOException
         
     | 
| 
       276 
291 
     | 
    
         
             
                {
         
     | 
| 
       277 
292 
     | 
    
         
             
                    List<List<String>> inputRecords = getRecords(inputFiles);
         
     | 
| 
       278 
293 
     | 
    
         
             
                    Collections.sort(inputRecords, new RecordComparator());
         
     | 
| 
         @@ -294,6 +309,7 @@ public class TestMapReduceExecutor 
     | 
|
| 
       294 
309 
     | 
    
         
             
                }
         
     | 
| 
       295 
310 
     | 
    
         | 
| 
       296 
311 
     | 
    
         
             
                private static List<List<String>> getRecords(List<String> files)
         
     | 
| 
      
 312 
     | 
    
         
            +
                    throws IOException
         
     | 
| 
       297 
313 
     | 
    
         
             
                {
         
     | 
| 
       298 
314 
     | 
    
         
             
                    List<List<String>> records = new ArrayList<>();
         
     | 
| 
       299 
315 
     | 
    
         | 
| 
         @@ -327,8 +343,8 @@ public class TestMapReduceExecutor 
     | 
|
| 
       327 
343 
     | 
    
         
             
                }
         
     | 
| 
       328 
344 
     | 
    
         | 
| 
       329 
345 
     | 
    
         
             
                private static BufferedReader newReader(String filePath)
         
     | 
| 
      
 346 
     | 
    
         
            +
                    throws IOException
         
     | 
| 
       330 
347 
     | 
    
         
             
                {
         
     | 
| 
       331 
     | 
    
         
            -
                     
     | 
| 
       332 
     | 
    
         
            -
                    return new BufferedReader(new InputStreamReader(in));
         
     | 
| 
      
 348 
     | 
    
         
            +
                    return Files.newBufferedReader(new File(filePath).toPath(), UTF_8);
         
     | 
| 
       333 
349 
     | 
    
         
             
                }
         
     | 
| 
       334 
350 
     | 
    
         
             
            }
         
     | 
| 
         @@ -19,8 +19,8 @@ in: 
     | 
|
| 
       19 
19 
     | 
    
         
             
                newline: CRLF
         
     | 
| 
       20 
20 
     | 
    
         
             
                type: csv
         
     | 
| 
       21 
21 
     | 
    
         
             
                delimiter: ','
         
     | 
| 
       22 
     | 
    
         
            -
                quote: ''
         
     | 
| 
       23 
     | 
    
         
            -
                escape: ''
         
     | 
| 
      
 22 
     | 
    
         
            +
                quote: '"'
         
     | 
| 
      
 23 
     | 
    
         
            +
                escape: '"'
         
     | 
| 
       24 
24 
     | 
    
         
             
                skip_header_lines: 1
         
     | 
| 
       25 
25 
     | 
    
         
             
                columns:
         
     | 
| 
       26 
26 
     | 
    
         
             
                - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
         
     | 
| 
         @@ -34,13 +34,16 @@ in: 
     | 
|
| 
       34 
34 
     | 
    
         
             
                - {name: size, type: long}
         
     | 
| 
       35 
35 
     | 
    
         
             
                - {name: d, type: double}
         
     | 
| 
       36 
36 
     | 
    
         
             
                - {name: flag, type: boolean}
         
     | 
| 
      
 37 
     | 
    
         
            +
                - {name: v_json, type: json}
         
     | 
| 
       37 
38 
     | 
    
         
             
            out:
         
     | 
| 
       38 
39 
     | 
    
         
             
              type: file
         
     | 
| 
       39 
     | 
    
         
            -
              path_prefix: ' 
     | 
| 
      
 40 
     | 
    
         
            +
              path_prefix: 'tmp/embulk_mapred_output.'
         
     | 
| 
       40 
41 
     | 
    
         
             
              file_ext: 'csv'
         
     | 
| 
       41 
42 
     | 
    
         
             
              formatter:
         
     | 
| 
       42 
43 
     | 
    
         
             
                charset: UTF-8
         
     | 
| 
       43 
44 
     | 
    
         
             
                newline: CRLF
         
     | 
| 
      
 45 
     | 
    
         
            +
                quote: '"'
         
     | 
| 
      
 46 
     | 
    
         
            +
                escape: '"'
         
     | 
| 
       44 
47 
     | 
    
         
             
                type: csv
         
     | 
| 
       45 
48 
     | 
    
         
             
                column_options:
         
     | 
| 
       46 
49 
     | 
    
         
             
                  timestamp: {format: '%Y-%m-%d %H:%M:%S'}
         
     | 
| 
         @@ -25,8 +25,8 @@ in: 
     | 
|
| 
       25 
25 
     | 
    
         
             
                newline: CRLF
         
     | 
| 
       26 
26 
     | 
    
         
             
                type: csv
         
     | 
| 
       27 
27 
     | 
    
         
             
                delimiter: ','
         
     | 
| 
       28 
     | 
    
         
            -
                quote: ''
         
     | 
| 
       29 
     | 
    
         
            -
                escape: ''
         
     | 
| 
      
 28 
     | 
    
         
            +
                quote: '"'
         
     | 
| 
      
 29 
     | 
    
         
            +
                escape: '"'
         
     | 
| 
       30 
30 
     | 
    
         
             
                skip_header_lines: 1
         
     | 
| 
       31 
31 
     | 
    
         
             
                columns:
         
     | 
| 
       32 
32 
     | 
    
         
             
                - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
         
     | 
| 
         @@ -40,13 +40,16 @@ in: 
     | 
|
| 
       40 
40 
     | 
    
         
             
                - {name: size, type: long}
         
     | 
| 
       41 
41 
     | 
    
         
             
                - {name: d, type: double}
         
     | 
| 
       42 
42 
     | 
    
         
             
                - {name: flag, type: boolean}
         
     | 
| 
      
 43 
     | 
    
         
            +
                - {name: v_json, type: json}
         
     | 
| 
       43 
44 
     | 
    
         
             
            out:
         
     | 
| 
       44 
45 
     | 
    
         
             
              type: file
         
     | 
| 
       45 
     | 
    
         
            -
              path_prefix: ' 
     | 
| 
      
 46 
     | 
    
         
            +
              path_prefix: 'tmp/embulk_mapred_partitioning_output.'
         
     | 
| 
       46 
47 
     | 
    
         
             
              file_ext: 'csv'
         
     | 
| 
       47 
48 
     | 
    
         
             
              formatter:
         
     | 
| 
       48 
49 
     | 
    
         
             
                charset: UTF-8
         
     | 
| 
       49 
50 
     | 
    
         
             
                newline: CRLF
         
     | 
| 
      
 51 
     | 
    
         
            +
                quote: '"'
         
     | 
| 
      
 52 
     | 
    
         
            +
                escape: '"'
         
     | 
| 
       50 
53 
     | 
    
         
             
                type: csv
         
     | 
| 
       51 
54 
     | 
    
         
             
                column_options:
         
     | 
| 
       52 
55 
     | 
    
         
             
                  timestamp: {format: '%Y-%m-%d %H:%M:%S'}
         
     | 
| 
         @@ -1,3 +1,3 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            timestamp,host,path,method,referer,code,agent,user,size,d,flag
         
     | 
| 
       2 
     | 
    
         
            -
            2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136,1.1,true
         
     | 
| 
       3 
     | 
    
         
            -
            2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false
         
     | 
| 
      
 1 
     | 
    
         
            +
            timestamp,host,path,method,referer,code,agent,user,size,d,flag,v_json
         
     | 
| 
      
 2 
     | 
    
         
            +
            2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136,1.1,true,"{""k0"":""v0"",""k1"":""v1""}"
         
     | 
| 
      
 3 
     | 
    
         
            +
            2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false,"[1,2,""3""]"
         
     | 
| 
         @@ -1,4 +1,3 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            timestamp,host,path,method,referer,code,agent,user,size,d,flag
         
     | 
| 
       2 
     | 
    
         
            -
            2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136,1.1,true
         
     | 
| 
       3 
     | 
    
         
            -
            2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            timestamp,host,path,method,referer,code,agent,user,size,d,flag,v_json
         
     | 
| 
      
 2 
     | 
    
         
            +
            2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136,1.1,true,"{""k0"":""v0"",""k1"":""v1""}"
         
     | 
| 
      
 3 
     | 
    
         
            +
            2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false,"[1,2,""3""]"
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: embulk-executor-mapreduce
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.2. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.5
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Sadayuki Furuhashi
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire:
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date:  
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2016-02-09 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       13 
13 
     | 
    
         
             
            description: Executes tasks on Hadoop.
         
     | 
| 
       14 
14 
     | 
    
         
             
            email:
         
     | 
| 
         @@ -84,7 +84,7 @@ files: 
     | 
|
| 
       84 
84 
     | 
    
         
             
            - classpath/curator-client-2.6.0.jar
         
     | 
| 
       85 
85 
     | 
    
         
             
            - classpath/curator-framework-2.6.0.jar
         
     | 
| 
       86 
86 
     | 
    
         
             
            - classpath/curator-recipes-2.6.0.jar
         
     | 
| 
       87 
     | 
    
         
            -
            - classpath/embulk-executor-mapreduce-0.2. 
     | 
| 
      
 87 
     | 
    
         
            +
            - classpath/embulk-executor-mapreduce-0.2.5.jar
         
     | 
| 
       88 
88 
     | 
    
         
             
            - classpath/gson-2.2.4.jar
         
     | 
| 
       89 
89 
     | 
    
         
             
            - classpath/hadoop-annotations-2.6.0.jar
         
     | 
| 
       90 
90 
     | 
    
         
             
            - classpath/hadoop-auth-2.6.0.jar
         
     |