embulk-output-embulk_output_domo 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0f0e72f5092ee17a2a9f653daf209c54afd686e
|
4
|
+
data.tar.gz: 9d4343c4a5f4972d6018a2f5e5e1c49b9dddfd3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a69775fdbc1c12340e899ccbf18d6f9c8fa53340fbf7af47ffc0b279183c3ff25d4cfc762529081ccae6cc52e73ef72955e35d052b26f4349a260e3793a883e8
|
7
|
+
data.tar.gz: 298b879ef0206ca9522330b13ae1e4b1678c0aad1d2141185598986d76e1f3be42a0883bc2d8ff16d4b386385803bfeb39233b409e52d40b902bcaa57523818c
|
data/build.gradle
CHANGED
@@ -13,7 +13,7 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.3.0"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.8
|
19
19
|
targetCompatibility = 1.8
|
@@ -23,6 +23,8 @@ dependencies {
|
|
23
23
|
provided "org.embulk:embulk-core:0.9.7"
|
24
24
|
compile 'com.domo:domo-java-sdk-all:0.4.0'
|
25
25
|
compile 'junit:junit:4.12'
|
26
|
+
compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
|
27
|
+
compile group: 'commons-io', name: 'commons-io', version: '2.6'
|
26
28
|
testCompile "junit:junit:4.+"
|
27
29
|
}
|
28
30
|
|
@@ -1,3 +1,7 @@
|
|
1
|
+
/**
|
2
|
+
* Embulk output plugin that can move really big input to domo stream
|
3
|
+
* We are going to use algorithm for uploading in parallel found in https://developer.domo.com/docs/stream/upload-in-parallel
|
4
|
+
*/
|
1
5
|
package org.embulk.output.embulk_output_domo;
|
2
6
|
|
3
7
|
import java.lang.StringBuilder;
|
@@ -49,6 +53,24 @@ import java.util.HashMap;
|
|
49
53
|
import java.util.ArrayList;
|
50
54
|
import java.util.Date;
|
51
55
|
import java.text.SimpleDateFormat;
|
56
|
+
import java.io.File;
|
57
|
+
import java.io.FileWriter;
|
58
|
+
import java.io.FileReader;
|
59
|
+
import java.io.BufferedWriter;
|
60
|
+
import java.io.BufferedReader;
|
61
|
+
import java.io.FileOutputStream;
|
62
|
+
|
63
|
+
import java.nio.file.Files;
|
64
|
+
import java.nio.file.Paths;
|
65
|
+
import java.util.concurrent.Callable;
|
66
|
+
import java.util.concurrent.ExecutionException;
|
67
|
+
import java.util.concurrent.ExecutorService;
|
68
|
+
import java.util.concurrent.Executors;
|
69
|
+
import java.util.concurrent.Future;
|
70
|
+
import java.util.zip.GZIPOutputStream;
|
71
|
+
import java.util.Collections;
|
72
|
+
import org.apache.commons.lang3.RandomStringUtils;
|
73
|
+
import org.apache.commons.io.FileUtils;
|
52
74
|
|
53
75
|
|
54
76
|
public class EmbulkOutputDomoOutputPlugin
|
@@ -56,7 +78,7 @@ public class EmbulkOutputDomoOutputPlugin
|
|
56
78
|
{
|
57
79
|
private static DomoClient client = null;
|
58
80
|
private static Execution execution = null;
|
59
|
-
private static StreamClient
|
81
|
+
private static StreamClient streamClient = null;
|
60
82
|
private static Stream sds = null;
|
61
83
|
private static TimestampFormatter[] timestampFormatters = null;
|
62
84
|
private int partNum = 1;
|
@@ -67,6 +89,8 @@ public class EmbulkOutputDomoOutputPlugin
|
|
67
89
|
private int currentPartCounter = 1;
|
68
90
|
private static int totalBatches = 1;
|
69
91
|
private static int pageReaderCount = 0;
|
92
|
+
private static String TEMP_DIR = "/tmp/csv/" +RandomStringUtils.randomAlphabetic(10)+"/";
|
93
|
+
public static int totalRecordsCounter = 0;
|
70
94
|
|
71
95
|
public enum QuotePolicy
|
72
96
|
{
|
@@ -86,12 +110,10 @@ public class EmbulkOutputDomoOutputPlugin
|
|
86
110
|
return string;
|
87
111
|
}
|
88
112
|
}
|
89
|
-
|
90
113
|
public interface TimestampColumnOption
|
91
114
|
extends Task, TimestampFormatter.TimestampColumnOption
|
92
115
|
{
|
93
116
|
}
|
94
|
-
|
95
117
|
public interface PluginTask
|
96
118
|
extends Task, TimestampFormatter.Task
|
97
119
|
{
|
@@ -140,7 +162,6 @@ public class EmbulkOutputDomoOutputPlugin
|
|
140
162
|
@ConfigDefault("\"LF\"")
|
141
163
|
Newline getNewlineInField();
|
142
164
|
}
|
143
|
-
|
144
165
|
public com.domo.sdk.datasets.model.Schema getDomoSchema(Schema schema){
|
145
166
|
/**
|
146
167
|
* We need to return domo Schema
|
@@ -211,16 +232,20 @@ public class EmbulkOutputDomoOutputPlugin
|
|
211
232
|
.build();
|
212
233
|
|
213
234
|
client = DomoClient.create(domoConfig);
|
214
|
-
|
235
|
+
streamClient = client.streamClient();
|
215
236
|
|
216
|
-
List<Stream> searchedSds =
|
237
|
+
List<Stream> searchedSds = streamClient.search("dataSource.name:" + task.getStreamName());
|
217
238
|
sds = searchedSds.get(0);
|
218
239
|
logger.info("Stream "+ sds);
|
219
|
-
execution =
|
240
|
+
execution = streamClient.createExecution(sds.getId());
|
220
241
|
logger.info("Created Execution: " + execution);
|
221
242
|
timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
|
222
243
|
totalBatches = task.getBatchSize();
|
244
|
+
File directory = new File(TEMP_DIR);
|
245
|
+
if(!directory.exists()) {
|
223
246
|
|
247
|
+
directory.mkdirs();
|
248
|
+
}
|
224
249
|
}
|
225
250
|
}
|
226
251
|
catch(Exception ex){
|
@@ -232,32 +257,59 @@ public class EmbulkOutputDomoOutputPlugin
|
|
232
257
|
control.run(task.dump());
|
233
258
|
return Exec.newConfigDiff();
|
234
259
|
}
|
235
|
-
|
236
260
|
@Override
|
237
261
|
public ConfigDiff resume(TaskSource taskSource,
|
238
|
-
|
239
|
-
|
262
|
+
Schema schema, int taskCount,
|
263
|
+
OutputPlugin.Control control)
|
240
264
|
{
|
241
265
|
throw new UnsupportedOperationException("embulk_output_domo output plugin does not support resuming");
|
242
266
|
}
|
243
|
-
|
244
267
|
@Override
|
245
268
|
public void cleanup(TaskSource taskSource,
|
246
|
-
|
247
|
-
|
269
|
+
Schema schema, int taskCount,
|
270
|
+
List<TaskReport> successTaskReports)
|
248
271
|
{
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
272
|
+
try{
|
273
|
+
ArrayList<File> csvFiles = loadCSVFiles(TEMP_DIR);
|
274
|
+
File tempFolder = new File(TEMP_DIR);
|
275
|
+
List<File> compressedCsvFiles = toGzipFilesUTF8(csvFiles, tempFolder.getPath() + "/");
|
276
|
+
ExecutorService executorService = Executors.newCachedThreadPool();
|
277
|
+
List<Callable<Object>> uploadTasks = Collections.synchronizedList(new ArrayList<>());
|
278
|
+
|
279
|
+
// For each data part (csv gzip file), create a runnable upload task
|
280
|
+
long partNum = 1;
|
281
|
+
for (File compressedCsvFile : compressedCsvFiles){
|
282
|
+
long myPartNum = partNum;
|
283
|
+
// "uploadDataPart()" accepts csv strings, csv files, and compressed csv files
|
284
|
+
Runnable partUpload = () -> streamClient.uploadDataPart(sds.getId(), execution.getId(), myPartNum, compressedCsvFile);
|
285
|
+
uploadTasks.add(Executors.callable(partUpload));
|
286
|
+
partNum++;
|
287
|
+
}
|
288
|
+
// Asynchronously execute all uploading tasks
|
289
|
+
try {
|
290
|
+
executorService.invokeAll(uploadTasks);
|
291
|
+
}
|
292
|
+
catch (Exception e){
|
293
|
+
logger.error("Error uploading all data parts", e);
|
294
|
+
}
|
295
|
+
|
296
|
+
}catch(Exception e) {
|
297
|
+
logger.error("Exception on uploading!! "+e);
|
298
|
+
System.out.println(e.getMessage());
|
299
|
+
return;
|
300
|
+
}
|
256
301
|
//Commit Execution
|
257
|
-
Execution committedExecution =
|
302
|
+
Execution committedExecution = streamClient.commitExecution(sds.getId(), execution.getId());
|
258
303
|
logger.info("Committed Execution: " + committedExecution);
|
259
|
-
|
304
|
+
try {
|
305
|
+
FileUtils.deleteDirectory(new File(TEMP_DIR));
|
306
|
+
logger.info("Delete temp directory");
|
307
|
+
}
|
308
|
+
catch (IOException ex){
|
309
|
+
logger.error("Delete temp directory Failed "+ ex);
|
310
|
+
}
|
260
311
|
|
312
|
+
}
|
261
313
|
@Override
|
262
314
|
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
|
263
315
|
{
|
@@ -265,7 +317,6 @@ public class EmbulkOutputDomoOutputPlugin
|
|
265
317
|
final PageReader reader = new PageReader(schema);
|
266
318
|
return new DomoPageOutput(reader, client, task, schema);
|
267
319
|
}
|
268
|
-
|
269
320
|
public class DomoPageOutput
|
270
321
|
implements TransactionalPageOutput
|
271
322
|
{
|
@@ -274,36 +325,55 @@ public class EmbulkOutputDomoOutputPlugin
|
|
274
325
|
private final PageReader pageReader;
|
275
326
|
private DomoClient client;
|
276
327
|
private PluginTask task;
|
328
|
+
private int partPageNum;
|
277
329
|
|
278
330
|
private Schema schema;
|
279
|
-
ArrayList<StringBuilder> recordsPage =
|
331
|
+
ArrayList<StringBuilder> recordsPage = null;
|
332
|
+
private char delimiter = ',';
|
333
|
+
private String delimiterString = ",";
|
334
|
+
private String nullString = "";
|
335
|
+
private QuotePolicy quotePolicy = null;
|
336
|
+
private char quote = '"';
|
337
|
+
private char escape = quote;
|
338
|
+
private String newlineInField;
|
280
339
|
|
281
340
|
public DomoPageOutput(final PageReader pageReader,
|
282
|
-
|
341
|
+
DomoClient client, PluginTask task, Schema schema)
|
283
342
|
{
|
284
|
-
|
343
|
+
logger.info("NEW PAGE CONSTRUCTOR!!");
|
285
344
|
this.pageReader = pageReader;
|
286
345
|
this.client = client;
|
287
346
|
this.task = task;
|
288
347
|
this.schema = schema;
|
289
|
-
partNum++;
|
348
|
+
this.partPageNum = partNum++;
|
349
|
+
|
350
|
+
try {
|
351
|
+
File directory = new File(TEMP_DIR);
|
352
|
+
if (!directory.exists()) {
|
353
|
+
directory.mkdir();
|
354
|
+
}
|
355
|
+
}
|
356
|
+
catch(Exception ex){
|
357
|
+
System.out.println(ex.getMessage());
|
358
|
+
}
|
359
|
+
this.partPageNum = partNum++;
|
360
|
+
this.quotePolicy = this.task.getQuotePolicy();
|
361
|
+
this.quote = this.task.getQuoteChar() != '\0' ? this.task.getQuoteChar() : '"';
|
362
|
+
this.escape = this.task.getEscapeChar().or(this.quotePolicy == QuotePolicy.NONE ? '\\' : this.quote);
|
363
|
+
this.newlineInField = this.task.getNewlineInField().getString();
|
364
|
+
this.delimiter = ',';
|
365
|
+
this.delimiterString = ",";
|
366
|
+
this.nullString = "";
|
290
367
|
}
|
291
368
|
|
292
369
|
@Override
|
293
370
|
public void add(Page page)
|
294
371
|
{
|
372
|
+
final StringBuilder pageBuilder = new StringBuilder();
|
373
|
+
//logger.info("New page");
|
374
|
+
this.recordsPage = new ArrayList<StringBuilder>();
|
295
375
|
try {
|
296
376
|
pageReader.setPage(page);
|
297
|
-
|
298
|
-
final char delimiter = ',';
|
299
|
-
final String delimiterString = ",";
|
300
|
-
final String nullString = "";
|
301
|
-
final QuotePolicy quotePolicy = this.task.getQuotePolicy();
|
302
|
-
final char quote = this.task.getQuoteChar() != '\0' ? this.task.getQuoteChar() : '"';
|
303
|
-
final char escape = this.task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : quote);
|
304
|
-
final String newlineInField = this.task.getNewlineInField().getString();
|
305
|
-
|
306
|
-
|
307
377
|
while (pageReader.nextRecord()) {
|
308
378
|
StringBuilder lineBuilder = new StringBuilder();
|
309
379
|
pageReader.getSchema().visitColumns(new ColumnVisitor() {
|
@@ -380,16 +450,17 @@ public class EmbulkOutputDomoOutputPlugin
|
|
380
450
|
addNullString();
|
381
451
|
}
|
382
452
|
}
|
383
|
-
|
384
453
|
});
|
385
|
-
|
386
454
|
recordsPage.add(lineBuilder);
|
387
|
-
totalRecords++;
|
388
455
|
}
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
456
|
+
try {
|
457
|
+
//save as csv
|
458
|
+
WriteToFile(stringify(recordsPage), RandomStringUtils.randomNumeric(10)+RandomStringUtils.randomAlphabetic(20).toString()+".csv");
|
459
|
+
}
|
460
|
+
catch (IOException e){
|
461
|
+
logger.error("Exception on closing page!");
|
462
|
+
logger.error(e.getMessage());
|
463
|
+
}
|
393
464
|
}
|
394
465
|
catch (Exception ex) {
|
395
466
|
throw new RuntimeException(ex);
|
@@ -404,10 +475,6 @@ public class EmbulkOutputDomoOutputPlugin
|
|
404
475
|
@Override
|
405
476
|
public void close()
|
406
477
|
{
|
407
|
-
logger.info("Uplaod records count= "+recordsPage.size() +" partNum = "+partNum);
|
408
|
-
sdsClient.uploadDataPart(sds.getId(), execution.getId(), partNum, stringify(recordsPage));
|
409
|
-
// allRecords.addAll(recordsPage);
|
410
|
-
recordsPage = null;
|
411
478
|
}
|
412
479
|
|
413
480
|
@Override
|
@@ -422,7 +489,6 @@ public class EmbulkOutputDomoOutputPlugin
|
|
422
489
|
}
|
423
490
|
|
424
491
|
}
|
425
|
-
|
426
492
|
private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
|
427
493
|
{
|
428
494
|
StringBuilder escapedValue = new StringBuilder();
|
@@ -469,13 +535,11 @@ public class EmbulkOutputDomoOutputPlugin
|
|
469
535
|
return escapedValue.toString();
|
470
536
|
}
|
471
537
|
}
|
472
|
-
|
473
538
|
private String setQuoteValue(String v, char quote)
|
474
539
|
{
|
475
540
|
|
476
541
|
return String.valueOf(quote) + v + quote;
|
477
542
|
}
|
478
|
-
|
479
543
|
private String stringifyList(List<StringBuilder> records){
|
480
544
|
StringBuilder sb = new StringBuilder();
|
481
545
|
for (StringBuilder s : records)
|
@@ -490,7 +554,6 @@ public class EmbulkOutputDomoOutputPlugin
|
|
490
554
|
}
|
491
555
|
return sb.toString();
|
492
556
|
}
|
493
|
-
|
494
557
|
private String stringify(ArrayList<StringBuilder> records) {
|
495
558
|
StringBuilder sb = new StringBuilder();
|
496
559
|
for (StringBuilder s : records)
|
@@ -521,4 +584,94 @@ public class EmbulkOutputDomoOutputPlugin
|
|
521
584
|
|
522
585
|
return chunks;
|
523
586
|
}
|
524
|
-
|
587
|
+
public static String readFileAsString(String fileName)throws Exception
|
588
|
+
{
|
589
|
+
String data = "";
|
590
|
+
data = new String(Files.readAllBytes(Paths.get(fileName)));
|
591
|
+
return data;
|
592
|
+
}
|
593
|
+
public static List<File> toGzipFilesUTF8( List<File> sourceFiles, String path){
|
594
|
+
List<File> files = new ArrayList<>();
|
595
|
+
int batchMaxCount = 1000;
|
596
|
+
int currentCount = 0;
|
597
|
+
int remaining = sourceFiles.size();
|
598
|
+
ArrayList<File> batchFiles = new ArrayList<File>();
|
599
|
+
//System.out.println("All source files are "+remaining);
|
600
|
+
for (File sourceFile : sourceFiles) {
|
601
|
+
currentCount++;
|
602
|
+
batchFiles.add(sourceFile);
|
603
|
+
if(currentCount>=batchMaxCount || currentCount>=remaining){
|
604
|
+
remaining=remaining-batchMaxCount;
|
605
|
+
String zipFileName = sourceFile.getName().replace(".csv", ".zip");
|
606
|
+
files.add(toGzipFileUTF8(batchFiles, path + zipFileName));
|
607
|
+
// System.out.println("Add file "+sourceFile.getName()+"to zip file name = "+zipFileName+". Current count = "+currentCount +" Total records counter = "+totalRecordsCounter);
|
608
|
+
batchFiles.clear();
|
609
|
+
currentCount = 0;
|
610
|
+
}
|
611
|
+
//System.out.println("Add file "+sourceFile.getName()+ ". Current count = "+currentCount);
|
612
|
+
|
613
|
+
}
|
614
|
+
return files;
|
615
|
+
}
|
616
|
+
public static File toGzipFileUTF8(ArrayList<File> csvFiles, String zipFilePath){
|
617
|
+
File outputFile = new File(zipFilePath);
|
618
|
+
try {
|
619
|
+
GZIPOutputStream gzos = new GZIPOutputStream(new FileOutputStream(outputFile));
|
620
|
+
for (File csvFile : csvFiles){
|
621
|
+
BufferedReader reader = new BufferedReader(new FileReader(csvFile));
|
622
|
+
|
623
|
+
String currentLine;
|
624
|
+
while ((currentLine = reader.readLine()) != null){
|
625
|
+
currentLine += System.lineSeparator();
|
626
|
+
totalRecordsCounter++;
|
627
|
+
// Specifying UTF-8 encoding is critical; getBytes() uses ISO-8859-1 by default
|
628
|
+
gzos.write(currentLine.getBytes("UTF-8"));
|
629
|
+
}
|
630
|
+
}
|
631
|
+
gzos.flush();
|
632
|
+
gzos.finish();
|
633
|
+
gzos.close();
|
634
|
+
|
635
|
+
}
|
636
|
+
catch(IOException e) {
|
637
|
+
logger.error("Error compressing a string to gzip", e);
|
638
|
+
}
|
639
|
+
|
640
|
+
return outputFile;
|
641
|
+
}
|
642
|
+
public static void WriteToFile(String fileContent, String fileName) throws IOException {
|
643
|
+
//logger.info("writing csv file to "+fileName);
|
644
|
+
|
645
|
+
String tempFile = TEMP_DIR + fileName;
|
646
|
+
File file = new File(tempFile);
|
647
|
+
// if file does exists, then delete and create a new file
|
648
|
+
if (file.exists()) {
|
649
|
+
try {
|
650
|
+
File newFileName = new File(TEMP_DIR + "backup_" + fileName);
|
651
|
+
file.renameTo(newFileName);
|
652
|
+
file.createNewFile();
|
653
|
+
} catch (IOException e) {
|
654
|
+
e.printStackTrace();
|
655
|
+
}
|
656
|
+
}
|
657
|
+
FileWriter fw = new FileWriter(file.getAbsoluteFile());
|
658
|
+
BufferedWriter bw = new BufferedWriter(fw);
|
659
|
+
bw.write(fileContent);
|
660
|
+
bw.close();
|
661
|
+
}
|
662
|
+
|
663
|
+
public static ArrayList<File> loadCSVFiles (String searchFolder) {
|
664
|
+
File folder = new File(searchFolder);
|
665
|
+
File[] listOfFiles = folder.listFiles();
|
666
|
+
ArrayList<File> csvFiles = new ArrayList<File>();
|
667
|
+
|
668
|
+
for (File file : listOfFiles) {
|
669
|
+
if (file.isFile() && file.getName().indexOf(".csv")>0) {
|
670
|
+
csvFiles.add(file);
|
671
|
+
}
|
672
|
+
}
|
673
|
+
|
674
|
+
//System.out.println("All csv files are "+ csvFiles);
|
675
|
+
return csvFiles;
|
676
|
+
}
|
677
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-embulk_output_domo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Angelos Alexopoulos
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -64,9 +64,10 @@ files:
|
|
64
64
|
- classpath/junit-4.12.jar
|
65
65
|
- classpath/okhttp-3.7.0.jar
|
66
66
|
- classpath/logging-interceptor-3.7.0.jar
|
67
|
-
- classpath/
|
67
|
+
- classpath/commons-io-2.6.jar
|
68
68
|
- classpath/hamcrest-core-1.3.jar
|
69
69
|
- classpath/okio-1.12.0.jar
|
70
|
+
- classpath/embulk-output-embulk_output_domo-0.3.0.jar
|
70
71
|
homepage: https://github.com/alexopoulos7/embulk-output-embulk_output_domo
|
71
72
|
licenses:
|
72
73
|
- MIT
|