embulk-output-embulk_output_domo 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 06ef088bc54d8412fbfc5fe0d250e64679c2191d
4
- data.tar.gz: 54c0305194812b9c1bf056e9d61cc760f516a486
3
+ metadata.gz: a0f0e72f5092ee17a2a9f653daf209c54afd686e
4
+ data.tar.gz: 9d4343c4a5f4972d6018a2f5e5e1c49b9dddfd3a
5
5
  SHA512:
6
- metadata.gz: 826c5d74751cdc1951a588cdaa0a1b98a3eb296a8720bd263c36f58ab463deed9e9108acb9382bdb4275fce4a7c12579a17e21c8f2309a9986f68b179cbbd451
7
- data.tar.gz: 5804e980747764f6fe29b1b9b7676d1e41df7b05367afa6fa945e80901b7e604359588c353915fd0e57abd77f528f8ad7e2d41b504e98379d746b71176b721f3
6
+ metadata.gz: a69775fdbc1c12340e899ccbf18d6f9c8fa53340fbf7af47ffc0b279183c3ff25d4cfc762529081ccae6cc52e73ef72955e35d052b26f4349a260e3793a883e8
7
+ data.tar.gz: 298b879ef0206ca9522330b13ae1e4b1678c0aad1d2141185598986d76e1f3be42a0883bc2d8ff16d4b386385803bfeb39233b409e52d40b902bcaa57523818c
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.2.5"
16
+ version = "0.3.0"
17
17
 
18
18
  sourceCompatibility = 1.8
19
19
  targetCompatibility = 1.8
@@ -23,6 +23,8 @@ dependencies {
23
23
  provided "org.embulk:embulk-core:0.9.7"
24
24
  compile 'com.domo:domo-java-sdk-all:0.4.0'
25
25
  compile 'junit:junit:4.12'
26
+ compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.0'
27
+ compile group: 'commons-io', name: 'commons-io', version: '2.6'
26
28
  testCompile "junit:junit:4.+"
27
29
  }
28
30
 
@@ -1,3 +1,7 @@
1
+ /**
2
+ * Embulk output plugin that can move really big input to domo stream
3
+ * We are going to use algorithm for uploading in parallel found in https://developer.domo.com/docs/stream/upload-in-parallel
4
+ */
1
5
  package org.embulk.output.embulk_output_domo;
2
6
 
3
7
  import java.lang.StringBuilder;
@@ -49,6 +53,24 @@ import java.util.HashMap;
49
53
  import java.util.ArrayList;
50
54
  import java.util.Date;
51
55
  import java.text.SimpleDateFormat;
56
+ import java.io.File;
57
+ import java.io.FileWriter;
58
+ import java.io.FileReader;
59
+ import java.io.BufferedWriter;
60
+ import java.io.BufferedReader;
61
+ import java.io.FileOutputStream;
62
+
63
+ import java.nio.file.Files;
64
+ import java.nio.file.Paths;
65
+ import java.util.concurrent.Callable;
66
+ import java.util.concurrent.ExecutionException;
67
+ import java.util.concurrent.ExecutorService;
68
+ import java.util.concurrent.Executors;
69
+ import java.util.concurrent.Future;
70
+ import java.util.zip.GZIPOutputStream;
71
+ import java.util.Collections;
72
+ import org.apache.commons.lang3.RandomStringUtils;
73
+ import org.apache.commons.io.FileUtils;
52
74
 
53
75
 
54
76
  public class EmbulkOutputDomoOutputPlugin
@@ -56,7 +78,7 @@ public class EmbulkOutputDomoOutputPlugin
56
78
  {
57
79
  private static DomoClient client = null;
58
80
  private static Execution execution = null;
59
- private static StreamClient sdsClient = null;
81
+ private static StreamClient streamClient = null;
60
82
  private static Stream sds = null;
61
83
  private static TimestampFormatter[] timestampFormatters = null;
62
84
  private int partNum = 1;
@@ -67,6 +89,8 @@ public class EmbulkOutputDomoOutputPlugin
67
89
  private int currentPartCounter = 1;
68
90
  private static int totalBatches = 1;
69
91
  private static int pageReaderCount = 0;
92
+ private static String TEMP_DIR = "/tmp/csv/" +RandomStringUtils.randomAlphabetic(10)+"/";
93
+ public static int totalRecordsCounter = 0;
70
94
 
71
95
  public enum QuotePolicy
72
96
  {
@@ -86,12 +110,10 @@ public class EmbulkOutputDomoOutputPlugin
86
110
  return string;
87
111
  }
88
112
  }
89
-
90
113
  public interface TimestampColumnOption
91
114
  extends Task, TimestampFormatter.TimestampColumnOption
92
115
  {
93
116
  }
94
-
95
117
  public interface PluginTask
96
118
  extends Task, TimestampFormatter.Task
97
119
  {
@@ -140,7 +162,6 @@ public class EmbulkOutputDomoOutputPlugin
140
162
  @ConfigDefault("\"LF\"")
141
163
  Newline getNewlineInField();
142
164
  }
143
-
144
165
  public com.domo.sdk.datasets.model.Schema getDomoSchema(Schema schema){
145
166
  /**
146
167
  * We need to return domo Schema
@@ -211,16 +232,20 @@ public class EmbulkOutputDomoOutputPlugin
211
232
  .build();
212
233
 
213
234
  client = DomoClient.create(domoConfig);
214
- sdsClient = client.streamClient();
235
+ streamClient = client.streamClient();
215
236
 
216
- List<Stream> searchedSds = sdsClient.search("dataSource.name:" + task.getStreamName());
237
+ List<Stream> searchedSds = streamClient.search("dataSource.name:" + task.getStreamName());
217
238
  sds = searchedSds.get(0);
218
239
  logger.info("Stream "+ sds);
219
- execution = sdsClient.createExecution(sds.getId());
240
+ execution = streamClient.createExecution(sds.getId());
220
241
  logger.info("Created Execution: " + execution);
221
242
  timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
222
243
  totalBatches = task.getBatchSize();
244
+ File directory = new File(TEMP_DIR);
245
+ if(!directory.exists()) {
223
246
 
247
+ directory.mkdirs();
248
+ }
224
249
  }
225
250
  }
226
251
  catch(Exception ex){
@@ -232,32 +257,59 @@ public class EmbulkOutputDomoOutputPlugin
232
257
  control.run(task.dump());
233
258
  return Exec.newConfigDiff();
234
259
  }
235
-
236
260
  @Override
237
261
  public ConfigDiff resume(TaskSource taskSource,
238
- Schema schema, int taskCount,
239
- OutputPlugin.Control control)
262
+ Schema schema, int taskCount,
263
+ OutputPlugin.Control control)
240
264
  {
241
265
  throw new UnsupportedOperationException("embulk_output_domo output plugin does not support resuming");
242
266
  }
243
-
244
267
  @Override
245
268
  public void cleanup(TaskSource taskSource,
246
- Schema schema, int taskCount,
247
- List<TaskReport> successTaskReports)
269
+ Schema schema, int taskCount,
270
+ List<TaskReport> successTaskReports)
248
271
  {
249
- // List<List<StringBuilder>> batchLists = batches(allRecords, totalBatches);
250
- // int i=1;
251
- // for(List<StringBuilder> l : batchLists){
252
- // sdsClient.uploadDataPart(sds.getId(), execution.getId(), i, stringifyList(l));
253
- // i++;
254
- // }
255
- // logger.info("Finished Uploading");
272
+ try{
273
+ ArrayList<File> csvFiles = loadCSVFiles(TEMP_DIR);
274
+ File tempFolder = new File(TEMP_DIR);
275
+ List<File> compressedCsvFiles = toGzipFilesUTF8(csvFiles, tempFolder.getPath() + "/");
276
+ ExecutorService executorService = Executors.newCachedThreadPool();
277
+ List<Callable<Object>> uploadTasks = Collections.synchronizedList(new ArrayList<>());
278
+
279
+ // For each data part (csv gzip file), create a runnable upload task
280
+ long partNum = 1;
281
+ for (File compressedCsvFile : compressedCsvFiles){
282
+ long myPartNum = partNum;
283
+ // "uploadDataPart()" accepts csv strings, csv files, and compressed csv files
284
+ Runnable partUpload = () -> streamClient.uploadDataPart(sds.getId(), execution.getId(), myPartNum, compressedCsvFile);
285
+ uploadTasks.add(Executors.callable(partUpload));
286
+ partNum++;
287
+ }
288
+ // Asynchronously execute all uploading tasks
289
+ try {
290
+ executorService.invokeAll(uploadTasks);
291
+ }
292
+ catch (Exception e){
293
+ logger.error("Error uploading all data parts", e);
294
+ }
295
+
296
+ }catch(Exception e) {
297
+ logger.error("Exception on uploading!! "+e);
298
+ System.out.println(e.getMessage());
299
+ return;
300
+ }
256
301
  //Commit Execution
257
- Execution committedExecution = sdsClient.commitExecution(sds.getId(), execution.getId());
302
+ Execution committedExecution = streamClient.commitExecution(sds.getId(), execution.getId());
258
303
  logger.info("Committed Execution: " + committedExecution);
259
- }
304
+ try {
305
+ FileUtils.deleteDirectory(new File(TEMP_DIR));
306
+ logger.info("Delete temp directory");
307
+ }
308
+ catch (IOException ex){
309
+ logger.error("Delete temp directory Failed "+ ex);
310
+ }
260
311
 
312
+ }
261
313
  @Override
262
314
  public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
263
315
  {
@@ -265,7 +317,6 @@ public class EmbulkOutputDomoOutputPlugin
265
317
  final PageReader reader = new PageReader(schema);
266
318
  return new DomoPageOutput(reader, client, task, schema);
267
319
  }
268
-
269
320
  public class DomoPageOutput
270
321
  implements TransactionalPageOutput
271
322
  {
@@ -274,36 +325,55 @@ public class EmbulkOutputDomoOutputPlugin
274
325
  private final PageReader pageReader;
275
326
  private DomoClient client;
276
327
  private PluginTask task;
328
+ private int partPageNum;
277
329
 
278
330
  private Schema schema;
279
- ArrayList<StringBuilder> recordsPage = new ArrayList<StringBuilder>();
331
+ ArrayList<StringBuilder> recordsPage = null;
332
+ private char delimiter = ',';
333
+ private String delimiterString = ",";
334
+ private String nullString = "";
335
+ private QuotePolicy quotePolicy = null;
336
+ private char quote = '"';
337
+ private char escape = quote;
338
+ private String newlineInField;
280
339
 
281
340
  public DomoPageOutput(final PageReader pageReader,
282
- DomoClient client, PluginTask task, Schema schema)
341
+ DomoClient client, PluginTask task, Schema schema)
283
342
  {
284
- //logger.info("NEW PAGE CONSTRUCTOR!!");
343
+ logger.info("NEW PAGE CONSTRUCTOR!!");
285
344
  this.pageReader = pageReader;
286
345
  this.client = client;
287
346
  this.task = task;
288
347
  this.schema = schema;
289
- partNum++;
348
+ this.partPageNum = partNum++;
349
+
350
+ try {
351
+ File directory = new File(TEMP_DIR);
352
+ if (!directory.exists()) {
353
+ directory.mkdir();
354
+ }
355
+ }
356
+ catch(Exception ex){
357
+ System.out.println(ex.getMessage());
358
+ }
359
+ this.partPageNum = partNum++;
360
+ this.quotePolicy = this.task.getQuotePolicy();
361
+ this.quote = this.task.getQuoteChar() != '\0' ? this.task.getQuoteChar() : '"';
362
+ this.escape = this.task.getEscapeChar().or(this.quotePolicy == QuotePolicy.NONE ? '\\' : this.quote);
363
+ this.newlineInField = this.task.getNewlineInField().getString();
364
+ this.delimiter = ',';
365
+ this.delimiterString = ",";
366
+ this.nullString = "";
290
367
  }
291
368
 
292
369
  @Override
293
370
  public void add(Page page)
294
371
  {
372
+ final StringBuilder pageBuilder = new StringBuilder();
373
+ //logger.info("New page");
374
+ this.recordsPage = new ArrayList<StringBuilder>();
295
375
  try {
296
376
  pageReader.setPage(page);
297
-
298
- final char delimiter = ',';
299
- final String delimiterString = ",";
300
- final String nullString = "";
301
- final QuotePolicy quotePolicy = this.task.getQuotePolicy();
302
- final char quote = this.task.getQuoteChar() != '\0' ? this.task.getQuoteChar() : '"';
303
- final char escape = this.task.getEscapeChar().or(quotePolicy == QuotePolicy.NONE ? '\\' : quote);
304
- final String newlineInField = this.task.getNewlineInField().getString();
305
-
306
-
307
377
  while (pageReader.nextRecord()) {
308
378
  StringBuilder lineBuilder = new StringBuilder();
309
379
  pageReader.getSchema().visitColumns(new ColumnVisitor() {
@@ -380,16 +450,17 @@ public class EmbulkOutputDomoOutputPlugin
380
450
  addNullString();
381
451
  }
382
452
  }
383
-
384
453
  });
385
-
386
454
  recordsPage.add(lineBuilder);
387
- totalRecords++;
388
455
  }
389
-
390
-
391
-
392
-
456
+ try {
457
+ //save as csv
458
+ WriteToFile(stringify(recordsPage), RandomStringUtils.randomNumeric(10)+RandomStringUtils.randomAlphabetic(20).toString()+".csv");
459
+ }
460
+ catch (IOException e){
461
+ logger.error("Exception on closing page!");
462
+ logger.error(e.getMessage());
463
+ }
393
464
  }
394
465
  catch (Exception ex) {
395
466
  throw new RuntimeException(ex);
@@ -404,10 +475,6 @@ public class EmbulkOutputDomoOutputPlugin
404
475
  @Override
405
476
  public void close()
406
477
  {
407
- logger.info("Uplaod records count= "+recordsPage.size() +" partNum = "+partNum);
408
- sdsClient.uploadDataPart(sds.getId(), execution.getId(), partNum, stringify(recordsPage));
409
- // allRecords.addAll(recordsPage);
410
- recordsPage = null;
411
478
  }
412
479
 
413
480
  @Override
@@ -422,7 +489,6 @@ public class EmbulkOutputDomoOutputPlugin
422
489
  }
423
490
 
424
491
  }
425
-
426
492
  private String setEscapeAndQuoteValue(String v, char delimiter, QuotePolicy policy, char quote, char escape, String newline, String nullString)
427
493
  {
428
494
  StringBuilder escapedValue = new StringBuilder();
@@ -469,13 +535,11 @@ public class EmbulkOutputDomoOutputPlugin
469
535
  return escapedValue.toString();
470
536
  }
471
537
  }
472
-
473
538
  private String setQuoteValue(String v, char quote)
474
539
  {
475
540
 
476
541
  return String.valueOf(quote) + v + quote;
477
542
  }
478
-
479
543
  private String stringifyList(List<StringBuilder> records){
480
544
  StringBuilder sb = new StringBuilder();
481
545
  for (StringBuilder s : records)
@@ -490,7 +554,6 @@ public class EmbulkOutputDomoOutputPlugin
490
554
  }
491
555
  return sb.toString();
492
556
  }
493
-
494
557
  private String stringify(ArrayList<StringBuilder> records) {
495
558
  StringBuilder sb = new StringBuilder();
496
559
  for (StringBuilder s : records)
@@ -521,4 +584,94 @@ public class EmbulkOutputDomoOutputPlugin
521
584
 
522
585
  return chunks;
523
586
  }
524
- }
587
+ public static String readFileAsString(String fileName)throws Exception
588
+ {
589
+ String data = "";
590
+ data = new String(Files.readAllBytes(Paths.get(fileName)));
591
+ return data;
592
+ }
593
+ public static List<File> toGzipFilesUTF8( List<File> sourceFiles, String path){
594
+ List<File> files = new ArrayList<>();
595
+ int batchMaxCount = 1000;
596
+ int currentCount = 0;
597
+ int remaining = sourceFiles.size();
598
+ ArrayList<File> batchFiles = new ArrayList<File>();
599
+ //System.out.println("All source files are "+remaining);
600
+ for (File sourceFile : sourceFiles) {
601
+ currentCount++;
602
+ batchFiles.add(sourceFile);
603
+ if(currentCount>=batchMaxCount || currentCount>=remaining){
604
+ remaining=remaining-batchMaxCount;
605
+ String zipFileName = sourceFile.getName().replace(".csv", ".zip");
606
+ files.add(toGzipFileUTF8(batchFiles, path + zipFileName));
607
+ // System.out.println("Add file "+sourceFile.getName()+"to zip file name = "+zipFileName+". Current count = "+currentCount +" Total records counter = "+totalRecordsCounter);
608
+ batchFiles.clear();
609
+ currentCount = 0;
610
+ }
611
+ //System.out.println("Add file "+sourceFile.getName()+ ". Current count = "+currentCount);
612
+
613
+ }
614
+ return files;
615
+ }
616
+ public static File toGzipFileUTF8(ArrayList<File> csvFiles, String zipFilePath){
617
+ File outputFile = new File(zipFilePath);
618
+ try {
619
+ GZIPOutputStream gzos = new GZIPOutputStream(new FileOutputStream(outputFile));
620
+ for (File csvFile : csvFiles){
621
+ BufferedReader reader = new BufferedReader(new FileReader(csvFile));
622
+
623
+ String currentLine;
624
+ while ((currentLine = reader.readLine()) != null){
625
+ currentLine += System.lineSeparator();
626
+ totalRecordsCounter++;
627
+ // Specifying UTF-8 encoding is critical; getBytes() uses ISO-8859-1 by default
628
+ gzos.write(currentLine.getBytes("UTF-8"));
629
+ }
630
+ }
631
+ gzos.flush();
632
+ gzos.finish();
633
+ gzos.close();
634
+
635
+ }
636
+ catch(IOException e) {
637
+ logger.error("Error compressing a string to gzip", e);
638
+ }
639
+
640
+ return outputFile;
641
+ }
642
+ public static void WriteToFile(String fileContent, String fileName) throws IOException {
643
+ //logger.info("writing csv file to "+fileName);
644
+
645
+ String tempFile = TEMP_DIR + fileName;
646
+ File file = new File(tempFile);
647
+ // if file does exists, then delete and create a new file
648
+ if (file.exists()) {
649
+ try {
650
+ File newFileName = new File(TEMP_DIR + "backup_" + fileName);
651
+ file.renameTo(newFileName);
652
+ file.createNewFile();
653
+ } catch (IOException e) {
654
+ e.printStackTrace();
655
+ }
656
+ }
657
+ FileWriter fw = new FileWriter(file.getAbsoluteFile());
658
+ BufferedWriter bw = new BufferedWriter(fw);
659
+ bw.write(fileContent);
660
+ bw.close();
661
+ }
662
+
663
+ public static ArrayList<File> loadCSVFiles (String searchFolder) {
664
+ File folder = new File(searchFolder);
665
+ File[] listOfFiles = folder.listFiles();
666
+ ArrayList<File> csvFiles = new ArrayList<File>();
667
+
668
+ for (File file : listOfFiles) {
669
+ if (file.isFile() && file.getName().indexOf(".csv")>0) {
670
+ csvFiles.add(file);
671
+ }
672
+ }
673
+
674
+ //System.out.println("All csv files are "+ csvFiles);
675
+ return csvFiles;
676
+ }
677
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-embulk_output_domo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Angelos Alexopoulos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-19 00:00:00.000000000 Z
11
+ date: 2018-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -64,9 +64,10 @@ files:
64
64
  - classpath/junit-4.12.jar
65
65
  - classpath/okhttp-3.7.0.jar
66
66
  - classpath/logging-interceptor-3.7.0.jar
67
- - classpath/embulk-output-embulk_output_domo-0.2.5.jar
67
+ - classpath/commons-io-2.6.jar
68
68
  - classpath/hamcrest-core-1.3.jar
69
69
  - classpath/okio-1.12.0.jar
70
+ - classpath/embulk-output-embulk_output_domo-0.3.0.jar
70
71
  homepage: https://github.com/alexopoulos7/embulk-output-embulk_output_domo
71
72
  licenses:
72
73
  - MIT