embulk-input-marketo 0.5.7.alpha.6 → 0.6.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/build.gradle +6 -6
  3. data/src/main/java/org/embulk/input/marketo/MarketoService.java +1 -1
  4. data/src/main/java/org/embulk/input/marketo/MarketoServiceImpl.java +4 -5
  5. data/src/main/java/org/embulk/input/marketo/MarketoUtils.java +45 -14
  6. data/src/main/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPlugin.java +12 -11
  7. data/src/main/java/org/embulk/input/marketo/delegate/CampaignInputPlugin.java +4 -23
  8. data/src/main/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPlugin.java +18 -11
  9. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithListInputPlugin.java +3 -18
  10. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPlugin.java +5 -20
  11. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPlugin.java +213 -87
  12. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseInputPluginDelegate.java +22 -0
  13. data/src/main/java/org/embulk/input/marketo/model/MarketoBulkExtractRequest.java +12 -1
  14. data/src/main/java/org/embulk/input/marketo/model/MarketoField.java +28 -2
  15. data/src/main/java/org/embulk/input/marketo/model/MarketoResponse.java +2 -1
  16. data/src/main/java/org/embulk/input/marketo/model/filter/DateRangeFilter.java +9 -0
  17. data/src/main/java/org/embulk/input/marketo/rest/MarketoBaseRestClient.java +4 -0
  18. data/src/main/java/org/embulk/input/marketo/rest/{MarketoFileResponseEntityReader.java → MarketoInputStreamResponseEntityReader.java} +2 -2
  19. data/src/main/java/org/embulk/input/marketo/rest/MarketoResponseJetty92EntityReader.java +4 -1
  20. data/src/main/java/org/embulk/input/marketo/rest/MarketoRestClient.java +76 -42
  21. data/src/main/java/org/embulk/input/marketo/rest/RecordPagingIterable.java +35 -11
  22. data/src/test/java/org/embulk/input/marketo/MarketoServiceImplTest.java +159 -0
  23. data/src/test/java/org/embulk/input/marketo/MarketoUtilsTest.java +87 -0
  24. data/src/test/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPluginTest.java +84 -0
  25. data/src/test/java/org/embulk/input/marketo/delegate/CampaignInputPluginTest.java +73 -0
  26. data/src/test/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPluginTest.java +94 -0
  27. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithListInputPluginTest.java +99 -0
  28. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPluginTest.java +101 -0
  29. data/src/test/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPluginTest.java +114 -0
  30. data/src/test/java/org/embulk/input/marketo/rest/MarketoBaseRestClientTest.java +3 -15
  31. data/src/test/java/org/embulk/input/marketo/rest/MarketoRestClientTest.java +450 -0
  32. data/src/test/resources/config/activity_bulk_extract_config.yaml +7 -0
  33. data/src/test/resources/config/lead_bulk_extract_config.yaml +8 -0
  34. data/src/test/resources/config/rest_config.yaml +3 -0
  35. data/src/test/resources/fixtures/activity_extract1.csv +35 -0
  36. data/src/test/resources/fixtures/activity_extract2.csv +22 -0
  37. data/src/test/resources/fixtures/all_program_full.json +53 -0
  38. data/src/test/resources/fixtures/campaign_response.json +38 -0
  39. data/src/test/resources/fixtures/campaign_response_full.json +102 -0
  40. data/src/test/resources/fixtures/lead_by_list.json +33 -0
  41. data/src/test/resources/fixtures/lead_by_program_response.json +47 -0
  42. data/src/test/resources/fixtures/lead_describe.json +221 -0
  43. data/src/test/resources/fixtures/lead_describe_expected.json +66 -0
  44. data/src/test/resources/fixtures/lead_describe_marketo_fields_full.json +518 -0
  45. data/src/test/resources/fixtures/lead_extract1.csv +11 -0
  46. data/src/test/resources/fixtures/lead_response_full.json +2402 -0
  47. data/src/test/resources/fixtures/lead_with_program_full.json +17 -0
  48. data/src/test/resources/fixtures/leads_extract2.csv +10 -0
  49. data/src/test/resources/fixtures/lists_response.json +31 -0
  50. data/src/test/resources/fixtures/program_response.json +71 -0
  51. metadata +39 -8
  52. data/src/main/java/org/embulk/input/marketo/model/filter/ListFilter.java +0 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c729920ccd16968b191ec5c12cf8f982f925f80
4
- data.tar.gz: 1cf0acc73b4c5e90b8a11fd78770ee56791dc565
3
+ metadata.gz: 4581ee28de4fb94a896d4e18509f39086801c9eb
4
+ data.tar.gz: 352811dc47c6e42ff55bd1b0531fc83bc0c19949
5
5
  SHA512:
6
- metadata.gz: 3f14afd21ae9c968c6c0e6f1ebaf324421ae4ff10c1ceb74020ea458e970f039c2b81e9cf026c40e73089986155ea116ee1d3d5cdb47b1455b529f7de5c51506
7
- data.tar.gz: f54d8dff5fc370b8b6f4b2f793c3f90f9e9f57a510383edc83b4c996dc255951d8d6ee882b65d9146e609180a861cea793a1c2c8167d2905f44c8c479c0458c1
6
+ metadata.gz: 95d49092d1edee0454c4a4ed04c29c06c0ec44db9de8bc2aedccb06eb706e814a89317c9c55662d3e8cf6c967ac1e29209e3a27487a6429eb2743c416a7c9620
7
+ data.tar.gz: aea7947562f1d5b6753029a4de2c47f8edfb91c2f020863c8e916964591270219c09aeb3b1e85ab46f9b98b5c5a8c2976c14332d061bf9b1b14988736bf15598
data/build.gradle CHANGED
@@ -16,7 +16,7 @@ repositories {
16
16
  configurations {
17
17
  provided
18
18
  }
19
- version = "0.5.7.alpha.6"
19
+ version = "0.6.0.alpha.1"
20
20
  sourceCompatibility = 1.7
21
21
  targetCompatibility = 1.7
22
22
 
@@ -81,13 +81,13 @@ task gemspec {
81
81
  Gem::Specification.new do |spec|
82
82
  spec.name = "${project.name}"
83
83
  spec.version = "${project.version}"
84
- spec.authors = ["Tai Khuu"]
84
+ spec.authors = ["uu59", "yoshihara", "taikhuu"]
85
85
  spec.summary = %[Marketo input plugin for Embulk]
86
86
  spec.description = %[Loads records from Marketo.]
87
- spec.email = ["tai@treasuredata.com"]
88
- spec.licenses = ["MIT"]
89
- # TODO set this: spec.homepage = "https://github.com/khuutantai/embulk-input-marketo"
90
-
87
+ spec.email = ["k@uu59.org", "h.yoshihara@everyleaf.com", "tai@treasuredata.com"]
88
+ spec.licenses = ["Apache2"]
89
+ spec.homepage = "https://github.com/treasure-data/embulk-input-marketo"
90
+
91
91
  spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
92
92
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
93
93
  spec.require_paths = ["lib"]
@@ -18,7 +18,7 @@ public interface MarketoService
18
18
 
19
19
  List<MarketoField> describeLeadByLists();
20
20
 
21
- File extractLead(Date startTime, Date endTime, List<String> extractedFields, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond);
21
+ File extractLead(Date startTime, Date endTime, List<String> extractedFields, String filterField, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond);
22
22
 
23
23
  File extractAllActivity(Date startTime, Date endTime, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond);
24
24
 
@@ -39,9 +39,9 @@ public class MarketoServiceImpl implements MarketoService
39
39
  }
40
40
 
41
41
  @Override
42
- public File extractLead(Date startTime, Date endTime, List<String> extractedFields, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond)
42
+ public File extractLead(Date startTime, Date endTime, List<String> extractedFields, String filterField, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond)
43
43
  {
44
- String exportID = marketoRestClient.createLeadBulkExtract(startTime, endTime, extractedFields);
44
+ String exportID = marketoRestClient.createLeadBulkExtract(startTime, endTime, extractedFields, filterField);
45
45
  marketoRestClient.startLeadBulkExtract(exportID);
46
46
  try {
47
47
  marketoRestClient.waitLeadExportJobComplete(exportID, pollingTimeIntervalSecond, bulkJobTimeoutSecond);
@@ -70,7 +70,7 @@ public class MarketoServiceImpl implements MarketoService
70
70
  @Override
71
71
  public File extractAllActivity(Date startTime, Date endTime, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond)
72
72
  {
73
- String exportID = marketoRestClient.createActitvityExtract(startTime, endTime, null);
73
+ String exportID = marketoRestClient.createActivityExtract(startTime, endTime);
74
74
  marketoRestClient.startActitvityBulkExtract(exportID);
75
75
  try {
76
76
  marketoRestClient.waitActitvityExportJobComplete(exportID, pollingTimeIntervalSecond, bulkJobTimeoutSecond);
@@ -92,9 +92,8 @@ public class MarketoServiceImpl implements MarketoService
92
92
  final String id = node.get("id").asText();
93
93
  iterables.add(Iterables.transform(marketoRestClient.getLeadsByList(id, fieldNames), new Function<ObjectNode, ObjectNode>()
94
94
  {
95
- @Nullable
96
95
  @Override
97
- public ObjectNode apply(@Nullable ObjectNode input)
96
+ public ObjectNode apply(ObjectNode input)
98
97
  {
99
98
  input.put(MarketoUtils.LIST_ID_COLUMN_NAME, id);
100
99
  return input;
@@ -11,18 +11,12 @@ import org.embulk.base.restclient.jackson.JacksonTopLevelValueLocator;
11
11
  import org.embulk.base.restclient.record.ServiceRecord;
12
12
  import org.embulk.base.restclient.record.ValueLocator;
13
13
  import org.embulk.input.marketo.model.MarketoField;
14
- import org.embulk.spi.Exec;
15
14
  import org.joda.time.DateTime;
16
- import org.slf4j.Logger;
17
15
 
18
16
  import javax.annotation.Nullable;
19
17
 
20
18
  import java.util.ArrayList;
21
- import java.util.Calendar;
22
- import java.util.Date;
23
- import java.util.HashMap;
24
19
  import java.util.List;
25
- import java.util.Map;
26
20
  import java.util.Set;
27
21
 
28
22
  /**
@@ -71,7 +65,7 @@ public class MarketoUtils
71
65
 
72
66
  public static List<String> getFieldNameFromMarketoFields(List<MarketoField> columns, String... excludedFields)
73
67
  {
74
- Set<String> excludeFields= Sets.newHashSet(excludedFields);
68
+ Set<String> excludeFields = Sets.newHashSet(excludedFields);
75
69
  List<String> extractedFields = new ArrayList<>();
76
70
  for (MarketoField column : columns) {
77
71
  if (excludeFields.contains(column.getName())) {
@@ -82,17 +76,54 @@ public class MarketoUtils
82
76
  return extractedFields;
83
77
  }
84
78
 
85
- public static <K, V> Map<K, V> zip(List<K> keys, List<V> values)
79
+ public static String buildColumnName(String prefix, String columnName)
80
+ {
81
+ return prefix + "_" + columnName;
82
+ }
83
+
84
+ public static final List<DateRange> sliceRange(DateTime fromDate, DateTime toDate, int rangeSize)
86
85
  {
87
- Map<K, V> kvMap = new HashMap<>();
88
- for (int i = 0; i < keys.size(); i++) {
89
- kvMap.put(keys.get(i), values.get(i));
86
+ List<DateRange> ranges = new ArrayList<>();
87
+ while (fromDate.isBefore(toDate)) {
88
+ DateTime nextToDate = fromDate.plusDays(rangeSize);
89
+ if (nextToDate.isAfter(toDate)) {
90
+ ranges.add(new DateRange(fromDate, toDate));
91
+ break;
92
+ }
93
+ ranges.add(new DateRange(fromDate, nextToDate));
94
+ fromDate = nextToDate.plusSeconds(1);
90
95
  }
91
- return kvMap;
96
+ return ranges;
92
97
  }
93
98
 
94
- public static String buildColumnName(String prefix, String columnName)
99
+ public static String getIdentityEndPoint(String accountId)
95
100
  {
96
- return prefix + "_" + columnName;
101
+ return "https://" + accountId + ".mktorest.com/identity";
102
+ }
103
+
104
+ public static String getEndPoint(String accountID)
105
+ {
106
+ return "https://" + accountID + ".mktorest.com";
107
+ }
108
+
109
+ public static final class DateRange
110
+ {
111
+ public final DateTime fromDate;
112
+ public final DateTime toDate;
113
+
114
+ public DateRange(DateTime fromDate, DateTime toDate)
115
+ {
116
+ this.fromDate = fromDate;
117
+ this.toDate = toDate;
118
+ }
119
+
120
+ @Override
121
+ public String toString()
122
+ {
123
+ return "DateRange{" +
124
+ "fromDate=" + fromDate +
125
+ ", toDate=" + toDate +
126
+ '}';
127
+ }
97
128
  }
98
129
  }
@@ -1,22 +1,20 @@
1
1
  package org.embulk.input.marketo.delegate;
2
2
 
3
+ import com.google.common.base.Optional;
3
4
  import org.embulk.base.restclient.ServiceResponseMapper;
4
5
  import org.embulk.base.restclient.jackson.JacksonServiceResponseMapper;
5
6
  import org.embulk.base.restclient.record.ValueLocator;
6
7
  import org.embulk.input.marketo.MarketoService;
7
- import org.embulk.input.marketo.MarketoServiceImpl;
8
8
  import org.embulk.input.marketo.MarketoUtils;
9
- import org.embulk.input.marketo.rest.MarketoRestClient;
10
9
  import org.embulk.spi.DataException;
11
10
  import org.embulk.spi.Exec;
12
- import org.embulk.spi.Schema;
13
11
  import org.embulk.spi.type.Types;
12
+ import org.joda.time.DateTime;
14
13
  import org.slf4j.Logger;
15
14
 
16
15
  import java.io.FileInputStream;
17
16
  import java.io.FileNotFoundException;
18
17
  import java.io.InputStream;
19
- import java.util.Date;
20
18
 
21
19
  /**
22
20
  * Created by tai.khuu on 9/18/17.
@@ -24,21 +22,24 @@ import java.util.Date;
24
22
  public class ActivityBulkExtractInputPlugin extends MarketoBaseBulkExtractInputPlugin<ActivityBulkExtractInputPlugin.PluginTask>
25
23
  {
26
24
  private static final Logger LOGGER = Exec.getLogger(ActivityBulkExtractInputPlugin.class);
25
+ public static final String INCREMENTAL_COLUMN = "activityDate";
26
+ public static final String UID_COLUMN = "marketoGUID";
27
27
 
28
28
  public interface PluginTask extends MarketoBaseBulkExtractInputPlugin.PluginTask {}
29
29
 
30
- public ActivityBulkExtractInputPlugin()
30
+ @Override
31
+ public void validateInputTask(PluginTask task)
31
32
  {
32
- super("activityDate", "marketoGUID");
33
+ task.setIncrementalColumn(Optional.of(INCREMENTAL_COLUMN));
34
+ task.setUidColumn(Optional.of(UID_COLUMN));
35
+ super.validateInputTask(task);
33
36
  }
34
37
 
35
38
  @Override
36
- protected InputStream getExtractedStream(PluginTask task, Schema schema)
39
+ protected InputStream getExtractedStream(MarketoService service, PluginTask task, DateTime fromDate, DateTime toDate)
37
40
  {
38
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
39
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
40
- Date fromDate = task.getFromDate();
41
- return new FileInputStream(marketoService.extractAllActivity(fromDate, task.getToDate().orNull(), task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond()));
41
+ try {
42
+ return new FileInputStream(service.extractAllActivity(fromDate.toDate(), toDate.toDate(), task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond()));
42
43
  }
43
44
  catch (FileNotFoundException e) {
44
45
  LOGGER.error("Exception when trying to extract activity", e);
@@ -3,19 +3,14 @@ package org.embulk.input.marketo.delegate;
3
3
  import com.google.common.collect.FluentIterable;
4
4
  import org.embulk.base.restclient.ServiceResponseMapper;
5
5
  import org.embulk.base.restclient.jackson.JacksonServiceResponseMapper;
6
- import org.embulk.base.restclient.record.RecordImporter;
7
6
  import org.embulk.base.restclient.record.ServiceRecord;
8
7
  import org.embulk.base.restclient.record.ValueLocator;
9
- import org.embulk.config.TaskReport;
10
8
  import org.embulk.input.marketo.MarketoService;
11
- import org.embulk.input.marketo.MarketoServiceImpl;
12
9
  import org.embulk.input.marketo.MarketoUtils;
13
- import org.embulk.input.marketo.model.MarketoField;
14
- import org.embulk.input.marketo.rest.MarketoRestClient;
15
- import org.embulk.spi.Exec;
16
- import org.embulk.spi.PageBuilder;
17
10
  import org.embulk.spi.type.Types;
18
11
 
12
+ import java.util.Iterator;
13
+
19
14
  /**
20
15
  * Input plugin use to import Campaign
21
16
  * Created by tai.khuu on 9/18/17.
@@ -31,23 +26,9 @@ public class CampaignInputPlugin extends MarketoBaseInputPluginDelegate<Campaign
31
26
  }
32
27
 
33
28
  @Override
34
- public TaskReport ingestServiceData(PluginTask task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
29
+ protected Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, PluginTask task)
35
30
  {
36
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
37
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
38
- {
39
- FluentIterable<ServiceRecord> serviceRecords = FluentIterable.from(marketoService.getCampaign()).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION);
40
- int imported = 0;
41
- for (ServiceRecord serviceRecord : serviceRecords) {
42
- if (imported >= PREVIEW_RECORD_LIMIT && Exec.isPreview()) {
43
- break;
44
- }
45
- recordImporter.importRecord(serviceRecord, pageBuilder);
46
- imported++;
47
- }
48
- return Exec.newTaskReport();
49
- }
50
- }
31
+ return FluentIterable.from(marketoService.getCampaign()).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION).iterator();
51
32
  }
52
33
 
53
34
  @Override
@@ -1,7 +1,10 @@
1
1
  package org.embulk.input.marketo.delegate;
2
2
 
3
+ import com.google.common.base.Optional;
3
4
  import org.embulk.base.restclient.ServiceResponseMapper;
4
5
  import org.embulk.base.restclient.record.ValueLocator;
6
+ import org.embulk.config.Config;
7
+ import org.embulk.config.ConfigDefault;
5
8
  import org.embulk.input.marketo.MarketoService;
6
9
  import org.embulk.input.marketo.MarketoServiceImpl;
7
10
  import org.embulk.input.marketo.MarketoUtils;
@@ -9,14 +12,12 @@ import org.embulk.input.marketo.model.MarketoField;
9
12
  import org.embulk.input.marketo.rest.MarketoRestClient;
10
13
  import org.embulk.spi.DataException;
11
14
  import org.embulk.spi.Exec;
12
- import org.embulk.spi.Schema;
15
+ import org.joda.time.DateTime;
13
16
  import org.slf4j.Logger;
14
17
 
15
- import java.io.File;
16
18
  import java.io.FileInputStream;
17
19
  import java.io.FileNotFoundException;
18
20
  import java.io.InputStream;
19
- import java.util.Date;
20
21
  import java.util.List;
21
22
 
22
23
  /**
@@ -26,24 +27,30 @@ public class LeadBulkExtractInputPlugin extends MarketoBaseBulkExtractInputPlugi
26
27
  {
27
28
  private static final Logger LOGGER = Exec.getLogger(LeadBulkExtractInputPlugin.class);
28
29
 
30
+ private static final String UPDATED_AT = "updatedAt";
31
+
29
32
  public interface PluginTask extends MarketoBaseBulkExtractInputPlugin.PluginTask
30
33
  {
34
+ @Config("use_updated_at")
35
+ @ConfigDefault("false")
36
+ boolean getUseUpdatedAt();
31
37
  }
32
38
 
33
- public LeadBulkExtractInputPlugin()
39
+ @Override
40
+ public void validateInputTask(PluginTask task)
34
41
  {
35
- super("updatedAt", null);
42
+ if (task.getUseUpdatedAt()) {
43
+ task.setIncrementalColumn(Optional.of(UPDATED_AT));
44
+ }
45
+ super.validateInputTask(task);
36
46
  }
37
47
 
38
48
  @Override
39
- protected InputStream getExtractedStream(PluginTask task, Schema schema)
49
+ protected InputStream getExtractedStream(MarketoService service, PluginTask task, DateTime fromDate, DateTime toDate)
40
50
  {
41
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
42
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
51
+ try {
43
52
  List<String> fieldNames = task.getExtractedFields();
44
- Date fromDate = task.getFromDate();
45
- File file = marketoService.extractLead(fromDate, task.getToDate().orNull(), fieldNames, task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond());
46
- return new FileInputStream(file);
53
+ return new FileInputStream(service.extractLead(fromDate.toDate(), toDate.toDate(), fieldNames, task.getIncrementalColumn().orNull(), task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond()));
47
54
  }
48
55
  catch (FileNotFoundException e) {
49
56
  LOGGER.error("File not found", e);
@@ -2,18 +2,15 @@ package org.embulk.input.marketo.delegate;
2
2
 
3
3
  import com.google.common.collect.FluentIterable;
4
4
  import org.embulk.base.restclient.ServiceResponseMapper;
5
- import org.embulk.base.restclient.record.RecordImporter;
6
5
  import org.embulk.base.restclient.record.ServiceRecord;
7
6
  import org.embulk.base.restclient.record.ValueLocator;
8
- import org.embulk.config.TaskReport;
9
7
  import org.embulk.input.marketo.MarketoService;
10
8
  import org.embulk.input.marketo.MarketoServiceImpl;
11
9
  import org.embulk.input.marketo.MarketoUtils;
12
10
  import org.embulk.input.marketo.model.MarketoField;
13
11
  import org.embulk.input.marketo.rest.MarketoRestClient;
14
- import org.embulk.spi.Exec;
15
- import org.embulk.spi.PageBuilder;
16
12
 
13
+ import java.util.Iterator;
17
14
  import java.util.List;
18
15
 
19
16
  /**
@@ -30,21 +27,9 @@ public class LeadWithListInputPlugin extends MarketoBaseInputPluginDelegate<Lead
30
27
  }
31
28
 
32
29
  @Override
33
- public TaskReport ingestServiceData(PluginTask task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
30
+ protected Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, PluginTask task)
34
31
  {
35
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
36
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
37
- FluentIterable<ServiceRecord> serviceRecords = FluentIterable.from(marketoService.getAllListLead(task.getExtractedFields())).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION);
38
- int imported = 0;
39
- for (ServiceRecord serviceRecord : serviceRecords) {
40
- if (imported >= PREVIEW_RECORD_LIMIT && Exec.isPreview()) {
41
- break;
42
- }
43
- recordImporter.importRecord(serviceRecord, pageBuilder);
44
- imported++;
45
- }
46
- return Exec.newTaskReport();
47
- }
32
+ return FluentIterable.from(marketoService.getAllListLead(task.getExtractedFields())).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION).iterator();
48
33
  }
49
34
 
50
35
  @Override
@@ -2,18 +2,15 @@ package org.embulk.input.marketo.delegate;
2
2
 
3
3
  import com.google.common.collect.FluentIterable;
4
4
  import org.embulk.base.restclient.ServiceResponseMapper;
5
- import org.embulk.base.restclient.record.RecordImporter;
6
5
  import org.embulk.base.restclient.record.ServiceRecord;
7
6
  import org.embulk.base.restclient.record.ValueLocator;
8
- import org.embulk.config.TaskReport;
9
7
  import org.embulk.input.marketo.MarketoService;
10
8
  import org.embulk.input.marketo.MarketoServiceImpl;
11
9
  import org.embulk.input.marketo.MarketoUtils;
12
10
  import org.embulk.input.marketo.model.MarketoField;
13
11
  import org.embulk.input.marketo.rest.MarketoRestClient;
14
- import org.embulk.spi.Exec;
15
- import org.embulk.spi.PageBuilder;
16
12
 
13
+ import java.util.Iterator;
17
14
  import java.util.List;
18
15
 
19
16
  /**
@@ -26,23 +23,11 @@ public class LeadWithProgramInputPlugin extends MarketoBaseInputPluginDelegate<L
26
23
  }
27
24
 
28
25
  @Override
29
- public TaskReport ingestServiceData(PluginTask task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
26
+ protected Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, PluginTask task)
30
27
  {
31
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
32
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
33
- List<String> fieldNames = task.getExtractedFields();
34
- FluentIterable<ServiceRecord> serviceRecords = FluentIterable.from(marketoService.getAllProgramLead(fieldNames)).
35
- transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION);
36
- int imported = 0;
37
- for (ServiceRecord serviceRecord : serviceRecords) {
38
- if (imported >= PREVIEW_RECORD_LIMIT && Exec.isPreview()) {
39
- break;
40
- }
41
- recordImporter.importRecord(serviceRecord, pageBuilder);
42
- imported++;
43
- }
44
- }
45
- return Exec.newTaskReport();
28
+ List<String> fieldNames = task.getExtractedFields();
29
+ return FluentIterable.from(marketoService.getAllProgramLead(fieldNames)).
30
+ transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION).iterator();
46
31
  }
47
32
 
48
33
  @Override
@@ -1,11 +1,13 @@
1
1
  package org.embulk.input.marketo.delegate;
2
2
 
3
- import com.fasterxml.jackson.databind.JsonNode;
4
3
  import com.fasterxml.jackson.databind.node.ObjectNode;
4
+ import com.google.common.base.Function;
5
5
  import com.google.common.base.Optional;
6
+ import com.google.common.collect.Iterators;
6
7
  import org.embulk.base.restclient.jackson.JacksonServiceRecord;
7
8
  import org.embulk.base.restclient.jackson.JacksonServiceValue;
8
9
  import org.embulk.base.restclient.record.RecordImporter;
10
+ import org.embulk.base.restclient.record.ServiceRecord;
9
11
  import org.embulk.base.restclient.record.ValueLocator;
10
12
  import org.embulk.config.Config;
11
13
  import org.embulk.config.ConfigDefault;
@@ -14,7 +16,10 @@ import org.embulk.config.ConfigException;
14
16
  import org.embulk.config.ConfigInject;
15
17
  import org.embulk.config.TaskReport;
16
18
  import org.embulk.input.marketo.CsvTokenizer;
19
+ import org.embulk.input.marketo.MarketoService;
20
+ import org.embulk.input.marketo.MarketoServiceImpl;
17
21
  import org.embulk.input.marketo.MarketoUtils;
22
+ import org.embulk.input.marketo.rest.MarketoRestClient;
18
23
  import org.embulk.spi.BufferAllocator;
19
24
  import org.embulk.spi.Column;
20
25
  import org.embulk.spi.ColumnVisitor;
@@ -37,8 +42,11 @@ import java.text.DateFormat;
37
42
  import java.text.SimpleDateFormat;
38
43
  import java.util.ArrayList;
39
44
  import java.util.Date;
45
+ import java.util.HashMap;
46
+ import java.util.Iterator;
40
47
  import java.util.List;
41
48
  import java.util.Map;
49
+ import java.util.NoSuchElementException;
42
50
  import java.util.Set;
43
51
 
44
52
  /**
@@ -52,10 +60,10 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
52
60
 
53
61
  private static final DateTimeFormatter ISO_DATETIME_FORMAT = ISODateTimeFormat.dateTimeParser();
54
62
 
55
- private static final String IMPORTED_RECORD_COUNT = "imported";
56
-
57
63
  private static final String FROM_DATE = "from_date";
58
64
 
65
+ private static final int MARKETO_MAX_RANGE_EXTRACT = 30;
66
+
59
67
  public interface PluginTask extends MarketoBaseInputPluginDelegate.PluginTask, CsvTokenizer.PluginTask
60
68
  {
61
69
  @Config("from_date")
@@ -92,17 +100,18 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
92
100
  @ConfigDefault("null")
93
101
  Optional<Date> getToDate();
94
102
 
95
- void setToDate(Date toDate);
96
- }
103
+ void setToDate(Optional<Date> toDate);
97
104
 
98
- private String incrementalColumn;
105
+ @Config("incremental_column")
106
+ @ConfigDefault("\"createdAt\"")
107
+ Optional<String> getIncrementalColumn();
99
108
 
100
- private String uidColumn;
109
+ void setIncrementalColumn(Optional<String> incrementalColumn);
101
110
 
102
- public MarketoBaseBulkExtractInputPlugin(String incrementalColumn, String uidColumn)
103
- {
104
- this.incrementalColumn = incrementalColumn;
105
- this.uidColumn = uidColumn;
111
+ @Config("uid_column")
112
+ @ConfigDefault("null")
113
+ Optional<String> getUidColumn();
114
+ void setUidColumn(Optional<String> uidColumn);
106
115
  }
107
116
 
108
117
  @Override
@@ -112,12 +121,9 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
112
121
  if (task.getFromDate() == null) {
113
122
  throw new ConfigException("From date is required for Bulk Extract");
114
123
  }
115
- if (task.getFetchDays() > 30) {
116
- throw new ConfigException("Marketo bulk extract fetch days can't be more than 30");
117
- }
118
124
  //Calculate to date
119
125
  DateTime toDate = getToDate(task);
120
- task.setToDate(toDate.toDate());
126
+ task.setToDate(Optional.of(toDate.toDate()));
121
127
  }
122
128
 
123
129
  public DateTime getToDate(T task)
@@ -138,8 +144,8 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
138
144
  ConfigDiff configDiff = super.buildConfigDiff(task, schema, taskCount, taskReports);
139
145
  Long currentLatestFetchTime = 0L;
140
146
  Set latestUIds = null;
147
+ String incrementalColumn = task.getIncrementalColumn().orNull();
141
148
  if (incrementalColumn != null && task.getIncremental()) {
142
- int imported = 0;
143
149
  DateFormat df = new SimpleDateFormat(MarketoUtils.MARKETO_DATE_SIMPLE_DATE_FORMAT);
144
150
  for (TaskReport taskReport : taskReports) {
145
151
  Long latestFetchTime = taskReport.get(Long.class, LATEST_FETCH_TIME);
@@ -150,7 +156,9 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
150
156
  currentLatestFetchTime = latestFetchTime;
151
157
  latestUIds = taskReport.get(Set.class, LATEST_UID_LIST);
152
158
  }
153
- imported = imported + taskReport.get(Integer.class, IMPORTED_RECORD_COUNT);
159
+ else if (currentLatestFetchTime == latestFetchTime) {
160
+ latestUIds.addAll(taskReport.get(Set.class, LATEST_UID_LIST));
161
+ }
154
162
  }
155
163
  // in case of we didn't import anything but search range is entirely in the past. Then we should move the the range anyway.
156
164
  configDiff.set(FROM_DATE, df.format(task.getToDate().orNull()));
@@ -161,15 +169,66 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
161
169
  }
162
170
 
163
171
  @Override
164
- public TaskReport ingestServiceData(T task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
172
+ public TaskReport ingestServiceData(final T task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
165
173
  {
166
- InputStream extractedStream;
174
+ TaskReport taskReport = Exec.newTaskReport();
175
+ String incrementalColumn = task.getIncrementalColumn().orNull();
176
+ String uidColumn = task.getUidColumn().orNull();
167
177
  if (Exec.isPreview()) {
168
178
  return importMockPreviewData(pageBuilder);
169
179
  }
170
180
  else {
171
- extractedStream = getExtractedStream(task, pageBuilder.getSchema());
172
- return importRecordFromFile(task, extractedStream, recordImporter, pageBuilder);
181
+ try (LineDecoderIterator decoderIterator = getLineDecoderIterator(task)) {
182
+ Iterator<Map<String, String>> csvRecords = Iterators.concat(Iterators.transform(decoderIterator, new Function<LineDecoder, Iterator<Map<String, String>>>()
183
+ {
184
+ @Override
185
+ public Iterator<Map<String, String>> apply(LineDecoder input)
186
+ {
187
+ return new CsvRecordIterator(input, task);
188
+ }
189
+ }));
190
+ long currentTimestamp = 0L;
191
+ Set<String> latestUids = task.getPreviousUids();
192
+ //Keep the preview code here when we can enable real preview
193
+ if (Exec.isPreview()) {
194
+ csvRecords = Iterators.limit(csvRecords, PREVIEW_RECORD_LIMIT);
195
+ }
196
+ while (csvRecords.hasNext()) {
197
+ Map<String, String> csvRecord = csvRecords.next();
198
+ if (task.getIncremental()) {
199
+ if (!csvRecord.containsKey(incrementalColumn)) {
200
+ throw new DataException("Extracted record doesn't have incremental column " + incrementalColumn);
201
+ }
202
+ if (uidColumn != null) {
203
+ String uid = csvRecord.get(uidColumn);
204
+ if (latestUids.contains(uid)) {
205
+ //Duplicate value
206
+ continue;
207
+ }
208
+ }
209
+ String incrementalTimeStamp = csvRecord.get(incrementalColumn);
210
+ long timestamp = ISO_DATETIME_FORMAT.parseDateTime(incrementalTimeStamp).getMillis();
211
+ if (currentTimestamp < timestamp) {
212
+ currentTimestamp = timestamp;
213
+ //switch timestamp
214
+ latestUids.clear();
215
+ }
216
+ else if (currentTimestamp == timestamp) {
217
+ //timestamp is equal
218
+ if (uidColumn != null) {
219
+ String uid = csvRecord.get(uidColumn);
220
+ latestUids.add(uid);
221
+ }
222
+ }
223
+ }
224
+
225
+ ObjectNode objectNode = MarketoUtils.OBJECT_MAPPER.valueToTree(csvRecord);
226
+ recordImporter.importRecord(new AllStringJacksonServiceRecord(objectNode), pageBuilder);
227
+ }
228
+ taskReport.set(LATEST_FETCH_TIME, currentTimestamp);
229
+ taskReport.set(LATEST_UID_LIST, latestUids);
230
+ return taskReport;
231
+ }
173
232
  }
174
233
  }
175
234
 
@@ -227,76 +286,20 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
227
286
  return Exec.newTaskReport();
228
287
  }
229
288
 
230
- protected TaskReport importRecordFromFile(T task, InputStream inputStream, RecordImporter recordImporter, PageBuilder pageBuilder)
289
+ private LineDecoderIterator getLineDecoderIterator(T task)
231
290
  {
232
- Set<String> latestUids = task.getPreviousUids();
233
- TaskReport taskReport = Exec.newTaskReport();
234
- int imported = 0;
235
- long currentTimestamp = 0L;
236
- if (task.getLatestFetchTime().isPresent()) {
237
- currentTimestamp = task.getLatestFetchTime().get();
238
- }
239
- try (LineDecoder lineDecoder = new LineDecoder(new InputStreamFileInput(task.getBufferAllocator(), inputStream), task)) {
240
- CsvTokenizer csvTokenizer = new CsvTokenizer(lineDecoder, task);
241
- if (!csvTokenizer.nextFile()) {
242
- throw new DataException("Can't read extract input stream");
243
- }
244
- csvTokenizer.nextRecord();
245
- List<String> headers = new ArrayList<>();
246
- while (csvTokenizer.hasNextColumn()) {
247
- headers.add(csvTokenizer.nextColumn());
248
- }
249
- while (csvTokenizer.nextRecord() && (imported < PREVIEW_RECORD_LIMIT || !Exec.isPreview())) {
250
- List<String> values = new ArrayList<>();
251
- try {
252
- while (csvTokenizer.hasNextColumn()) {
253
- values.add(csvTokenizer.nextColumnOrNull());
254
- }
255
- }
256
- catch (CsvTokenizer.InvalidValueException ex) {
257
- throw new DataException("Encounter exception when parse csv file. Please check to see if you are using the correct" +
258
- "quote or escape character.", ex);
259
- }
260
- final Map<String, String> kvMap = MarketoUtils.zip(headers, values);
261
- ObjectNode objectNode = MarketoUtils.OBJECT_MAPPER.valueToTree(kvMap);
291
+ List<MarketoUtils.DateRange> dateRanges = MarketoUtils.sliceRange(new DateTime(task.getFromDate()), new DateTime(task.getToDate().orNull()), MARKETO_MAX_RANGE_EXTRACT);
292
+ final Iterator<MarketoUtils.DateRange> iterator = dateRanges.iterator();
293
+ return new LineDecoderIterator(iterator, task);
294
+ }
262
295
 
263
- if (task.getIncremental()) {
264
- if (!kvMap.containsKey(incrementalColumn)) {
265
- throw new DataException("Extracted record doesn't have incremental column " + incrementalColumn);
266
- }
267
- if (uidColumn != null) {
268
- String uid = kvMap.get(uidColumn);
269
- if (latestUids.contains(uid)) {
270
- //Duplicate value
271
- continue;
272
- }
273
- }
274
- String incrementalTimeStamp = kvMap.get(incrementalColumn);
275
- long timestamp = ISO_DATETIME_FORMAT.parseDateTime(incrementalTimeStamp).getMillis();
276
- if (currentTimestamp < timestamp) {
277
- currentTimestamp = timestamp;
278
- //switch timestamp
279
- latestUids.clear();
280
- }
281
- else if (currentTimestamp == timestamp) {
282
- //timestamp is equal
283
- if (uidColumn != null) {
284
- JsonNode uidField = objectNode.get(uidColumn);
285
- latestUids.add(uidField.asText());
286
- }
287
- }
288
- }
289
- recordImporter.importRecord(new AllStringJacksonServiceRecord(objectNode), pageBuilder);
290
- imported++;
291
- }
292
- }
293
- taskReport.set(LATEST_FETCH_TIME, currentTimestamp);
294
- taskReport.set(LATEST_UID_LIST, latestUids);
295
- taskReport.set(IMPORTED_RECORD_COUNT, imported);
296
- return taskReport;
296
+ @Override
297
+ protected final Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, T task)
298
+ {
299
+ throw new UnsupportedOperationException();
297
300
  }
298
301
 
299
- protected abstract InputStream getExtractedStream(T task, Schema schema);
302
+ protected abstract InputStream getExtractedStream(MarketoService service, T task, DateTime fromDate, DateTime toDate);
300
303
 
301
304
  private static class AllStringJacksonServiceRecord extends JacksonServiceRecord
302
305
  {
@@ -366,4 +369,127 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
366
369
  return timestampParser.parse(textValue);
367
370
  }
368
371
  }
372
+
373
+ private final class LineDecoderIterator implements Iterator<LineDecoder>, AutoCloseable
374
+ {
375
+ private LineDecoder currentLineDecoder;
376
+
377
+ private Iterator<MarketoUtils.DateRange> dateRangeIterator;
378
+
379
+ private MarketoService marketoService;
380
+
381
+ private MarketoRestClient marketoRestClient;
382
+ private T task;
383
+ public LineDecoderIterator(Iterator<MarketoUtils.DateRange> dateRangeIterator, T task)
384
+ {
385
+ marketoRestClient = createMarketoRestClient(task);
386
+ marketoService = new MarketoServiceImpl(marketoRestClient);
387
+ this.dateRangeIterator = dateRangeIterator;
388
+ this.task = task;
389
+ }
390
+
391
+ @Override
392
+ public void close()
393
+ {
394
+ if (currentLineDecoder != null) {
395
+ currentLineDecoder.close();
396
+ }
397
+ if (marketoRestClient != null) {
398
+ marketoRestClient.close();
399
+ }
400
+ }
401
+
402
+ @Override
403
+ public boolean hasNext()
404
+ {
405
+ return dateRangeIterator.hasNext();
406
+ }
407
+
408
+ @Override
409
+ public LineDecoder next()
410
+ {
411
+ if (hasNext()) {
412
+ MarketoUtils.DateRange next = dateRangeIterator.next();
413
+ InputStream extractedStream = getExtractedStream(marketoService, task, next.fromDate, next.toDate);
414
+ currentLineDecoder = new LineDecoder(new InputStreamFileInput(task.getBufferAllocator(), extractedStream), task);
415
+ return currentLineDecoder;
416
+ }
417
+ throw new NoSuchElementException();
418
+ }
419
+
420
+ @Override
421
+ public void remove()
422
+ {
423
+ throw new UnsupportedOperationException("Removed are not supported");
424
+ }
425
+ }
426
+
427
+ private class CsvRecordIterator implements Iterator<Map<String, String>>
428
+ {
429
+ private CsvTokenizer tokenizer;
430
+
431
+ private List<String> headers;
432
+
433
+ private Map<String, String> currentCsvRecord;
434
+ public CsvRecordIterator(LineDecoder lineDecoder, T task)
435
+ {
436
+ tokenizer = new CsvTokenizer(lineDecoder, task);
437
+ if (!tokenizer.nextFile()) {
438
+ throw new DataException("Can't read extract input stream");
439
+ }
440
+ headers = new ArrayList<>();
441
+ tokenizer.nextRecord();
442
+ while (tokenizer.hasNextColumn()) {
443
+ headers.add(tokenizer.nextColumn());
444
+ }
445
+ }
446
+
447
+ @Override
448
+ public boolean hasNext()
449
+ {
450
+ if (currentCsvRecord == null) {
451
+ currentCsvRecord = getNextCSVRecord();
452
+ }
453
+ return currentCsvRecord != null;
454
+ }
455
+
456
+ @Override
457
+ public Map<String, String> next()
458
+ {
459
+ try {
460
+ if (hasNext()) {
461
+ return currentCsvRecord;
462
+ }
463
+ }
464
+ finally {
465
+ currentCsvRecord = null;
466
+ }
467
+ throw new NoSuchElementException();
468
+ }
469
+
470
+ @Override
471
+ public void remove()
472
+ {
473
+ throw new UnsupportedOperationException();
474
+ }
475
+ private Map<String, String> getNextCSVRecord()
476
+ {
477
+ if (!tokenizer.nextRecord()) {
478
+ return null;
479
+ }
480
+ Map<String, String> kvMap = new HashMap<>();
481
+ try {
482
+ int i = 0;
483
+ while (tokenizer.hasNextColumn()) {
484
+ kvMap.put(headers.get(i), tokenizer.nextColumnOrNull());
485
+ i++;
486
+ }
487
+ }
488
+ catch (CsvTokenizer.InvalidValueException ex) {
489
+ throw new DataException("Encounter exception when parse csv file. Please check to see if you are using the correct" +
490
+ "quote or escape character.", ex);
491
+ }
492
+ return kvMap;
493
+ }
494
+ }
369
495
  }