embulk-input-marketo 0.5.7.alpha.6 → 0.6.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/build.gradle +6 -6
  3. data/src/main/java/org/embulk/input/marketo/MarketoService.java +1 -1
  4. data/src/main/java/org/embulk/input/marketo/MarketoServiceImpl.java +4 -5
  5. data/src/main/java/org/embulk/input/marketo/MarketoUtils.java +45 -14
  6. data/src/main/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPlugin.java +12 -11
  7. data/src/main/java/org/embulk/input/marketo/delegate/CampaignInputPlugin.java +4 -23
  8. data/src/main/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPlugin.java +18 -11
  9. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithListInputPlugin.java +3 -18
  10. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPlugin.java +5 -20
  11. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPlugin.java +213 -87
  12. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseInputPluginDelegate.java +22 -0
  13. data/src/main/java/org/embulk/input/marketo/model/MarketoBulkExtractRequest.java +12 -1
  14. data/src/main/java/org/embulk/input/marketo/model/MarketoField.java +28 -2
  15. data/src/main/java/org/embulk/input/marketo/model/MarketoResponse.java +2 -1
  16. data/src/main/java/org/embulk/input/marketo/model/filter/DateRangeFilter.java +9 -0
  17. data/src/main/java/org/embulk/input/marketo/rest/MarketoBaseRestClient.java +4 -0
  18. data/src/main/java/org/embulk/input/marketo/rest/{MarketoFileResponseEntityReader.java → MarketoInputStreamResponseEntityReader.java} +2 -2
  19. data/src/main/java/org/embulk/input/marketo/rest/MarketoResponseJetty92EntityReader.java +4 -1
  20. data/src/main/java/org/embulk/input/marketo/rest/MarketoRestClient.java +76 -42
  21. data/src/main/java/org/embulk/input/marketo/rest/RecordPagingIterable.java +35 -11
  22. data/src/test/java/org/embulk/input/marketo/MarketoServiceImplTest.java +159 -0
  23. data/src/test/java/org/embulk/input/marketo/MarketoUtilsTest.java +87 -0
  24. data/src/test/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPluginTest.java +84 -0
  25. data/src/test/java/org/embulk/input/marketo/delegate/CampaignInputPluginTest.java +73 -0
  26. data/src/test/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPluginTest.java +94 -0
  27. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithListInputPluginTest.java +99 -0
  28. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPluginTest.java +101 -0
  29. data/src/test/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPluginTest.java +114 -0
  30. data/src/test/java/org/embulk/input/marketo/rest/MarketoBaseRestClientTest.java +3 -15
  31. data/src/test/java/org/embulk/input/marketo/rest/MarketoRestClientTest.java +450 -0
  32. data/src/test/resources/config/activity_bulk_extract_config.yaml +7 -0
  33. data/src/test/resources/config/lead_bulk_extract_config.yaml +8 -0
  34. data/src/test/resources/config/rest_config.yaml +3 -0
  35. data/src/test/resources/fixtures/activity_extract1.csv +35 -0
  36. data/src/test/resources/fixtures/activity_extract2.csv +22 -0
  37. data/src/test/resources/fixtures/all_program_full.json +53 -0
  38. data/src/test/resources/fixtures/campaign_response.json +38 -0
  39. data/src/test/resources/fixtures/campaign_response_full.json +102 -0
  40. data/src/test/resources/fixtures/lead_by_list.json +33 -0
  41. data/src/test/resources/fixtures/lead_by_program_response.json +47 -0
  42. data/src/test/resources/fixtures/lead_describe.json +221 -0
  43. data/src/test/resources/fixtures/lead_describe_expected.json +66 -0
  44. data/src/test/resources/fixtures/lead_describe_marketo_fields_full.json +518 -0
  45. data/src/test/resources/fixtures/lead_extract1.csv +11 -0
  46. data/src/test/resources/fixtures/lead_response_full.json +2402 -0
  47. data/src/test/resources/fixtures/lead_with_program_full.json +17 -0
  48. data/src/test/resources/fixtures/leads_extract2.csv +10 -0
  49. data/src/test/resources/fixtures/lists_response.json +31 -0
  50. data/src/test/resources/fixtures/program_response.json +71 -0
  51. metadata +39 -8
  52. data/src/main/java/org/embulk/input/marketo/model/filter/ListFilter.java +0 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3c729920ccd16968b191ec5c12cf8f982f925f80
4
- data.tar.gz: 1cf0acc73b4c5e90b8a11fd78770ee56791dc565
3
+ metadata.gz: 4581ee28de4fb94a896d4e18509f39086801c9eb
4
+ data.tar.gz: 352811dc47c6e42ff55bd1b0531fc83bc0c19949
5
5
  SHA512:
6
- metadata.gz: 3f14afd21ae9c968c6c0e6f1ebaf324421ae4ff10c1ceb74020ea458e970f039c2b81e9cf026c40e73089986155ea116ee1d3d5cdb47b1455b529f7de5c51506
7
- data.tar.gz: f54d8dff5fc370b8b6f4b2f793c3f90f9e9f57a510383edc83b4c996dc255951d8d6ee882b65d9146e609180a861cea793a1c2c8167d2905f44c8c479c0458c1
6
+ metadata.gz: 95d49092d1edee0454c4a4ed04c29c06c0ec44db9de8bc2aedccb06eb706e814a89317c9c55662d3e8cf6c967ac1e29209e3a27487a6429eb2743c416a7c9620
7
+ data.tar.gz: aea7947562f1d5b6753029a4de2c47f8edfb91c2f020863c8e916964591270219c09aeb3b1e85ab46f9b98b5c5a8c2976c14332d061bf9b1b14988736bf15598
data/build.gradle CHANGED
@@ -16,7 +16,7 @@ repositories {
16
16
  configurations {
17
17
  provided
18
18
  }
19
- version = "0.5.7.alpha.6"
19
+ version = "0.6.0.alpha.1"
20
20
  sourceCompatibility = 1.7
21
21
  targetCompatibility = 1.7
22
22
 
@@ -81,13 +81,13 @@ task gemspec {
81
81
  Gem::Specification.new do |spec|
82
82
  spec.name = "${project.name}"
83
83
  spec.version = "${project.version}"
84
- spec.authors = ["Tai Khuu"]
84
+ spec.authors = ["uu59", "yoshihara", "taikhuu"]
85
85
  spec.summary = %[Marketo input plugin for Embulk]
86
86
  spec.description = %[Loads records from Marketo.]
87
- spec.email = ["tai@treasuredata.com"]
88
- spec.licenses = ["MIT"]
89
- # TODO set this: spec.homepage = "https://github.com/khuutantai/embulk-input-marketo"
90
-
87
+ spec.email = ["k@uu59.org", "h.yoshihara@everyleaf.com", "tai@treasuredata.com"]
88
+ spec.licenses = ["Apache2"]
89
+ spec.homepage = "https://github.com/treasure-data/embulk-input-marketo"
90
+
91
91
  spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
92
92
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
93
93
  spec.require_paths = ["lib"]
@@ -18,7 +18,7 @@ public interface MarketoService
18
18
 
19
19
  List<MarketoField> describeLeadByLists();
20
20
 
21
- File extractLead(Date startTime, Date endTime, List<String> extractedFields, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond);
21
+ File extractLead(Date startTime, Date endTime, List<String> extractedFields, String filterField, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond);
22
22
 
23
23
  File extractAllActivity(Date startTime, Date endTime, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond);
24
24
 
@@ -39,9 +39,9 @@ public class MarketoServiceImpl implements MarketoService
39
39
  }
40
40
 
41
41
  @Override
42
- public File extractLead(Date startTime, Date endTime, List<String> extractedFields, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond)
42
+ public File extractLead(Date startTime, Date endTime, List<String> extractedFields, String filterField, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond)
43
43
  {
44
- String exportID = marketoRestClient.createLeadBulkExtract(startTime, endTime, extractedFields);
44
+ String exportID = marketoRestClient.createLeadBulkExtract(startTime, endTime, extractedFields, filterField);
45
45
  marketoRestClient.startLeadBulkExtract(exportID);
46
46
  try {
47
47
  marketoRestClient.waitLeadExportJobComplete(exportID, pollingTimeIntervalSecond, bulkJobTimeoutSecond);
@@ -70,7 +70,7 @@ public class MarketoServiceImpl implements MarketoService
70
70
  @Override
71
71
  public File extractAllActivity(Date startTime, Date endTime, int pollingTimeIntervalSecond, int bulkJobTimeoutSecond)
72
72
  {
73
- String exportID = marketoRestClient.createActitvityExtract(startTime, endTime, null);
73
+ String exportID = marketoRestClient.createActivityExtract(startTime, endTime);
74
74
  marketoRestClient.startActitvityBulkExtract(exportID);
75
75
  try {
76
76
  marketoRestClient.waitActitvityExportJobComplete(exportID, pollingTimeIntervalSecond, bulkJobTimeoutSecond);
@@ -92,9 +92,8 @@ public class MarketoServiceImpl implements MarketoService
92
92
  final String id = node.get("id").asText();
93
93
  iterables.add(Iterables.transform(marketoRestClient.getLeadsByList(id, fieldNames), new Function<ObjectNode, ObjectNode>()
94
94
  {
95
- @Nullable
96
95
  @Override
97
- public ObjectNode apply(@Nullable ObjectNode input)
96
+ public ObjectNode apply(ObjectNode input)
98
97
  {
99
98
  input.put(MarketoUtils.LIST_ID_COLUMN_NAME, id);
100
99
  return input;
@@ -11,18 +11,12 @@ import org.embulk.base.restclient.jackson.JacksonTopLevelValueLocator;
11
11
  import org.embulk.base.restclient.record.ServiceRecord;
12
12
  import org.embulk.base.restclient.record.ValueLocator;
13
13
  import org.embulk.input.marketo.model.MarketoField;
14
- import org.embulk.spi.Exec;
15
14
  import org.joda.time.DateTime;
16
- import org.slf4j.Logger;
17
15
 
18
16
  import javax.annotation.Nullable;
19
17
 
20
18
  import java.util.ArrayList;
21
- import java.util.Calendar;
22
- import java.util.Date;
23
- import java.util.HashMap;
24
19
  import java.util.List;
25
- import java.util.Map;
26
20
  import java.util.Set;
27
21
 
28
22
  /**
@@ -71,7 +65,7 @@ public class MarketoUtils
71
65
 
72
66
  public static List<String> getFieldNameFromMarketoFields(List<MarketoField> columns, String... excludedFields)
73
67
  {
74
- Set<String> excludeFields= Sets.newHashSet(excludedFields);
68
+ Set<String> excludeFields = Sets.newHashSet(excludedFields);
75
69
  List<String> extractedFields = new ArrayList<>();
76
70
  for (MarketoField column : columns) {
77
71
  if (excludeFields.contains(column.getName())) {
@@ -82,17 +76,54 @@ public class MarketoUtils
82
76
  return extractedFields;
83
77
  }
84
78
 
85
- public static <K, V> Map<K, V> zip(List<K> keys, List<V> values)
79
+ public static String buildColumnName(String prefix, String columnName)
80
+ {
81
+ return prefix + "_" + columnName;
82
+ }
83
+
84
+ public static final List<DateRange> sliceRange(DateTime fromDate, DateTime toDate, int rangeSize)
86
85
  {
87
- Map<K, V> kvMap = new HashMap<>();
88
- for (int i = 0; i < keys.size(); i++) {
89
- kvMap.put(keys.get(i), values.get(i));
86
+ List<DateRange> ranges = new ArrayList<>();
87
+ while (fromDate.isBefore(toDate)) {
88
+ DateTime nextToDate = fromDate.plusDays(rangeSize);
89
+ if (nextToDate.isAfter(toDate)) {
90
+ ranges.add(new DateRange(fromDate, toDate));
91
+ break;
92
+ }
93
+ ranges.add(new DateRange(fromDate, nextToDate));
94
+ fromDate = nextToDate.plusSeconds(1);
90
95
  }
91
- return kvMap;
96
+ return ranges;
92
97
  }
93
98
 
94
- public static String buildColumnName(String prefix, String columnName)
99
+ public static String getIdentityEndPoint(String accountId)
95
100
  {
96
- return prefix + "_" + columnName;
101
+ return "https://" + accountId + ".mktorest.com/identity";
102
+ }
103
+
104
+ public static String getEndPoint(String accountID)
105
+ {
106
+ return "https://" + accountID + ".mktorest.com";
107
+ }
108
+
109
+ public static final class DateRange
110
+ {
111
+ public final DateTime fromDate;
112
+ public final DateTime toDate;
113
+
114
+ public DateRange(DateTime fromDate, DateTime toDate)
115
+ {
116
+ this.fromDate = fromDate;
117
+ this.toDate = toDate;
118
+ }
119
+
120
+ @Override
121
+ public String toString()
122
+ {
123
+ return "DateRange{" +
124
+ "fromDate=" + fromDate +
125
+ ", toDate=" + toDate +
126
+ '}';
127
+ }
97
128
  }
98
129
  }
@@ -1,22 +1,20 @@
1
1
  package org.embulk.input.marketo.delegate;
2
2
 
3
+ import com.google.common.base.Optional;
3
4
  import org.embulk.base.restclient.ServiceResponseMapper;
4
5
  import org.embulk.base.restclient.jackson.JacksonServiceResponseMapper;
5
6
  import org.embulk.base.restclient.record.ValueLocator;
6
7
  import org.embulk.input.marketo.MarketoService;
7
- import org.embulk.input.marketo.MarketoServiceImpl;
8
8
  import org.embulk.input.marketo.MarketoUtils;
9
- import org.embulk.input.marketo.rest.MarketoRestClient;
10
9
  import org.embulk.spi.DataException;
11
10
  import org.embulk.spi.Exec;
12
- import org.embulk.spi.Schema;
13
11
  import org.embulk.spi.type.Types;
12
+ import org.joda.time.DateTime;
14
13
  import org.slf4j.Logger;
15
14
 
16
15
  import java.io.FileInputStream;
17
16
  import java.io.FileNotFoundException;
18
17
  import java.io.InputStream;
19
- import java.util.Date;
20
18
 
21
19
  /**
22
20
  * Created by tai.khuu on 9/18/17.
@@ -24,21 +22,24 @@ import java.util.Date;
24
22
  public class ActivityBulkExtractInputPlugin extends MarketoBaseBulkExtractInputPlugin<ActivityBulkExtractInputPlugin.PluginTask>
25
23
  {
26
24
  private static final Logger LOGGER = Exec.getLogger(ActivityBulkExtractInputPlugin.class);
25
+ public static final String INCREMENTAL_COLUMN = "activityDate";
26
+ public static final String UID_COLUMN = "marketoGUID";
27
27
 
28
28
  public interface PluginTask extends MarketoBaseBulkExtractInputPlugin.PluginTask {}
29
29
 
30
- public ActivityBulkExtractInputPlugin()
30
+ @Override
31
+ public void validateInputTask(PluginTask task)
31
32
  {
32
- super("activityDate", "marketoGUID");
33
+ task.setIncrementalColumn(Optional.of(INCREMENTAL_COLUMN));
34
+ task.setUidColumn(Optional.of(UID_COLUMN));
35
+ super.validateInputTask(task);
33
36
  }
34
37
 
35
38
  @Override
36
- protected InputStream getExtractedStream(PluginTask task, Schema schema)
39
+ protected InputStream getExtractedStream(MarketoService service, PluginTask task, DateTime fromDate, DateTime toDate)
37
40
  {
38
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
39
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
40
- Date fromDate = task.getFromDate();
41
- return new FileInputStream(marketoService.extractAllActivity(fromDate, task.getToDate().orNull(), task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond()));
41
+ try {
42
+ return new FileInputStream(service.extractAllActivity(fromDate.toDate(), toDate.toDate(), task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond()));
42
43
  }
43
44
  catch (FileNotFoundException e) {
44
45
  LOGGER.error("Exception when trying to extract activity", e);
@@ -3,19 +3,14 @@ package org.embulk.input.marketo.delegate;
3
3
  import com.google.common.collect.FluentIterable;
4
4
  import org.embulk.base.restclient.ServiceResponseMapper;
5
5
  import org.embulk.base.restclient.jackson.JacksonServiceResponseMapper;
6
- import org.embulk.base.restclient.record.RecordImporter;
7
6
  import org.embulk.base.restclient.record.ServiceRecord;
8
7
  import org.embulk.base.restclient.record.ValueLocator;
9
- import org.embulk.config.TaskReport;
10
8
  import org.embulk.input.marketo.MarketoService;
11
- import org.embulk.input.marketo.MarketoServiceImpl;
12
9
  import org.embulk.input.marketo.MarketoUtils;
13
- import org.embulk.input.marketo.model.MarketoField;
14
- import org.embulk.input.marketo.rest.MarketoRestClient;
15
- import org.embulk.spi.Exec;
16
- import org.embulk.spi.PageBuilder;
17
10
  import org.embulk.spi.type.Types;
18
11
 
12
+ import java.util.Iterator;
13
+
19
14
  /**
20
15
  * Input plugin use to import Campaign
21
16
  * Created by tai.khuu on 9/18/17.
@@ -31,23 +26,9 @@ public class CampaignInputPlugin extends MarketoBaseInputPluginDelegate<Campaign
31
26
  }
32
27
 
33
28
  @Override
34
- public TaskReport ingestServiceData(PluginTask task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
29
+ protected Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, PluginTask task)
35
30
  {
36
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
37
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
38
- {
39
- FluentIterable<ServiceRecord> serviceRecords = FluentIterable.from(marketoService.getCampaign()).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION);
40
- int imported = 0;
41
- for (ServiceRecord serviceRecord : serviceRecords) {
42
- if (imported >= PREVIEW_RECORD_LIMIT && Exec.isPreview()) {
43
- break;
44
- }
45
- recordImporter.importRecord(serviceRecord, pageBuilder);
46
- imported++;
47
- }
48
- return Exec.newTaskReport();
49
- }
50
- }
31
+ return FluentIterable.from(marketoService.getCampaign()).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION).iterator();
51
32
  }
52
33
 
53
34
  @Override
@@ -1,7 +1,10 @@
1
1
  package org.embulk.input.marketo.delegate;
2
2
 
3
+ import com.google.common.base.Optional;
3
4
  import org.embulk.base.restclient.ServiceResponseMapper;
4
5
  import org.embulk.base.restclient.record.ValueLocator;
6
+ import org.embulk.config.Config;
7
+ import org.embulk.config.ConfigDefault;
5
8
  import org.embulk.input.marketo.MarketoService;
6
9
  import org.embulk.input.marketo.MarketoServiceImpl;
7
10
  import org.embulk.input.marketo.MarketoUtils;
@@ -9,14 +12,12 @@ import org.embulk.input.marketo.model.MarketoField;
9
12
  import org.embulk.input.marketo.rest.MarketoRestClient;
10
13
  import org.embulk.spi.DataException;
11
14
  import org.embulk.spi.Exec;
12
- import org.embulk.spi.Schema;
15
+ import org.joda.time.DateTime;
13
16
  import org.slf4j.Logger;
14
17
 
15
- import java.io.File;
16
18
  import java.io.FileInputStream;
17
19
  import java.io.FileNotFoundException;
18
20
  import java.io.InputStream;
19
- import java.util.Date;
20
21
  import java.util.List;
21
22
 
22
23
  /**
@@ -26,24 +27,30 @@ public class LeadBulkExtractInputPlugin extends MarketoBaseBulkExtractInputPlugi
26
27
  {
27
28
  private static final Logger LOGGER = Exec.getLogger(LeadBulkExtractInputPlugin.class);
28
29
 
30
+ private static final String UPDATED_AT = "updatedAt";
31
+
29
32
  public interface PluginTask extends MarketoBaseBulkExtractInputPlugin.PluginTask
30
33
  {
34
+ @Config("use_updated_at")
35
+ @ConfigDefault("false")
36
+ boolean getUseUpdatedAt();
31
37
  }
32
38
 
33
- public LeadBulkExtractInputPlugin()
39
+ @Override
40
+ public void validateInputTask(PluginTask task)
34
41
  {
35
- super("updatedAt", null);
42
+ if (task.getUseUpdatedAt()) {
43
+ task.setIncrementalColumn(Optional.of(UPDATED_AT));
44
+ }
45
+ super.validateInputTask(task);
36
46
  }
37
47
 
38
48
  @Override
39
- protected InputStream getExtractedStream(PluginTask task, Schema schema)
49
+ protected InputStream getExtractedStream(MarketoService service, PluginTask task, DateTime fromDate, DateTime toDate)
40
50
  {
41
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
42
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
51
+ try {
43
52
  List<String> fieldNames = task.getExtractedFields();
44
- Date fromDate = task.getFromDate();
45
- File file = marketoService.extractLead(fromDate, task.getToDate().orNull(), fieldNames, task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond());
46
- return new FileInputStream(file);
53
+ return new FileInputStream(service.extractLead(fromDate.toDate(), toDate.toDate(), fieldNames, task.getIncrementalColumn().orNull(), task.getPollingIntervalSecond(), task.getBulkJobTimeoutSecond()));
47
54
  }
48
55
  catch (FileNotFoundException e) {
49
56
  LOGGER.error("File not found", e);
@@ -2,18 +2,15 @@ package org.embulk.input.marketo.delegate;
2
2
 
3
3
  import com.google.common.collect.FluentIterable;
4
4
  import org.embulk.base.restclient.ServiceResponseMapper;
5
- import org.embulk.base.restclient.record.RecordImporter;
6
5
  import org.embulk.base.restclient.record.ServiceRecord;
7
6
  import org.embulk.base.restclient.record.ValueLocator;
8
- import org.embulk.config.TaskReport;
9
7
  import org.embulk.input.marketo.MarketoService;
10
8
  import org.embulk.input.marketo.MarketoServiceImpl;
11
9
  import org.embulk.input.marketo.MarketoUtils;
12
10
  import org.embulk.input.marketo.model.MarketoField;
13
11
  import org.embulk.input.marketo.rest.MarketoRestClient;
14
- import org.embulk.spi.Exec;
15
- import org.embulk.spi.PageBuilder;
16
12
 
13
+ import java.util.Iterator;
17
14
  import java.util.List;
18
15
 
19
16
  /**
@@ -30,21 +27,9 @@ public class LeadWithListInputPlugin extends MarketoBaseInputPluginDelegate<Lead
30
27
  }
31
28
 
32
29
  @Override
33
- public TaskReport ingestServiceData(PluginTask task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
30
+ protected Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, PluginTask task)
34
31
  {
35
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
36
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
37
- FluentIterable<ServiceRecord> serviceRecords = FluentIterable.from(marketoService.getAllListLead(task.getExtractedFields())).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION);
38
- int imported = 0;
39
- for (ServiceRecord serviceRecord : serviceRecords) {
40
- if (imported >= PREVIEW_RECORD_LIMIT && Exec.isPreview()) {
41
- break;
42
- }
43
- recordImporter.importRecord(serviceRecord, pageBuilder);
44
- imported++;
45
- }
46
- return Exec.newTaskReport();
47
- }
32
+ return FluentIterable.from(marketoService.getAllListLead(task.getExtractedFields())).transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION).iterator();
48
33
  }
49
34
 
50
35
  @Override
@@ -2,18 +2,15 @@ package org.embulk.input.marketo.delegate;
2
2
 
3
3
  import com.google.common.collect.FluentIterable;
4
4
  import org.embulk.base.restclient.ServiceResponseMapper;
5
- import org.embulk.base.restclient.record.RecordImporter;
6
5
  import org.embulk.base.restclient.record.ServiceRecord;
7
6
  import org.embulk.base.restclient.record.ValueLocator;
8
- import org.embulk.config.TaskReport;
9
7
  import org.embulk.input.marketo.MarketoService;
10
8
  import org.embulk.input.marketo.MarketoServiceImpl;
11
9
  import org.embulk.input.marketo.MarketoUtils;
12
10
  import org.embulk.input.marketo.model.MarketoField;
13
11
  import org.embulk.input.marketo.rest.MarketoRestClient;
14
- import org.embulk.spi.Exec;
15
- import org.embulk.spi.PageBuilder;
16
12
 
13
+ import java.util.Iterator;
17
14
  import java.util.List;
18
15
 
19
16
  /**
@@ -26,23 +23,11 @@ public class LeadWithProgramInputPlugin extends MarketoBaseInputPluginDelegate<L
26
23
  }
27
24
 
28
25
  @Override
29
- public TaskReport ingestServiceData(PluginTask task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
26
+ protected Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, PluginTask task)
30
27
  {
31
- try (MarketoRestClient marketoRestClient = createMarketoRestClient(task)) {
32
- MarketoService marketoService = new MarketoServiceImpl(marketoRestClient);
33
- List<String> fieldNames = task.getExtractedFields();
34
- FluentIterable<ServiceRecord> serviceRecords = FluentIterable.from(marketoService.getAllProgramLead(fieldNames)).
35
- transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION);
36
- int imported = 0;
37
- for (ServiceRecord serviceRecord : serviceRecords) {
38
- if (imported >= PREVIEW_RECORD_LIMIT && Exec.isPreview()) {
39
- break;
40
- }
41
- recordImporter.importRecord(serviceRecord, pageBuilder);
42
- imported++;
43
- }
44
- }
45
- return Exec.newTaskReport();
28
+ List<String> fieldNames = task.getExtractedFields();
29
+ return FluentIterable.from(marketoService.getAllProgramLead(fieldNames)).
30
+ transform(MarketoUtils.TRANSFORM_OBJECT_TO_JACKSON_SERVICE_RECORD_FUNCTION).iterator();
46
31
  }
47
32
 
48
33
  @Override
@@ -1,11 +1,13 @@
1
1
  package org.embulk.input.marketo.delegate;
2
2
 
3
- import com.fasterxml.jackson.databind.JsonNode;
4
3
  import com.fasterxml.jackson.databind.node.ObjectNode;
4
+ import com.google.common.base.Function;
5
5
  import com.google.common.base.Optional;
6
+ import com.google.common.collect.Iterators;
6
7
  import org.embulk.base.restclient.jackson.JacksonServiceRecord;
7
8
  import org.embulk.base.restclient.jackson.JacksonServiceValue;
8
9
  import org.embulk.base.restclient.record.RecordImporter;
10
+ import org.embulk.base.restclient.record.ServiceRecord;
9
11
  import org.embulk.base.restclient.record.ValueLocator;
10
12
  import org.embulk.config.Config;
11
13
  import org.embulk.config.ConfigDefault;
@@ -14,7 +16,10 @@ import org.embulk.config.ConfigException;
14
16
  import org.embulk.config.ConfigInject;
15
17
  import org.embulk.config.TaskReport;
16
18
  import org.embulk.input.marketo.CsvTokenizer;
19
+ import org.embulk.input.marketo.MarketoService;
20
+ import org.embulk.input.marketo.MarketoServiceImpl;
17
21
  import org.embulk.input.marketo.MarketoUtils;
22
+ import org.embulk.input.marketo.rest.MarketoRestClient;
18
23
  import org.embulk.spi.BufferAllocator;
19
24
  import org.embulk.spi.Column;
20
25
  import org.embulk.spi.ColumnVisitor;
@@ -37,8 +42,11 @@ import java.text.DateFormat;
37
42
  import java.text.SimpleDateFormat;
38
43
  import java.util.ArrayList;
39
44
  import java.util.Date;
45
+ import java.util.HashMap;
46
+ import java.util.Iterator;
40
47
  import java.util.List;
41
48
  import java.util.Map;
49
+ import java.util.NoSuchElementException;
42
50
  import java.util.Set;
43
51
 
44
52
  /**
@@ -52,10 +60,10 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
52
60
 
53
61
  private static final DateTimeFormatter ISO_DATETIME_FORMAT = ISODateTimeFormat.dateTimeParser();
54
62
 
55
- private static final String IMPORTED_RECORD_COUNT = "imported";
56
-
57
63
  private static final String FROM_DATE = "from_date";
58
64
 
65
+ private static final int MARKETO_MAX_RANGE_EXTRACT = 30;
66
+
59
67
  public interface PluginTask extends MarketoBaseInputPluginDelegate.PluginTask, CsvTokenizer.PluginTask
60
68
  {
61
69
  @Config("from_date")
@@ -92,17 +100,18 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
92
100
  @ConfigDefault("null")
93
101
  Optional<Date> getToDate();
94
102
 
95
- void setToDate(Date toDate);
96
- }
103
+ void setToDate(Optional<Date> toDate);
97
104
 
98
- private String incrementalColumn;
105
+ @Config("incremental_column")
106
+ @ConfigDefault("\"createdAt\"")
107
+ Optional<String> getIncrementalColumn();
99
108
 
100
- private String uidColumn;
109
+ void setIncrementalColumn(Optional<String> incrementalColumn);
101
110
 
102
- public MarketoBaseBulkExtractInputPlugin(String incrementalColumn, String uidColumn)
103
- {
104
- this.incrementalColumn = incrementalColumn;
105
- this.uidColumn = uidColumn;
111
+ @Config("uid_column")
112
+ @ConfigDefault("null")
113
+ Optional<String> getUidColumn();
114
+ void setUidColumn(Optional<String> uidColumn);
106
115
  }
107
116
 
108
117
  @Override
@@ -112,12 +121,9 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
112
121
  if (task.getFromDate() == null) {
113
122
  throw new ConfigException("From date is required for Bulk Extract");
114
123
  }
115
- if (task.getFetchDays() > 30) {
116
- throw new ConfigException("Marketo bulk extract fetch days can't be more than 30");
117
- }
118
124
  //Calculate to date
119
125
  DateTime toDate = getToDate(task);
120
- task.setToDate(toDate.toDate());
126
+ task.setToDate(Optional.of(toDate.toDate()));
121
127
  }
122
128
 
123
129
  public DateTime getToDate(T task)
@@ -138,8 +144,8 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
138
144
  ConfigDiff configDiff = super.buildConfigDiff(task, schema, taskCount, taskReports);
139
145
  Long currentLatestFetchTime = 0L;
140
146
  Set latestUIds = null;
147
+ String incrementalColumn = task.getIncrementalColumn().orNull();
141
148
  if (incrementalColumn != null && task.getIncremental()) {
142
- int imported = 0;
143
149
  DateFormat df = new SimpleDateFormat(MarketoUtils.MARKETO_DATE_SIMPLE_DATE_FORMAT);
144
150
  for (TaskReport taskReport : taskReports) {
145
151
  Long latestFetchTime = taskReport.get(Long.class, LATEST_FETCH_TIME);
@@ -150,7 +156,9 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
150
156
  currentLatestFetchTime = latestFetchTime;
151
157
  latestUIds = taskReport.get(Set.class, LATEST_UID_LIST);
152
158
  }
153
- imported = imported + taskReport.get(Integer.class, IMPORTED_RECORD_COUNT);
159
+ else if (currentLatestFetchTime == latestFetchTime) {
160
+ latestUIds.addAll(taskReport.get(Set.class, LATEST_UID_LIST));
161
+ }
154
162
  }
155
163
  // in case of we didn't import anything but search range is entirely in the past. Then we should move the the range anyway.
156
164
  configDiff.set(FROM_DATE, df.format(task.getToDate().orNull()));
@@ -161,15 +169,66 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
161
169
  }
162
170
 
163
171
  @Override
164
- public TaskReport ingestServiceData(T task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
172
+ public TaskReport ingestServiceData(final T task, RecordImporter recordImporter, int taskIndex, PageBuilder pageBuilder)
165
173
  {
166
- InputStream extractedStream;
174
+ TaskReport taskReport = Exec.newTaskReport();
175
+ String incrementalColumn = task.getIncrementalColumn().orNull();
176
+ String uidColumn = task.getUidColumn().orNull();
167
177
  if (Exec.isPreview()) {
168
178
  return importMockPreviewData(pageBuilder);
169
179
  }
170
180
  else {
171
- extractedStream = getExtractedStream(task, pageBuilder.getSchema());
172
- return importRecordFromFile(task, extractedStream, recordImporter, pageBuilder);
181
+ try (LineDecoderIterator decoderIterator = getLineDecoderIterator(task)) {
182
+ Iterator<Map<String, String>> csvRecords = Iterators.concat(Iterators.transform(decoderIterator, new Function<LineDecoder, Iterator<Map<String, String>>>()
183
+ {
184
+ @Override
185
+ public Iterator<Map<String, String>> apply(LineDecoder input)
186
+ {
187
+ return new CsvRecordIterator(input, task);
188
+ }
189
+ }));
190
+ long currentTimestamp = 0L;
191
+ Set<String> latestUids = task.getPreviousUids();
192
+ //Keep the preview code here when we can enable real preview
193
+ if (Exec.isPreview()) {
194
+ csvRecords = Iterators.limit(csvRecords, PREVIEW_RECORD_LIMIT);
195
+ }
196
+ while (csvRecords.hasNext()) {
197
+ Map<String, String> csvRecord = csvRecords.next();
198
+ if (task.getIncremental()) {
199
+ if (!csvRecord.containsKey(incrementalColumn)) {
200
+ throw new DataException("Extracted record doesn't have incremental column " + incrementalColumn);
201
+ }
202
+ if (uidColumn != null) {
203
+ String uid = csvRecord.get(uidColumn);
204
+ if (latestUids.contains(uid)) {
205
+ //Duplicate value
206
+ continue;
207
+ }
208
+ }
209
+ String incrementalTimeStamp = csvRecord.get(incrementalColumn);
210
+ long timestamp = ISO_DATETIME_FORMAT.parseDateTime(incrementalTimeStamp).getMillis();
211
+ if (currentTimestamp < timestamp) {
212
+ currentTimestamp = timestamp;
213
+ //switch timestamp
214
+ latestUids.clear();
215
+ }
216
+ else if (currentTimestamp == timestamp) {
217
+ //timestamp is equal
218
+ if (uidColumn != null) {
219
+ String uid = csvRecord.get(uidColumn);
220
+ latestUids.add(uid);
221
+ }
222
+ }
223
+ }
224
+
225
+ ObjectNode objectNode = MarketoUtils.OBJECT_MAPPER.valueToTree(csvRecord);
226
+ recordImporter.importRecord(new AllStringJacksonServiceRecord(objectNode), pageBuilder);
227
+ }
228
+ taskReport.set(LATEST_FETCH_TIME, currentTimestamp);
229
+ taskReport.set(LATEST_UID_LIST, latestUids);
230
+ return taskReport;
231
+ }
173
232
  }
174
233
  }
175
234
 
@@ -227,76 +286,20 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
227
286
  return Exec.newTaskReport();
228
287
  }
229
288
 
230
- protected TaskReport importRecordFromFile(T task, InputStream inputStream, RecordImporter recordImporter, PageBuilder pageBuilder)
289
+ private LineDecoderIterator getLineDecoderIterator(T task)
231
290
  {
232
- Set<String> latestUids = task.getPreviousUids();
233
- TaskReport taskReport = Exec.newTaskReport();
234
- int imported = 0;
235
- long currentTimestamp = 0L;
236
- if (task.getLatestFetchTime().isPresent()) {
237
- currentTimestamp = task.getLatestFetchTime().get();
238
- }
239
- try (LineDecoder lineDecoder = new LineDecoder(new InputStreamFileInput(task.getBufferAllocator(), inputStream), task)) {
240
- CsvTokenizer csvTokenizer = new CsvTokenizer(lineDecoder, task);
241
- if (!csvTokenizer.nextFile()) {
242
- throw new DataException("Can't read extract input stream");
243
- }
244
- csvTokenizer.nextRecord();
245
- List<String> headers = new ArrayList<>();
246
- while (csvTokenizer.hasNextColumn()) {
247
- headers.add(csvTokenizer.nextColumn());
248
- }
249
- while (csvTokenizer.nextRecord() && (imported < PREVIEW_RECORD_LIMIT || !Exec.isPreview())) {
250
- List<String> values = new ArrayList<>();
251
- try {
252
- while (csvTokenizer.hasNextColumn()) {
253
- values.add(csvTokenizer.nextColumnOrNull());
254
- }
255
- }
256
- catch (CsvTokenizer.InvalidValueException ex) {
257
- throw new DataException("Encounter exception when parse csv file. Please check to see if you are using the correct" +
258
- "quote or escape character.", ex);
259
- }
260
- final Map<String, String> kvMap = MarketoUtils.zip(headers, values);
261
- ObjectNode objectNode = MarketoUtils.OBJECT_MAPPER.valueToTree(kvMap);
291
+ List<MarketoUtils.DateRange> dateRanges = MarketoUtils.sliceRange(new DateTime(task.getFromDate()), new DateTime(task.getToDate().orNull()), MARKETO_MAX_RANGE_EXTRACT);
292
+ final Iterator<MarketoUtils.DateRange> iterator = dateRanges.iterator();
293
+ return new LineDecoderIterator(iterator, task);
294
+ }
262
295
 
263
- if (task.getIncremental()) {
264
- if (!kvMap.containsKey(incrementalColumn)) {
265
- throw new DataException("Extracted record doesn't have incremental column " + incrementalColumn);
266
- }
267
- if (uidColumn != null) {
268
- String uid = kvMap.get(uidColumn);
269
- if (latestUids.contains(uid)) {
270
- //Duplicate value
271
- continue;
272
- }
273
- }
274
- String incrementalTimeStamp = kvMap.get(incrementalColumn);
275
- long timestamp = ISO_DATETIME_FORMAT.parseDateTime(incrementalTimeStamp).getMillis();
276
- if (currentTimestamp < timestamp) {
277
- currentTimestamp = timestamp;
278
- //switch timestamp
279
- latestUids.clear();
280
- }
281
- else if (currentTimestamp == timestamp) {
282
- //timestamp is equal
283
- if (uidColumn != null) {
284
- JsonNode uidField = objectNode.get(uidColumn);
285
- latestUids.add(uidField.asText());
286
- }
287
- }
288
- }
289
- recordImporter.importRecord(new AllStringJacksonServiceRecord(objectNode), pageBuilder);
290
- imported++;
291
- }
292
- }
293
- taskReport.set(LATEST_FETCH_TIME, currentTimestamp);
294
- taskReport.set(LATEST_UID_LIST, latestUids);
295
- taskReport.set(IMPORTED_RECORD_COUNT, imported);
296
- return taskReport;
296
+ @Override
297
+ protected final Iterator<ServiceRecord> getServiceRecords(MarketoService marketoService, T task)
298
+ {
299
+ throw new UnsupportedOperationException();
297
300
  }
298
301
 
299
- protected abstract InputStream getExtractedStream(T task, Schema schema);
302
+ protected abstract InputStream getExtractedStream(MarketoService service, T task, DateTime fromDate, DateTime toDate);
300
303
 
301
304
  private static class AllStringJacksonServiceRecord extends JacksonServiceRecord
302
305
  {
@@ -366,4 +369,127 @@ public abstract class MarketoBaseBulkExtractInputPlugin<T extends MarketoBaseBul
366
369
  return timestampParser.parse(textValue);
367
370
  }
368
371
  }
372
+
373
+ private final class LineDecoderIterator implements Iterator<LineDecoder>, AutoCloseable
374
+ {
375
+ private LineDecoder currentLineDecoder;
376
+
377
+ private Iterator<MarketoUtils.DateRange> dateRangeIterator;
378
+
379
+ private MarketoService marketoService;
380
+
381
+ private MarketoRestClient marketoRestClient;
382
+ private T task;
383
+ public LineDecoderIterator(Iterator<MarketoUtils.DateRange> dateRangeIterator, T task)
384
+ {
385
+ marketoRestClient = createMarketoRestClient(task);
386
+ marketoService = new MarketoServiceImpl(marketoRestClient);
387
+ this.dateRangeIterator = dateRangeIterator;
388
+ this.task = task;
389
+ }
390
+
391
+ @Override
392
+ public void close()
393
+ {
394
+ if (currentLineDecoder != null) {
395
+ currentLineDecoder.close();
396
+ }
397
+ if (marketoRestClient != null) {
398
+ marketoRestClient.close();
399
+ }
400
+ }
401
+
402
+ @Override
403
+ public boolean hasNext()
404
+ {
405
+ return dateRangeIterator.hasNext();
406
+ }
407
+
408
+ @Override
409
+ public LineDecoder next()
410
+ {
411
+ if (hasNext()) {
412
+ MarketoUtils.DateRange next = dateRangeIterator.next();
413
+ InputStream extractedStream = getExtractedStream(marketoService, task, next.fromDate, next.toDate);
414
+ currentLineDecoder = new LineDecoder(new InputStreamFileInput(task.getBufferAllocator(), extractedStream), task);
415
+ return currentLineDecoder;
416
+ }
417
+ throw new NoSuchElementException();
418
+ }
419
+
420
+ @Override
421
+ public void remove()
422
+ {
423
+ throw new UnsupportedOperationException("Removed are not supported");
424
+ }
425
+ }
426
+
427
+ private class CsvRecordIterator implements Iterator<Map<String, String>>
428
+ {
429
+ private CsvTokenizer tokenizer;
430
+
431
+ private List<String> headers;
432
+
433
+ private Map<String, String> currentCsvRecord;
434
+ public CsvRecordIterator(LineDecoder lineDecoder, T task)
435
+ {
436
+ tokenizer = new CsvTokenizer(lineDecoder, task);
437
+ if (!tokenizer.nextFile()) {
438
+ throw new DataException("Can't read extract input stream");
439
+ }
440
+ headers = new ArrayList<>();
441
+ tokenizer.nextRecord();
442
+ while (tokenizer.hasNextColumn()) {
443
+ headers.add(tokenizer.nextColumn());
444
+ }
445
+ }
446
+
447
+ @Override
448
+ public boolean hasNext()
449
+ {
450
+ if (currentCsvRecord == null) {
451
+ currentCsvRecord = getNextCSVRecord();
452
+ }
453
+ return currentCsvRecord != null;
454
+ }
455
+
456
+ @Override
457
+ public Map<String, String> next()
458
+ {
459
+ try {
460
+ if (hasNext()) {
461
+ return currentCsvRecord;
462
+ }
463
+ }
464
+ finally {
465
+ currentCsvRecord = null;
466
+ }
467
+ throw new NoSuchElementException();
468
+ }
469
+
470
+ @Override
471
+ public void remove()
472
+ {
473
+ throw new UnsupportedOperationException();
474
+ }
475
+ private Map<String, String> getNextCSVRecord()
476
+ {
477
+ if (!tokenizer.nextRecord()) {
478
+ return null;
479
+ }
480
+ Map<String, String> kvMap = new HashMap<>();
481
+ try {
482
+ int i = 0;
483
+ while (tokenizer.hasNextColumn()) {
484
+ kvMap.put(headers.get(i), tokenizer.nextColumnOrNull());
485
+ i++;
486
+ }
487
+ }
488
+ catch (CsvTokenizer.InvalidValueException ex) {
489
+ throw new DataException("Encounter exception when parse csv file. Please check to see if you are using the correct" +
490
+ "quote or escape character.", ex);
491
+ }
492
+ return kvMap;
493
+ }
494
+ }
369
495
  }