embulk-input-marketo 0.5.6 → 0.5.7.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/.github/PULL_REQUEST_TEMPLATE.md +37 -0
  3. data/.gitignore +11 -2
  4. data/.travis.yml +5 -45
  5. data/LICENSE.txt +21 -0
  6. data/README.md +14 -65
  7. data/build.gradle +102 -0
  8. data/config/checkstyle/checkstyle.xml +128 -0
  9. data/config/checkstyle/default.xml +108 -0
  10. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  11. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  12. data/gradlew +169 -0
  13. data/gradlew.bat +84 -0
  14. data/lib/embulk/input/marketo.rb +3 -0
  15. data/settings.gradle +1 -0
  16. data/src/main/java/org/embulk/input/marketo/CsvTokenizer.java +677 -0
  17. data/src/main/java/org/embulk/input/marketo/MarketoInputPlugin.java +15 -0
  18. data/src/main/java/org/embulk/input/marketo/MarketoInputPluginDelegate.java +77 -0
  19. data/src/main/java/org/embulk/input/marketo/MarketoService.java +30 -0
  20. data/src/main/java/org/embulk/input/marketo/MarketoServiceImpl.java +176 -0
  21. data/src/main/java/org/embulk/input/marketo/MarketoUtils.java +172 -0
  22. data/src/main/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPlugin.java +63 -0
  23. data/src/main/java/org/embulk/input/marketo/delegate/CampaignInputPlugin.java +67 -0
  24. data/src/main/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPlugin.java +61 -0
  25. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithListInputPlugin.java +58 -0
  26. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPlugin.java +56 -0
  27. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPlugin.java +260 -0
  28. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseInputPluginDelegate.java +69 -0
  29. data/src/main/java/org/embulk/input/marketo/exception/MarketoAPIException.java +30 -0
  30. data/src/main/java/org/embulk/input/marketo/model/MarketoAccessTokenResponse.java +92 -0
  31. data/src/main/java/org/embulk/input/marketo/model/MarketoBulkExtractRequest.java +59 -0
  32. data/src/main/java/org/embulk/input/marketo/model/MarketoError.java +40 -0
  33. data/src/main/java/org/embulk/input/marketo/model/MarketoField.java +91 -0
  34. data/src/main/java/org/embulk/input/marketo/model/MarketoResponse.java +81 -0
  35. data/src/main/java/org/embulk/input/marketo/model/filter/DateRangeFilter.java +31 -0
  36. data/src/main/java/org/embulk/input/marketo/model/filter/ListFilter.java +10 -0
  37. data/src/main/java/org/embulk/input/marketo/model/filter/MarketoFilter.java +8 -0
  38. data/src/main/java/org/embulk/input/marketo/rest/MarketoBaseRestClient.java +226 -0
  39. data/src/main/java/org/embulk/input/marketo/rest/MarketoFileResponseEntityReader.java +69 -0
  40. data/src/main/java/org/embulk/input/marketo/rest/MarketoRESTEndpoint.java +44 -0
  41. data/src/main/java/org/embulk/input/marketo/rest/MarketoResponseJetty92EntityReader.java +88 -0
  42. data/src/main/java/org/embulk/input/marketo/rest/MarketoRestClient.java +332 -0
  43. data/src/main/java/org/embulk/input/marketo/rest/RecordPagingIterable.java +130 -0
  44. data/src/test/java/org/embulk/input/marketo/TestMarketoInputPlugin.java +5 -0
  45. data/src/test/java/org/embulk/input/marketo/rest/MarketoBaseRestClientTest.java +220 -0
  46. metadata +65 -222
  47. data/.ruby-version +0 -1
  48. data/.travis.yml.erb +0 -42
  49. data/Gemfile +0 -3
  50. data/LICENSE +0 -13
  51. data/Rakefile +0 -20
  52. data/embulk-input-marketo.gemspec +0 -28
  53. data/gemfiles/embulk-latest +0 -4
  54. data/gemfiles/template.erb +0 -4
  55. data/lib/embulk/input/marketo/activity_log.rb +0 -103
  56. data/lib/embulk/input/marketo/base.rb +0 -139
  57. data/lib/embulk/input/marketo/lead.rb +0 -143
  58. data/lib/embulk/input/marketo_api.rb +0 -22
  59. data/lib/embulk/input/marketo_api/soap/activity_log.rb +0 -103
  60. data/lib/embulk/input/marketo_api/soap/base.rb +0 -135
  61. data/lib/embulk/input/marketo_api/soap/lead.rb +0 -91
  62. data/test/activity_log_fixtures.rb +0 -216
  63. data/test/embulk/input/marketo/test_activity_log.rb +0 -444
  64. data/test/embulk/input/marketo/test_base.rb +0 -76
  65. data/test/embulk/input/marketo/test_lead.rb +0 -605
  66. data/test/embulk/input/marketo_api/soap/test_activity_log.rb +0 -154
  67. data/test/embulk/input/marketo_api/soap/test_base.rb +0 -96
  68. data/test/embulk/input/marketo_api/soap/test_lead.rb +0 -139
  69. data/test/embulk/input/test_marketo_api.rb +0 -28
  70. data/test/lead_fixtures.rb +0 -111
  71. data/test/mute_logger.rb +0 -7
  72. data/test/override_assert_raise.rb +0 -18
  73. data/test/prepare_embulk.rb +0 -15
  74. data/test/run-test.rb +0 -26
  75. data/test/savon_helper.rb +0 -17
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- jruby-9.0.5.0
data/.travis.yml.erb DELETED
@@ -1,42 +0,0 @@
1
- language: ruby
2
-
3
- addons:
4
- code_climate:
5
- repo_token:
6
- secure: "cYPXD2Dv1VOU0rdUSsevrGuHcjBajCTq8s961R8d2pPkX+V1AbmIMKK0if00qMgqR7D65p6jOnfiuZfXolF1z4awqLXNu7LhP6zOr1hMRCOHOfjj/SJLbm5MFreN81G+6k4XpfBlA9YShA9E/nNVxBmakxC8aCnlJHGfDUbkb9kmNX/LXSNf0efZMRVr3vN8tP6tHP0TUCx90A2FNols9qHgl+boxF090OpgFTspyeqC6K82Q4D97RwajnM+oKIBMfxuc4Pc1n+PZ+mQQfYNK7ze4WszXRtQydwDD7rtF5d6IwczElZ243GjXxdJWL6inUvcMJQogC5X8ayh7m0ZNT2awfbJXT1tsLte42j+/eUAPaCD07bE8XzgdmfTdmpxP6nHasAMkleDMlcBRrxht+cGD61qEXcsSKd0c7mnvg35L+hl39NGFde0yz1Xxx/D69p9KHE0pa7cZNC7D8n1w9AUxFdl8OX6rfDlgiapIonAF/QG/5Z3ltHDzZu6XK1MMFugrdV7SxuSepL3xgBv5jN0jMA/GVmvFbXf6FGDXxt/bhSUCZoTJ/c8BaRnIM+QTkBts6TqgQ6BeUQNAe5p0TtssNwMW8RC4DpkaLDeQRSScJJ6V6UDpjsPJ3hcozpzrCVwy/5F72MYzcHeKg85EF30aA94Q+EDbsKUR0BEeGo="
7
-
8
- before_install:
9
- - |
10
- # Currently, Travis can't treat jruby 9.0.1.0
11
- ruby -v
12
- rvm get head
13
- rvm use jruby-9.0.5.0 --install
14
- ruby -v
15
- gem i bundler
16
- bundle install
17
-
18
- jdk: oraclejdk8
19
-
20
- rvm:
21
- - jruby-9.0.5.0
22
-
23
- script: bundle exec rake cov
24
-
25
- gemfile:
26
- <% versions.each do |file| -%>
27
- - gemfiles/<%= file %>
28
- <% end -%>
29
-
30
- matrix:
31
- exclude:
32
- - jdk: oraclejdk8 # Ignore all matrix at first, use `include` to allow build
33
- include:
34
- <% matrix.each do |m| -%>
35
- <%= m %>
36
- <% end %>
37
-
38
- allow_failures:
39
- # Ignore failure for *-latest
40
- <% versions.find_all{|file| file.to_s.match(/-latest/)}.each do |file| -%>
41
- - gemfile: <%= file %>
42
- <% end %>
data/Gemfile DELETED
@@ -1,3 +0,0 @@
1
- source 'https://rubygems.org/'
2
-
3
- gemspec
data/LICENSE DELETED
@@ -1,13 +0,0 @@
1
- Copyright 2015 Everyleaf Corporation
2
-
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License.
data/Rakefile DELETED
@@ -1,20 +0,0 @@
1
- require "bundler/gem_tasks"
2
- require "everyleaf/embulk_helper/tasks"
3
-
4
- task default: :test
5
-
6
- desc "Run tests"
7
- task :test do
8
- ruby("test/run-test.rb", "--use-color=yes", "--collector=dir")
9
- end
10
-
11
- desc "Run tests with coverage"
12
- task :cov do
13
- ENV["COVERAGE"] = "1"
14
- ruby("--debug", "test/run-test.rb", "--use-color=yes", "--collector=dir")
15
- end
16
-
17
- Everyleaf::EmbulkHelper::Tasks.install(
18
- gemspec: "./embulk-input-marketo.gemspec",
19
- github_name: "treasure-data/embulk-input-marketo",
20
- )
@@ -1,28 +0,0 @@
1
- Gem::Specification.new do |spec|
2
- spec.name = "embulk-input-marketo"
3
- spec.version = "0.5.6"
4
- spec.authors = ["uu59", "yoshihara"]
5
- spec.summary = "Marketo input plugin for Embulk"
6
- spec.description = "Loads records from Marketo."
7
- spec.email = ["k@uu59.org", "h.yoshihara@everyleaf.com"]
8
- spec.licenses = ["Apache2"]
9
- spec.homepage = "https://github.com/treasure-data/embulk-input-marketo"
10
-
11
- spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
12
- spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
- spec.require_paths = ["lib"]
14
-
15
- spec.add_dependency 'savon', ['~> 2.11.1']
16
- spec.add_dependency 'httpclient', '>= 2.8.3' # To use tcp_keepalive
17
- spec.add_dependency 'httpi', '2.4.2' # To use tcp_keepalive with monky patch (See lib/embulk/input/marketo/base.rb)
18
- spec.add_dependency 'perfect_retry', ["~> 0.5"]
19
- spec.add_development_dependency 'embulk', [">= 0.6.13", "< 1.0"]
20
- spec.add_development_dependency 'bundler', ['~> 1.0']
21
- spec.add_development_dependency 'rake', ['>= 10.0']
22
- spec.add_development_dependency 'pry'
23
- spec.add_development_dependency 'test-unit'
24
- spec.add_development_dependency 'rr', "1.1.2" # 1.2.0 causes strange error.. https://travis-ci.org/treasure-data/embulk-input-marketo/jobs/141537151
25
- spec.add_development_dependency 'test-unit-rr'
26
- spec.add_development_dependency 'codeclimate-test-reporter', "~> 0.5"
27
- spec.add_development_dependency 'everyleaf-embulk_helper'
28
- end
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org/'
2
- gemspec :path => '../'
3
-
4
- gem "embulk", "> 0.8.7"
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org/'
2
- gemspec :path => '../'
3
-
4
- gem "embulk", "<%= version %>"
@@ -1,103 +0,0 @@
1
- require "embulk/input/marketo/base"
2
-
3
- module Embulk
4
- module Input
5
- module Marketo
6
- class ActivityLog < Base
7
- BATCH_SIZE_DEFAULT = 100
8
-
9
- Plugin.register_input("marketo/activity_log", self)
10
-
11
- def self.target
12
- :activity_log
13
- end
14
-
15
- def self.resume(task, columns, count, &control)
16
- task_reports = yield(task, columns, count)
17
-
18
- # NOTE: If this plugin supports to run by multi threads, this
19
- # implementation is terrible.
20
- next_config_diff = task_reports.first
21
- return next_config_diff
22
- end
23
-
24
- def self.transaction(config, &control)
25
- endpoint_url = config.param(:endpoint, :string)
26
-
27
- range = format_range(config)
28
-
29
- task = {
30
- endpoint_url: endpoint_url,
31
- wsdl_url: config.param(:wsdl, :string, default: "#{endpoint_url}?WSDL"),
32
- user_id: config.param(:user_id, :string),
33
- encryption_key: config.param(:encryption_key, :string),
34
- from_datetime: range[:from],
35
- to_datetime: range[:to],
36
- retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
37
- retry_limit: config.param(:retry_limit, :integer, default: 5),
38
- columns: config.param(:columns, :array)
39
- }
40
- validate_url(task[:endpoint_url], "endpoint")
41
- validate_url(task[:wsdl_url], "wsdl")
42
-
43
- resume(task, embulk_columns(config), 1, &control)
44
- end
45
-
46
- def self.guess(config)
47
- client = soap_client(config)
48
- range = format_range(config)
49
-
50
- schema = client.metadata(range[:from], batch_size: PREVIEW_COUNT)
51
- columns = schema.map do |c|
52
- column = {name: c.name, type: c.type}
53
- column[:format] = c.format if c.format
54
- column
55
- end
56
-
57
- return {"columns" => columns}
58
- end
59
-
60
- def init
61
- @columns = task[:columns]
62
- @soap = MarketoApi.soap_client(task, target)
63
- @options = {
64
- retry_initial_wait_sec: task[:retry_initial_wait_sec],
65
- retry_limit: task[:retry_limit],
66
- to: task[:to_datetime],
67
- batch_size: (preview? ? PREVIEW_COUNT : BATCH_SIZE_DEFAULT),
68
- }
69
- end
70
-
71
- def run
72
- counter = 0
73
- latest_updated_at = @soap.each(task[:from_datetime], @options) do |activity_log|
74
- page_builder.add(format_record(activity_log))
75
- break if preview? && (counter += 1) >= PREVIEW_COUNT
76
- end
77
-
78
- page_builder.finish
79
-
80
- task_report = {}
81
- if !preview?
82
- from_datetime = latest_updated_at || task[:from_datetime]
83
- if from_datetime
84
- task_report = {
85
- from_datetime: from_datetime
86
- }
87
- end
88
- end
89
-
90
- return task_report
91
- end
92
-
93
- def format_record(activity_log)
94
- @columns.map do |column|
95
- name = column["name"].to_s
96
- value = activity_log[name]
97
- cast_value(column, value)
98
- end
99
- end
100
- end
101
- end
102
- end
103
- end
@@ -1,139 +0,0 @@
1
- require "embulk/input/marketo_api"
2
-
3
- module Embulk
4
- module Input
5
- module Marketo
6
- class Base < InputPlugin
7
- PREVIEW_COUNT = 15
8
-
9
- attr_reader :soap
10
-
11
- def self.target
12
- raise NotImplementedError
13
- end
14
-
15
- def self.soap_client(config)
16
- endpoint_url = config.param(:endpoint, :string)
17
- soap_config = {
18
- endpoint_url: endpoint_url,
19
- wsdl_url: config.param(:wsdl, :string, default: "#{endpoint_url}?WSDL"),
20
- user_id: config.param(:user_id, :string),
21
- encryption_key: config.param(:encryption_key, :string),
22
- }
23
- validate_url(soap_config[:endpoint_url], "endpoint")
24
- validate_url(soap_config[:wsdl_url], "wsdl")
25
-
26
- MarketoApi.soap_client(soap_config, target)
27
- end
28
-
29
- def self.embulk_columns(config)
30
- config.param(:columns, :array).map do |column|
31
- name = column["name"]
32
- type = column["type"].to_sym
33
-
34
- Column.new(nil, name, type, column["format"])
35
- end
36
- end
37
-
38
- def self.format_range(config)
39
- if config.param(:last_updated_at, :string, default: nil)
40
- Embulk.logger.warn "config: last_updated_at is deprecated. Use from_datetime/to_datetime"
41
- end
42
-
43
- from_datetime = config.param(:from_datetime, :string)
44
- to_datetime = config.param(:to_datetime, :string, default: Time.now.to_s)
45
-
46
- # check from/to format to parse
47
- begin
48
- Time.parse(from_datetime)
49
- Time.parse(to_datetime)
50
- rescue => e
51
- # possibly Time.parse fail
52
- raise ConfigError.new e.message
53
- end
54
-
55
- if Time.parse(from_datetime) > Time.parse(to_datetime)
56
- raise ConfigError.new "config: from_datetime '#{from_datetime}' is later than '#{to_datetime}'."
57
- end
58
-
59
- {
60
- from: from_datetime,
61
- to: to_datetime,
62
- }
63
- end
64
-
65
- def self.timeslice(from, to, count)
66
- generate_time_range(from, to).each_slice(count).to_a
67
- end
68
-
69
- def self.generate_time_range(from, to)
70
- # e.g. from = 2010-01-01 15:00, to = 2010-01-03 09:30
71
- # convert to such array:
72
- # [
73
- # {from: 2010-01-01 15:00, to: 2010-01-01 16:00},
74
- # {from: 2010-01-01 16:00, to: 2010-01-01 17:00},
75
- # ...
76
- # {from: 2010-01-03 08:00, to: 2010-01-03 09:00},
77
- # {from: 2010-01-03 09:00, to: 2010-01-03 09:30},
78
- # ]
79
- # to fetch data from Marketo API with each day as
80
- # desribed on official blog:
81
- # http://developers.marketo.com/blog/performance-tuning-api-requests/
82
- to ||= Time.now
83
- from = Time.parse(from) unless from.is_a?(Time)
84
- to = Time.parse(to) unless to.is_a?(Time)
85
-
86
- result = []
87
- since = from
88
- while since < to
89
- next_since = since + 3600
90
- if to < next_since
91
- next_since = to
92
- end
93
- result << {
94
- "from" => since,
95
- "to" => next_since
96
- }
97
- since = next_since
98
- end
99
- result
100
- end
101
-
102
- private
103
-
104
- def self.validate_url(url, key)
105
- URI.parse(url)
106
- rescue URI::InvalidURIError
107
- raise ConfigError.new("#{key}: '#{url}' is not a valid URL.")
108
- end
109
-
110
- def preview?
111
- begin
112
- org.embulk.spi.Exec.isPreview()
113
- rescue java.lang.NullPointerException => e
114
- false
115
- end
116
- end
117
-
118
- def cast_value(column, value)
119
- return if value.to_s.empty? # nil or empty string
120
-
121
- case column["type"].to_s
122
- when "timestamp"
123
- begin
124
- Time.parse(value)
125
- rescue => e
126
- raise ConfigError.new "Can't parse as Time '#{value}' (column is #{column["name"]})"
127
- end
128
- else
129
- value
130
- end
131
- end
132
-
133
- def target
134
- self.class.target
135
- end
136
- end
137
- end
138
- end
139
- end
@@ -1,143 +0,0 @@
1
- require "embulk/input/marketo/base"
2
-
3
- module Embulk
4
- module Input
5
- module Marketo
6
- class Lead < Base
7
- TIMESLICE_COUNT_PER_TASK = 24
8
-
9
- Plugin.register_input("marketo/lead", self)
10
-
11
- def self.target
12
- :lead
13
- end
14
-
15
- def self.guess(config)
16
- if config.param(:last_updated_at, :string, default: nil)
17
- Embulk.logger.warn "config: last_updated_at is deprecated. Use from_datetime/to_datetime"
18
- end
19
-
20
- client = soap_client(config)
21
- metadata = client.metadata
22
-
23
- return {"columns" => generate_columns(metadata)}
24
- end
25
-
26
- def self.resume(task, columns, count, &control)
27
- task_reports = yield(task, columns, count)
28
-
29
- return {}
30
- end
31
-
32
- def self.transaction(config, &control)
33
- endpoint_url = config.param(:endpoint, :string)
34
-
35
- range = format_range(config)
36
- ranges = timeslice(range[:from], range[:to], TIMESLICE_COUNT_PER_TASK)
37
-
38
- append_processed_time_column = config.param(:append_processed_time_column, :bool, default: true)
39
-
40
- task = {
41
- endpoint_url: endpoint_url,
42
- wsdl_url: config.param(:wsdl, :string, default: "#{endpoint_url}?WSDL"),
43
- user_id: config.param(:user_id, :string),
44
- encryption_key: config.param(:encryption_key, :string),
45
- from_datetime: range[:from],
46
- to_datetime: range[:to],
47
- ranges: ranges,
48
- retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
49
- retry_limit: config.param(:retry_limit, :integer, default: 5),
50
- append_processed_time_column: append_processed_time_column,
51
- columns: config.param(:columns, :array),
52
- }
53
- validate_url(task[:endpoint_url], "endpoint")
54
- validate_url(task[:wsdl_url], "wsdl")
55
-
56
- columns = embulk_columns(config)
57
- if append_processed_time_column
58
- processed_time_column = Column.new(nil, :processed_time, :timestamp, "%Y-%m-%dT%H:%M:%S%z")
59
- columns << processed_time_column
60
- end
61
-
62
- resume(task, columns, ranges.size, &control)
63
- end
64
-
65
- def self.generate_columns(metadata)
66
- columns = [
67
- {name: "id", type: "long"},
68
- {name: "email", type: "string"},
69
- ]
70
-
71
- metadata.each do |field|
72
- type =
73
- case field[:data_type]
74
- when "integer"
75
- "long"
76
- when "datetime", "date"
77
- "timestamp"
78
- when "string", "text", "phone", "currency"
79
- "string"
80
- when "boolean"
81
- "boolean"
82
- when "float"
83
- "double"
84
- else
85
- "string"
86
- end
87
-
88
- columns << {name: field[:name], type: type}
89
- end
90
-
91
- columns
92
- end
93
-
94
- def init
95
- @columns = task[:columns]
96
- if preview?
97
- # Try newer date at first to reduce cache miss hit
98
- @ranges = task[:ranges].flatten.sort_by{|range| Time.parse(range["to"])}.reverse
99
- else
100
- @ranges = task[:ranges][index]
101
- end
102
- @soap = MarketoApi.soap_client(task, target)
103
- @append_processed_time_column = task[:append_processed_time_column]
104
- @options = {
105
- retry_initial_wait_sec: task[:retry_initial_wait_sec],
106
- retry_limit: task[:retry_limit],
107
- }
108
- @options[:batch_size] = PREVIEW_COUNT if preview?
109
- end
110
-
111
- def run
112
- counter = 0
113
- catch(:finish) do
114
- @ranges.each do |range|
115
- soap.each(range, @options) do |lead|
116
- page_builder.add(format_record(lead, range))
117
- throw(:finish) if preview? && (counter += 1) >= PREVIEW_COUNT
118
- end
119
- end
120
- end
121
-
122
- page_builder.finish
123
-
124
- task_report = {}
125
- return task_report
126
- end
127
-
128
- def format_record(lead, range)
129
- values = @columns.map do |column|
130
- name = column["name"].to_s
131
- value = (lead[name] || {})[:value]
132
- cast_value(column, value)
133
- end
134
-
135
- if @append_processed_time_column
136
- values << Time.parse(range["from"])
137
- end
138
- values
139
- end
140
- end
141
- end
142
- end
143
- end