embulk-output-influxdb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bd1ac581900456f6a43fcfb3ddd7276526fc2764
4
+ data.tar.gz: 7b3bc69eb29a6bac858236853ae4e09382c6ad57
5
+ SHA512:
6
+ metadata.gz: d0f949ab9a638167771aed41f4c4a8ff338f3b60597ac225562105bdde59926dff07fcc4b71c7b6decd7f1e9fd161c03317c4b4c4be16f0568327aac1f283ca8
7
+ data.tar.gz: 21984a22409343d5c896850fc1f52e0febcd2e766b481df7f494d08edfe0246d9a6305c4f721a19c718d574ccb8f0709c4fa02c3d40529fcaa0d0df78975653f
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
6
+ /jruby
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.0.1.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # InfluxDB output plugin for Embulk
2
+
3
+ ## Overview
4
+
5
+ * **Plugin type**: output
6
+ * **Load all or nothing**: no
7
+ * **Resume supported**: no
8
+ * **Cleanup supported**: yes
9
+ * **Dynamic Database creating**: yes
10
+ * **Dynamic Series creating**: yes
11
+
12
+ ## Configuration
13
+
14
+ - **host**: hostname (string, default: localhost)
15
+ - **port**: port number (integer, default: 8086)
16
+ - **username**: username (string, default: 'root')
17
+ - **password**: password (string, default: 'root')
18
+ - **database**: database name (string, required)
19
+ - **series**: series name (string, required) (can use column value placeholder. see example)
20
+ - **mode**: "insert", or "replace". See bellow. (string, default: insert)
21
+ - **timestamp_column**: timestamp column (string, default: nil)
22
+ - **ignore_columns**: ignore column names (array[string], default: nil)
23
+ - **default_timezone**: default timezone for column (string, default: 'UTC')
24
+
25
+ ### Modes
26
+
27
+ * **insert**:
28
+ * Behavior: This mode inserts rows simplly.
29
+ * **replace**:
30
+ * Behavior: Same with insert mode excepting that it drops the target series first.
31
+
32
+ ## Example
33
+
34
+ ```yaml
35
+ out:
36
+ type: influxdb
37
+ username: root
38
+ password: root
39
+ database: dbname
40
+ series: ${key_name}_series
41
+ timestamp_column: day
42
+ mode: replace
43
+ ignore_columns:
44
+ - key_name
45
+ ```
46
+
47
+
48
+ ## Build
49
+
50
+ ```
51
+ $ rake
52
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,23 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-output-influxdb"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["joker1007"]
6
+ spec.summary = "InfluxDB output plugin for Embulk"
7
+ spec.description = "Dumps records to InfluxDB."
8
+ spec.email = ["kakyoin.hierophant@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/joker1007/embulk-output-influxdb"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_development_dependency 'embulk', ['~> 0.7.4']
17
+ spec.add_development_dependency 'bundler', ['~> 1.0']
18
+ spec.add_development_dependency 'rake', ['>= 10.0']
19
+ spec.add_development_dependency 'tapp'
20
+
21
+ spec.add_runtime_dependency 'influxdb', ['~> 0.2']
22
+ spec.add_runtime_dependency 'timezone'
23
+ end
data/example.csv ADDED
@@ -0,0 +1,13 @@
1
+ key_name,day,value
2
+ new_clients,2015-08-22,1
3
+ new_clients,2015-08-25,2
4
+ new_clients,2015-08-26,3
5
+ new_clients,2015-08-27,4
6
+ new_clients,2015-08-28,10
7
+ new_clients,2015-08-29,11
8
+ new_clients,2015-09-01,12
9
+ new_clients,2015-09-03,13
10
+ new_clients,2015-09-05,21
11
+ new_clients,2015-09-07,22
12
+ new_clients,2015-09-11,23
13
+ new_clients,2015-09-12,24
data/example.yml ADDED
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: file
3
+ path_prefix: example.csv
4
+ parser:
5
+ type: csv
6
+ skip_header_lines: 1
7
+ columns:
8
+ - {name: key_name, type: string}
9
+ - {name: day, type: timestamp, format: '%Y-%m-%d'}
10
+ - {name: new_clients, type: long}
11
+
12
+ out:
13
+ type: influxdb
14
+ username: root
15
+ password: root
16
+ database: dbname
17
+ # series: ${key_name}_series
18
+ series_per_column: true
19
+ timestamp_column: day
20
+ mode: replace
21
+ ignore_columns:
22
+ - key_name
@@ -0,0 +1,175 @@
1
+ require 'influxdb'
2
+ require 'timezone'
3
+
4
+ module Embulk
5
+ module Output
6
+
7
+ class Influxdb < OutputPlugin
8
+ Plugin.register_output("influxdb", self)
9
+
10
+ def self.transaction(config, schema, count, &control)
11
+ # configuration code:
12
+ task = {
13
+ "host" => config.param("host", :string, default: "localhost"),
14
+ "hosts" => config.param("hosts", :array, default: "localhost"),
15
+ "port" => config.param("port", :integer, default: 8086),
16
+ "username" => config.param("username", :string, default: "root"),
17
+ "password" => config.param("password", :string, default: "root"),
18
+ "database" => config.param("database", :string),
19
+ "series" => config.param("series", :string, default: nil),
20
+ "series_per_column" => config.param("series_per_column", :bool, default: false),
21
+ "timestamp_column" => config.param("timestamp_column", :string, default: nil),
22
+ "ignore_columns" => config.param("ignore_columns", :array, default: []),
23
+ "default_timezone" => config.param("default_timezone", :string, default: "UTC"),
24
+ "mode" => config.param("mode", :string, default: "insert"),
25
+ "use_ssl" => config.param("use_ssl", :bool, default: false),
26
+ "verify" => config.param("verify_ssl", :bool, default: true),
27
+ "ssl_ca_cert" => config.param("ssl_ca_cert", :string, default: nil),
28
+ "time_precision" => config.param("time_precision", :string, default: "s"),
29
+ "initial_delay" => config.param("initial_delay", :float, default: 0.01),
30
+ "max_delay" => config.param("max_delay", :float, default: 30),
31
+ "open_timeout" => config.param("open_timeout", :integer, default: 5),
32
+ "read_timeout" => config.param("read_timeout", :integer, default: 300),
33
+ "async" => config.param("async", :bool, default: false),
34
+ "udp" => config.param("udp", :bool, default: false),
35
+ "retry" => config.param("retry", :integer, default: nil),
36
+ "denormalize" => config.param("denormalize", :bool, default: true),
37
+ }
38
+
39
+ # resumable output:
40
+ # resume(task, schema, count, &control)
41
+
42
+ # non-resumable output:
43
+ task_reports = yield(task)
44
+ next_config_diff = {}
45
+ return next_config_diff
46
+ end
47
+
48
+ #def self.resume(task, schema, count, &control)
49
+ # task_reports = yield(task)
50
+ #
51
+ # next_config_diff = {}
52
+ # return next_config_diff
53
+ #end
54
+
55
+ def self.replaced_measurements
56
+ @replaced_measurements ||= {}
57
+ end
58
+
59
+ def init
60
+ # initialization code:
61
+ @database = task["database"]
62
+ @series = task["series"]
63
+ @series_per_column = task["series_per_column"]
64
+ unless @series || @series_per_column
65
+ raise "Need series or series_per_column parameter"
66
+ end
67
+ if task["timestamp_column"]
68
+ @timestamp_column = schema.find { |col| col.name == task["timestamp_column"] }
69
+ end
70
+ @ignore_columns = task["ignore_columns"]
71
+ @time_precision = task["time_precision"]
72
+ @replace = task["mode"].downcase == "replace"
73
+ @default_timezone = task["default_timezone"]
74
+
75
+ @connection = InfluxDB::Client.new(@database,
76
+ task.map { |k, v| [k.to_sym, v] }.to_h
77
+ )
78
+ create_database_if_not_exist
79
+ end
80
+
81
+ def close
82
+ end
83
+
84
+ def add(page)
85
+ data = @series ? build_payload(page) : build_payload_per_column(page)
86
+
87
+ Embulk.logger.info { "embulk-output-influxdb: Writing to #{@database}" }
88
+ Embulk.logger.debug { "embulk-output-influxdb: #{data}" }
89
+
90
+ @connection.write_points(data, @time_precision)
91
+ end
92
+
93
+ def finish
94
+ end
95
+
96
+ def abort
97
+ end
98
+
99
+ def commit
100
+ task_report = {}
101
+ return task_report
102
+ end
103
+
104
+ private
105
+
106
+ def build_payload(page)
107
+ data = page.map do |record|
108
+ series = resolve_placeholder(record, @series)
109
+ drop_measurement_if_exist(series)
110
+ payload = {
111
+ series: series,
112
+ values: Hash[
113
+ target_columns.map { |col| [col.name, convert_timezone(record[col.index])] }
114
+ ],
115
+ }
116
+ payload[:timestamp] = convert_timezone(record[@timestamp_column.index]).to_i if @timestamp_column
117
+ payload
118
+ end
119
+ end
120
+
121
+ def build_payload_per_column(page)
122
+ page.flat_map do |record|
123
+ target_columns.map do |col|
124
+ series = col.name
125
+ drop_measurement_if_exist(series)
126
+ payload = {
127
+ series: series,
128
+ values: {value: record[col.index]},
129
+ }
130
+ payload[:timestamp] = convert_timezone(record[@timestamp_column.index]).to_i if @timestamp_column
131
+ payload
132
+ end
133
+ end
134
+ end
135
+
136
+ def drop_measurement_if_exist(series)
137
+ if @replace && self.class.replaced_measurements[series].nil? && find_measurement(series)
138
+ Embulk.logger.info { "embulk-output-influxdb: Drop measurement #{series} from #{@database}" }
139
+ self.class.replaced_measurements[series] = true
140
+ @connection.query("DROP MEASUREMENT #{series}")
141
+ end
142
+ end
143
+
144
+ def find_measurement(series)
145
+ @connection.query("SHOW MEASUREMENTS")[0]["values"].find { |v| v["name"] == series }
146
+ end
147
+
148
+ def create_database_if_not_exist
149
+ unless @connection.list_databases.any? { |db| db["name"] == @database }
150
+ @connection.create_database(@database)
151
+ end
152
+ end
153
+
154
+ def resolve_placeholder(record, series)
155
+ series.gsub(/\$\{(.*?)\}/) do |name|
156
+ index = schema.index { |col| col.name == $1 }
157
+ record[index]
158
+ end
159
+ end
160
+
161
+ def target_columns
162
+ schema.reject do |col|
163
+ col.name == @timestamp_column.name || @ignore_columns.include?(col.name)
164
+ end
165
+ end
166
+
167
+ def convert_timezone(value)
168
+ return value unless value.is_a?(Time)
169
+
170
+ timezone = Timezone::Zone.new(zone: @default_timezone)
171
+ timezone.time(value)
172
+ end
173
+ end
174
+ end
175
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-influxdb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - joker1007
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: embulk
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.7.4
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - "~>"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.7.4
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - "~>"
37
+ - !ruby/object:Gem::Version
38
+ version: '1.0'
39
+ prerelease: false
40
+ type: :development
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '10.0'
53
+ prerelease: false
54
+ type: :development
55
+ - !ruby/object:Gem::Dependency
56
+ name: tapp
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ prerelease: false
68
+ type: :development
69
+ - !ruby/object:Gem::Dependency
70
+ name: influxdb
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.2'
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - "~>"
79
+ - !ruby/object:Gem::Version
80
+ version: '0.2'
81
+ prerelease: false
82
+ type: :runtime
83
+ - !ruby/object:Gem::Dependency
84
+ name: timezone
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ prerelease: false
96
+ type: :runtime
97
+ description: Dumps records to InfluxDB.
98
+ email:
99
+ - kakyoin.hierophant@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".ruby-version"
106
+ - Gemfile
107
+ - LICENSE.txt
108
+ - README.md
109
+ - Rakefile
110
+ - embulk-output-influxdb.gemspec
111
+ - example.csv
112
+ - example.yml
113
+ - lib/embulk/output/influxdb.rb
114
+ homepage: https://github.com/joker1007/embulk-output-influxdb
115
+ licenses:
116
+ - MIT
117
+ metadata: {}
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ requirements: []
133
+ rubyforge_project:
134
+ rubygems_version: 2.4.8
135
+ signing_key:
136
+ specification_version: 4
137
+ summary: InfluxDB output plugin for Embulk
138
+ test_files: []
139
+ has_rdoc: