embulk-filter-insert 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 09934c336dd5ea3b9af1e9e52a7ba4f5647bac8d
4
+ data.tar.gz: afa263b7fbe96133f5149cf271443a0efbcbc60a
5
+ SHA512:
6
+ metadata.gz: 69433687684b90978a8363801d9aded46144f805f7b88ca15bf4f0214408ce452892e9a3269e83e67d3b7b00c1bc22a37f3a0068ceb53b8d385a6fa2731b1d17
7
+ data.tar.gz: 4af757d250752ac14cf85cbcf563ccb245d308a5b57ab82f3b3f2495509e8e9d4d9df4d3abc58dce06d79392b196398a01cd9813d4df0e51cdfa3482eab239f6
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,101 @@
1
+ # Insert filter plugin for Embulk
2
+
3
+ Embulk filter plugin that inserts column(s) at any position (e.g., the top/bottom of the columns, before/after the specified column name)
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: filter
8
+
9
+ ## Configuration
10
+
11
+ ### Column(s)
12
+
13
+ Either "column" or "columns" is required for specifying inserted column(s).
14
+
15
+ - **column**: associative array that contains only one key-value pair (key means a column name, value means a constant value in the column)
16
+ - **columns**: List of the associative arrays
17
+
18
+ The "column" associate array can contain following optional configuration.
19
+
20
+ - **as**: type of the constant vaule in the column, i.e. boolean, long, double, string or timestamp (string, default: string)
21
+
22
+ ### Position
23
+
24
+ Any of the following configurations is required for specifying a position where new columns are inserted.
25
+
26
+ - **at**: "top", "head", "bottom", "tail" or index number where the new column(s) is/are inserted (string)
27
+ - **before**: column name that comes before the new column(s) (string)
28
+ - **after**: column name that comes after the new column(s) (string)
29
+
30
+ If none of the configurations is specified, the new columns are inserted at the bottom of the existing columns.
31
+
32
+ ## Example
33
+
34
+ Example 1: Insert "host_name" column at the top of the columns
35
+
36
+ ```yaml
37
+ filters:
38
+ - { type: insert, column: { host_name: host01 }, at: top }
39
+ ```
40
+
41
+ Example 2: Insert "host_name" column at the bottom of the columns
42
+
43
+ ```yaml
44
+ filters:
45
+ - { type: insert, column: { host_name: host01 }, at: bottom }
46
+ ```
47
+
48
+ Example 3: Insert "host_name" column after second column
49
+
50
+ ```yaml
51
+ filters:
52
+ - { type: insert, column: { host_name: host01 }, at: 2 }
53
+ ```
54
+
55
+ Example 4: Insert "service_name" column before "host_name" column
56
+
57
+ ```yaml
58
+ filters:
59
+ - { type: insert, column: { service_name: service01 }, before: host_name }
60
+ ```
61
+
62
+ Example 5: Insert "service_name" column after "host_name" column
63
+
64
+ ```yaml
65
+ filters:
66
+ - { type: insert, column: { service_name: service01 }, after: host_name }
67
+ ```
68
+
69
+ Example 6: Insert "user_id" column as integer at the bottom of the columns
70
+
71
+ ```yaml
72
+ filters:
73
+ - { type: insert, column: { user_id: 1234567, as: long } }
74
+ ```
75
+
76
+ Example 7: Insert multiple columns in a row at the bottom of the columns
77
+
78
+ ```yaml
79
+ filters:
80
+ - type: insert
81
+ columns:
82
+ - host_name: host01
83
+ - service_name: service01
84
+ ```
85
+
86
+ Example 8: Combination of the above examples
87
+
88
+ ```yaml
89
+ filters:
90
+ - type: insert
91
+ columns:
92
+ - service_name: service01
93
+ - { user_id: 1234567, as: long }
94
+ after: host_name
95
+ ```
96
+
97
+ ## Build
98
+
99
+ ```
100
+ $ rake
101
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,19 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-insert"
4
+ spec.version = "1.0.0"
5
+ spec.authors = ["muziyoshiz"]
6
+ spec.summary = "Embulk filter plugin that inserts column(s) at any position"
7
+ spec.description = "Embulk filter plugin that inserts column(s) at any position (e.g., the top/bottom of the columns, before/after the specified column name)"
8
+ spec.email = ["muziyoshiz@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/muziyoshiz/embulk-filter-insert"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
17
+ spec.add_development_dependency 'bundler', ['~> 1.0']
18
+ spec.add_development_dependency 'rake', ['>= 10.0']
19
+ end
@@ -0,0 +1,144 @@
1
+ module Embulk
2
+ module Filter
3
+
4
+ class InsertFilterPlugin < FilterPlugin
5
+ Plugin.register_filter("insert", self)
6
+
7
+ def self.transaction(config, in_schema, &control)
8
+ task = {}
9
+
10
+ column = config.param("column", :hash, default: nil)
11
+ columns = config.param("columns", :array, default: nil)
12
+ # ^ = XOR
13
+ unless (column.nil? ^ columns.nil?)
14
+ raise ArgumentError, "Either \"column\" or \"columns\" is needed"
15
+ end
16
+
17
+ if column
18
+ columns = [ InsertFilterPlugin.get_column(column) ]
19
+ else
20
+ columns = InsertFilterPlugin.get_columns(columns)
21
+ end
22
+
23
+ task["values"] = columns.map{|c| c[:value] }
24
+
25
+ at = config.param("at", :string, default: nil)
26
+ before = config.param("before", :string, default: nil)
27
+ after = config.param("after", :string, default: nil)
28
+
29
+ if at.nil? and before.nil? and after.nil?
30
+ at = "bottom"
31
+ end
32
+
33
+ no_of_position_param = 0
34
+ no_of_position_param += 1 unless at.nil?
35
+ no_of_position_param += 1 unless before.nil?
36
+ no_of_position_param += 1 unless after.nil?
37
+
38
+ unless no_of_position_param == 1
39
+ raise ArgumentError, "Either \"at\", \"before\" or \"after\" is needed"
40
+ end
41
+
42
+ if at
43
+ case at
44
+ when "top", "head"
45
+ task["position"] = 0
46
+ when "bottom", "tail"
47
+ task["position"] = in_schema.size
48
+ else
49
+ task["position"] = at.to_i
50
+ end
51
+ elsif before
52
+ schema_cols = in_schema.select{|c| c.name == before }
53
+ if schema_cols.empty?
54
+ raise ArgumentError, "Column #{before} is not found"
55
+ end
56
+ task["position"] = schema_cols[0].index
57
+ else
58
+ schema_cols = in_schema.select{|c| c.name == after }
59
+ if schema_cols.empty?
60
+ raise ArgumentError, "Column #{after} is not found"
61
+ end
62
+ task["position"] = schema_cols[0].index + 1
63
+ end
64
+
65
+ # modify column definition
66
+ inserted_schema = []
67
+ columns.each{|c| inserted_schema.push(Column.new(0, c[:name], c[:type])) }
68
+ out_columns = in_schema.map{|c| c }
69
+ out_columns.insert(task["position"], *inserted_schema)
70
+
71
+ # renumber index
72
+ out_columns.each_with_index{|c, idx| c.index = idx }
73
+
74
+ yield(task, out_columns)
75
+ end
76
+
77
+ # return { :name => name1, :value => value1, :type => type1 }
78
+ def self.get_column(column_hash)
79
+ if column_hash.size > 2
80
+ raise ArgumentError, "Invalid column parameter: #{column_hash.to_s}"
81
+ end
82
+
83
+ # default type is string
84
+ type = :string
85
+
86
+ if column_hash.size == 2
87
+ unless column_hash.keys.include?("as")
88
+ raise ArgumentError, "Invalid column parameter: #{column_hash.to_s}"
89
+ end
90
+ type = column_hash["as"].to_sym
91
+ column_hash = column_hash.select{|k, v| k != "as" }
92
+ end
93
+
94
+ column = {
95
+ :name => column_hash.keys.first,
96
+ :value => column_hash.values.first,
97
+ :type => type
98
+ }
99
+
100
+ case type
101
+ when :boolean
102
+ column[:value] = (column[:value] != "false")
103
+ when :long
104
+ column[:value] = column[:value].to_i
105
+ when :double
106
+ column[:value] = column[:value].to_f
107
+ when :string
108
+ # do nothing
109
+ when :timestamp
110
+ column[:value] = Date.parse(column[:value])
111
+ else
112
+ raise ArgumentError, "Unknown type #{type}: supported types are boolean, long, double, string and timestamp"
113
+ end
114
+
115
+ column
116
+ end
117
+
118
+ # return array of column
119
+ def self.get_columns(columns_array)
120
+ columns_array.map{|column_hash| InsertFilterPlugin.get_column(column_hash) }
121
+ end
122
+
123
+ def init
124
+ @values = task["values"]
125
+ @position = task["position"]
126
+ end
127
+
128
+ def close
129
+ end
130
+
131
+ def add(page)
132
+ page.each do |record|
133
+ record.insert(@position, *@values)
134
+ page_builder.add(record)
135
+ end
136
+ end
137
+
138
+ def finish
139
+ page_builder.finish
140
+ end
141
+ end
142
+
143
+ end
144
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-insert
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - muziyoshiz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Embulk filter plugin that inserts column(s) at any position (e.g., the
42
+ top/bottom of the columns, before/after the specified column name)
43
+ email:
44
+ - muziyoshiz@gmail.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - embulk-filter-insert.gemspec
55
+ - lib/embulk/filter/insert.rb
56
+ homepage: https://github.com/muziyoshiz/embulk-filter-insert
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 2.0.14
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: Embulk filter plugin that inserts column(s) at any position
80
+ test_files: []